summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/overlayfs.txt106
-rw-r--r--fs/dcache.c88
-rw-r--r--fs/overlayfs/Kconfig31
-rw-r--r--fs/overlayfs/Makefile3
-rw-r--r--fs/overlayfs/copy_up.c188
-rw-r--r--fs/overlayfs/dir.c175
-rw-r--r--fs/overlayfs/export.c715
-rw-r--r--fs/overlayfs/inode.c106
-rw-r--r--fs/overlayfs/namei.c533
-rw-r--r--fs/overlayfs/overlayfs.h66
-rw-r--r--fs/overlayfs/ovl_entry.h11
-rw-r--r--fs/overlayfs/readdir.c57
-rw-r--r--fs/overlayfs/super.c125
-rw-r--r--fs/overlayfs/util.c108
-rw-r--r--include/linux/dcache.h2
15 files changed, 1905 insertions, 409 deletions
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index e6a5f4912b6d..6ea1e64d1464 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -190,6 +190,20 @@ Mount options:
Redirects are not created and not followed (equivalent to "redirect_dir=off"
if "redirect_always_follow" feature is not enabled).
+When the NFS export feature is enabled, every copied up directory is
+indexed by the file handle of the lower inode and a file handle of the
+upper directory is stored in a "trusted.overlay.upper" extended attribute
+on the index entry. On lookup of a merged directory, if the upper
+directory does not match the file handle stores in the index, that is an
+indication that multiple upper directories may be redirected to the same
+lower directory. In that case, lookup returns an error and warns about
+a possible inconsistency.
+
+Because lower layer redirects cannot be verified with the index, enabling
+NFS export support on an overlay filesystem with no upper layer requires
+turning off redirect follow (e.g. "redirect_dir=nofollow").
+
+
Non-directories
---------------
@@ -281,9 +295,9 @@ filesystem, so both st_dev and st_ino of the file may change.
Any open files referring to this inode will access the old data.
-If a file with multiple hard links is copied up, then this will
-"break" the link. Changes will not be propagated to other names
-referring to the same inode.
+Unless "inode index" feature is enabled, if a file with multiple hard
+links is copied up, then this will "break" the link. Changes will not be
+propagated to other names referring to the same inode.
Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged
directory will fail with EXDEV.
@@ -299,6 +313,92 @@ filesystem are not allowed. If the underlying filesystem is changed,
the behavior of the overlay is undefined, though it will not result in
a crash or deadlock.
+When the overlay NFS export feature is enabled, overlay filesystems
+behavior on offline changes of the underlying lower layer is different
+than the behavior when NFS export is disabled.
+
+On every copy_up, an NFS file handle of the lower inode, along with the
+UUID of the lower filesystem, are encoded and stored in an extended
+attribute "trusted.overlay.origin" on the upper inode.
+
+When the NFS export feature is enabled, a lookup of a merged directory,
+that found a lower directory at the lookup path or at the path pointed
+to by the "trusted.overlay.redirect" extended attribute, will verify
+that the found lower directory file handle and lower filesystem UUID
+match the origin file handle that was stored at copy_up time. If a
+found lower directory does not match the stored origin, that directory
+will not be merged with the upper directory.
+
+
+
+NFS export
+----------
+
+When the underlying filesystems supports NFS export and the "nfs_export"
+feature is enabled, an overlay filesystem may be exported to NFS.
+
+With the "nfs_export" feature, on copy_up of any lower object, an index
+entry is created under the index directory. The index entry name is the
+hexadecimal representation of the copy up origin file handle. For a
+non-directory object, the index entry is a hard link to the upper inode.
+For a directory object, the index entry has an extended attribute
+"trusted.overlay.upper" with an encoded file handle of the upper
+directory inode.
+
+When encoding a file handle from an overlay filesystem object, the
+following rules apply:
+
+1. For a non-upper object, encode a lower file handle from lower inode
+2. For an indexed object, encode a lower file handle from copy_up origin
+3. For a pure-upper object and for an existing non-indexed upper object,
+ encode an upper file handle from upper inode
+
+The encoded overlay file handle includes:
+ - Header including path type information (e.g. lower/upper)
+ - UUID of the underlying filesystem
+ - Underlying filesystem encoding of underlying inode
+
+This encoding format is identical to the encoding format file handles that
+are stored in extended attribute "trusted.overlay.origin".
+
+When decoding an overlay file handle, the following steps are followed:
+
+1. Find underlying layer by UUID and path type information.
+2. Decode the underlying filesystem file handle to underlying dentry.
+3. For a lower file handle, lookup the handle in index directory by name.
+4. If a whiteout is found in index, return ESTALE. This represents an
+ overlay object that was deleted after its file handle was encoded.
+5. For a non-directory, instantiate a disconnected overlay dentry from the
+ decoded underlying dentry, the path type and index inode, if found.
+6. For a directory, use the connected underlying decoded dentry, path type
+ and index, to lookup a connected overlay dentry.
+
+Decoding a non-directory file handle may return a disconnected dentry.
+copy_up of that disconnected dentry will create an upper index entry with
+no upper alias.
+
+When overlay filesystem has multiple lower layers, a middle layer
+directory may have a "redirect" to lower directory. Because middle layer
+"redirects" are not indexed, a lower file handle that was encoded from the
+"redirect" origin directory, cannot be used to find the middle or upper
+layer directory. Similarly, a lower file handle that was encoded from a
+descendant of the "redirect" origin directory, cannot be used to
+reconstruct a connected overlay path. To mitigate the cases of
+directories that cannot be decoded from a lower file handle, these
+directories are copied up on encode and encoded as an upper file handle.
+On an overlay filesystem with no upper layer this mitigation cannot be
+used NFS export in this setup requires turning off redirect follow (e.g.
+"redirect_dir=nofollow").
+
+The overlay filesystem does not support non-directory connectable file
+handles, so exporting with the 'subtree_check' exportfs configuration will
+cause failures to lookup files over NFS.
+
+When the NFS export feature is enabled, all directory index entries are
+verified on mount time to check that upper file handles are not stale.
+This verification may cause significant overhead in some cases.
+
+
Testsuite
---------
diff --git a/fs/dcache.c b/fs/dcache.c
index cca2b377ff0a..7c38f39958bc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1698,9 +1698,15 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
}
EXPORT_SYMBOL(d_alloc);
+struct dentry *d_alloc_anon(struct super_block *sb)
+{
+ return __d_alloc(sb, NULL);
+}
+EXPORT_SYMBOL(d_alloc_anon);
+
struct dentry *d_alloc_cursor(struct dentry * parent)
{
- struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+ struct dentry *dentry = d_alloc_anon(parent->d_sb);
if (dentry) {
dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
dentry->d_parent = dget(parent);
@@ -1886,7 +1892,7 @@ struct dentry *d_make_root(struct inode *root_inode)
struct dentry *res = NULL;
if (root_inode) {
- res = __d_alloc(root_inode->i_sb, NULL);
+ res = d_alloc_anon(root_inode->i_sb);
if (res)
d_instantiate(res, root_inode);
else
@@ -1925,33 +1931,19 @@ struct dentry *d_find_any_alias(struct inode *inode)
}
EXPORT_SYMBOL(d_find_any_alias);
-static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
+static struct dentry *__d_instantiate_anon(struct dentry *dentry,
+ struct inode *inode,
+ bool disconnected)
{
- struct dentry *tmp;
struct dentry *res;
unsigned add_flags;
- if (!inode)
- return ERR_PTR(-ESTALE);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- res = d_find_any_alias(inode);
- if (res)
- goto out_iput;
-
- tmp = __d_alloc(inode->i_sb, NULL);
- if (!tmp) {
- res = ERR_PTR(-ENOMEM);
- goto out_iput;
- }
-
- security_d_instantiate(tmp, inode);
+ security_d_instantiate(dentry, inode);
spin_lock(&inode->i_lock);
res = __d_find_any_alias(inode);
if (res) {
spin_unlock(&inode->i_lock);
- dput(tmp);
+ dput(dentry);
goto out_iput;
}
@@ -1961,24 +1953,57 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
if (disconnected)
add_flags |= DCACHE_DISCONNECTED;
- spin_lock(&tmp->d_lock);
- __d_set_inode_and_type(tmp, inode, add_flags);
- hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
+ spin_lock(&dentry->d_lock);
+ __d_set_inode_and_type(dentry, inode, add_flags);
+ hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
if (!disconnected) {
- hlist_bl_lock(&tmp->d_sb->s_roots);
- hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_roots);
- hlist_bl_unlock(&tmp->d_sb->s_roots);
+ hlist_bl_lock(&dentry->d_sb->s_roots);
+ hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots);
+ hlist_bl_unlock(&dentry->d_sb->s_roots);
}
- spin_unlock(&tmp->d_lock);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
- return tmp;
+ return dentry;
out_iput:
iput(inode);
return res;
}
+struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode)
+{
+ return __d_instantiate_anon(dentry, inode, true);
+}
+EXPORT_SYMBOL(d_instantiate_anon);
+
+static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
+{
+ struct dentry *tmp;
+ struct dentry *res;
+
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ res = d_find_any_alias(inode);
+ if (res)
+ goto out_iput;
+
+ tmp = d_alloc_anon(inode->i_sb);
+ if (!tmp) {
+ res = ERR_PTR(-ENOMEM);
+ goto out_iput;
+ }
+
+ return __d_instantiate_anon(tmp, inode, disconnected);
+
+out_iput:
+ iput(inode);
+ return res;
+}
+
/**
* d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1999,7 +2024,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
*/
struct dentry *d_obtain_alias(struct inode *inode)
{
- return __d_obtain_alias(inode, 1);
+ return __d_obtain_alias(inode, true);
}
EXPORT_SYMBOL(d_obtain_alias);
@@ -2020,7 +2045,7 @@ EXPORT_SYMBOL(d_obtain_alias);
*/
struct dentry *d_obtain_root(struct inode *inode)
{
- return __d_obtain_alias(inode, 0);
+ return __d_obtain_alias(inode, false);
}
EXPORT_SYMBOL(d_obtain_root);
@@ -3527,6 +3552,7 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
return result;
}
+EXPORT_SYMBOL(is_subdir);
static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
{
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 5ac415466861..406e72de88f6 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -47,9 +47,28 @@ config OVERLAY_FS_INDEX
The inodes index feature prevents breaking of lower hardlinks on copy
up.
- Note, that the inodes index feature is read-only backward compatible.
- That is, mounting an overlay which has an index dir on a kernel that
- doesn't support this feature read-only, will not have any negative
- outcomes. However, mounting the same overlay with an old kernel
- read-write and then mounting it again with a new kernel, will have
- unexpected results.
+ Note, that the inodes index feature is not backward compatible.
+ That is, mounting an overlay which has an inodes index on a kernel
+ that doesn't support this feature will have unexpected results.
+
+config OVERLAY_FS_NFS_EXPORT
+ bool "Overlayfs: turn on NFS export feature by default"
+ depends on OVERLAY_FS
+ depends on OVERLAY_FS_INDEX
+ help
+ If this config option is enabled then overlay filesystems will use
+ the inodes index dir to decode overlay NFS file handles by default.
+ In this case, it is still possible to turn off NFS export support
+ globally with the "nfs_export=off" module option or on a filesystem
+ instance basis with the "nfs_export=off" mount option.
+
+ The NFS export feature creates an index on copy up of every file and
+ directory. This full index is used to detect overlay filesystems
+ inconsistencies on lookup, like redirect from multiple upper dirs to
+ the same lower dir. The full index may incur some overhead on mount
+ time, especially when verifying that directory file handles are not
+ stale.
+
+ Note, that the NFS export feature is not backward compatible.
+ That is, mounting an overlay which has a full index on a kernel
+ that doesn't support this feature will have unexpected results.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 99373bbc1478..30802347a020 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,4 +4,5 @@
obj-$(CONFIG_OVERLAY_FS) += overlay.o
-overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o
+overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \
+ export.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index eb3b8d39fb61..d855f508fa20 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -232,13 +232,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
-struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
+struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
void *buf;
int buflen = MAX_HANDLE_SZ;
- uuid_t *uuid = &lower->d_sb->s_uuid;
+ uuid_t *uuid = &real->d_sb->s_uuid;
buf = kmalloc(buflen, GFP_KERNEL);
if (!buf)
@@ -250,7 +250,7 @@ struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
- fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
+ fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
buflen = (dwords << 2);
fh = ERR_PTR(-EIO);
@@ -288,8 +288,8 @@ out:
return fh;
}
-static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
- struct dentry *upper)
+int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+ struct dentry *upper)
{
const struct ovl_fh *fh = NULL;
int err;
@@ -315,6 +315,94 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
return err;
}
+/* Store file handle of @upper dir in @index dir entry */
+static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
+{
+ const struct ovl_fh *fh;
+ int err;
+
+ fh = ovl_encode_fh(upper, true);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
+ err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
+
+ kfree(fh);
+ return err;
+}
+
+/*
+ * Create and install index entry.
+ *
+ * Caller must hold i_mutex on indexdir.
+ */
+static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+ struct dentry *upper)
+{
+ struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+ struct inode *dir = d_inode(indexdir);
+ struct dentry *index = NULL;
+ struct dentry *temp = NULL;
+ struct qstr name = { };
+ int err;
+
+ /*
+ * For now this is only used for creating index entry for directories,
+ * because non-dir are copied up directly to index and then hardlinked
+ * to upper dir.
+ *
+ * TODO: implement create index for non-dir, so we can call it when
+ * encoding file handle for non-dir in case index does not exist.
+ */
+ if (WARN_ON(!d_is_dir(dentry)))
+ return -EIO;
+
+ /* Directory not expected to be indexed before copy up */
+ if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
+ return -EIO;
+
+ err = ovl_get_index_name(origin, &name);
+ if (err)
+ return err;
+
+ temp = ovl_lookup_temp(indexdir);
+ if (IS_ERR(temp))
+ goto temp_err;
+
+ err = ovl_do_mkdir(dir, temp, S_IFDIR, true);
+ if (err)
+ goto out;
+
+ err = ovl_set_upper_fh(upper, temp);
+ if (err)
+ goto out_cleanup;
+
+ index = lookup_one_len(name.name, indexdir, name.len);
+ if (IS_ERR(index)) {
+ err = PTR_ERR(index);
+ } else {
+ err = ovl_do_rename(dir, temp, dir, index, 0);
+ dput(index);
+ }
+
+ if (err)
+ goto out_cleanup;
+
+out:
+ dput(temp);
+ kfree(name.name);
+ return err;
+
+temp_err:
+ err = PTR_ERR(temp);
+ temp = NULL;
+ goto out;
+
+out_cleanup:
+ ovl_cleanup(dir, temp);
+ goto out;
+}
+
struct ovl_copy_up_ctx {
struct dentry *parent;
struct dentry *dentry;
@@ -327,6 +415,7 @@ struct ovl_copy_up_ctx {
struct dentry *workdir;
bool tmpfile;
bool origin;
+ bool indexed;
};
static int ovl_link_up(struct ovl_copy_up_ctx *c)
@@ -361,7 +450,10 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
}
}
inode_unlock(udir);
- ovl_set_nlink_upper(c->dentry);
+ if (err)
+ return err;
+
+ err = ovl_set_nlink_upper(c->dentry);
return err;
}
@@ -498,6 +590,12 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
if (err)
goto out_cleanup;
+ if (S_ISDIR(c->stat.mode) && c->indexed) {
+ err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+ if (err)
+ goto out_cleanup;
+ }
+
if (c->tmpfile) {
inode_lock_nested(udir, I_MUTEX_PARENT);
err = ovl_install_temp(c, temp, &newdentry);
@@ -536,20 +634,33 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
- bool indexed = false;
+ bool to_index = false;
- if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
- c->stat.nlink > 1)
- indexed = true;
+ /*
+ * Indexed non-dir is copied up directly to the index entry and then
+ * hardlinked to upper dir. Indexed dir is copied up to indexdir,
+ * then index entry is created and then copied up dir installed.
+ * Copying dir up to indexdir instead of workdir simplifies locking.
+ */
+ if (ovl_need_index(c->dentry)) {
+ c->indexed = true;
+ if (S_ISDIR(c->stat.mode))
+ c->workdir = ovl_indexdir(c->dentry->d_sb);
+ else
+ to_index = true;
+ }
- if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
+ if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
c->origin = true;
- if (indexed) {
+ if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
if (err)
return err;
+ } else if (WARN_ON(!c->parent)) {
+ /* Disconnected dentry must be copied up to index dir */
+ return -EIO;
} else {
/*
* Mark parent "impure" because it may now contain non-pure
@@ -572,11 +683,17 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
}
}
- if (indexed) {
- if (!err)
- ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
- kfree(c->destname.name);
- } else if (!err) {
+
+ if (err)
+ goto out;
+
+ if (c->indexed)
+ ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+
+ if (to_index) {
+ /* Initialize nlink for copy up of disconnected dentry */
+ err = ovl_set_nlink_upper(c->dentry);
+ } else {
struct inode *udir = d_inode(c->destdir);
/* Restore timestamps on parent (best effort) */
@@ -587,6 +704,9 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
ovl_dentry_set_upper_alias(c->dentry);
}
+out:
+ if (to_index)
+ kfree(c->destname.name);
return err;
}
@@ -611,14 +731,17 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (err)
return err;
- ovl_path_upper(parent, &parentpath);
- ctx.destdir = parentpath.dentry;
- ctx.destname = dentry->d_name;
+ if (parent) {
+ ovl_path_upper(parent, &parentpath);
+ ctx.destdir = parentpath.dentry;
+ ctx.destname = dentry->d_name;
- err = vfs_getattr(&parentpath, &ctx.pstat,
- STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
- if (err)
- return err;
+ err = vfs_getattr(&parentpath, &ctx.pstat,
+ STATX_ATIME | STATX_MTIME,
+ AT_STATX_SYNC_AS_STAT);
+ if (err)
+ return err;
+ }
/* maybe truncate regular file. this has no effect on dirs */
if (flags & O_TRUNC)
@@ -639,7 +762,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
} else {
if (!ovl_dentry_upper(dentry))
err = ovl_do_copy_up(&ctx);
- if (!err && !ovl_dentry_has_upper_alias(dentry))
+ if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
err = ovl_link_up(&ctx);
ovl_copy_up_end(dentry);
}
@@ -652,10 +775,19 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
{
int err = 0;
const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
+ bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
+
+ /*
+ * With NFS export, copy up can get called for a disconnected non-dir.
+ * In this case, we will copy up lower inode to index dir without
+ * linking it to upper dir.
+ */
+ if (WARN_ON(disconnected && d_is_dir(dentry)))
+ return -EIO;
while (!err) {
struct dentry *next;
- struct dentry *parent;
+ struct dentry *parent = NULL;
/*
* Check if copy-up has happened as well as for upper alias (in
@@ -671,12 +803,12 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
* with rename.
*/
if (ovl_dentry_upper(dentry) &&
- ovl_dentry_has_upper_alias(dentry))
+ (ovl_dentry_has_upper_alias(dentry) || disconnected))
break;
next = dget(dentry);
/* find the topmost dentry not yet copied up */
- for (;;) {
+ for (; !disconnected;) {
parent = dget_parent(next);
if (ovl_dentry_upper(parent))
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f9788bc116a8..839709c7803a 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -63,8 +63,7 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
}
/* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir,
- struct dentry *dentry)
+static struct dentry *ovl_whiteout(struct dentry *workdir)
{
int err;
struct dentry *whiteout;
@@ -83,6 +82,38 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
return whiteout;
}
+/* Caller must hold i_mutex on both workdir and dir */
+int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+ struct dentry *dentry)
+{
+ struct inode *wdir = workdir->d_inode;
+ struct dentry *whiteout;
+ int err;
+ int flags = 0;
+
+ whiteout = ovl_whiteout(workdir);
+ err = PTR_ERR(whiteout);
+ if (IS_ERR(whiteout))
+ return err;
+
+ if (d_is_dir(dentry))
+ flags = RENAME_EXCHANGE;
+
+ err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
+ if (err)
+ goto kill_whiteout;
+ if (flags)
+ ovl_cleanup(wdir, dentry);
+
+out:
+ dput(whiteout);
+ return err;
+
+kill_whiteout:
+ ovl_cleanup(wdir, whiteout);
+ goto out;
+}
+
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
struct cattr *attr, struct dentry *hardlink, bool debug)
{
@@ -181,11 +212,6 @@ static bool ovl_type_origin(struct dentry *dentry)
return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
}
-static bool ovl_may_have_whiteouts(struct dentry *dentry)
-{
- return ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
-}
-
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink)
{
@@ -301,37 +327,6 @@ out:
return ERR_PTR(err);
}
-static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
-{
- int err;
- struct dentry *ret = NULL;
- LIST_HEAD(list);
-
- err = ovl_check_empty_dir(dentry, &list);
- if (err) {
- ret = ERR_PTR(err);
- goto out_free;
- }
-
- /*
- * When removing an empty opaque directory, then it makes no sense to
- * replace it with an exact replica of itself.
- *
- * If upperdentry has whiteouts, clear them.
- *
- * Can race with copy-up, since we don't hold the upperdir mutex.
- * Doesn't matter, since copy-up can't create a non-empty directory
- * from an empty one.
- */
- if (!list_empty(&list))
- ret = ovl_clear_empty(dentry, &list);
-
-out_free:
- ovl_cache_free(&list);
-
- return ret;
-}
-
static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
const struct posix_acl *acl)
{
@@ -623,23 +618,20 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
}
-static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
+static int ovl_remove_and_whiteout(struct dentry *dentry,
+ struct list_head *list)
{
struct dentry *workdir = ovl_workdir(dentry);
- struct inode *wdir = workdir->d_inode;
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
- struct inode *udir = upperdir->d_inode;
- struct dentry *whiteout;
struct dentry *upper;
struct dentry *opaquedir = NULL;
int err;
- int flags = 0;
if (WARN_ON(!workdir))
return -EROFS;
- if (is_dir) {
- opaquedir = ovl_check_empty_and_clear(dentry);
+ if (!list_empty(list)) {
+ opaquedir = ovl_clear_empty(dentry, list);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out;
@@ -662,24 +654,13 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
goto out_dput_upper;
}
- whiteout = ovl_whiteout(workdir, dentry);
- err = PTR_ERR(whiteout);
- if (IS_ERR(whiteout))
- goto out_dput_upper;
-
- if (d_is_dir(upper))
- flags = RENAME_EXCHANGE;
-
- err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+ err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
if (err)
- goto kill_whiteout;
- if (flags)
- ovl_cleanup(wdir, upper);
+ goto out_d_drop;
ovl_dentry_version_inc(dentry->d_parent, true);
out_d_drop:
d_drop(dentry);
- dput(whiteout);
out_dput_upper:
dput(upper);
out_unlock:
@@ -688,13 +669,10 @@ out_dput:
dput(opaquedir);
out:
return err;
-
-kill_whiteout:
- ovl_cleanup(wdir, whiteout);
- goto out_d_drop;
}
-static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
+static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
+ struct list_head *list)
{
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *dir = upperdir->d_inode;
@@ -702,10 +680,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
struct dentry *opaquedir = NULL;
int err;
- /* Redirect/origin dir can be !ovl_lower_positive && not clean */
- if (is_dir && (ovl_dentry_get_redirect(dentry) ||
- ovl_may_have_whiteouts(dentry))) {
- opaquedir = ovl_check_empty_and_clear(dentry);
+ if (!list_empty(list)) {
+ opaquedir = ovl_clear_empty(dentry, list);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out;
@@ -746,11 +722,26 @@ out:
return err;
}
+static bool ovl_pure_upper(struct dentry *dentry)
+{
+ return !ovl_dentry_lower(dentry) &&
+ !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
+}
+
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
int err;
bool locked = false;
const struct cred *old_cred;
+ bool lower_positive = ovl_lower_positive(dentry);
+ LIST_HEAD(list);
+
+ /* No need to clean pure upper removed by vfs_rmdir() */
+ if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
+ err = ovl_check_empty_dir(dentry, &list);
+ if (err)
+ goto out;
+ }
err = ovl_want_write(dentry);
if (err)
@@ -765,10 +756,10 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
goto out_drop_write;
old_cred = ovl_override_creds(dentry->d_sb);
- if (!ovl_lower_positive(dentry))
- err = ovl_remove_upper(dentry, is_dir);
+ if (!lower_positive)
+ err = ovl_remove_upper(dentry, is_dir, &list);
else
- err = ovl_remove_and_whiteout(dentry, is_dir);
+ err = ovl_remove_and_whiteout(dentry, &list);
revert_creds(old_cred);
if (!err) {
if (is_dir)
@@ -780,6 +771,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
out_drop_write:
ovl_drop_write(dentry);
out:
+ ovl_cache_free(&list);
return err;
}
@@ -915,6 +907,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
bool samedir = olddir == newdir;
struct dentry *opaquedir = NULL;
const struct cred *old_cred = NULL;
+ LIST_HEAD(list);
err = -EINVAL;
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
@@ -929,6 +922,27 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (!overwrite && !ovl_can_move(new))
goto out;
+ if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
+ err = ovl_check_empty_dir(new, &list);
+ if (err)
+ goto out;
+ }
+
+ if (overwrite) {
+ if (ovl_lower_positive(old)) {
+ if (!ovl_dentry_is_whiteout(new)) {
+ /* Whiteout source */
+ flags |= RENAME_WHITEOUT;
+ } else {
+ /* Switch whiteouts */
+ flags |= RENAME_EXCHANGE;
+ }
+ } else if (is_dir && ovl_dentry_is_whiteout(new)) {
+ flags |= RENAME_EXCHANGE;
+ cleanup_whiteout = true;
+ }
+ }
+
err = ovl_want_write(old);
if (err)
goto out;
@@ -952,9 +966,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
old_cred = ovl_override_creds(old->d_sb);
- if (overwrite && new_is_dir && (ovl_type_merge_or_lower(new) ||
- ovl_may_have_whiteouts(new))) {
- opaquedir = ovl_check_empty_and_clear(new);
+ if (!list_empty(&list)) {
+ opaquedir = ovl_clear_empty(new, &list);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir)) {
opaquedir = NULL;
@@ -962,21 +975,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
}
}
- if (overwrite) {
- if (ovl_lower_positive(old)) {
- if (!ovl_dentry_is_whiteout(new)) {
- /* Whiteout source */
- flags |= RENAME_WHITEOUT;
- } else {
- /* Switch whiteouts */
- flags |= RENAME_EXCHANGE;
- }
- } else if (is_dir && ovl_dentry_is_whiteout(new)) {
- flags |= RENAME_EXCHANGE;
- cleanup_whiteout = true;
- }
- }
-
old_upperdir = ovl_dentry_upper(old->d_parent);
new_upperdir = ovl_dentry_upper(new->d_parent);
@@ -1094,6 +1092,7 @@ out_drop_write:
ovl_drop_write(old);
out:
dput(opaquedir);
+ ovl_cache_free(&list);
return err;
}
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
new file mode 100644
index 000000000000..bb94ce9da5c8
--- /dev/null
+++ b/fs/overlayfs/export.c
@@ -0,0 +1,715 @@
+/*
+ * Overlayfs NFS export support.
+ *
+ * Amir Goldstein <amir73il@gmail.com>
+ *
+ * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/cred.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/ratelimit.h>
+#include "overlayfs.h"
+
+/*
+ * We only need to encode origin if there is a chance that the same object was
+ * encoded pre copy up and then we need to stay consistent with the same
+ * encoding also after copy up. If non-pure upper is not indexed, then it was
+ * copied up before NFS export was enabled. In that case we don't need to worry
+ * about staying consistent with pre copy up encoding and we encode an upper
+ * file handle. Overlay root dentry is a private case of non-indexed upper.
+ *
+ * The following table summarizes the different file handle encodings used for
+ * different overlay object types:
+ *
+ * Object type | Encoding
+ * --------------------------------
+ * Pure upper | U
+ * Non-indexed upper | U
+ * Indexed upper | L (*)
+ * Non-upper | L (*)
+ *
+ * U = upper file handle
+ * L = lower file handle
+ *
+ * (*) Connecting an overlay dir from real lower dentry is not always
+ * possible when there are redirects in lower layers. To mitigate this case,
+ * we copy up the lower dir first and then encode an upper dir file handle.
+ */
+static bool ovl_should_encode_origin(struct dentry *dentry)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+ if (!ovl_dentry_lower(dentry))
+ return false;
+
+ /*
+ * Decoding a merge dir, whose origin's parent is under a redirected
+ * lower dir is not always possible. As a simple aproximation, we do
+ * not encode lower dir file handles when overlay has multiple lower
+ * layers and origin is below the topmost lower layer.
+ *
+ * TODO: copy up only the parent that is under redirected lower.
+ */
+ if (d_is_dir(dentry) && ofs->upper_mnt &&
+ OVL_E(dentry)->lowerstack[0].layer->idx > 1)
+ return false;
+
+ /* Decoding a non-indexed upper from origin is not implemented */
+ if (ovl_dentry_upper(dentry) &&
+ !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ return false;
+
+ return true;
+}
+
+static int ovl_encode_maybe_copy_up(struct dentry *dentry)
+{
+ int err;
+
+ if (ovl_dentry_upper(dentry))
+ return 0;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ return err;
+
+ err = ovl_copy_up(dentry);
+
+ ovl_drop_write(dentry);
+ return err;
+}
+
+static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
+{
+ struct dentry *origin = ovl_dentry_lower(dentry);
+ struct ovl_fh *fh = NULL;
+ int err;
+
+ /*
+ * If we should not encode a lower dir file handle, copy up and encode
+ * an upper dir file handle.
+ */
+ if (!ovl_should_encode_origin(dentry)) {
+ err = ovl_encode_maybe_copy_up(dentry);
+ if (err)
+ goto fail;
+
+ origin = NULL;
+ }
+
+ /* Encode an upper or origin file handle */
+ fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin);
+ err = PTR_ERR(fh);
+ if (IS_ERR(fh))
+ goto fail;
+
+ err = -EOVERFLOW;
+ if (fh->len > buflen)
+ goto fail;
+
+ memcpy(buf, (char *)fh, fh->len);
+ err = fh->len;
+
+out:
+ kfree(fh);
+ return err;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
+ dentry, err, buflen, fh ? (int)fh->len : 0,
+ fh ? fh->type : 0);
+ goto out;
+}
+
+static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
+{
+ int res, len = *max_len << 2;
+
+ res = ovl_d_to_fh(dentry, (char *)fid, len);
+ if (res <= 0)
+ return FILEID_INVALID;
+
+ len = res;
+
+ /* Round up to dwords */
+ *max_len = (len + 3) >> 2;
+ return OVL_FILEID;
+}
+
+static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len,
+ struct inode *parent)
+{
+ struct dentry *dentry;
+ int type;
+
+ /* TODO: encode connectable file handles */
+ if (parent)
+ return FILEID_INVALID;
+
+ dentry = d_find_any_alias(inode);
+ if (WARN_ON(!dentry))
+ return FILEID_INVALID;
+
+ type = ovl_dentry_to_fh(dentry, fid, max_len);
+
+ dput(dentry);
+ return type;
+}
+
+/*
+ * Find or instantiate an overlay dentry from real dentries and index.
+ */
+static struct dentry *ovl_obtain_alias(struct super_block *sb,
+ struct dentry *upper_alias,
+ struct ovl_path *lowerpath,
+ struct dentry *index)
+{
+ struct dentry *lower = lowerpath ? lowerpath->dentry : NULL;
+ struct dentry *upper = upper_alias ?: index;
+ struct dentry *dentry;
+ struct inode *inode;
+ struct ovl_entry *oe;
+
+ /* We get overlay directory dentries with ovl_lookup_real() */
+ if (d_is_dir(upper ?: lower))
+ return ERR_PTR(-EIO);
+
+ inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower);
+ if (IS_ERR(inode)) {
+ dput(upper);
+ return ERR_CAST(inode);
+ }
+
+ if (index)
+ ovl_set_flag(OVL_INDEX, inode);
+
+ dentry = d_find_any_alias(inode);
+ if (!dentry) {
+ dentry = d_alloc_anon(inode->i_sb);
+ if (!dentry)
+ goto nomem;
+ oe = ovl_alloc_entry(lower ? 1 : 0);
+ if (!oe)
+ goto nomem;
+
+ if (lower) {
+ oe->lowerstack->dentry = dget(lower);
+ oe->lowerstack->layer = lowerpath->layer;
+ }
+ dentry->d_fsdata = oe;
+ if (upper_alias)
+ ovl_dentry_set_upper_alias(dentry);
+ }
+
+ return d_instantiate_anon(dentry, inode);
+
+nomem:
+ iput(inode);
+ dput(dentry);
+ return ERR_PTR(-ENOMEM);
+}
+
+/* Get the upper or lower dentry in stach whose on layer @idx */
+static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+ int i;
+
+ if (!idx)
+ return ovl_dentry_upper(dentry);
+
+ for (i = 0; i < oe->numlower; i++) {
+ if (oe->lowerstack[i].layer->idx == idx)
+ return oe->lowerstack[i].dentry;
+ }
+
+ return NULL;
+}
+
+/*
+ * Lookup a child overlay dentry to get a connected overlay dentry whose real
+ * dentry is @real. If @real is on upper layer, we lookup a child overlay
+ * dentry with the same name as the real dentry. Otherwise, we need to consult
+ * index for lookup.
+ */
+static struct dentry *ovl_lookup_real_one(struct dentry *connected,
+ struct dentry *real,
+ struct ovl_layer *layer)
+{
+ struct inode *dir = d_inode(connected);
+ struct dentry *this, *parent = NULL;
+ struct name_snapshot name;
+ int err;
+
+ /*
+ * Lookup child overlay dentry by real name. The dir mutex protects us
+ * from racing with overlay rename. If the overlay dentry that is above
+ * real has already been moved to a parent that is not under the
+ * connected overlay dir, we return -ECHILD and restart the lookup of
+ * connected real path from the top.
+ */
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ err = -ECHILD;
+ parent = dget_parent(real);
+ if (ovl_dentry_real_at(connected, layer->idx) != parent)
+ goto fail;
+
+ /*
+ * We also need to take a snapshot of real dentry name to protect us
+ * from racing with underlying layer rename. In this case, we don't
+ * care about returning ESTALE, only from dereferencing a free name
+ * pointer because we hold no lock on the real dentry.
+ */
+ take_dentry_name_snapshot(&name, real);
+ this = lookup_one_len(name.name, connected, strlen(name.name));
+ err = PTR_ERR(this);
+ if (IS_ERR(this)) {
+ goto fail;
+ } else if (!this || !this->d_inode) {
+ dput(this);
+ err = -ENOENT;
+ goto fail;
+ } else if (ovl_dentry_real_at(this, layer->idx) != real) {
+ dput(this);
+ err = -ESTALE;
+ goto fail;
+ }
+
+out:
+ release_dentry_name_snapshot(&name);
+ dput(parent);
+ inode_unlock(dir);
+ return this;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ real, layer->idx, connected, err);
+ this = ERR_PTR(err);
+ goto out;
+}
+
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+ struct dentry *real,
+ struct ovl_layer *layer);
+
+/*
+ * Lookup an indexed or hashed overlay dentry by real inode.
+ */
+static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
+ struct dentry *real,
+ struct ovl_layer *layer)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+ struct dentry *index = NULL;
+ struct dentry *this = NULL;
+ struct inode *inode;
+
+ /*
+ * Decoding upper dir from index is expensive, so first try to lookup
+ * overlay dentry in inode/dcache.
+ */
+ inode = ovl_lookup_inode(sb, real, !layer->idx);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ if (inode) {
+ this = d_find_any_alias(inode);
+ iput(inode);
+ }
+
+ /*
+ * For decoded lower dir file handle, lookup index by origin to check
+ * if lower dir was copied up and and/or removed.
+ */
+ if (!this && layer->idx && ofs->indexdir && !WARN_ON(!d_is_dir(real))) {
+ index = ovl_lookup_index(ofs, NULL, real, false);
+ if (IS_ERR(index))
+ return index;
+ }
+
+ /* Get connected upper overlay dir from index */
+ if (index) {
+ struct dentry *upper = ovl_index_upper(ofs, index);
+
+ dput(index);
+ if (IS_ERR_OR_NULL(upper))
+ return upper;
+
+ /*
+ * ovl_lookup_real() in lower layer may call recursively once to
+ * ovl_lookup_real() in upper layer. The first level call walks
+ * back lower parents to the topmost indexed parent. The second
+ * recursive call walks back from indexed upper to the topmost
+ * connected/hashed upper parent (or up to root).
+ */
+ this = ovl_lookup_real(sb, upper, &upper_layer);
+ dput(upper);
+ }
+
+ if (!this)
+ return NULL;
+
+ if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
+ dput(this);
+ this = ERR_PTR(-EIO);
+ }
+
+ return this;
+}
+
+/*
+ * Lookup an indexed or hashed overlay dentry, whose real dentry is an
+ * ancestor of @real.
+ */
+static struct dentry *ovl_lookup_real_ancestor(struct super_block *sb,
+ struct dentry *real,
+ struct ovl_layer *layer)
+{
+ struct dentry *next, *parent = NULL;
+ struct dentry *ancestor = ERR_PTR(-EIO);
+
+ if (real == layer->mnt->mnt_root)
+ return dget(sb->s_root);
+
+ /* Find the topmost indexed or hashed ancestor */
+ next = dget(real);
+ for (;;) {
+ parent = dget_parent(next);
+
+ /*
+ * Lookup a matching overlay dentry in inode/dentry
+ * cache or in index by real inode.
+ */
+ ancestor = ovl_lookup_real_inode(sb, next, layer);
+ if (ancestor)
+ break;
+
+ if (parent == layer->mnt->mnt_root) {
+ ancestor = dget(sb->s_root);
+ break;
+ }
+
+ /*
+ * If @real has been moved out of the layer root directory,
+ * we will eventully hit the real fs root. This cannot happen
+ * by legit overlay rename, so we return error in that case.
+ */
+ if (parent == next) {
+ ancestor = ERR_PTR(-EXDEV);
+ break;
+ }
+
+ dput(next);
+ next = parent;
+ }
+
+ dput(parent);
+ dput(next);
+
+ return ancestor;
+}
+
+/*
+ * Lookup a connected overlay dentry whose real dentry is @real.
+ * If @real is on upper layer, we lookup a child overlay dentry with the same
+ * path the real dentry. Otherwise, we need to consult index for lookup.
+ */
+static struct dentry *ovl_lookup_real(struct super_block *sb,
+ struct dentry *real,
+ struct ovl_layer *layer)
+{
+ struct dentry *connected;
+ int err = 0;
+
+ connected = ovl_lookup_real_ancestor(sb, real, layer);
+ if (IS_ERR(connected))
+ return connected;
+
+ while (!err) {
+ struct dentry *next, *this;
+ struct dentry *parent = NULL;
+ struct dentry *real_connected = ovl_dentry_real_at(connected,
+ layer->idx);
+
+ if (real_connected == real)
+ break;
+
+ /* Find the topmost dentry not yet connected */
+ next = dget(real);
+ for (;;) {
+ parent = dget_parent(next);
+
+ if (parent == real_connected)
+ break;
+
+ /*
+ * If real has been moved out of 'real_connected',
+ * we will not find 'real_connected' and hit the layer
+ * root. In that case, we need to restart connecting.
+ * This game can go on forever in the worst case. We
+ * may want to consider taking s_vfs_rename_mutex if
+ * this happens more than once.
+ */
+ if (parent == layer->mnt->mnt_root) {
+ dput(connected);
+ connected = dget(sb->s_root);
+ break;
+ }
+
+ /*
+ * If real file has been moved out of the layer root
+ * directory, we will eventully hit the real fs root.
+ * This cannot happen by legit overlay rename, so we
+ * return error in that case.
+ */
+ if (parent == next) {
+ err = -EXDEV;
+ break;
+ }
+
+ dput(next);
+ next = parent;
+ }
+
+ if (!err) {
+ this = ovl_lookup_real_one(connected, next, layer);
+ if (IS_ERR(this))
+ err = PTR_ERR(this);
+
+ /*
+ * Lookup of child in overlay can fail when racing with
+ * overlay rename of child away from 'connected' parent.
+ * In this case, we need to restart the lookup from the
+ * top, because we cannot trust that 'real_connected' is
+ * still an ancestor of 'real'. There is a good chance
+ * that the renamed overlay ancestor is now in cache, so
+ * ovl_lookup_real_ancestor() will find it and we can
+ * continue to connect exactly from where lookup failed.
+ */
+ if (err == -ECHILD) {
+ this = ovl_lookup_real_ancestor(sb, real,
+ layer);
+ err = IS_ERR(this) ? PTR_ERR(this) : 0;
+ }
+ if (!err) {
+ dput(connected);
+ connected = this;
+ }
+ }
+
+ dput(parent);
+ dput(next);
+ }
+
+ if (err)
+ goto fail;
+
+ return connected;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to lookup by real (%pd2, layer=%d, connected=%pd2, err=%i)\n",
+ real, layer->idx, connected, err);
+ dput(connected);
+ return ERR_PTR(err);
+}
+
+/*
+ * Get an overlay dentry from upper/lower real dentries and index.
+ */
+static struct dentry *ovl_get_dentry(struct super_block *sb,
+ struct dentry *upper,
+ struct ovl_path *lowerpath,
+ struct dentry *index)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_layer upper_layer = { .mnt = ofs->upper_mnt };
+ struct ovl_layer *layer = upper ? &upper_layer : lowerpath->layer;
+ struct dentry *real = upper ?: (index ?: lowerpath->dentry);
+
+ /*
+ * Obtain a disconnected overlay dentry from a non-dir real dentry
+ * and index.
+ */
+ if (!d_is_dir(real))
+ return ovl_obtain_alias(sb, upper, lowerpath, index);
+
+ /* Removed empty directory? */
+ if ((real->d_flags & DCACHE_DISCONNECTED) || d_unhashed(real))
+ return ERR_PTR(-ENOENT);
+
+ /*
+ * If real dentry is connected and hashed, get a connected overlay
+ * dentry whose real dentry is @real.
+ */
+ return ovl_lookup_real(sb, real, layer);
+}
+
+static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
+ struct ovl_fh *fh)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+ struct dentry *dentry;
+ struct dentry *upper;
+
+ if (!ofs->upper_mnt)
+ return ERR_PTR(-EACCES);
+
+ upper = ovl_decode_fh(fh, ofs->upper_mnt);
+ if (IS_ERR_OR_NULL(upper))
+ return upper;
+
+ dentry = ovl_get_dentry(sb, upper, NULL, NULL);
+ dput(upper);
+
+ return dentry;
+}
+
+static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
+ struct ovl_fh *fh)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_path origin = { };
+ struct ovl_path *stack = &origin;
+ struct dentry *dentry = NULL;
+ struct dentry *index = NULL;
+ struct inode *inode = NULL;
+ bool is_deleted = false;
+ int err;
+
+ /* First lookup indexed upper by fh */
+ if (ofs->indexdir) {
+ index = ovl_get_index_fh(ofs, fh);
+ err = PTR_ERR(index);
+ if (IS_ERR(index)) {
+ if (err != -ESTALE)
+ return ERR_PTR(err);
+
+ /* Found a whiteout index - treat as deleted inode */
+ is_deleted = true;
+ index = NULL;
+ }
+ }
+
+ /* Then try to get upper dir by index */
+ if (index && d_is_dir(index)) {
+ struct dentry *upper = ovl_index_upper(ofs, index);
+
+ err = PTR_ERR(upper);
+ if (IS_ERR_OR_NULL(upper))
+ goto out_err;
+
+ dentry = ovl_get_dentry(sb, upper, NULL, NULL);
+ dput(upper);
+ goto out;
+ }
+
+ /* Then lookup origin by fh */
+ err = ovl_check_origin_fh(ofs, fh, NULL, &stack);
+ if (err) {
+ goto out_err;
+ } else if (index) {
+ err = ovl_verify_origin(index, origin.dentry, false);
+ if (err)
+ goto out_err;
+ } else if (is_deleted) {
+ /* Lookup deleted non-dir by origin inode */
+ if (!d_is_dir(origin.dentry))
+ inode = ovl_lookup_inode(sb, origin.dentry, false);
+ err = -ESTALE;
+ if (!inode || atomic_read(&inode->i_count) == 1)
+ goto out_err;
+
+ /* Deleted but still open? */
+ index = dget(ovl_i_dentry_upper(inode));
+ }
+
+ dentry = ovl_get_dentry(sb, NULL, &origin, index);
+
+out:
+ dput(origin.dentry);
+ dput(index);
+ iput(inode);
+ return dentry;
+
+out_err:
+ dentry = ERR_PTR(err);
+ goto out;
+}
+
+static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ struct dentry *dentry = NULL;
+ struct ovl_fh *fh = (struct ovl_fh *) fid;
+ int len = fh_len << 2;
+ unsigned int flags = 0;
+ int err;
+
+ err = -EINVAL;
+ if (fh_type != OVL_FILEID)
+ goto out_err;
+
+ err = ovl_check_fh_len(fh, len);
+ if (err)
+ goto out_err;
+
+ flags = fh->flags;
+ dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
+ ovl_upper_fh_to_d(sb, fh) :
+ ovl_lower_fh_to_d(sb, fh);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry) && err != -ESTALE)
+ goto out_err;
+
+ return dentry;
+
+out_err:
+ pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
+ len, fh_type, flags, err);
+ return ERR_PTR(err);
+}
+
+static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ pr_warn_ratelimited("overlayfs: connectable file handles not supported; use 'no_subtree_check' exportfs option.\n");
+ return ERR_PTR(-EACCES);
+}
+
+static int ovl_get_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ /*
+ * ovl_fh_to_dentry() returns connected dir overlay dentries and
+ * ovl_fh_to_parent() is not implemented, so we should not get here.
+ */
+ WARN_ON_ONCE(1);
+ return -EIO;
+}
+
+static struct dentry *ovl_get_parent(struct dentry *dentry)
+{
+ /*
+ * ovl_fh_to_dentry() returns connected dir overlay dentries, so we
+ * should not get here.
+ */
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EIO);
+}
+
+const struct export_operations ovl_export_operations = {
+ .encode_fh = ovl_encode_inode_fh,
+ .fh_to_dentry = ovl_fh_to_dentry,
+ .fh_to_parent = ovl_fh_to_parent,
+ .get_name = ovl_get_name,
+ .get_parent = ovl_get_parent,
+};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 00b6b294272a..fcd97b783fa1 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -105,12 +105,20 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
* Lower hardlinks may be broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
* for those different files, even for the same fs case.
+ *
+ * Similarly, several redirected dirs can point to the
+ * same dir on a lower layer. With the "verify_lower"
+ * feature, we do not use the lower origin st_ino, if
+ * we haven't verified that this redirect is unique.
+ *
* With inodes index enabled, it is safe to use st_ino
- * of an indexed hardlinked origin. The index validates
- * that the upper hardlink is not broken.
+ * of an indexed origin. The index validates that the
+ * upper hardlink is not broken and that a redirected
+ * dir is the only redirect to that origin.
*/
- if (is_dir || lowerstat.nlink == 1 ||
- ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
+ (!ovl_verify_lower(dentry->d_sb) &&
+ (is_dir || lowerstat.nlink == 1)))
stat->ino = lowerstat.ino;
if (samefs)
@@ -343,8 +351,10 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
+ /* Copy up of disconnected dentry does not set upper alias */
if (ovl_dentry_upper(dentry) &&
- ovl_dentry_has_upper_alias(dentry))
+ (ovl_dentry_has_upper_alias(dentry) ||
+ (dentry->d_flags & DCACHE_DISCONNECTED)))
return false;
if (special_file(d_inode(dentry)->i_mode))
@@ -604,9 +614,25 @@ static int ovl_inode_set(struct inode *inode, void *data)
}
static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
- struct dentry *upperdentry)
+ struct dentry *upperdentry, bool strict)
{
/*
+ * For directories, @strict verify from lookup path performs consistency
+ * checks, so NULL lower/upper in dentry must match NULL lower/upper in
+ * inode. Non @strict verify from NFS handle decode path passes NULL for
+ * 'unknown' lower/upper.
+ */
+ if (S_ISDIR(inode->i_mode) && strict) {
+ /* Real lower dir moved to upper layer under us? */
+ if (!lowerdentry && ovl_inode_lower(inode))
+ return false;
+
+ /* Lookup of an uncovered redirect origin? */
+ if (!upperdentry && ovl_inode_upper(inode))
+ return false;
+ }
+
+ /*
* Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL.
* This happens when finding a copied up overlay inode for a renamed
* or hardlinked overlay dentry and lower dentry cannot be followed
@@ -625,14 +651,35 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
return true;
}
-struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
- struct dentry *index)
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+ bool is_upper)
{
- struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+ struct inode *inode, *key = d_inode(real);
+
+ inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+ if (!inode)
+ return NULL;
+
+ if (!ovl_verify_inode(inode, is_upper ? NULL : real,
+ is_upper ? real : NULL, false)) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+
+ return inode;
+}
+
+struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
+ struct dentry *lowerdentry, struct dentry *index,
+ unsigned int numlower)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
/* Already indexed or could be indexed on copy up? */
- bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry));
+ bool indexed = (index || (ovl_indexdir(sb) && !upperdentry));
+ struct dentry *origin = indexed ? lowerdentry : NULL;
+ bool is_dir;
if (WARN_ON(upperdentry && indexed && !lowerdentry))
return ERR_PTR(-EIO);
@@ -641,17 +688,22 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
realinode = d_inode(lowerdentry);
/*
- * Copy up origin (lower) may exist for non-indexed upper, but we must
- * not use lower as hash key in that case.
- * Hash inodes that are or could be indexed by origin inode and
- * non-indexed upper inodes that could be hard linked by upper inode.
+ * Copy up origin (lower) may exist for non-indexed non-dir upper, but
+ * we must not use lower as hash key in that case.
+ * Hash non-dir that is or could be indexed by origin inode.
+ * Hash dir that is or could be merged by origin inode.
+ * Hash pure upper and non-indexed non-dir by upper inode.
+ * Hash non-indexed dir by upper inode for NFS export.
*/
- if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) {
- struct inode *key = d_inode(indexed ? lowerdentry :
- upperdentry);
- unsigned int nlink;
+ is_dir = S_ISDIR(realinode->i_mode);
+ if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt))
+ origin = lowerdentry;
+
+ if (upperdentry || origin) {
+ struct inode *key = d_inode(origin ?: upperdentry);
+ unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
- inode = iget5_locked(dentry->d_sb, (unsigned long) key,
+ inode = iget5_locked(sb, (unsigned long) key,
ovl_inode_test, ovl_inode_set, key);
if (!inode)
goto out_nomem;
@@ -660,7 +712,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
* Verify that the underlying files stored in the inode
* match those in the dentry.
*/
- if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) {
+ if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
+ true)) {
iput(inode);
inode = ERR_PTR(-ESTALE);
goto out;
@@ -670,11 +723,12 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
goto out;
}
- nlink = ovl_get_nlink(lowerdentry, upperdentry,
- realinode->i_nlink);
+ /* Recalculate nlink for non-dir due to indexing */
+ if (!is_dir)
+ nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
set_nlink(inode, nlink);
} else {
- inode = new_inode(dentry->d_sb);
+ inode = new_inode(sb);
if (!inode)
goto out_nomem;
}
@@ -685,10 +739,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
ovl_set_flag(OVL_IMPURE, inode);
/* Check for non-merge dir that may have whiteouts */
- if (S_ISDIR(realinode->i_mode)) {
- struct ovl_entry *oe = dentry->d_fsdata;
-
- if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
+ if (is_dir) {
+ if (((upperdentry && lowerdentry) || numlower > 1) ||
ovl_check_origin_xattr(upperdentry ?: lowerdentry)) {
ovl_set_flag(OVL_WHITEOUTS, inode);
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index beb945e1963c..de3e6da1d5a5 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -9,6 +9,7 @@
#include <linux/fs.h>
#include <linux/cred.h>
+#include <linux/ctype.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/ratelimit.h>
@@ -84,15 +85,54 @@ invalid:
static int ovl_acceptable(void *ctx, struct dentry *dentry)
{
- return 1;
+ /*
+ * A non-dir origin may be disconnected, which is fine, because
+ * we only need it for its unique inode number.
+ */
+ if (!d_is_dir(dentry))
+ return 1;
+
+ /* Don't decode a deleted empty directory */
+ if (d_unhashed(dentry))
+ return 0;
+
+ /* Check if directory belongs to the layer we are decoding from */
+ return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
}
-static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
+/*
+ * Check validity of an overlay file handle buffer.
+ *
+ * Return 0 for a valid file handle.
+ * Return -ENODATA for "origin unknown".
+ * Return <0 for an invalid file handle.
+ */
+int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
{
- int res;
+ if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
+ return -EINVAL;
+
+ if (fh->magic != OVL_FH_MAGIC)
+ return -EINVAL;
+
+ /* Treat larger version and unknown flags as "origin unknown" */
+ if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+ return -ENODATA;
+
+ /* Treat endianness mismatch as "origin unknown" */
+ if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ return -ENODATA;
+
+ return 0;
+}
+
+static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
+{
+ int res, err;
struct ovl_fh *fh = NULL;
- res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+ res = vfs_getxattr(dentry, name, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
@@ -102,28 +142,20 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
if (res == 0)
return NULL;
- fh = kzalloc(res, GFP_KERNEL);
+ fh = kzalloc(res, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
+ res = vfs_getxattr(dentry, name, fh, res);
if (res < 0)
goto fail;
- if (res < sizeof(struct ovl_fh) || res < fh->len)
- goto invalid;
-
- if (fh->magic != OVL_FH_MAGIC)
+ err = ovl_check_fh_len(fh, res);
+ if (err < 0) {
+ if (err == -ENODATA)
+ goto out;
goto invalid;
-
- /* Treat larger version and unknown flags as "origin unknown" */
- if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
- goto out;
-
- /* Treat endianness mismatch as "origin unknown" */
- if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
- (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
- goto out;
+ }
return fh;
@@ -139,47 +171,41 @@ invalid:
goto out;
}
-static struct dentry *ovl_get_origin(struct dentry *dentry,
- struct vfsmount *mnt)
+struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
{
- struct dentry *origin = NULL;
- struct ovl_fh *fh = ovl_get_origin_fh(dentry);
+ struct dentry *real;
int bytes;
- if (IS_ERR_OR_NULL(fh))
- return (struct dentry *)fh;
-
/*
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
*/
if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
- goto out;
+ return NULL;
bytes = (fh->len - offsetof(struct ovl_fh, fid));
- origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
- bytes >> 2, (int)fh->type,
- ovl_acceptable, NULL);
- if (IS_ERR(origin)) {
- /* Treat stale file handle as "origin unknown" */
- if (origin == ERR_PTR(-ESTALE))
- origin = NULL;
- goto out;
+ real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
+ bytes >> 2, (int)fh->type,
+ ovl_acceptable, mnt);
+ if (IS_ERR(real)) {
+ /*
+ * Treat stale file handle to lower file as "origin unknown".
+ * upper file handle could become stale when upper file is
+ * unlinked and this information is needed to handle stale
+ * index entries correctly.
+ */
+ if (real == ERR_PTR(-ESTALE) &&
+ !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
+ real = NULL;
+ return real;
}
- if (ovl_dentry_weird(origin) ||
- ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
- goto invalid;
-
-out:
- kfree(fh);
- return origin;
+ if (ovl_dentry_weird(real)) {
+ dput(real);
+ return NULL;
+ }
-invalid:
- pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
- dput(origin);
- origin = NULL;
- goto out;
+ return real;
}
static bool ovl_is_opaquedir(struct dentry *dentry)
@@ -284,47 +310,81 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
}
-static int ovl_check_origin(struct dentry *upperdentry,
- struct ovl_path *lower, unsigned int numlower,
- struct ovl_path **stackp, unsigned int *ctrp)
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ struct dentry *upperdentry, struct ovl_path **stackp)
{
- struct vfsmount *mnt;
struct dentry *origin = NULL;
int i;
- for (i = 0; i < numlower; i++) {
- mnt = lower[i].layer->mnt;
- origin = ovl_get_origin(upperdentry, mnt);
- if (IS_ERR(origin))
- return PTR_ERR(origin);
-
+ for (i = 0; i < ofs->numlower; i++) {
+ origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt);
if (origin)
break;
}
if (!origin)
- return 0;
+ return -ESTALE;
+ else if (IS_ERR(origin))
+ return PTR_ERR(origin);
+
+ if (upperdentry && !ovl_is_whiteout(upperdentry) &&
+ ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
+ goto invalid;
- BUG_ON(*ctrp);
if (!*stackp)
*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
if (!*stackp) {
dput(origin);
return -ENOMEM;
}
- **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer};
- *ctrp = 1;
+ **stackp = (struct ovl_path){
+ .dentry = origin,
+ .layer = &ofs->lower_layers[i]
+ };
+
+ return 0;
+
+invalid:
+ pr_warn_ratelimited("overlayfs: invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
+ upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
+ d_inode(origin)->i_mode & S_IFMT);
+ dput(origin);
+ return -EIO;
+}
+
+static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
+ struct ovl_path **stackp, unsigned int *ctrp)
+{
+ struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
+ int err;
+
+ if (IS_ERR_OR_NULL(fh))
+ return PTR_ERR(fh);
+
+ err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp);
+ kfree(fh);
+
+ if (err) {
+ if (err == -ESTALE)
+ return 0;
+ return err;
+ }
+
+ if (WARN_ON(*ctrp))
+ return -EIO;
+ *ctrp = 1;
return 0;
}
/*
- * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
+ * Verify that @fh matches the file handle stored in xattr @name.
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
-static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
+static int ovl_verify_fh(struct dentry *dentry, const char *name,
+ const struct ovl_fh *fh)
{
- struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
+ struct ovl_fh *ofh = ovl_get_fh(dentry, name);
int err = 0;
if (!ofh)
@@ -341,28 +401,28 @@ static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
}
/*
- * Verify that an inode matches the origin file handle stored in upper inode.
+ * Verify that @real dentry matches the file handle stored in xattr @name.
*
- * If @set is true and there is no stored file handle, encode and store origin
- * file handle in OVL_XATTR_ORIGIN.
+ * If @set is true and there is no stored file handle, encode @real and store
+ * file handle in xattr @name.
*
- * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
*/
-int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
- bool is_upper, bool set)
+int ovl_verify_set_fh(struct dentry *dentry, const char *name,
+ struct dentry *real, bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
int err;
- fh = ovl_encode_fh(origin, is_upper);
+ fh = ovl_encode_fh(real, is_upper);
err = PTR_ERR(fh);
if (IS_ERR(fh))
goto fail;
- err = ovl_verify_origin_fh(dentry, fh);
+ err = ovl_verify_fh(dentry, name, fh);
if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
+ err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
if (err)
goto fail;
@@ -371,45 +431,71 @@ out:
return err;
fail:
- inode = d_inode(origin);
- pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
- origin, inode ? inode->i_ino : 0, err);
+ inode = d_inode(real);
+ pr_warn_ratelimited("overlayfs: failed to verify %s (%pd2, ino=%lu, err=%i)\n",
+ is_upper ? "upper" : "origin", real,
+ inode ? inode->i_ino : 0, err);
goto out;
}
+/* Get upper dentry from index */
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
+{
+ struct ovl_fh *fh;
+ struct dentry *upper;
+
+ if (!d_is_dir(index))
+ return dget(index);
+
+ fh = ovl_get_fh(index, OVL_XATTR_UPPER);
+ if (IS_ERR_OR_NULL(fh))
+ return ERR_CAST(fh);
+
+ upper = ovl_decode_fh(fh, ofs->upper_mnt);
+ kfree(fh);
+
+ if (IS_ERR_OR_NULL(upper))
+ return upper ?: ERR_PTR(-ESTALE);
+
+ if (!d_is_dir(upper)) {
+ pr_warn_ratelimited("overlayfs: invalid index upper (%pd2, upper=%pd2).\n",
+ index, upper);
+ dput(upper);
+ return ERR_PTR(-EIO);
+ }
+
+ return upper;
+}
+
+/* Is this a leftover from create/whiteout of directory index entry? */
+static bool ovl_is_temp_index(struct dentry *index)
+{
+ return index->d_name.name[0] == '#';
+}
+
/*
* Verify that an index entry name matches the origin file handle stored in
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
* Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
*/
-int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
- unsigned int numlower)
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
{
struct ovl_fh *fh = NULL;
size_t len;
struct ovl_path origin = { };
struct ovl_path *stack = &origin;
- unsigned int ctr = 0;
+ struct dentry *upper = NULL;
int err;
if (!d_inode(index))
return 0;
- /*
- * Directory index entries are going to be used for looking up
- * redirected upper dirs by lower dir fh when decoding an overlay
- * file handle of a merge dir. Whiteout index entries are going to be
- * used as an indication that an exported overlay file handle should
- * be treated as stale (i.e. after unlink of the overlay inode).
- * We don't know the verification rules for directory and whiteout
- * index entries, because they have not been implemented yet, so return
- * EINVAL if those entries are found to abort the mount to avoid
- * corrupting an index that was created by a newer kernel.
- */
- err = -EINVAL;
- if (d_is_dir(index) || ovl_is_whiteout(index))
+ /* Cleanup leftover from index create/cleanup attempt */
+ err = -ESTALE;
+ if (ovl_is_temp_index(index))
goto fail;
+ err = -EINVAL;
if (index->d_name.len < sizeof(struct ovl_fh)*2)
goto fail;
@@ -420,26 +506,68 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
goto fail;
err = -EINVAL;
- if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
+ if (hex2bin((u8 *)fh, index->d_name.name, len))
goto fail;
- err = ovl_verify_origin_fh(index, fh);
+ err = ovl_check_fh_len(fh, len);
if (err)
goto fail;
- err = ovl_check_origin(index, lower, numlower, &stack, &ctr);
- if (!err && !ctr)
- err = -ESTALE;
+ /*
+ * Whiteout index entries are used as an indication that an exported
+ * overlay file handle should be treated as stale (i.e. after unlink
+ * of the overlay inode). These entries contain no origin xattr.
+ */
+ if (ovl_is_whiteout(index))
+ goto out;
+
+ /*
+ * Verifying directory index entries are not stale is expensive, so
+ * only verify stale dir index if NFS export is enabled.
+ */
+ if (d_is_dir(index) && !ofs->config.nfs_export)
+ goto out;
+
+ /*
+ * Directory index entries should have 'upper' xattr pointing to the
+ * real upper dir. Non-dir index entries are hardlinks to the upper
+ * real inode. For non-dir index, we can read the copy up origin xattr
+ * directly from the index dentry, but for dir index we first need to
+ * decode the upper directory.
+ */
+ upper = ovl_index_upper(ofs, index);
+ if (IS_ERR_OR_NULL(upper)) {
+ err = PTR_ERR(upper);
+ /*
+ * Directory index entries with no 'upper' xattr need to be
+ * removed. When dir index entry has a stale 'upper' xattr,
+ * we assume that upper dir was removed and we treat the dir
+ * index as orphan entry that needs to be whited out.
+ */
+ if (err == -ESTALE)
+ goto orphan;
+ else if (!err)
+ err = -ESTALE;
+ goto fail;
+ }
+
+ err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
+ dput(upper);
if (err)
goto fail;
- /* Check if index is orphan and don't warn before cleaning it */
- if (d_inode(index)->i_nlink == 1 &&
- ovl_get_nlink(origin.dentry, index, 0) == 0)
- err = -ENOENT;
+ /* Check if non-dir index is orphan and don't warn before cleaning it */
+ if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
+ err = ovl_check_origin_fh(ofs, fh, index, &stack);
+ if (err)
+ goto fail;
+
+ if (ovl_get_nlink(origin.dentry, index, 0) == 0)
+ goto orphan;
+ }
- dput(origin.dentry);
out:
+ dput(origin.dentry);
kfree(fh);
return err;
@@ -447,6 +575,28 @@ fail:
pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
index, d_inode(index)->i_mode & S_IFMT, err);
goto out;
+
+orphan:
+ pr_warn_ratelimited("overlayfs: orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
+ index, d_inode(index)->i_mode & S_IFMT,
+ d_inode(index)->i_nlink);
+ err = -ENOENT;
+ goto out;
+}
+
+static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
+{
+ char *n, *s;
+
+ n = kzalloc(fh->len * 2, GFP_KERNEL);
+ if (!n)
+ return -ENOMEM;
+
+ s = bin2hex(n, fh, fh->len);
+ *name = (struct qstr) QSTR_INIT(n, s - n);
+
+ return 0;
+
}
/*
@@ -466,35 +616,58 @@ fail:
*/
int ovl_get_index_name(struct dentry *origin, struct qstr *name)
{
- int err;
struct ovl_fh *fh;
- char *n, *s;
+ int err;
fh = ovl_encode_fh(origin, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = -ENOMEM;
- n = kzalloc(fh->len * 2, GFP_KERNEL);
- if (n) {
- s = bin2hex(n, fh, fh->len);
- *name = (struct qstr) QSTR_INIT(n, s - n);
- err = 0;
- }
- kfree(fh);
+ err = ovl_get_index_name_fh(fh, name);
+ kfree(fh);
return err;
+}
+
+/* Lookup index by file handle for NFS export */
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
+{
+ struct dentry *index;
+ struct qstr name;
+ int err;
+
+ err = ovl_get_index_name_fh(fh, &name);
+ if (err)
+ return ERR_PTR(err);
+
+ index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ kfree(name.name);
+ if (IS_ERR(index)) {
+ if (PTR_ERR(index) == -ENOENT)
+ index = NULL;
+ return index;
+ }
+ if (d_is_negative(index))
+ err = 0;
+ else if (ovl_is_whiteout(index))
+ err = -ESTALE;
+ else if (ovl_dentry_weird(index))
+ err = -EIO;
+ else
+ return index;
+
+ dput(index);
+ return ERR_PTR(err);
}
-static struct dentry *ovl_lookup_index(struct dentry *dentry,
- struct dentry *upper,
- struct dentry *origin)
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+ struct dentry *origin, bool verify)
{
- struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct dentry *index;
struct inode *inode;
struct qstr name;
+ bool is_dir = d_is_dir(origin);
int err;
err = ovl_get_index_name(origin, &name);
@@ -518,8 +691,16 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
inode = d_inode(index);
if (d_is_negative(index)) {
goto out_dput;
- } else if (upper && d_inode(upper) != inode) {
- goto out_dput;
+ } else if (ovl_is_whiteout(index) && !verify) {
+ /*
+ * When index lookup is called with !verify for decoding an
+ * overlay file handle, a whiteout index implies that decode
+ * should treat file handle as stale and no need to print a
+ * warning about it.
+ */
+ dput(index);
+ index = ERR_PTR(-ESTALE);
+ goto out;
} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
/*
@@ -533,8 +714,25 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
index, d_inode(index)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
goto fail;
- }
+ } else if (is_dir && verify) {
+ if (!upper) {
+ pr_warn_ratelimited("overlayfs: suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
+ origin, index);
+ goto fail;
+ }
+ /* Verify that dir index 'upper' xattr points to upper dir */
+ err = ovl_verify_upper(index, upper, false);
+ if (err) {
+ if (err == -ESTALE) {
+ pr_warn_ratelimited("overlayfs: suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
+ upper, origin, index);
+ }
+ goto fail;
+ }
+ } else if (upper && d_inode(upper) != inode) {
+ goto out_dput;
+ }
out:
kfree(name.name);
return index;
@@ -572,16 +770,25 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
return (idx < oe->numlower) ? idx + 1 : -1;
}
-static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path)
+/* Fix missing 'origin' xattr */
+static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
+ struct dentry *upper)
{
- int i;
+ int err;
- for (i = 0; i < ofs->numlower; i++) {
- if (ofs->lower_layers[i].mnt == path->layer->mnt)
- break;
- }
+ if (ovl_check_origin_xattr(upper))
+ return 0;
+
+ err = ovl_want_write(dentry);
+ if (err)
+ return err;
+
+ err = ovl_set_origin(dentry, lower, upper);
+ if (!err)
+ err = ovl_set_impure(dentry->d_parent, upper->d_parent);
- return i;
+ ovl_drop_write(dentry);
+ return err;
}
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
@@ -594,6 +801,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct ovl_path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
+ struct dentry *origin = NULL;
struct dentry *index = NULL;
unsigned int ctr = 0;
struct inode *inode = NULL;
@@ -638,8 +846,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
- err = ovl_check_origin(upperdentry, roe->lowerstack,
- roe->numlower, &stack, &ctr);
+ err = ovl_check_origin(ofs, upperdentry, &stack, &ctr);
if (err)
goto out_put_upper;
}
@@ -674,6 +881,34 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (!this)
continue;
+ /*
+ * If no origin fh is stored in upper of a merge dir, store fh
+ * of lower dir and set upper parent "impure".
+ */
+ if (upperdentry && !ctr && !ofs->noxattr) {
+ err = ovl_fix_origin(dentry, this, upperdentry);
+ if (err) {
+ dput(this);
+ goto out_put;
+ }
+ }
+
+ /*
+ * When "verify_lower" feature is enabled, do not merge with a
+ * lower dir that does not match a stored origin xattr. In any
+ * case, only verified origin is used for index lookup.
+ */
+ if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
+ err = ovl_verify_origin(upperdentry, this, false);
+ if (err) {
+ dput(this);
+ break;
+ }
+
+ /* Bless lower dir as verified origin */
+ origin = this;
+ }
+
stack[ctr].dentry = this;
stack[ctr].layer = lower.layer;
ctr++;
@@ -693,25 +928,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
*/
err = -EPERM;
if (d.redirect && !ofs->config.redirect_follow) {
- pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry);
+ pr_warn_ratelimited("overlayfs: refusing to follow redirect for (%pd2)\n",
+ dentry);
goto out_put;
}
if (d.redirect && d.redirect[0] == '/' && poe != roe) {
poe = roe;
-
/* Find the current layer on the root dentry */
- i = ovl_find_layer(ofs, &lower);
- if (WARN_ON(i == ofs->numlower))
- break;
+ i = lower.layer->idx - 1;
}
}
- /* Lookup index by lower inode and verify it matches upper inode */
- if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
- struct dentry *origin = stack[0].dentry;
+ /*
+ * Lookup index by lower inode and verify it matches upper inode.
+ * We only trust dir index if we verified that lower dir matches
+ * origin, otherwise dir index entries may be inconsistent and we
+ * ignore them. Always lookup index of non-dir and non-upper.
+ */
+ if (ctr && (!upperdentry || !d.is_dir))
+ origin = stack[0].dentry;
- index = ovl_lookup_index(dentry, upperdentry, origin);
+ if (origin && ovl_indexdir(dentry->d_sb) &&
+ (!d.is_dir || ovl_index_all(dentry->d_sb))) {
+ index = ovl_lookup_index(ofs, upperdentry, origin, true);
if (IS_ERR(index)) {
err = PTR_ERR(index);
index = NULL;
@@ -724,17 +964,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (!oe)
goto out_put;
- oe->opaque = upperopaque;
memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
dentry->d_fsdata = oe;
+ if (upperopaque)
+ ovl_dentry_set_opaque(dentry);
+
if (upperdentry)
ovl_dentry_set_upper_alias(dentry);
else if (index)
upperdentry = dget(index);
if (upperdentry || ctr) {
- inode = ovl_get_inode(dentry, upperdentry, index);
+ if (ctr)
+ origin = stack[0].dentry;
+ inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index,
+ ctr);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_free_oe;
@@ -748,9 +993,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
dput(index);
kfree(stack);
kfree(d.redirect);
- d_add(dentry, inode);
-
- return NULL;
+ return d_splice_alias(inode, dentry);
out_free_oe:
dentry->d_fsdata = NULL;
@@ -771,9 +1014,9 @@ out:
bool ovl_lower_positive(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
const struct qstr *name = &dentry->d_name;
+ const struct cred *old_cred;
unsigned int i;
bool positive = false;
bool done = false;
@@ -783,12 +1026,13 @@ bool ovl_lower_positive(struct dentry *dentry)
* whiteout.
*/
if (!dentry->d_inode)
- return oe->opaque;
+ return ovl_dentry_is_opaque(dentry);
/* Negative upper -> positive lower */
if (!ovl_dentry_upper(dentry))
return true;
+ old_cred = ovl_override_creds(dentry->d_sb);
/* Positive upper -> have to look up lower to see whether it exists */
for (i = 0; !done && !positive && i < poe->numlower; i++) {
struct dentry *this;
@@ -818,6 +1062,7 @@ bool ovl_lower_positive(struct dentry *dentry)
dput(this);
}
}
+ revert_creds(old_cred);
return positive;
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b489099ccd49..0df25a9c94bd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -27,8 +27,9 @@ enum ovl_path_type {
#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
+#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
-enum ovl_flag {
+enum ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
OVL_IMPURE,
/* Non-merge dir that may contain whiteout entries */
@@ -36,6 +37,11 @@ enum ovl_flag {
OVL_INDEX,
};
+enum ovl_entry_flag {
+ OVL_E_UPPER_ALIAS,
+ OVL_E_OPAQUE,
+};
+
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
@@ -62,6 +68,9 @@ enum ovl_flag {
#error Endianness not defined
#endif
+/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
+#define OVL_FILEID 0xfb
+
/* On-disk and in-memeory format for redirect by file handle */
struct ovl_fh {
u8 version; /* 0 */
@@ -194,6 +203,8 @@ const struct cred *ovl_override_creds(struct super_block *sb);
struct super_block *ovl_same_sb(struct super_block *sb);
bool ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
+bool ovl_index_all(struct super_block *sb);
+bool ovl_verify_lower(struct super_block *sb);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
bool ovl_dentry_remote(struct dentry *dentry);
bool ovl_dentry_weird(struct dentry *dentry);
@@ -210,6 +221,9 @@ struct inode *ovl_inode_lower(struct inode *inode);
struct inode *ovl_inode_real(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry);
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
bool ovl_dentry_is_opaque(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
@@ -238,6 +252,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode);
bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
+bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
@@ -249,15 +264,35 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
/* namei.c */
-int ovl_verify_origin(struct dentry *dentry, struct dentry *origin,
- bool is_upper, bool set);
-int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
- unsigned int numlower);
+int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
+struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt);
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+ struct dentry *upperdentry, struct ovl_path **stackp);
+int ovl_verify_set_fh(struct dentry *dentry, const char *name,
+ struct dentry *real, bool is_upper, bool set);
+struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
+int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
int ovl_get_index_name(struct dentry *origin, struct qstr *name);
+struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
+struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
+ struct dentry *origin, bool verify);
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
-struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
+static inline int ovl_verify_origin(struct dentry *upper,
+ struct dentry *origin, bool set)
+{
+ return ovl_verify_set_fh(upper, OVL_XATTR_ORIGIN, origin, false, set);
+}
+
+static inline int ovl_verify_upper(struct dentry *index,
+ struct dentry *upper, bool set)
+{
+ return ovl_verify_set_fh(index, OVL_XATTR_UPPER, upper, true, set);
+}
+
/* readdir.c */
extern const struct file_operations ovl_dir_operations;
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
@@ -267,8 +302,7 @@ void ovl_dir_cache_free(struct inode *inode);
int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
-int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
- struct ovl_path *lower, unsigned int numlower);
+int ovl_indexdir_cleanup(struct ovl_fs *ofs);
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
@@ -291,8 +325,11 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
bool ovl_is_private_xattr(const char *name);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry,
- struct dentry *index);
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
+ bool is_upper);
+struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
+ struct dentry *lowerdentry, struct dentry *index,
+ unsigned int numlower);
static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
@@ -306,6 +343,8 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
struct dentry *ovl_lookup_temp(struct dentry *workdir);
+int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+ struct dentry *dentry);
struct cattr {
dev_t rdev;
umode_t mode;
@@ -321,4 +360,9 @@ int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);
+struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper);
+int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+ struct dentry *upper);
+
+/* export.c */
+extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 9d0bc03bf6e4..bfef6edcc111 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,11 +17,14 @@ struct ovl_config {
bool redirect_follow;
const char *redirect_mode;
bool index;
+ bool nfs_export;
};
struct ovl_layer {
struct vfsmount *mnt;
dev_t pseudo_dev;
+ /* Index of this layer in fs root (upper == 0) */
+ int idx;
};
struct ovl_path {
@@ -58,8 +61,7 @@ struct ovl_fs {
struct ovl_entry {
union {
struct {
- unsigned long has_upper;
- bool opaque;
+ unsigned long flags;
};
struct rcu_head rcu;
};
@@ -69,6 +71,11 @@ struct ovl_entry {
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
+static inline struct ovl_entry *OVL_E(struct dentry *dentry)
+{
+ return (struct ovl_entry *) dentry->d_fsdata;
+}
+
struct ovl_inode {
struct ovl_dir_cache *cache;
const char *redirect;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 8c98578d27a1..c11f5c0906c3 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -593,8 +593,15 @@ static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
return ERR_PTR(res);
}
if (list_empty(&cache->entries)) {
- /* Good oportunity to get rid of an unnecessary "impure" flag */
- ovl_do_removexattr(ovl_dentry_upper(dentry), OVL_XATTR_IMPURE);
+ /*
+ * A good opportunity to get rid of an unneeded "impure" flag.
+ * Removing the "impure" xattr is best effort.
+ */
+ if (!ovl_want_write(dentry)) {
+ ovl_do_removexattr(ovl_dentry_upper(dentry),
+ OVL_XATTR_IMPURE);
+ ovl_drop_write(dentry);
+ }
ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
kfree(cache);
return NULL;
@@ -769,10 +776,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
struct dentry *dentry = file->f_path.dentry;
struct file *realfile = od->realfile;
+ /* Nothing to sync for lower */
+ if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
+ return 0;
+
/*
* Need to check if we started out being a lower dir, but got copied up
*/
- if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
+ if (!od->is_upper) {
struct inode *inode = file_inode(file);
realfile = READ_ONCE(od->upperfile);
@@ -858,8 +869,11 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
int err;
struct ovl_cache_entry *p, *n;
struct rb_root root = RB_ROOT;
+ const struct cred *old_cred;
+ old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_dir_read_merged(dentry, list, &root);
+ revert_creds(old_cred);
if (err)
return err;
@@ -1016,13 +1030,13 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
}
}
-int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
- struct ovl_path *lower, unsigned int numlower)
+int ovl_indexdir_cleanup(struct ovl_fs *ofs)
{
int err;
+ struct dentry *indexdir = ofs->indexdir;
struct dentry *index = NULL;
- struct inode *dir = dentry->d_inode;
- struct path path = { .mnt = mnt, .dentry = dentry };
+ struct inode *dir = indexdir->d_inode;
+ struct path path = { .mnt = ofs->upper_mnt, .dentry = indexdir };
LIST_HEAD(list);
struct rb_root root = RB_ROOT;
struct ovl_cache_entry *p;
@@ -1046,19 +1060,40 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
if (p->len == 2 && p->name[1] == '.')
continue;
}
- index = lookup_one_len(p->name, dentry, p->len);
+ index = lookup_one_len(p->name, indexdir, p->len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
index = NULL;
break;
}
- err = ovl_verify_index(index, lower, numlower);
- /* Cleanup stale and orphan index entries */
- if (err && (err == -ESTALE || err == -ENOENT))
+ err = ovl_verify_index(ofs, index);
+ if (!err) {
+ goto next;
+ } else if (err == -ESTALE) {
+ /* Cleanup stale index entries */
+ err = ovl_cleanup(dir, index);
+ } else if (err != -ENOENT) {
+ /*
+ * Abort mount to avoid corrupting the index if
+ * an incompatible index entry was found or on out
+ * of memory.
+ */
+ break;
+ } else if (ofs->config.nfs_export) {
+ /*
+ * Whiteout orphan index to block future open by
+ * handle after overlay nlink dropped to zero.
+ */
+ err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ } else {
+ /* Cleanup orphan index entries */
err = ovl_cleanup(dir, index);
+ }
+
if (err)
break;
+next:
dput(index);
index = NULL;
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 76440feb79f6..9ee37c76091d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -45,6 +45,11 @@ module_param_named(index, ovl_index_def, bool, 0644);
MODULE_PARM_DESC(ovl_index_def,
"Default to on or off for the inodes index feature");
+static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
+module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
+MODULE_PARM_DESC(ovl_nfs_export_def,
+ "Default to on or off for the NFS export feature");
+
static void ovl_entry_stack_free(struct ovl_entry *oe)
{
unsigned int i;
@@ -211,6 +216,7 @@ static void ovl_destroy_inode(struct inode *inode)
struct ovl_inode *oi = OVL_I(inode);
dput(oi->__upperdentry);
+ iput(oi->lower);
kfree(oi->redirect);
ovl_dir_cache_free(inode);
mutex_destroy(&oi->lock);
@@ -341,6 +347,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
if (ofs->config.index != ovl_index_def)
seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+ if (ofs->config.nfs_export != ovl_nfs_export_def)
+ seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
+ "on" : "off");
return 0;
}
@@ -373,6 +382,8 @@ enum {
OPT_REDIRECT_DIR,
OPT_INDEX_ON,
OPT_INDEX_OFF,
+ OPT_NFS_EXPORT_ON,
+ OPT_NFS_EXPORT_OFF,
OPT_ERR,
};
@@ -384,6 +395,8 @@ static const match_table_t ovl_tokens = {
{OPT_REDIRECT_DIR, "redirect_dir=%s"},
{OPT_INDEX_ON, "index=on"},
{OPT_INDEX_OFF, "index=off"},
+ {OPT_NFS_EXPORT_ON, "nfs_export=on"},
+ {OPT_NFS_EXPORT_OFF, "nfs_export=off"},
{OPT_ERR, NULL}
};
@@ -490,6 +503,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->index = false;
break;
+ case OPT_NFS_EXPORT_ON:
+ config->nfs_export = true;
+ break;
+
+ case OPT_NFS_EXPORT_OFF:
+ config->nfs_export = false;
+ break;
+
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
@@ -520,10 +541,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
bool retried = false;
bool locked = false;
- err = mnt_want_write(mnt);
- if (err)
- goto out_err;
-
inode_lock_nested(dir, I_MUTEX_PARENT);
locked = true;
@@ -588,7 +605,6 @@ retry:
goto out_err;
}
out_unlock:
- mnt_drop_write(mnt);
if (locked)
inode_unlock(dir);
@@ -700,12 +716,16 @@ static int ovl_lower_dir(const char *name, struct path *path,
*remote = true;
/*
- * The inodes index feature needs to encode and decode file
- * handles, so it requires that all layers support them.
+ * The inodes index feature and NFS export need to encode and decode
+ * file handles, so they require that all layers support them.
*/
- if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
+ if ((ofs->config.nfs_export ||
+ (ofs->config.index && ofs->config.upperdir)) &&
+ !ovl_can_decode_fh(path->dentry->d_sb)) {
ofs->config.index = false;
- pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
+ ofs->config.nfs_export = false;
+ pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
+ name);
}
return 0;
@@ -929,12 +949,17 @@ out:
static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
{
+ struct vfsmount *mnt = ofs->upper_mnt;
struct dentry *temp;
int err;
+ err = mnt_want_write(mnt);
+ if (err)
+ return err;
+
ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
if (!ofs->workdir)
- return 0;
+ goto out;
/*
* Upper should support d_type, else whiteouts are visible. Given
@@ -944,7 +969,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
*/
err = ovl_check_d_type_supported(workpath);
if (err < 0)
- return err;
+ goto out;
/*
* We allowed this configuration and don't want to break users over
@@ -967,7 +992,9 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
if (err) {
ofs->noxattr = true;
- pr_warn("overlayfs: upper fs does not support xattr.\n");
+ ofs->config.index = false;
+ pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
+ err = 0;
} else {
vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
}
@@ -979,7 +1006,15 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
}
- return 0;
+ /* NFS export of r/w mount depends on index */
+ if (ofs->config.nfs_export && !ofs->config.index) {
+ pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
+ ofs->config.nfs_export = false;
+ }
+
+out:
+ mnt_drop_write(mnt);
+ return err;
}
static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
@@ -1026,11 +1061,16 @@ out:
static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
struct path *upperpath)
{
+ struct vfsmount *mnt = ofs->upper_mnt;
int err;
+ err = mnt_want_write(mnt);
+ if (err)
+ return err;
+
/* Verify lower root is upper root origin */
err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
- false, true);
+ true);
if (err) {
pr_err("overlayfs: failed to verify upper root origin\n");
goto out;
@@ -1038,23 +1078,33 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
if (ofs->indexdir) {
- /* Verify upper root is index dir origin */
- err = ovl_verify_origin(ofs->indexdir, upperpath->dentry,
- true, true);
+ /*
+ * Verify upper root is exclusively associated with index dir.
+ * Older kernels stored upper fh in "trusted.overlay.origin"
+ * xattr. If that xattr exists, verify that it is a match to
+ * upper dir file handle. In any case, verify or set xattr
+ * "trusted.overlay.upper" to indicate that index may have
+ * directory entries.
+ */
+ if (ovl_check_origin_xattr(ofs->indexdir)) {
+ err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
+ upperpath->dentry, true, false);
+ if (err)
+ pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
+ }
+ err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
if (err)
- pr_err("overlayfs: failed to verify index dir origin\n");
+ pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
/* Cleanup bad/stale/orphan index entries */
if (!err)
- err = ovl_indexdir_cleanup(ofs->indexdir,
- ofs->upper_mnt,
- oe->lowerstack,
- oe->numlower);
+ err = ovl_indexdir_cleanup(ofs);
}
if (err || !ofs->indexdir)
pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
out:
+ mnt_drop_write(mnt);
return err;
}
@@ -1094,6 +1144,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
ofs->lower_layers[ofs->numlower].mnt = mnt;
ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
+ ofs->lower_layers[ofs->numlower].idx = i + 1;
ofs->numlower++;
/* Check if all lower layers are on same sb */
@@ -1131,6 +1182,10 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
} else if (!ofs->config.upperdir && stacklen == 1) {
pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
goto out_err;
+ } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
+ ofs->config.redirect_follow) {
+ pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+ ofs->config.nfs_export = false;
}
err = -ENOMEM;
@@ -1207,6 +1262,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
goto out_err;
ofs->config.index = ovl_index_def;
+ ofs->config.nfs_export = ovl_nfs_export_def;
err = ovl_parse_opt((char *) data, &ofs->config);
if (err)
goto out_err;
@@ -1257,13 +1313,26 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_free_oe;
- if (!ofs->indexdir)
+ /* Force r/o mount with no index dir */
+ if (!ofs->indexdir) {
+ dput(ofs->workdir);
+ ofs->workdir = NULL;
sb->s_flags |= SB_RDONLY;
+ }
+
}
- /* Show index=off/on in /proc/mounts for any of the reasons above */
- if (!ofs->indexdir)
+ /* Show index=off in /proc/mounts for forced r/o mount */
+ if (!ofs->indexdir) {
ofs->config.index = false;
+ if (ofs->upper_mnt && ofs->config.nfs_export) {
+ pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
+ ofs->config.nfs_export = false;
+ }
+ }
+
+ if (ofs->config.nfs_export)
+ sb->s_export_op = &ovl_export_operations;
/* Never override disk quota limits or use reserved space */
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
@@ -1279,15 +1348,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!root_dentry)
goto out_free_oe;
+ root_dentry->d_fsdata = oe;
+
mntput(upperpath.mnt);
if (upperpath.dentry) {
- oe->has_upper = true;
+ ovl_dentry_set_upper_alias(root_dentry);
if (ovl_is_impuredir(upperpath.dentry))
ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
}
- root_dentry->d_fsdata = oe;
-
/* Root is always merge -> can have whiteouts */
ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index d6bb1c9f5e7a..930784a26623 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -63,6 +63,22 @@ struct dentry *ovl_indexdir(struct super_block *sb)
return ofs->indexdir;
}
+/* Index all files on copy up. For now only enabled for NFS export */
+bool ovl_index_all(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ return ofs->config.nfs_export && ofs->config.index;
+}
+
+/* Verify lower origin on lookup. For now only enabled for NFS export */
+bool ovl_verify_lower(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ return ofs->config.nfs_export && ofs->config.index;
+}
+
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -194,10 +210,24 @@ void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache)
OVL_I(inode)->cache = cache;
}
+void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry)
+{
+ set_bit(flag, &OVL_E(dentry)->flags);
+}
+
+void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry)
+{
+ clear_bit(flag, &OVL_E(dentry)->flags);
+}
+
+bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry)
+{
+ return test_bit(flag, &OVL_E(dentry)->flags);
+}
+
bool ovl_dentry_is_opaque(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
- return oe->opaque;
+ return ovl_dentry_test_flag(OVL_E_OPAQUE, dentry);
}
bool ovl_dentry_is_whiteout(struct dentry *dentry)
@@ -207,28 +237,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry)
void ovl_dentry_set_opaque(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- oe->opaque = true;
+ ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
}
/*
- * For hard links it's possible for ovl_dentry_upper() to return positive, while
- * there's no actual upper alias for the inode. Copy up code needs to know
- * about the existence of the upper alias, so it can't use ovl_dentry_upper().
+ * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
+ * to return positive, while there's no actual upper alias for the inode.
+ * Copy up code needs to know about the existence of the upper alias, so it
+ * can't use ovl_dentry_upper().
*/
bool ovl_dentry_has_upper_alias(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- return oe->has_upper;
+ return ovl_dentry_test_flag(OVL_E_UPPER_ALIAS, dentry);
}
void ovl_dentry_set_upper_alias(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- oe->has_upper = true;
+ ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
}
bool ovl_redirect_dir(struct super_block *sb)
@@ -257,7 +282,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
if (upperdentry)
OVL_I(inode)->__upperdentry = upperdentry;
if (lowerdentry)
- OVL_I(inode)->lower = d_inode(lowerdentry);
+ OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
}
@@ -273,7 +298,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
*/
smp_wmb();
OVL_I(inode)->__upperdentry = upperdentry;
- if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
+ if (inode_unhashed(inode)) {
inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
}
@@ -447,10 +472,32 @@ void ovl_inuse_unlock(struct dentry *dentry)
}
}
+/*
+ * Does this overlay dentry need to be indexed on copy up?
+ */
+bool ovl_need_index(struct dentry *dentry)
+{
+ struct dentry *lower = ovl_dentry_lower(dentry);
+
+ if (!lower || !ovl_indexdir(dentry->d_sb))
+ return false;
+
+ /* Index all files for NFS export and consistency verification */
+ if (ovl_index_all(dentry->d_sb))
+ return true;
+
+ /* Index only lower hardlinks on copy up */
+ if (!d_is_dir(lower) && d_inode(lower)->i_nlink > 1)
+ return true;
+
+ return false;
+}
+
/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
- struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
+ struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
+ struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *index = NULL;
@@ -463,7 +510,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
goto fail;
inode = d_inode(upperdentry);
- if (inode->i_nlink != 1) {
+ if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) {
pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
upperdentry, inode->i_ino, inode->i_nlink);
/*
@@ -481,13 +528,17 @@ static void ovl_cleanup_index(struct dentry *dentry)
}
inode_lock_nested(dir, I_MUTEX_PARENT);
- /* TODO: whiteout instead of cleanup to block future open by handle */
- index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
+ index = lookup_one_len(name.name, indexdir, name.len);
err = PTR_ERR(index);
- if (!IS_ERR(index))
- err = ovl_cleanup(dir, index);
- else
+ if (IS_ERR(index)) {
index = NULL;
+ } else if (ovl_index_all(dentry->d_sb)) {
+ /* Whiteout orphan index to block future open by handle */
+ err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ } else {
+ /* Cleanup orphan index entries */
+ err = ovl_cleanup(dir, index);
+ }
inode_unlock(dir);
if (err)
@@ -512,16 +563,16 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
const struct cred *old_cred;
int err;
- if (!d_inode(dentry) || d_is_dir(dentry))
+ if (!d_inode(dentry))
return 0;
/*
* With inodes index is enabled, we store the union overlay nlink
- * in an xattr on the index inode. When whiting out lower hardlinks
+ * in an xattr on the index inode. When whiting out an indexed lower,
* we need to decrement the overlay persistent nlink, but before the
* first copy up, we have no upper index inode to store the xattr.
*
- * As a workaround, before whiteout/rename over of a lower hardlink,
+ * As a workaround, before whiteout/rename over an indexed lower,
* copy up to create the upper index. Creating the upper index will
* initialize the overlay nlink, so it could be dropped if unlink
* or rename succeeds.
@@ -529,8 +580,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
* TODO: implement metadata only index copy up when called with
* ovl_copy_up_flags(dentry, O_PATH).
*/
- if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) &&
- d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
+ if (ovl_need_index(dentry) && !ovl_dentry_has_upper_alias(dentry)) {
err = ovl_copy_up(dentry);
if (err)
return err;
@@ -540,7 +590,7 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
if (err)
return err;
- if (!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
goto out;
old_cred = ovl_override_creds(dentry->d_sb);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 65cd8ab60b7a..82a99d366aec 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -227,6 +227,7 @@ extern seqlock_t rename_lock;
*/
extern void d_instantiate(struct dentry *, struct inode *);
extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
+extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
extern void __d_drop(struct dentry *dentry);
extern void d_drop(struct dentry *dentry);
@@ -235,6 +236,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
+extern struct dentry * d_alloc_anon(struct super_block *);
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
wait_queue_head_t *);