summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 04:11:47 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 04:11:47 +0100
commit345d4ab5e0a226e0e27219bef9ad150504666b0d (patch)
tree627b6ed33aba89581b8b24ddbc69e74e96d9f623
parentMerge branch 'regset.followup' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff)
parentselftests: core: add tests for CLOSE_RANGE_CLOEXEC (diff)
downloadlinux-345d4ab5e0a226e0e27219bef9ad150504666b0d.tar.xz
linux-345d4ab5e0a226e0e27219bef9ad150504666b0d.zip
Merge tag 'close-range-openat2-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull close_range/openat2 updates from Christian Brauner: "This contains a fix for openat2() to make RESOLVE_BENEATH and RESOLVE_IN_ROOT mutually exclusive. It doesn't make sense to specify both at the same time. The openat2() selftests have been extended to verify that these two flags can't be specified together. This also adds the CLOSE_RANGE_CLOEXEC flag to close_range() which allows to mark a range of file descriptors as close-on-exec without actually closing them. This is useful in general but the use-case that triggered the patch is installing a seccomp profile in the calling task before exec. If the seccomp profile wants to block the close_range() syscall it obviously can't use it to close all fds before exec. If it calls close_range() before installing the seccomp profile it needs to take care not to close fds that it will still need before the exec meaning it would have to call close_range() multiple times on different ranges and then still fall back to closing fds one by one right before the exec. CLOSE_RANGE_CLOEXEC allows to solve this problem relying on the exec codepath to get rid of the unwanted fds. The close_range() tests have been expanded to verify that CLOSE_RANGE_CLOEXEC works" * tag 'close-range-openat2-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: selftests: core: add tests for CLOSE_RANGE_CLOEXEC fs, close_range: add flag CLOSE_RANGE_CLOEXEC selftests: openat2: add RESOLVE_ conflict test openat2: reject RESOLVE_BENEATH|RESOLVE_IN_ROOT
-rw-r--r--fs/file.c44
-rw-r--r--fs/open.c4
-rw-r--r--include/uapi/linux/close_range.h3
-rw-r--r--tools/testing/selftests/core/close_range_test.c74
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c8
5 files changed, 122 insertions, 11 deletions
diff --git a/fs/file.c b/fs/file.c
index 4559b5fec3bd..e08e4daccac3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -674,6 +674,35 @@ int __close_fd(struct files_struct *files, unsigned fd)
}
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
+static inline void __range_cloexec(struct files_struct *cur_fds,
+ unsigned int fd, unsigned int max_fd)
+{
+ struct fdtable *fdt;
+
+ if (fd > max_fd)
+ return;
+
+ spin_lock(&cur_fds->file_lock);
+ fdt = files_fdtable(cur_fds);
+ bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
+ spin_unlock(&cur_fds->file_lock);
+}
+
+static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
+ unsigned int max_fd)
+{
+ while (fd <= max_fd) {
+ struct file *file;
+
+ file = pick_file(cur_fds, fd++);
+ if (!file)
+ continue;
+
+ filp_close(file, cur_fds);
+ cond_resched();
+ }
+}
+
/**
* __close_range() - Close all file descriptors in a given range.
*
@@ -689,7 +718,7 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
struct task_struct *me = current;
struct files_struct *cur_fds = me->files, *fds = NULL;
- if (flags & ~CLOSE_RANGE_UNSHARE)
+ if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
return -EINVAL;
if (fd > max_fd)
@@ -727,16 +756,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
}
max_fd = min(max_fd, cur_max);
- while (fd <= max_fd) {
- struct file *file;
- file = pick_file(cur_fds, fd++);
- if (!file)
- continue;
-
- filp_close(file, cur_fds);
- cond_resched();
- }
+ if (flags & CLOSE_RANGE_CLOEXEC)
+ __range_cloexec(cur_fds, fd, max_fd);
+ else
+ __range_close(cur_fds, fd, max_fd);
if (fds) {
/*
diff --git a/fs/open.c b/fs/open.c
index 9af548fb841b..4d7537ae59df 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1010,6 +1010,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
if (how->resolve & ~VALID_RESOLVE_FLAGS)
return -EINVAL;
+ /* Scoping flags are mutually exclusive. */
+ if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
+ return -EINVAL;
+
/* Deal with the mode. */
if (WILL_CREATE(flags)) {
if (how->mode & ~S_IALLUGO)
diff --git a/include/uapi/linux/close_range.h b/include/uapi/linux/close_range.h
index 6928a9fdee3c..2d804281554c 100644
--- a/include/uapi/linux/close_range.h
+++ b/include/uapi/linux/close_range.h
@@ -5,5 +5,8 @@
/* Unshare the file descriptor table before closing file descriptors. */
#define CLOSE_RANGE_UNSHARE (1U << 1)
+/* Set the FD_CLOEXEC bit instead of closing the file descriptor. */
+#define CLOSE_RANGE_CLOEXEC (1U << 2)
+
#endif /* _UAPI_LINUX_CLOSE_RANGE_H */
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 575b391ddc78..87e16d65d9d7 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -11,6 +11,7 @@
#include <string.h>
#include <syscall.h>
#include <unistd.h>
+#include <sys/resource.h>
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
@@ -23,6 +24,10 @@
#define CLOSE_RANGE_UNSHARE (1U << 1)
#endif
+#ifndef CLOSE_RANGE_CLOEXEC
+#define CLOSE_RANGE_CLOEXEC (1U << 2)
+#endif
+
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@@ -224,4 +229,73 @@ TEST(close_range_unshare_capped)
EXPECT_EQ(0, WEXITSTATUS(status));
}
+TEST(close_range_cloexec)
+{
+ int i, ret;
+ int open_fds[101];
+ struct rlimit rlimit;
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ XFAIL(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ XFAIL(return, "close_range() syscall not supported");
+ if (errno == EINVAL)
+ XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+ }
+
+ /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
+ ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+ rlimit.rlim_cur = 25;
+ ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
+
+ /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
+ ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
+ ASSERT_EQ(0, ret);
+ ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
+ ASSERT_EQ(0, ret);
+
+ for (i = 0; i <= 50; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+
+ for (i = 51; i <= 74; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+ }
+
+ for (i = 75; i <= 100; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+
+ /* Test a common pattern. */
+ ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
+ for (i = 0; i <= 100; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+}
+
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index b386367c606b..381d874cce99 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -155,7 +155,7 @@ struct flag_test {
int err;
};
-#define NUM_OPENAT2_FLAG_TESTS 23
+#define NUM_OPENAT2_FLAG_TESTS 24
void test_openat2_flags(void)
{
@@ -210,6 +210,12 @@ void test_openat2_flags(void)
.how.flags = O_TMPFILE | O_RDWR,
.how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
+ /* ->resolve flags must not conflict. */
+ { .name = "incompatible resolve flags (BENEATH | IN_ROOT)",
+ .how.flags = O_RDONLY,
+ .how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT,
+ .err = -EINVAL },
+
/* ->resolve must only contain RESOLVE_* flags. */
{ .name = "invalid how.resolve and O_RDONLY",
.how.flags = O_RDONLY,