diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-02 05:06:18 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-02 05:06:18 +0100 |
commit | cdab10bf3285ee354e8f50254aa799631b7a95e0 (patch) | |
tree | e0b622a649d301346132b4e5ae91966856449fad /kernel/auditsc.c | |
parent | Merge tag 'rcu.2021.11.01a' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff) | |
parent | security: Return xattr name from security_dentry_init_security() (diff) | |
download | linux-cdab10bf3285ee354e8f50254aa799631b7a95e0.tar.xz linux-cdab10bf3285ee354e8f50254aa799631b7a95e0.zip |
Merge tag 'selinux-pr-20211101' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux
Pull selinux updates from Paul Moore:
- Add LSM/SELinux/Smack controls and auditing for io-uring.
As usual, the individual commit descriptions have more detail, but we
were basically missing two things which we're adding here:
+ establishment of a proper audit context so that auditing of
io-uring ops works similarly to how it does for syscalls (with
some io-uring additions because io-uring ops are *not* syscalls)
+ additional LSM hooks to enable access control points for some of
the more unusual io-uring features, e.g. credential overrides.
The additional audit callouts and LSM hooks were done in conjunction
with the io-uring folks, based on conversations and RFC patches
earlier in the year.
- Fixup the binder credential handling so that the proper credentials
are used in the LSM hooks; the commit description and the code
comment which is removed in these patches are helpful to understand
the background and why this is the proper fix.
- Enable SELinux genfscon policy support for securityfs, allowing
improved SELinux filesystem labeling for other subsystems which make
use of securityfs, e.g. IMA.
* tag 'selinux-pr-20211101' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux:
security: Return xattr name from security_dentry_init_security()
selinux: fix a sock regression in selinux_ip_postroute_compat()
binder: use cred instead of task for getsecid
binder: use cred instead of task for selinux checks
binder: use euid from cred instead of using task
LSM: Avoid warnings about potentially unused hook variables
selinux: fix all of the W=1 build warnings
selinux: make better use of the nf_hook_state passed to the NF hooks
selinux: fix race condition when computing ocontext SIDs
selinux: remove unneeded ipv6 hook wrappers
selinux: remove the SELinux lockdown implementation
selinux: enable genfscon labeling for securityfs
Smack: Brutalist io_uring support
selinux: add support for the io_uring access controls
lsm,io_uring: add LSM hooks to io_uring
io_uring: convert io_uring to the secure anon inode interface
fs: add anon_inode_getfile_secure() similar to anon_inode_getfd_secure()
audit: add filtering for io_uring records
audit,io_uring,io-wq: add some basic audit support to io_uring
audit: prepare audit_context for use in calling contexts beyond syscalls
Diffstat (limited to 'kernel/auditsc.c')
-rw-r--r-- | kernel/auditsc.c | 468 |
1 files changed, 368 insertions, 100 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b1cb1dbf7417..c131985c3e6d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -805,6 +805,34 @@ static int audit_in_mask(const struct audit_krule *rule, unsigned long val) return rule->mask[word] & bit; } +/** + * audit_filter_uring - apply filters to an io_uring operation + * @tsk: associated task + * @ctx: audit context + */ +static void audit_filter_uring(struct task_struct *tsk, + struct audit_context *ctx) +{ + struct audit_entry *e; + enum audit_state state; + + if (auditd_test_task(tsk)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_URING_EXIT], + list) { + if (audit_in_mask(&e->rule, ctx->uring_op) && + audit_filter_rules(tsk, &e->rule, ctx, NULL, &state, + false)) { + rcu_read_unlock(); + ctx->current_state = state; + return; + } + } + rcu_read_unlock(); +} + /* At syscall exit time, this filter is called if the audit_state is * not low enough that auditing cannot take place, but is also not * high enough that we already know we have to write an audit record @@ -915,10 +943,81 @@ static inline void audit_free_aux(struct audit_context *context) context->aux = aux->next; kfree(aux); } + context->aux = NULL; while ((aux = context->aux_pids)) { context->aux_pids = aux->next; kfree(aux); } + context->aux_pids = NULL; +} + +/** + * audit_reset_context - reset a audit_context structure + * @ctx: the audit_context to reset + * + * All fields in the audit_context will be reset to an initial state, all + * references held by fields will be dropped, and private memory will be + * released. When this function returns the audit_context will be suitable + * for reuse, so long as the passed context is not NULL or a dummy context. + */ +static void audit_reset_context(struct audit_context *ctx) +{ + if (!ctx) + return; + + /* if ctx is non-null, reset the "ctx->state" regardless */ + ctx->context = AUDIT_CTX_UNUSED; + if (ctx->dummy) + return; + + /* + * NOTE: It shouldn't matter in what order we release the fields, so + * release them in the order in which they appear in the struct; + * this gives us some hope of quickly making sure we are + * resetting the audit_context properly. + * + * Other things worth mentioning: + * - we don't reset "dummy" + * - we don't reset "state", we do reset "current_state" + * - we preserve "filterkey" if "state" is AUDIT_STATE_RECORD + * - much of this is likely overkill, but play it safe for now + * - we really need to work on improving the audit_context struct + */ + + ctx->current_state = ctx->state; + ctx->serial = 0; + ctx->major = 0; + ctx->uring_op = 0; + ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 }; + memset(ctx->argv, 0, sizeof(ctx->argv)); + ctx->return_code = 0; + ctx->prio = (ctx->state == AUDIT_STATE_RECORD ? ~0ULL : 0); + ctx->return_valid = AUDITSC_INVALID; + audit_free_names(ctx); + if (ctx->state != AUDIT_STATE_RECORD) { + kfree(ctx->filterkey); + ctx->filterkey = NULL; + } + audit_free_aux(ctx); + kfree(ctx->sockaddr); + ctx->sockaddr = NULL; + ctx->sockaddr_len = 0; + ctx->pid = ctx->ppid = 0; + ctx->uid = ctx->euid = ctx->suid = ctx->fsuid = KUIDT_INIT(0); + ctx->gid = ctx->egid = ctx->sgid = ctx->fsgid = KGIDT_INIT(0); + ctx->personality = 0; + ctx->arch = 0; + ctx->target_pid = 0; + ctx->target_auid = ctx->target_uid = KUIDT_INIT(0); + ctx->target_sessionid = 0; + ctx->target_sid = 0; + ctx->target_comm[0] = '\0'; + unroll_tree_refs(ctx, NULL, 0); + WARN_ON(!list_empty(&ctx->killed_trees)); + ctx->type = 0; + audit_free_module(ctx); + ctx->fds[0] = -1; + audit_proctitle_free(ctx); } static inline struct audit_context *audit_alloc_context(enum audit_state state) @@ -928,6 +1027,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state) context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return NULL; + context->context = AUDIT_CTX_UNUSED; context->state = state; context->prio = state == AUDIT_STATE_RECORD ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); @@ -953,7 +1053,7 @@ int audit_alloc(struct task_struct *tsk) char *key = NULL; if (likely(!audit_ever_enabled)) - return 0; /* Return if not auditing. */ + return 0; state = audit_filter_task(tsk, &key); if (state == AUDIT_STATE_DISABLED) { @@ -973,16 +1073,37 @@ int audit_alloc(struct task_struct *tsk) return 0; } +/** + * audit_alloc_kernel - allocate an audit_context for a kernel task + * @tsk: the kernel task + * + * Similar to the audit_alloc() function, but intended for kernel private + * threads. Returns zero on success, negative values on failure. + */ +int audit_alloc_kernel(struct task_struct *tsk) +{ + /* + * At the moment we are just going to call into audit_alloc() to + * simplify the code, but there two things to keep in mind with this + * approach: + * + * 1. Filtering internal kernel tasks is a bit laughable in almost all + * cases, but there is at least one case where there is a benefit: + * the '-a task,never' case allows the admin to effectively disable + * task auditing at runtime. + * + * 2. The {set,clear}_task_syscall_work() ops likely have zero effect + * on these internal kernel tasks, but they probably don't hurt either. + */ + return audit_alloc(tsk); +} + static inline void audit_free_context(struct audit_context *context) { - audit_free_module(context); - audit_free_names(context); - unroll_tree_refs(context, NULL, 0); + /* resetting is extra work, but it is likely just noise */ + audit_reset_context(context); free_tree_refs(context); - audit_free_aux(context); kfree(context->filterkey); - kfree(context->sockaddr); - audit_proctitle_free(context); kfree(context); } @@ -1479,6 +1600,44 @@ out: audit_log_end(ab); } +/** + * audit_log_uring - generate a AUDIT_URINGOP record + * @ctx: the audit context + */ +static void audit_log_uring(struct audit_context *ctx) +{ + struct audit_buffer *ab; + const struct cred *cred; + + ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_URINGOP); + if (!ab) + return; + cred = current_cred(); + audit_log_format(ab, "uring_op=%d", ctx->uring_op); + if (ctx->return_valid != AUDITSC_INVALID) + audit_log_format(ab, " success=%s exit=%ld", + (ctx->return_valid == AUDITSC_SUCCESS ? + "yes" : "no"), + ctx->return_code); + audit_log_format(ab, + " items=%d" + " ppid=%d pid=%d uid=%u gid=%u euid=%u suid=%u" + " fsuid=%u egid=%u sgid=%u fsgid=%u", + ctx->name_count, + task_ppid_nr(current), task_tgid_nr(current), + from_kuid(&init_user_ns, cred->uid), + from_kgid(&init_user_ns, cred->gid), + from_kuid(&init_user_ns, cred->euid), + from_kuid(&init_user_ns, cred->suid), + from_kuid(&init_user_ns, cred->fsuid), + from_kgid(&init_user_ns, cred->egid), + from_kgid(&init_user_ns, cred->sgid), + from_kgid(&init_user_ns, cred->fsgid)); + audit_log_task_context(ab); + audit_log_key(ab, ctx->filterkey); + audit_log_end(ab); +} + static void audit_log_exit(void) { int i, call_panic = 0; @@ -1489,29 +1648,38 @@ static void audit_log_exit(void) context->personality = current->personality; - ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); - if (!ab) - return; /* audit_panic has been called */ - audit_log_format(ab, "arch=%x syscall=%d", - context->arch, context->major); - if (context->personality != PER_LINUX) - audit_log_format(ab, " per=%lx", context->personality); - if (context->return_valid != AUDITSC_INVALID) - audit_log_format(ab, " success=%s exit=%ld", - (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", - context->return_code); - - audit_log_format(ab, - " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", - context->argv[0], - context->argv[1], - context->argv[2], - context->argv[3], - context->name_count); - - audit_log_task_info(ab); - audit_log_key(ab, context->filterkey); - audit_log_end(ab); + switch (context->context) { + case AUDIT_CTX_SYSCALL: + ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); + if (!ab) + return; + audit_log_format(ab, "arch=%x syscall=%d", + context->arch, context->major); + if (context->personality != PER_LINUX) + audit_log_format(ab, " per=%lx", context->personality); + if (context->return_valid != AUDITSC_INVALID) + audit_log_format(ab, " success=%s exit=%ld", + (context->return_valid == AUDITSC_SUCCESS ? + "yes" : "no"), + context->return_code); + audit_log_format(ab, + " a0=%lx a1=%lx a2=%lx a3=%lx items=%d", + context->argv[0], + context->argv[1], + context->argv[2], + context->argv[3], + context->name_count); + audit_log_task_info(ab); + audit_log_key(ab, context->filterkey); + audit_log_end(ab); + break; + case AUDIT_CTX_URING: + audit_log_uring(context); + break; + default: + BUG(); + break; + } for (aux = context->aux; aux; aux = aux->next) { @@ -1602,21 +1770,22 @@ static void audit_log_exit(void) audit_log_name(context, n, NULL, i++, &call_panic); } - audit_log_proctitle(); + if (context->context == AUDIT_CTX_SYSCALL) + audit_log_proctitle(); /* Send end of event record to help user space know we are finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE); if (ab) audit_log_end(ab); if (call_panic) - audit_panic("error converting sid to string"); + audit_panic("error in audit_log_exit()"); } /** * __audit_free - free a per-task audit context * @tsk: task whose audit context block to free * - * Called from copy_process and do_exit + * Called from copy_process, do_exit, and the io_uring code */ void __audit_free(struct task_struct *tsk) { @@ -1625,6 +1794,7 @@ void __audit_free(struct task_struct *tsk) if (!context) return; + /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); @@ -1633,14 +1803,21 @@ void __audit_free(struct task_struct *tsk) * random task_struct that doesn't doesn't have any meaningful data we * need to log via audit_log_exit(). */ - if (tsk == current && !context->dummy && context->in_syscall) { + if (tsk == current && !context->dummy) { context->return_valid = AUDITSC_INVALID; context->return_code = 0; - - audit_filter_syscall(tsk, context); - audit_filter_inodes(tsk, context); - if (context->current_state == AUDIT_STATE_RECORD) - audit_log_exit(); + if (context->context == AUDIT_CTX_SYSCALL) { + audit_filter_syscall(tsk, context); + audit_filter_inodes(tsk, context); + if (context->current_state == AUDIT_STATE_RECORD) + audit_log_exit(); + } else if (context->context == AUDIT_CTX_URING) { + /* TODO: verify this case is real and valid */ + audit_filter_uring(tsk, context); + audit_filter_inodes(tsk, context); + if (context->current_state == AUDIT_STATE_RECORD) + audit_log_uring(context); + } } audit_set_context(tsk, NULL); @@ -1648,6 +1825,131 @@ void __audit_free(struct task_struct *tsk) } /** + * audit_return_fixup - fixup the return codes in the audit_context + * @ctx: the audit_context + * @success: true/false value to indicate if the operation succeeded or not + * @code: operation return code + * + * We need to fixup the return code in the audit logs if the actual return + * codes are later going to be fixed by the arch specific signal handlers. + */ +static void audit_return_fixup(struct audit_context *ctx, + int success, long code) +{ + /* + * This is actually a test for: + * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || + * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) + * + * but is faster than a bunch of || + */ + if (unlikely(code <= -ERESTARTSYS) && + (code >= -ERESTART_RESTARTBLOCK) && + (code != -ENOIOCTLCMD)) + ctx->return_code = -EINTR; + else + ctx->return_code = code; + ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE); +} + +/** + * __audit_uring_entry - prepare the kernel task's audit context for io_uring + * @op: the io_uring opcode + * + * This is similar to audit_syscall_entry() but is intended for use by io_uring + * operations. This function should only ever be called from + * audit_uring_entry() as we rely on the audit context checking present in that + * function. + */ +void __audit_uring_entry(u8 op) +{ + struct audit_context *ctx = audit_context(); + + if (ctx->state == AUDIT_STATE_DISABLED) + return; + + /* + * NOTE: It's possible that we can be called from the process' context + * before it returns to userspace, and before audit_syscall_exit() + * is called. In this case there is not much to do, just record + * the io_uring details and return. + */ + ctx->uring_op = op; + if (ctx->context == AUDIT_CTX_SYSCALL) + return; + + ctx->dummy = !audit_n_rules; + if (!ctx->dummy && ctx->state == AUDIT_STATE_BUILD) + ctx->prio = 0; + + ctx->context = AUDIT_CTX_URING; + ctx->current_state = ctx->state; + ktime_get_coarse_real_ts64(&ctx->ctime); +} + +/** + * __audit_uring_exit - wrap up the kernel task's audit context after io_uring + * @success: true/false value to indicate if the operation succeeded or not + * @code: operation return code + * + * This is similar to audit_syscall_exit() but is intended for use by io_uring + * operations. This function should only ever be called from + * audit_uring_exit() as we rely on the audit context checking present in that + * function. + */ +void __audit_uring_exit(int success, long code) +{ + struct audit_context *ctx = audit_context(); + + if (ctx->context == AUDIT_CTX_SYSCALL) { + /* + * NOTE: See the note in __audit_uring_entry() about the case + * where we may be called from process context before we + * return to userspace via audit_syscall_exit(). In this + * case we simply emit a URINGOP record and bail, the + * normal syscall exit handling will take care of + * everything else. + * It is also worth mentioning that when we are called, + * the current process creds may differ from the creds + * used during the normal syscall processing; keep that + * in mind if/when we move the record generation code. + */ + + /* + * We need to filter on the syscall info here to decide if we + * should emit a URINGOP record. I know it seems odd but this + * solves the problem where users have a filter to block *all* + * syscall records in the "exit" filter; we want to preserve + * the behavior here. + */ + audit_filter_syscall(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + audit_filter_uring(current, ctx); + audit_filter_inodes(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + return; + + audit_log_uring(ctx); + return; + } + + /* this may generate CONFIG_CHANGE records */ + if (!list_empty(&ctx->killed_trees)) + audit_kill_trees(ctx); + + /* run through both filters to ensure we set the filterkey properly */ + audit_filter_uring(current, ctx); + audit_filter_inodes(current, ctx); + if (ctx->current_state != AUDIT_STATE_RECORD) + goto out; + audit_return_fixup(ctx, success, code); + audit_log_exit(); + +out: + audit_reset_context(ctx); +} + +/** * __audit_syscall_entry - fill in an audit record at syscall entry * @major: major syscall type (function) * @a1: additional syscall register 1 @@ -1672,7 +1974,12 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, if (!audit_enabled || !context) return; - BUG_ON(context->in_syscall || context->name_count); + WARN_ON(context->context != AUDIT_CTX_UNUSED); + WARN_ON(context->name_count); + if (context->context != AUDIT_CTX_UNUSED || context->name_count) { + audit_panic("unrecoverable error in audit_syscall_entry()"); + return; + } state = context->state; if (state == AUDIT_STATE_DISABLED) @@ -1691,10 +1998,8 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, context->argv[1] = a2; context->argv[2] = a3; context->argv[3] = a4; - context->serial = 0; - context->in_syscall = 1; + context->context = AUDIT_CTX_SYSCALL; context->current_state = state; - context->ppid = 0; ktime_get_coarse_real_ts64(&context->ctime); } @@ -1711,63 +2016,27 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, */ void __audit_syscall_exit(int success, long return_code) { - struct audit_context *context; + struct audit_context *context = audit_context(); - context = audit_context(); - if (!context) - return; + if (!context || context->dummy || + context->context != AUDIT_CTX_SYSCALL) + goto out; + /* this may generate CONFIG_CHANGE records */ if (!list_empty(&context->killed_trees)) audit_kill_trees(context); - if (!context->dummy && context->in_syscall) { - if (success) - context->return_valid = AUDITSC_SUCCESS; - else - context->return_valid = AUDITSC_FAILURE; + /* run through both filters to ensure we set the filterkey properly */ + audit_filter_syscall(current, context); + audit_filter_inodes(current, context); + if (context->current_state < AUDIT_STATE_RECORD) + goto out; - /* - * we need to fix up the return code in the audit logs if the - * actual return codes are later going to be fixed up by the - * arch specific signal handlers - * - * This is actually a test for: - * (rc == ERESTARTSYS ) || (rc == ERESTARTNOINTR) || - * (rc == ERESTARTNOHAND) || (rc == ERESTART_RESTARTBLOCK) - * - * but is faster than a bunch of || - */ - if (unlikely(return_code <= -ERESTARTSYS) && - (return_code >= -ERESTART_RESTARTBLOCK) && - (return_code != -ENOIOCTLCMD)) - context->return_code = -EINTR; - else - context->return_code = return_code; - - audit_filter_syscall(current, context); - audit_filter_inodes(current, context); - if (context->current_state == AUDIT_STATE_RECORD) - audit_log_exit(); - } - - context->in_syscall = 0; - context->prio = context->state == AUDIT_STATE_RECORD ? ~0ULL : 0; - - audit_free_module(context); - audit_free_names(context); - unroll_tree_refs(context, NULL, 0); - audit_free_aux(context); - context->aux = NULL; - context->aux_pids = NULL; - context->target_pid = 0; - context->target_sid = 0; - context->sockaddr_len = 0; - context->type = 0; - context->fds[0] = -1; - if (context->state != AUDIT_STATE_RECORD) { - kfree(context->filterkey); - context->filterkey = NULL; - } + audit_return_fixup(context, success, return_code); + audit_log_exit(); + +out: + audit_reset_context(context); } static inline void handle_one(const struct inode *inode) @@ -1919,7 +2188,7 @@ void __audit_getname(struct filename *name) struct audit_context *context = audit_context(); struct audit_names *n; - if (!context->in_syscall) + if (context->context == AUDIT_CTX_UNUSED) return; n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); @@ -1991,7 +2260,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; - if (!context->in_syscall) + if (context->context == AUDIT_CTX_UNUSED) return; rcu_read_lock(); @@ -2109,7 +2378,7 @@ void __audit_inode_child(struct inode *parent, struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; int i; - if (!context->in_syscall) + if (context->context == AUDIT_CTX_UNUSED) return; rcu_read_lock(); @@ -2208,7 +2477,7 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); int auditsc_get_stamp(struct audit_context *ctx, struct timespec64 *t, unsigned int *serial) { - if (!ctx->in_syscall) + if (ctx->context == AUDIT_CTX_UNUSED) return 0; if (!ctx->serial) ctx->serial = audit_serial(); @@ -2706,8 +2975,7 @@ void audit_seccomp_actions_logged(const char *names, const char *old_names, struct list_head *audit_killed_trees(void) { struct audit_context *ctx = audit_context(); - - if (likely(!ctx || !ctx->in_syscall)) + if (likely(!ctx || ctx->context == AUDIT_CTX_UNUSED)) return NULL; return &ctx->killed_trees; } |