summaryrefslogtreecommitdiffstats
path: root/kernel/sys.c
diff options
context:
space:
mode:
authorBarret Rhoden <brho@google.com>2022-01-06 18:20:41 +0100
committerEric W. Biederman <ebiederm@xmission.com>2022-03-08 21:33:36 +0100
commit18c91bb2d87268d23868bf13508f5bc9cf04e89a (patch)
treee081820aafeec498f7d81513d6ec7411972ab1f9 /kernel/sys.c
parentprlimit: make do_prlimit() static (diff)
downloadlinux-18c91bb2d87268d23868bf13508f5bc9cf04e89a.tar.xz
linux-18c91bb2d87268d23868bf13508f5bc9cf04e89a.zip
prlimit: do not grab the tasklist_lock
Unnecessarily grabbing the tasklist_lock can be a scalability bottleneck for workloads that also must grab the tasklist_lock for waiting, killing, and cloning. The tasklist_lock was grabbed to protect tsk->sighand from disappearing (becoming NULL). tsk->signal was already protected by holding a reference to tsk. update_rlimit_cpu() assumed tsk->sighand != NULL. With this commit, it attempts to lock_task_sighand(). However, this means that update_rlimit_cpu() can fail. This only happens when a task is exiting. Note that during exec, sighand may *change*, but it will not be NULL. Prior to this commit, the do_prlimit() ensured that update_rlimit_cpu() would not fail by read locking the tasklist_lock and checking tsk->sighand != NULL. If update_rlimit_cpu() fails, there may be other tasks that are not exiting that share tsk->signal. However, the group_leader is the last task to be released, so if we cannot update_rlimit_cpu(group_leader), then the entire process is exiting. The only other caller of update_rlimit_cpu() is selinux_bprm_committing_creds(). It has tsk == current, so update_rlimit_cpu() cannot fail (current->sighand cannot disappear until current exits). This change resulted in a 14% speedup on a microbenchmark where parents kill and wait on their children, and children getpriority, setpriority, and getrlimit. Signed-off-by: Barret Rhoden <brho@google.com> Link: https://lkml.kernel.org/r/20220106172041.522167-4-brho@google.com Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c25
1 files changed, 14 insertions, 11 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index d37d1f882b7d..374f83e95239 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1441,13 +1441,7 @@ static int do_prlimit(struct task_struct *tsk, unsigned int resource,
return -EPERM;
}
- /* protect tsk->signal and tsk->sighand from disappearing */
- read_lock(&tasklist_lock);
- if (!tsk->sighand) {
- retval = -ESRCH;
- goto out;
- }
-
+ /* Holding a refcount on tsk protects tsk->signal from disappearing. */
rlim = tsk->signal->rlim + resource;
task_lock(tsk->group_leader);
if (new_rlim) {
@@ -1476,10 +1470,19 @@ static int do_prlimit(struct task_struct *tsk, unsigned int resource,
*/
if (!retval && new_rlim && resource == RLIMIT_CPU &&
new_rlim->rlim_cur != RLIM_INFINITY &&
- IS_ENABLED(CONFIG_POSIX_TIMERS))
- update_rlimit_cpu(tsk, new_rlim->rlim_cur);
-out:
- read_unlock(&tasklist_lock);
+ IS_ENABLED(CONFIG_POSIX_TIMERS)) {
+ /*
+ * update_rlimit_cpu can fail if the task is exiting, but there
+ * may be other tasks in the thread group that are not exiting,
+ * and they need their cpu timers adjusted.
+ *
+ * The group_leader is the last task to be released, so if we
+ * cannot update_rlimit_cpu on it, then the entire process is
+ * exiting and we do not need to update at all.
+ */
+ update_rlimit_cpu(tsk->group_leader, new_rlim->rlim_cur);
+ }
+
return retval;
}