summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorKinsey Ho <kinseyho@google.com>2024-09-05 02:30:53 +0200
committerAndrew Morton <akpm@linux-foundation.org>2024-09-10 01:39:16 +0200
commitec0db74b4b1f249ffca4df450f54c17573114045 (patch)
tree70c06f61013d633a5de5a2b532c4e7f34597b270 /mm/memcontrol.c
parentmm: increment gen # before restarting traversal (diff)
downloadlinux-ec0db74b4b1f249ffca4df450f54c17573114045.tar.xz
linux-ec0db74b4b1f249ffca4df450f54c17573114045.zip
mm: restart if multiple traversals raced
Currently, if multiple reclaimers raced on the same position, the reclaimers which detect the race will still reclaim from the same memcg. Instead, the reclaimers which detect the race should move on to the next memcg in the hierarchy. So, in the case where multiple traversals race, jump back to the start of the mem_cgroup_iter() function to find the next memcg in the hierarchy to reclaim from. Link: https://lkml.kernel.org/r/20240905003058.1859929-5-kinseyho@google.com Reported-by: syzbot+e099d407346c45275ce9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/000000000000817cf10620e20d33@google.com/ Signed-off-by: Kinsey Ho <kinseyho@google.com> Reviewed-by: T.J. Mercier <tjmercier@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Michal Koutný <mkoutny@suse.com> Cc: Muchun Song <muchun.song@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Tejun Heo <tj@kernel.org> Cc: Yosry Ahmed <yosryahmed@google.com> Cc: Zefan Li <lizefan.x@bytedance.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c26
1 files changed, 17 insertions, 9 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 38e3251ed482..36cbc7730745 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -986,8 +986,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup_reclaim_cookie *reclaim)
{
struct mem_cgroup_reclaim_iter *iter;
- struct cgroup_subsys_state *css = NULL;
- struct mem_cgroup *memcg = NULL;
+ struct cgroup_subsys_state *css;
+ struct mem_cgroup *memcg;
struct mem_cgroup *pos = NULL;
if (mem_cgroup_disabled())
@@ -998,19 +998,23 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
rcu_read_lock();
restart:
+ memcg = NULL;
+
if (reclaim) {
+ int gen;
struct mem_cgroup_per_node *mz;
mz = root->nodeinfo[reclaim->pgdat->node_id];
iter = &mz->iter;
+ gen = atomic_read(&iter->generation);
/*
* On start, join the current reclaim iteration cycle.
* Exit when a concurrent walker completes it.
*/
if (!prev)
- reclaim->generation = iter->generation;
- else if (reclaim->generation != iter->generation)
+ reclaim->generation = gen;
+ else if (reclaim->generation != gen)
goto out_unlock;
pos = READ_ONCE(iter->position);
@@ -1018,8 +1022,7 @@ restart:
pos = prev;
}
- if (pos)
- css = &pos->css;
+ css = pos ? &pos->css : NULL;
for (;;) {
css = css_next_descendant_pre(css, &root->css);
@@ -1033,21 +1036,26 @@ restart:
* and kicking, and don't take an extra reference.
*/
if (css == &root->css || css_tryget(css)) {
- memcg = mem_cgroup_from_css(css);
break;
}
}
+ memcg = mem_cgroup_from_css(css);
+
if (reclaim) {
/*
* The position could have already been updated by a competing
* thread, so check that the value hasn't changed since we read
* it to avoid reclaiming from the same cgroup twice.
*/
- (void)cmpxchg(&iter->position, pos, memcg);
+ if (cmpxchg(&iter->position, pos, memcg) != pos) {
+ if (css && css != &root->css)
+ css_put(css);
+ goto restart;
+ }
if (!memcg) {
- iter->generation++;
+ atomic_inc(&iter->generation);
/*
* Reclaimers share the hierarchy walk, and a