summaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorOmar Sandoval <osandov@fb.com>2018-06-08 02:07:02 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-08 02:34:35 +0200
commit93781325da6e07af57a21f111d1f2b9f128fa397 (patch)
tree35542710adcf686003dadb55d3e6ae20036a3d36 /mm/vmscan.c
parentmm: shmem: make stat.st_blksize return huge page size if THP is on (diff)
downloadlinux-93781325da6e07af57a21f111d1f2b9f128fa397.tar.xz
linux-93781325da6e07af57a21f111d1f2b9f128fa397.zip
lockdep: fix fs_reclaim annotation
While revisiting my Btrfs swapfile series [1], I introduced a situation in which reclaim would lock i_rwsem, and even though the swapon() path clearly made GFP_KERNEL allocations while holding i_rwsem, I got no complaints from lockdep. It turns out that the rework of the fs_reclaim annotation was broken: if the current task has PF_MEMALLOC set, we don't acquire the dummy fs_reclaim lock, but when reclaiming we always check this _after_ we've just set the PF_MEMALLOC flag. In most cases, we can fix this by moving the fs_reclaim_{acquire,release}() outside of the memalloc_noreclaim_{save,restore}(), althought kswapd is slightly different. After applying this, I got the expected lockdep splats. 1: https://lwn.net/Articles/625412/ Link: http://lkml.kernel.org/r/9f8aa70652a98e98d7c4de0fc96a4addcee13efe.1523778026.git.osandov@fb.com Fixes: d92a8cfcb37e ("locking/lockdep: Rework FS_RECLAIM annotation") Signed-off-by: Omar Sandoval <osandov@fb.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to '')
-rw-r--r--mm/vmscan.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9270a4370d54..0e67b477ecef 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3318,11 +3318,15 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
.may_unmap = 1,
.may_swap = 1,
};
+
+ __fs_reclaim_acquire();
+
count_vm_event(PAGEOUTRUN);
do {
unsigned long nr_reclaimed = sc.nr_reclaimed;
bool raise_priority = true;
+ bool ret;
sc.reclaim_idx = classzone_idx;
@@ -3395,7 +3399,10 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
wake_up_all(&pgdat->pfmemalloc_wait);
/* Check if kswapd should be suspending */
- if (try_to_freeze() || kthread_should_stop())
+ __fs_reclaim_release();
+ ret = try_to_freeze();
+ __fs_reclaim_acquire();
+ if (ret || kthread_should_stop())
break;
/*
@@ -3412,6 +3419,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
out:
snapshot_refaults(NULL, pgdat);
+ __fs_reclaim_release();
/*
* Return the order kswapd stopped reclaiming at as
* prepare_kswapd_sleep() takes it into account. If another caller
@@ -3600,9 +3608,7 @@ kswapd_try_sleep:
*/
trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
alloc_order);
- fs_reclaim_acquire(GFP_KERNEL);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
- fs_reclaim_release(GFP_KERNEL);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
}
@@ -3684,16 +3690,16 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
- noreclaim_flag = memalloc_noreclaim_save();
fs_reclaim_acquire(sc.gfp_mask);
+ noreclaim_flag = memalloc_noreclaim_save();
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
p->reclaim_state = NULL;
- fs_reclaim_release(sc.gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
+ fs_reclaim_release(sc.gfp_mask);
return nr_reclaimed;
}
@@ -3870,6 +3876,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
};
cond_resched();
+ fs_reclaim_acquire(sc.gfp_mask);
/*
* We need to be able to allocate from the reserves for RECLAIM_UNMAP
* and we also need to be able to write out pages for RECLAIM_WRITE
@@ -3877,7 +3884,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
*/
noreclaim_flag = memalloc_noreclaim_save();
p->flags |= PF_SWAPWRITE;
- fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3892,9 +3898,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
}
p->reclaim_state = NULL;
- fs_reclaim_release(gfp_mask);
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
+ fs_reclaim_release(sc.gfp_mask);
return sc.nr_reclaimed >= nr_pages;
}