summaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.h
diff options
context:
space:
mode:
authorWaiman Long <longman@redhat.com>2022-11-05 01:59:01 +0100
committerJens Axboe <axboe@kernel.dk>2022-11-17 00:58:44 +0100
commit3b8cc6298724021da845f2f9fd7dd4b6829a6817 (patch)
tree23b6bb2853fa72d3d9d648d66bd63ef5d4b74926 /block/blk-cgroup.h
parentblk-cgroup: Return -ENOMEM directly in blkcg_css_alloc() error path (diff)
downloadlinux-3b8cc6298724021da845f2f9fd7dd4b6829a6817.tar.xz
linux-3b8cc6298724021da845f2f9fd7dd4b6829a6817.zip
blk-cgroup: Optimize blkcg_rstat_flush()
For a system with many CPUs and block devices, the time to do blkcg_rstat_flush() from cgroup_rstat_flush() can be rather long. It can be especially problematic as interrupt is disabled during the flush. It was reported that it might take seconds to complete in some extreme cases leading to hard lockup messages. As it is likely that not all the percpu blkg_iostat_set's has been updated since the last flush, those stale blkg_iostat_set's don't need to be flushed in this case. This patch optimizes blkcg_rstat_flush() by keeping a lockless list of recently updated blkg_iostat_set's in a newly added percpu blkcg->lhead pointer. The blkg_iostat_set is added to a lockless list on the update side in blk_cgroup_bio_start(). It is removed from the lockless list when flushed in blkcg_rstat_flush(). Due to racing, it is possible that blk_iostat_set's in the lockless list may have no new IO stats to be flushed, but that is OK. To protect against destruction of blkg, a percpu reference is gotten when putting into the lockless list and put back when removed. When booting up an instrumented test kernel with this patch on a 2-socket 96-thread system with cgroup v2, out of the 2051 calls to cgroup_rstat_flush() after bootup, 1788 of the calls were exited immediately because of empty lockless list. After an all-cpu kernel build, the ratio became 6295424/6340513. That was more than 99%. Signed-off-by: Waiman Long <longman@redhat.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20221105005902.407297-3-longman@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-cgroup.h')
-rw-r--r--block/blk-cgroup.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index aa2b286bc825..1e94e404eaa8 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -18,6 +18,7 @@
#include <linux/cgroup.h>
#include <linux/kthread.h>
#include <linux/blk-mq.h>
+#include <linux/llist.h>
struct blkcg_gq;
struct blkg_policy_data;
@@ -43,6 +44,9 @@ struct blkg_iostat {
struct blkg_iostat_set {
struct u64_stats_sync sync;
+ struct blkcg_gq *blkg;
+ struct llist_node lnode;
+ int lqueued; /* queued in llist */
struct blkg_iostat cur;
struct blkg_iostat last;
};
@@ -97,6 +101,12 @@ struct blkcg {
struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
struct list_head all_blkcgs_node;
+
+ /*
+ * List of updated percpu blkg_iostat_set's since the last flush.
+ */
+ struct llist_head __percpu *lhead;
+
#ifdef CONFIG_BLK_CGROUP_FC_APPID
char fc_app_id[FC_APPID_LEN];
#endif