summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/f2fs/data.c19
-rw-r--r--fs/f2fs/f2fs.h4
-rw-r--r--fs/f2fs/iostat.c133
-rw-r--r--fs/f2fs/iostat.h57
-rw-r--r--fs/f2fs/super.c13
-rw-r--r--include/trace/events/f2fs.h95
6 files changed, 315 insertions, 6 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index fd16c4fc4507..5e4120b92f59 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -271,7 +271,10 @@ static void f2fs_post_read_work(struct work_struct *work)
static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
- struct bio_post_read_ctx *ctx = bio->bi_private;
+ struct bio_post_read_ctx *ctx;
+
+ iostat_update_and_unbind_ctx(bio, 0);
+ ctx = bio->bi_private;
if (time_to_inject(sbi, FAULT_READ_IO)) {
f2fs_show_injection_info(sbi, FAULT_READ_IO);
@@ -293,10 +296,13 @@ static void f2fs_read_end_io(struct bio *bio)
static void f2fs_write_end_io(struct bio *bio)
{
- struct f2fs_sb_info *sbi = bio->bi_private;
+ struct f2fs_sb_info *sbi;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
+ iostat_update_and_unbind_ctx(bio, 1);
+ sbi = bio->bi_private;
+
if (time_to_inject(sbi, FAULT_WRITE_IO)) {
f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
bio->bi_status = BLK_STS_IOERR;
@@ -400,6 +406,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
fio->type, fio->temp);
}
+ iostat_alloc_and_bind_ctx(sbi, bio, NULL);
+
if (fio->io_wbc)
wbc_init_bio(fio->io_wbc, bio);
@@ -481,6 +489,8 @@ submit_io:
trace_f2fs_submit_read_bio(sbi->sb, type, bio);
else
trace_f2fs_submit_write_bio(sbi->sb, type, bio);
+
+ iostat_update_submit_ctx(bio, type);
submit_bio(bio);
}
@@ -972,7 +982,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
- struct bio_post_read_ctx *ctx;
+ struct bio_post_read_ctx *ctx = NULL;
unsigned int post_read_steps = 0;
bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
@@ -1008,6 +1018,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
ctx->fs_blkaddr = blkaddr;
bio->bi_private = ctx;
}
+ iostat_alloc_and_bind_ctx(sbi, bio, ctx);
return bio;
}
@@ -2253,7 +2264,7 @@ submit_and_realloc:
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
- ctx = bio->bi_private;
+ ctx = get_post_read_ctx(bio);
ctx->enabled_steps |= STEP_DECOMPRESS;
refcount_inc(&dic->refcnt);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 12ecf6ee9cb5..26d084a1fea8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1781,6 +1781,10 @@ struct f2fs_sb_info {
bool iostat_enable;
unsigned long iostat_next_period;
unsigned int iostat_period_ms;
+
+ /* For io latency related statistics info in one iostat period */
+ spinlock_t iostat_lat_lock;
+ struct iostat_lat_info *iostat_io_lat;
#endif
};
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index 21c29e121a86..cdcf54ae0db8 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -14,6 +14,10 @@
#include "iostat.h"
#include <trace/events/f2fs.h>
+#define NUM_PREALLOC_IOSTAT_CTXS 128
+static struct kmem_cache *bio_iostat_ctx_cache;
+static mempool_t *bio_iostat_ctx_pool;
+
int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
@@ -81,6 +85,32 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
+static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
+{
+ int io, idx = 0;
+ unsigned int cnt;
+ struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
+ struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+
+ spin_lock_irq(&sbi->iostat_lat_lock);
+ for (idx = 0; idx < MAX_IO_TYPE; idx++) {
+ for (io = 0; io < NR_PAGE_TYPE; io++) {
+ cnt = io_lat->bio_cnt[idx][io];
+ iostat_lat[idx][io].peak_lat =
+ jiffies_to_msecs(io_lat->peak_lat[idx][io]);
+ iostat_lat[idx][io].cnt = cnt;
+ iostat_lat[idx][io].avg_lat = cnt ?
+ jiffies_to_msecs(io_lat->sum_lat[idx][io]) / cnt : 0;
+ io_lat->sum_lat[idx][io] = 0;
+ io_lat->peak_lat[idx][io] = 0;
+ io_lat->bio_cnt[idx][io] = 0;
+ }
+ }
+ spin_unlock_irq(&sbi->iostat_lat_lock);
+
+ trace_f2fs_iostat_latency(sbi, iostat_lat);
+}
+
static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
{
unsigned long long iostat_diff[NR_IO_TYPE];
@@ -106,10 +136,13 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff);
+
+ __record_iostat_latency(sbi);
}
void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
+ struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int i;
spin_lock(&sbi->iostat_lock);
@@ -118,6 +151,10 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
sbi->prev_rw_iostat[i] = 0;
}
spin_unlock(&sbi->iostat_lock);
+
+ spin_lock_irq(&sbi->iostat_lat_lock);
+ memset(io_lat, 0, sizeof(struct iostat_lat_info));
+ spin_unlock_irq(&sbi->iostat_lat_lock);
}
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
@@ -143,12 +180,108 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
f2fs_record_iostat(sbi);
}
+static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
+ int rw, bool is_sync)
+{
+ unsigned long ts_diff;
+ unsigned int iotype = iostat_ctx->type;
+ unsigned long flags;
+ struct f2fs_sb_info *sbi = iostat_ctx->sbi;
+ struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+ int idx;
+
+ if (!sbi->iostat_enable)
+ return;
+
+ ts_diff = jiffies - iostat_ctx->submit_ts;
+ if (iotype >= META_FLUSH)
+ iotype = META;
+
+ if (rw == 0) {
+ idx = READ_IO;
+ } else {
+ if (is_sync)
+ idx = WRITE_SYNC_IO;
+ else
+ idx = WRITE_ASYNC_IO;
+ }
+
+ spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
+ io_lat->sum_lat[idx][iotype] += ts_diff;
+ io_lat->bio_cnt[idx][iotype]++;
+ if (ts_diff > io_lat->peak_lat[idx][iotype])
+ io_lat->peak_lat[idx][iotype] = ts_diff;
+ spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
+}
+
+void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+ bool is_sync = bio->bi_opf & REQ_SYNC;
+
+ if (rw == 0)
+ bio->bi_private = iostat_ctx->post_read_ctx;
+ else
+ bio->bi_private = iostat_ctx->sbi;
+ __update_iostat_latency(iostat_ctx, rw, is_sync);
+ mempool_free(iostat_ctx, bio_iostat_ctx_pool);
+}
+
+void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
+ struct bio *bio, struct bio_post_read_ctx *ctx)
+{
+ struct bio_iostat_ctx *iostat_ctx;
+ /* Due to the mempool, this never fails. */
+ iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
+ iostat_ctx->sbi = sbi;
+ iostat_ctx->submit_ts = 0;
+ iostat_ctx->type = 0;
+ iostat_ctx->post_read_ctx = ctx;
+ bio->bi_private = iostat_ctx;
+}
+
+int __init f2fs_init_iostat_processing(void)
+{
+ bio_iostat_ctx_cache =
+ kmem_cache_create("f2fs_bio_iostat_ctx",
+ sizeof(struct bio_iostat_ctx), 0, 0, NULL);
+ if (!bio_iostat_ctx_cache)
+ goto fail;
+ bio_iostat_ctx_pool =
+ mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
+ bio_iostat_ctx_cache);
+ if (!bio_iostat_ctx_pool)
+ goto fail_free_cache;
+ return 0;
+
+fail_free_cache:
+ kmem_cache_destroy(bio_iostat_ctx_cache);
+fail:
+ return -ENOMEM;
+}
+
+void f2fs_destroy_iostat_processing(void)
+{
+ mempool_destroy(bio_iostat_ctx_pool);
+ kmem_cache_destroy(bio_iostat_ctx_cache);
+}
+
int f2fs_init_iostat(struct f2fs_sb_info *sbi)
{
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
+ spin_lock_init(&sbi->iostat_lat_lock);
sbi->iostat_enable = false;
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
+ sbi->iostat_io_lat = f2fs_kzalloc(sbi, sizeof(struct iostat_lat_info),
+ GFP_KERNEL);
+ if (!sbi->iostat_io_lat)
+ return -ENOMEM;
return 0;
}
+
+void f2fs_destroy_iostat(struct f2fs_sb_info *sbi)
+{
+ kfree(sbi->iostat_io_lat);
+}
diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
index 46e4a36fc8e9..22a2d01f57ef 100644
--- a/fs/f2fs/iostat.h
+++ b/fs/f2fs/iostat.h
@@ -6,6 +6,8 @@
#ifndef __F2FS_IOSTAT_H__
#define __F2FS_IOSTAT_H__
+struct bio_post_read_ctx;
+
#ifdef CONFIG_F2FS_IOSTAT
#define DEFAULT_IOSTAT_PERIOD_MS 3000
@@ -13,15 +15,70 @@
/* maximum period of iostat tracing is 1 day */
#define MAX_IOSTAT_PERIOD_MS 8640000
+enum {
+ READ_IO,
+ WRITE_SYNC_IO,
+ WRITE_ASYNC_IO,
+ MAX_IO_TYPE,
+};
+
+struct iostat_lat_info {
+ unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* sum of io latencies */
+ unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* peak io latency */
+ unsigned int bio_cnt[MAX_IO_TYPE][NR_PAGE_TYPE]; /* bio count */
+};
+
extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset);
extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi);
extern void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes);
+
+struct bio_iostat_ctx {
+ struct f2fs_sb_info *sbi;
+ unsigned long submit_ts;
+ enum page_type type;
+ struct bio_post_read_ctx *post_read_ctx;
+};
+
+static inline void iostat_update_submit_ctx(struct bio *bio,
+ enum page_type type)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+ iostat_ctx->submit_ts = jiffies;
+ iostat_ctx->type = type;
+}
+
+static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+ return iostat_ctx->post_read_ctx;
+}
+
+extern void iostat_update_and_unbind_ctx(struct bio *bio, int rw);
+extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
+ struct bio *bio, struct bio_post_read_ctx *ctx);
+extern int f2fs_init_iostat_processing(void);
+extern void f2fs_destroy_iostat_processing(void);
extern int f2fs_init_iostat(struct f2fs_sb_info *sbi);
+extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
#else
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes) {}
+static inline void iostat_update_and_unbind_ctx(struct bio *bio, int rw) {}
+static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
+ struct bio *bio, struct bio_post_read_ctx *ctx) {}
+static inline void iostat_update_submit_ctx(struct bio *bio,
+ enum page_type type) {}
+static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
+{
+ return bio->bi_private;
+}
+static inline int f2fs_init_iostat_processing(void) { return 0; }
+static inline void f2fs_destroy_iostat_processing(void) {}
static inline int f2fs_init_iostat(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_iostat(struct f2fs_sb_info *sbi) {}
#endif
#endif /* __F2FS_IOSTAT_H__ */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a23926d1a77b..f5148f2fd884 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1574,6 +1574,7 @@ static void f2fs_put_super(struct super_block *sb)
#endif
fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
destroy_percpu_info(sbi);
+ f2fs_destroy_iostat(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
@@ -4001,7 +4002,7 @@ try_onemore:
err = init_percpu_info(sbi);
if (err)
- goto free_bio_info;
+ goto free_iostat;
if (F2FS_IO_ALIGNED(sbi)) {
sbi->write_io_dummy =
@@ -4334,6 +4335,8 @@ free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
+free_iostat:
+ f2fs_destroy_iostat(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
@@ -4476,9 +4479,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
- err = f2fs_init_bio_entry_cache();
+ err = f2fs_init_iostat_processing();
if (err)
goto free_post_read;
+ err = f2fs_init_bio_entry_cache();
+ if (err)
+ goto free_iostat;
err = f2fs_init_bioset();
if (err)
goto free_bio_enrty_cache;
@@ -4500,6 +4506,8 @@ free_bioset:
f2fs_destroy_bioset();
free_bio_enrty_cache:
f2fs_destroy_bio_entry_cache();
+free_iostat:
+ f2fs_destroy_iostat_processing();
free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
@@ -4534,6 +4542,7 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
+ f2fs_destroy_iostat_processing();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 3eaf19aa89af..4e881d91c874 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1894,6 +1894,101 @@ TRACE_EVENT(f2fs_iostat,
__entry->app_mrio, __entry->fs_drio, __entry->fs_gdrio,
__entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
);
+
+#ifndef __F2FS_IOSTAT_LATENCY_TYPE
+#define __F2FS_IOSTAT_LATENCY_TYPE
+struct f2fs_iostat_latency {
+ unsigned int peak_lat;
+ unsigned int avg_lat;
+ unsigned int cnt;
+};
+#endif /* __F2FS_IOSTAT_LATENCY_TYPE */
+
+TRACE_EVENT(f2fs_iostat_latency,
+
+ TP_PROTO(struct f2fs_sb_info *sbi, struct f2fs_iostat_latency (*iostat_lat)[NR_PAGE_TYPE]),
+
+ TP_ARGS(sbi, iostat_lat),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, d_rd_peak)
+ __field(unsigned int, d_rd_avg)
+ __field(unsigned int, d_rd_cnt)
+ __field(unsigned int, n_rd_peak)
+ __field(unsigned int, n_rd_avg)
+ __field(unsigned int, n_rd_cnt)
+ __field(unsigned int, m_rd_peak)
+ __field(unsigned int, m_rd_avg)
+ __field(unsigned int, m_rd_cnt)
+ __field(unsigned int, d_wr_s_peak)
+ __field(unsigned int, d_wr_s_avg)
+ __field(unsigned int, d_wr_s_cnt)
+ __field(unsigned int, n_wr_s_peak)
+ __field(unsigned int, n_wr_s_avg)
+ __field(unsigned int, n_wr_s_cnt)
+ __field(unsigned int, m_wr_s_peak)
+ __field(unsigned int, m_wr_s_avg)
+ __field(unsigned int, m_wr_s_cnt)
+ __field(unsigned int, d_wr_as_peak)
+ __field(unsigned int, d_wr_as_avg)
+ __field(unsigned int, d_wr_as_cnt)
+ __field(unsigned int, n_wr_as_peak)
+ __field(unsigned int, n_wr_as_avg)
+ __field(unsigned int, n_wr_as_cnt)
+ __field(unsigned int, m_wr_as_peak)
+ __field(unsigned int, m_wr_as_avg)
+ __field(unsigned int, m_wr_as_cnt)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sbi->sb->s_dev;
+ __entry->d_rd_peak = iostat_lat[0][DATA].peak_lat;
+ __entry->d_rd_avg = iostat_lat[0][DATA].avg_lat;
+ __entry->d_rd_cnt = iostat_lat[0][DATA].cnt;
+ __entry->n_rd_peak = iostat_lat[0][NODE].peak_lat;
+ __entry->n_rd_avg = iostat_lat[0][NODE].avg_lat;
+ __entry->n_rd_cnt = iostat_lat[0][NODE].cnt;
+ __entry->m_rd_peak = iostat_lat[0][META].peak_lat;
+ __entry->m_rd_avg = iostat_lat[0][META].avg_lat;
+ __entry->m_rd_cnt = iostat_lat[0][META].cnt;
+ __entry->d_wr_s_peak = iostat_lat[1][DATA].peak_lat;
+ __entry->d_wr_s_avg = iostat_lat[1][DATA].avg_lat;
+ __entry->d_wr_s_cnt = iostat_lat[1][DATA].cnt;
+ __entry->n_wr_s_peak = iostat_lat[1][NODE].peak_lat;
+ __entry->n_wr_s_avg = iostat_lat[1][NODE].avg_lat;
+ __entry->n_wr_s_cnt = iostat_lat[1][NODE].cnt;
+ __entry->m_wr_s_peak = iostat_lat[1][META].peak_lat;
+ __entry->m_wr_s_avg = iostat_lat[1][META].avg_lat;
+ __entry->m_wr_s_cnt = iostat_lat[1][META].cnt;
+ __entry->d_wr_as_peak = iostat_lat[2][DATA].peak_lat;
+ __entry->d_wr_as_avg = iostat_lat[2][DATA].avg_lat;
+ __entry->d_wr_as_cnt = iostat_lat[2][DATA].cnt;
+ __entry->n_wr_as_peak = iostat_lat[2][NODE].peak_lat;
+ __entry->n_wr_as_avg = iostat_lat[2][NODE].avg_lat;
+ __entry->n_wr_as_cnt = iostat_lat[2][NODE].cnt;
+ __entry->m_wr_as_peak = iostat_lat[2][META].peak_lat;
+ __entry->m_wr_as_avg = iostat_lat[2][META].avg_lat;
+ __entry->m_wr_as_cnt = iostat_lat[2][META].cnt;
+ ),
+
+ TP_printk("dev = (%d,%d), "
+ "iotype [peak lat.(ms)/avg lat.(ms)/count], "
+ "rd_data [%u/%u/%u], rd_node [%u/%u/%u], rd_meta [%u/%u/%u], "
+ "wr_sync_data [%u/%u/%u], wr_sync_node [%u/%u/%u], "
+ "wr_sync_meta [%u/%u/%u], wr_async_data [%u/%u/%u], "
+ "wr_async_node [%u/%u/%u], wr_async_meta [%u/%u/%u]",
+ show_dev(__entry->dev),
+ __entry->d_rd_peak, __entry->d_rd_avg, __entry->d_rd_cnt,
+ __entry->n_rd_peak, __entry->n_rd_avg, __entry->n_rd_cnt,
+ __entry->m_rd_peak, __entry->m_rd_avg, __entry->m_rd_cnt,
+ __entry->d_wr_s_peak, __entry->d_wr_s_avg, __entry->d_wr_s_cnt,
+ __entry->n_wr_s_peak, __entry->n_wr_s_avg, __entry->n_wr_s_cnt,
+ __entry->m_wr_s_peak, __entry->m_wr_s_avg, __entry->m_wr_s_cnt,
+ __entry->d_wr_as_peak, __entry->d_wr_as_avg, __entry->d_wr_as_cnt,
+ __entry->n_wr_as_peak, __entry->n_wr_as_avg, __entry->n_wr_as_cnt,
+ __entry->m_wr_as_peak, __entry->m_wr_as_avg, __entry->m_wr_as_cnt)
+);
#endif
TRACE_EVENT(f2fs_bmap,