From e447a0151f7ce8dd884fea48279274bd64434c29 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:32:56 -0800 Subject: zram: set BDI_CAP_STABLE_WRITES once With fast swap storage, the platform wants to use swap more aggressively and swap-in is crucial to application latency. The rw_page() based synchronous devices like zram, pmem and btt are such fast storage. When I profile swapin performance with zram lz4 decompress test, S/W overhead is more than 70%. Maybe, it would be bigger in nvdimm. This patchset reduces swap-in latency by skipping swapcache if the swap device is a synchronous device like a rw_page() based device. It enhances by 45% my swapin test (5G sequential swapin, no readahead) from 2.41sec to 1.64sec. This patch (of 4): Commit 19b7ccf8651d ("block: get rid of blk_integrity_revalidate()") fixed a weird thing (i.e., reset BDI_CAP_STABLE_WRITES flag unconditionally whenever revalidat_disk is called) so zram doesn't need to reset the flag any more when revalidating the bdev. Instead, set the flag just once when the zram device is created. It shouldn't change any behavior. Link: http://lkml.kernel.org/r/1505886205-9671-2-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Ilya Dryomov Cc: Christoph Hellwig Cc: Dan Williams Cc: Ross Zwisler Cc: Jens Axboe Cc: Hugh Dickins Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'drivers/block/zram/zram_drv.c') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f149d3e61234..d95bb8ce5092 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -122,14 +122,6 @@ static inline bool is_partial_io(struct bio_vec *bvec) } #endif -static void zram_revalidate_disk(struct zram *zram) -{ - revalidate_disk(zram->disk); - /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ - zram->disk->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; -} - /* * Check if request is within bounds and aligned on zram logical blocks. */ @@ -1373,7 +1365,8 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_revalidate_disk(zram); + + revalidate_disk(zram->disk); up_write(&zram->init_lock); return len; @@ -1420,7 +1413,7 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - zram_revalidate_disk(zram); + revalidate_disk(zram->disk); bdput(bdev); mutex_lock(&bdev->bd_mutex); @@ -1539,6 +1532,7 @@ static int zram_add(void) /* zram devices sort of resembles non-rotational disks */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); + /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. @@ -1563,6 +1557,8 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); + zram->disk->queue->backing_dev_info->capabilities |= + BDI_CAP_STABLE_WRITES; add_disk(zram->disk); ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, -- cgit v1.2.3 From 23c47d2ada9f96731492a67b28c0072715075baa Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:00 -0800 Subject: bdi: introduce BDI_CAP_SYNCHRONOUS_IO As discussed at https://lkml.kernel.org/r/<20170728165604.10455-1-ross.zwisler@linux.intel.com> someday we will remove rw_page(). If so, we need something to detect such super-fast storage on which synchronous IO operations like the current rw_page are always a win. Introduces BDI_CAP_SYNCHRONOUS_IO to indicate such devices. With it, we could use various optimization techniques. Link: http://lkml.kernel.org/r/1505886205-9671-3-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Christoph Hellwig Cc: Dan Williams Cc: Ross Zwisler Cc: Hugh Dickins Cc: Ilya Dryomov Cc: Jens Axboe Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/brd.c | 2 ++ drivers/block/zram/zram_drv.c | 2 +- drivers/nvdimm/btt.c | 3 +++ drivers/nvdimm/pmem.c | 2 ++ include/linux/backing-dev.h | 8 ++++++++ 5 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/block/zram/zram_drv.c') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c1cf87718c2e..588360d79fca 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_BLK_DEV_RAM_DAX #include #include @@ -448,6 +449,7 @@ static struct brd_device *brd_alloc(int i) disk->flags = GENHD_FL_EXT_DEVT; sprintf(disk->disk_name, "ram%d", i); set_capacity(disk, rd_size * 2); + disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; #ifdef CONFIG_BLK_DEV_RAM_DAX queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d95bb8ce5092..88564222f473 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1558,7 +1558,7 @@ static int zram_add(void) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); zram->disk->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); add_disk(zram->disk); ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index d5612bd1cc81..e949e3302af4 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "btt.h" #include "nd.h" @@ -1402,6 +1403,8 @@ static int btt_blk_init(struct btt *btt) btt->btt_disk->private_data = btt; btt->btt_disk->queue = btt->btt_queue; btt->btt_disk->flags = GENHD_FL_EXT_DEVT; + btt->btt_disk->queue->backing_dev_info->capabilities |= + BDI_CAP_SYNCHRONOUS_IO; blk_queue_make_request(btt->btt_queue, btt_make_request); blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 39dfd7affa31..7fbc5c5dc8e1 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "pmem.h" #include "pfn.h" #include "nd.h" @@ -394,6 +395,7 @@ static int pmem_attach_disk(struct device *dev, disk->fops = &pmem_fops; disk->queue = q; disk->flags = GENHD_FL_EXT_DEVT; + disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; nvdimm_namespace_disk_name(ndns, disk->disk_name); set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) / 512); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f41ca8486e02..7360e79e9a2f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -122,6 +122,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. * * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. + * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be + * inefficient. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 @@ -129,6 +131,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 +#define BDI_CAP_SYNCHRONOUS_IO 0x00000040 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) @@ -174,6 +177,11 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); +static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) +{ + return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO; +} + static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) { return bdi->capabilities & BDI_CAP_STABLE_WRITES; -- cgit v1.2.3 From 384bc41fc064bd8b12b7081aa3e81d26f3407045 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 15 Nov 2017 17:37:08 -0800 Subject: drivers/block/zram/zram_drv.c: make zram_page_end_io() static zram_page_end_io() is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'zram_page_end_io' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20171016173336.20320-1-colin.king@canonical.com Signed-off-by: Colin Ian King Reviewed-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/zram/zram_drv.c') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 88564222f473..d70eba30003a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -428,7 +428,7 @@ static void put_entry_bdev(struct zram *zram, unsigned long entry) WARN_ON_ONCE(!was_set); } -void zram_page_end_io(struct bio *bio) +static void zram_page_end_io(struct bio *bio) { struct page *page = bio->bi_io_vec[0].bv_page; -- cgit v1.2.3