diff options
author | Lars Ellenberg <lars@linbit.com> | 2016-06-14 00:26:31 +0200 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2016-06-14 05:43:07 +0200 |
commit | 9104d31a759fbade8505f38f92f4dde719957826 (patch) | |
tree | ad71e3ed1cbb0a096fae43d8a48db469b8ff4761 /drivers/block/drbd/drbd_nl.c | |
parent | drbd: report sizes if rejecting too small peer disk (diff) | |
download | linux-9104d31a759fbade8505f38f92f4dde719957826.tar.xz linux-9104d31a759fbade8505f38f92f4dde719957826.zip |
drbd: introduce WRITE_SAME support
We will support WRITE_SAME, if
* all peers support WRITE_SAME (both in kernel and DRBD version),
* all peer devices support WRITE_SAME
* logical_block_size is identical on all peers.
We may at some point introduce a fallback on the receiving side
for devices/kernels that do not support WRITE_SAME,
by open-coding a submit loop. But not yet.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 88 |
1 files changed, 79 insertions, 9 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 169e3e15a30e..9a45c80239ba 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1174,6 +1174,17 @@ static void blk_queue_discard_granularity(struct request_queue *q, unsigned int { q->limits.discard_granularity = granularity; } + +static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) +{ + /* when we introduced REQ_WRITE_SAME support, we also bumped + * our maximum supported batch bio size used for discards. */ + if (connection->agreed_features & DRBD_FF_WSAME) + return DRBD_MAX_BBIO_SECTORS; + /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */ + return AL_EXTENT_SIZE >> 9; +} + static void decide_on_discard_support(struct drbd_device *device, struct request_queue *q, struct request_queue *b, @@ -1190,7 +1201,7 @@ static void decide_on_discard_support(struct drbd_device *device, can_do = false; drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n"); } - if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & FF_TRIM)) { + if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { can_do = false; drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); } @@ -1202,7 +1213,7 @@ static void decide_on_discard_support(struct drbd_device *device, * you care, you need to use devices with similar * topology on all peers. */ blk_queue_discard_granularity(q, 512); - q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS; + q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } else { queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); @@ -1223,8 +1234,67 @@ static void fixup_discard_if_not_supported(struct request_queue *q) } } +static void decide_on_write_same_support(struct drbd_device *device, + struct request_queue *q, + struct request_queue *b, struct o_qlim *o) +{ + struct drbd_peer_device *peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device->connection; + bool can_do = b ? b->limits.max_write_same_sectors : true; + + if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) { + can_do = false; + drbd_info(peer_device, "peer does not support WRITE_SAME\n"); + } + + if (o) { + /* logical block size; queue_logical_block_size(NULL) is 512 */ + unsigned int peer_lbs = be32_to_cpu(o->logical_block_size); + unsigned int me_lbs_b = queue_logical_block_size(b); + unsigned int me_lbs = queue_logical_block_size(q); + + if (me_lbs_b != me_lbs) { + drbd_warn(device, + "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n", + me_lbs, me_lbs_b); + /* rather disable write same than trigger some BUG_ON later in the scsi layer. */ + can_do = false; + } + if (me_lbs_b != peer_lbs) { + drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n", + me_lbs, peer_lbs); + if (can_do) { + drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n"); + can_do = false; + } + me_lbs = max(me_lbs, me_lbs_b); + /* We cannot change the logical block size of an in-use queue. + * We can only hope that access happens to be properly aligned. + * If not, the peer will likely produce an IO error, and detach. */ + if (peer_lbs > me_lbs) { + if (device->state.role != R_PRIMARY) { + blk_queue_logical_block_size(q, peer_lbs); + drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs); + } else { + drbd_warn(peer_device, + "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n", + me_lbs, peer_lbs); + } + } + } + if (can_do && !o->write_same_capable) { + /* If we introduce an open-coded write-same loop on the receiving side, + * the peer would present itself as "capable". */ + drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n"); + can_do = false; + } + } + + blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0); +} + static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, - unsigned int max_bio_size) + unsigned int max_bio_size, struct o_qlim *o) { struct request_queue * const q = device->rq_queue; unsigned int max_hw_sectors = max_bio_size >> 9; @@ -1244,15 +1314,15 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi rcu_read_unlock(); blk_set_stacking_limits(&q->limits); - blk_queue_max_write_same_sectors(q, 0); } - blk_queue_logical_block_size(q, 512); blk_queue_max_hw_sectors(q, max_hw_sectors); /* This is the workaround for "bio would need to, but cannot, be split" */ blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); blk_queue_segment_boundary(q, PAGE_SIZE-1); decide_on_discard_support(device, q, b, discard_zeroes_if_aligned); + decide_on_write_same_support(device, q, b, o); + if (b) { blk_queue_stack_limits(q, b); @@ -1266,7 +1336,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi fixup_discard_if_not_supported(q); } -void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev) +void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) { unsigned int now, new, local, peer; @@ -1309,7 +1379,7 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_ba if (new != now) drbd_info(device, "max BIO size = %u\n", new); - drbd_setup_queue_param(device, bdev, new); + drbd_setup_queue_param(device, bdev, new, o); } /* Starts the worker thread */ @@ -1542,7 +1612,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned) - drbd_reconsider_queue_parameters(device, device->ldev); + drbd_reconsider_queue_parameters(device, device->ldev, NULL); drbd_md_sync(device); @@ -1922,7 +1992,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) device->read_cnt = 0; device->writ_cnt = 0; - drbd_reconsider_queue_parameters(device, device->ldev); + drbd_reconsider_queue_parameters(device, device->ldev, NULL); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, |