summaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_nl.c
diff options
context:
space:
mode:
authorLars Ellenberg <lars@linbit.com>2016-06-14 00:26:31 +0200
committerJens Axboe <axboe@fb.com>2016-06-14 05:43:07 +0200
commit9104d31a759fbade8505f38f92f4dde719957826 (patch)
treead71e3ed1cbb0a096fae43d8a48db469b8ff4761 /drivers/block/drbd/drbd_nl.c
parentdrbd: report sizes if rejecting too small peer disk (diff)
downloadlinux-9104d31a759fbade8505f38f92f4dde719957826.tar.xz
linux-9104d31a759fbade8505f38f92f4dde719957826.zip
drbd: introduce WRITE_SAME support
We will support WRITE_SAME, if * all peers support WRITE_SAME (both in kernel and DRBD version), * all peer devices support WRITE_SAME * logical_block_size is identical on all peers. We may at some point introduce a fallback on the receiving side for devices/kernels that do not support WRITE_SAME, by open-coding a submit loop. But not yet. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r--drivers/block/drbd/drbd_nl.c88
1 files changed, 79 insertions, 9 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 169e3e15a30e..9a45c80239ba 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1174,6 +1174,17 @@ static void blk_queue_discard_granularity(struct request_queue *q, unsigned int
{
q->limits.discard_granularity = granularity;
}
+
+static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
+{
+ /* when we introduced REQ_WRITE_SAME support, we also bumped
+ * our maximum supported batch bio size used for discards. */
+ if (connection->agreed_features & DRBD_FF_WSAME)
+ return DRBD_MAX_BBIO_SECTORS;
+ /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */
+ return AL_EXTENT_SIZE >> 9;
+}
+
static void decide_on_discard_support(struct drbd_device *device,
struct request_queue *q,
struct request_queue *b,
@@ -1190,7 +1201,7 @@ static void decide_on_discard_support(struct drbd_device *device,
can_do = false;
drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n");
}
- if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & FF_TRIM)) {
+ if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
can_do = false;
drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
}
@@ -1202,7 +1213,7 @@ static void decide_on_discard_support(struct drbd_device *device,
* you care, you need to use devices with similar
* topology on all peers. */
blk_queue_discard_granularity(q, 512);
- q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
+ q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
} else {
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
@@ -1223,8 +1234,67 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
}
}
+static void decide_on_write_same_support(struct drbd_device *device,
+ struct request_queue *q,
+ struct request_queue *b, struct o_qlim *o)
+{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
+ struct drbd_connection *connection = peer_device->connection;
+ bool can_do = b ? b->limits.max_write_same_sectors : true;
+
+ if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
+ can_do = false;
+ drbd_info(peer_device, "peer does not support WRITE_SAME\n");
+ }
+
+ if (o) {
+ /* logical block size; queue_logical_block_size(NULL) is 512 */
+ unsigned int peer_lbs = be32_to_cpu(o->logical_block_size);
+ unsigned int me_lbs_b = queue_logical_block_size(b);
+ unsigned int me_lbs = queue_logical_block_size(q);
+
+ if (me_lbs_b != me_lbs) {
+ drbd_warn(device,
+ "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n",
+ me_lbs, me_lbs_b);
+ /* rather disable write same than trigger some BUG_ON later in the scsi layer. */
+ can_do = false;
+ }
+ if (me_lbs_b != peer_lbs) {
+ drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n",
+ me_lbs, peer_lbs);
+ if (can_do) {
+ drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n");
+ can_do = false;
+ }
+ me_lbs = max(me_lbs, me_lbs_b);
+ /* We cannot change the logical block size of an in-use queue.
+ * We can only hope that access happens to be properly aligned.
+ * If not, the peer will likely produce an IO error, and detach. */
+ if (peer_lbs > me_lbs) {
+ if (device->state.role != R_PRIMARY) {
+ blk_queue_logical_block_size(q, peer_lbs);
+ drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs);
+ } else {
+ drbd_warn(peer_device,
+ "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n",
+ me_lbs, peer_lbs);
+ }
+ }
+ }
+ if (can_do && !o->write_same_capable) {
+ /* If we introduce an open-coded write-same loop on the receiving side,
+ * the peer would present itself as "capable". */
+ drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n");
+ can_do = false;
+ }
+ }
+
+ blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0);
+}
+
static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
- unsigned int max_bio_size)
+ unsigned int max_bio_size, struct o_qlim *o)
{
struct request_queue * const q = device->rq_queue;
unsigned int max_hw_sectors = max_bio_size >> 9;
@@ -1244,15 +1314,15 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
rcu_read_unlock();
blk_set_stacking_limits(&q->limits);
- blk_queue_max_write_same_sectors(q, 0);
}
- blk_queue_logical_block_size(q, 512);
blk_queue_max_hw_sectors(q, max_hw_sectors);
/* This is the workaround for "bio would need to, but cannot, be split" */
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_SIZE-1);
decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
+ decide_on_write_same_support(device, q, b, o);
+
if (b) {
blk_queue_stack_limits(q, b);
@@ -1266,7 +1336,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
fixup_discard_if_not_supported(q);
}
-void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev)
+void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
{
unsigned int now, new, local, peer;
@@ -1309,7 +1379,7 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_ba
if (new != now)
drbd_info(device, "max BIO size = %u\n", new);
- drbd_setup_queue_param(device, bdev, new);
+ drbd_setup_queue_param(device, bdev, new, o);
}
/* Starts the worker thread */
@@ -1542,7 +1612,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
- drbd_reconsider_queue_parameters(device, device->ldev);
+ drbd_reconsider_queue_parameters(device, device->ldev, NULL);
drbd_md_sync(device);
@@ -1922,7 +1992,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
device->read_cnt = 0;
device->writ_cnt = 0;
- drbd_reconsider_queue_parameters(device, device->ldev);
+ drbd_reconsider_queue_parameters(device, device->ldev, NULL);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,