summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2014-04-28 18:43:13 +0200
committerJens Axboe <axboe@fb.com>2014-04-30 21:46:54 +0200
commitd7fe69c6a1940c3f9c5bed08634b6dd868612cdf (patch)
treea58b3e1e0bd3c420e94b85d963e73e33d915dfeb
parentdrbd: Break a deadlock while concurrent fencing and establishing a connection (diff)
downloadlinux-d7fe69c6a1940c3f9c5bed08634b6dd868612cdf.tar.xz
linux-d7fe69c6a1940c3f9c5bed08634b6dd868612cdf.zip
drbd: Leave IO suspended if the fence handler find the peer primary
Actually we are clearing the susp_fen flag if we are not going to call a fencing handler. For setting the susp_fen flag needs to be edge-triggerd, and not level triggered. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/drbd/drbd_nl.c8
-rw-r--r--drivers/block/drbd/drbd_state.c24
2 files changed, 21 insertions, 11 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 526414bc2cab..5ccbd7164c78 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -426,6 +426,14 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connec
}
rcu_read_unlock();
+ if (fp == FP_NOT_AVAIL) {
+ /* IO Suspending works on the whole resource.
+ Do it only for one device. */
+ vnr = 0;
+ peer_device = idr_get_next(&connection->peer_devices, &vnr);
+ drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
+ }
+
return fp;
}
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 1a84345a3868..379666246569 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -54,8 +54,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
-static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
- enum sanitize_state_warnings *warn);
+static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
+ union drbd_state ns, enum sanitize_state_warnings *warn);
static inline bool is_susp(union drbd_state s)
{
@@ -287,7 +287,7 @@ _req_st_cond(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
- ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+ ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns);
if (rv >= SS_SUCCESS)
rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
@@ -333,7 +333,7 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
- ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+ ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns);
if (rv < SS_SUCCESS) {
spin_unlock_irqrestore(&device->resource->req_lock, flags);
@@ -740,8 +740,8 @@ static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_st
* When we loose connection, we have to set the state of the peers disk (pdsk)
* to D_UNKNOWN. This rule and many more along those lines are in this function.
*/
-static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
- enum sanitize_state_warnings *warn)
+static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
+ union drbd_state ns, enum sanitize_state_warnings *warn)
{
enum drbd_fencing_p fp;
enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
@@ -882,11 +882,13 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
}
if (fp == FP_STONITH &&
- (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
+ (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
+ !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
- (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+ (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
+ !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
@@ -958,7 +960,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
os = drbd_read_state(device);
- ns = sanitize_state(device, ns, &ssw);
+ ns = sanitize_state(device, os, ns, &ssw);
if (ns.i == os.i)
return SS_NOTHING_TO_DO;
@@ -1656,7 +1658,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
os = drbd_read_state(device);
- ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+ ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;
@@ -1718,7 +1720,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
number_of_volumes++;
os = drbd_read_state(device);
ns = apply_mask_val(os, mask, val);
- ns = sanitize_state(device, ns, NULL);
+ ns = sanitize_state(device, os, ns, NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;