From c1fd29a11f433ca8ae37723768016ffe6cdd487b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 21 Aug 2012 20:34:07 +0200 Subject: drbd: Fix a race condition that can lead to a BUG() If the preconditions for a state change change after the wait_event() we might hit the BUG() statement in conn_set_state(). With holding the spin_lock while evaluating the condition AND until the actual state change we ensure the the preconditions can not change anymore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 27 +++++++++++++++++++++++++++ drivers/block/drbd/drbd_state.c | 14 ++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 37ae87e468ae..1c1576b942b6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2301,3 +2301,30 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) } #endif + +/* This is defined in drivers/md/md.h as well. Should go into wait.h */ +#define __wait_event_lock_irq(wq, condition, lock, cmd) \ +do { \ + wait_queue_t __wait; \ + init_waitqueue_entry(&__wait, current); \ + \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock); \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ +} while (0) + +#define wait_event_lock_irq(wq, condition, lock, cmd) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, cmd); \ +} while (0) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4fda4e2024ec..ce1495187f02 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1710,7 +1710,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) return SS_CW_FAILED_BY_PEER; - spin_lock_irq(&tconn->req_lock); rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; if (rv == SS_UNKNOWN_ERROR) @@ -1719,8 +1718,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ - spin_unlock_irq(&tconn->req_lock); - return rv; } @@ -1736,21 +1733,22 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); if (conn_send_state_req(tconn, mask, val)) { clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); - rv = SS_CW_FAILED_BY_PEER; /* if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); */ - goto abort; + mutex_unlock(&tconn->cstate_mutex); + spin_lock_irq(&tconn->req_lock); + return SS_CW_FAILED_BY_PEER; } if (val.conn == C_DISCONNECTING) set_bit(DISCONNECT_SENT, &tconn->flags); - wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val))); + spin_lock_irq(&tconn->req_lock); + + wait_event_lock_irq(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)), tconn->req_lock,); clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); -abort: mutex_unlock(&tconn->cstate_mutex); - spin_lock_irq(&tconn->req_lock); return rv; } -- cgit v1.2.3