From 44a4d551846b8c61aa430b9432c1fcdf88444708 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 22 Nov 2013 12:40:58 +0100 Subject: drbd: refactor use of first_peer_device() Reduce the number of calls to first_peer_device(). Instead, call first_peer_device() just once to assign a local variable peer_device. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 65 +++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 32 deletions(-) (limited to 'drivers/block/drbd/drbd_state.c') diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a5d8aae00e04..19da7c7590cd 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -952,6 +952,8 @@ enum drbd_state_rv __drbd_set_state(struct drbd_device *device, union drbd_state ns, enum chg_state_flags flags, struct completion *done) { + struct drbd_peer_device *peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; union drbd_state os; enum drbd_state_rv rv = SS_SUCCESS; enum sanitize_state_warnings ssw; @@ -978,9 +980,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, this happen...*/ if (is_valid_state(device, os) == rv) - rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); + rv = is_valid_soft_transition(os, ns, connection); } else - rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); + rv = is_valid_soft_transition(os, ns, connection); } if (rv < SS_SUCCESS) { @@ -997,7 +999,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, sanitize_state(). Only display it here if we where not called from _conn_request_state() */ if (!(flags & CS_DC_SUSP)) - conn_pr_state_change(first_peer_device(device)->connection, os, ns, + conn_pr_state_change(connection, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP); /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference @@ -1017,19 +1019,19 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, /* put replicated vs not-replicated requests in seperate epochs */ if (did_remote != should_do_remote) - start_new_tl_epoch(first_peer_device(device)->connection); + start_new_tl_epoch(connection); if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) drbd_print_uuids(device, "attached to UUIDs"); /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && - no_peer_wf_report_params(first_peer_device(device)->connection)) - clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags); + no_peer_wf_report_params(connection)) + clear_bit(STATE_SENT, &connection->flags); wake_up(&device->misc_wait); wake_up(&device->state_wait); - wake_up(&first_peer_device(device)->connection->ping_wait); + wake_up(&connection->ping_wait); /* Aborted verify run, or we reached the stop sector. * Log the last position, unless end-of-device. */ @@ -1118,21 +1120,21 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, /* Receiver should clean up itself */ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); + drbd_thread_stop_nowait(&connection->receiver); /* Now the receiver finished cleaning up itself, it should die */ if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); + drbd_thread_stop_nowait(&connection->receiver); /* Upon network failure, we need to restart the receiver. */ if (os.conn > C_WF_CONNECTION && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver); + drbd_thread_restart_nowait(&connection->receiver); /* Resume AL writing if we get a connection */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { drbd_resume_al(device); - first_peer_device(device)->connection->connect_cnt++; + connection->connect_cnt++; } /* remember last attach time so request_timer_fn() won't @@ -1150,7 +1152,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, ascw->w.cb = w_after_state_ch; ascw->device = device; ascw->done = done; - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&connection->sender_work, &ascw->w); } else { drbd_err(device, "Could not kmalloc an ascw\n"); @@ -1222,6 +1224,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { struct drbd_resource *resource = device->resource; + struct drbd_peer_device *peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; struct sib_info sib; sib.sib_reason = SIB_STATE_CHANGE; @@ -1245,7 +1249,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, state change. This function might sleep */ if (ns.susp_nod) { - struct drbd_connection *connection = first_peer_device(device)->connection; enum drbd_req_event what = NOTHING; spin_lock_irq(&device->resource->req_lock); @@ -1267,8 +1270,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, } if (ns.susp_fen) { - struct drbd_connection *connection = first_peer_device(device)->connection; - spin_lock_irq(&device->resource->req_lock); if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) { /* case2: The connection was established again: */ @@ -1294,8 +1295,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, * which is unexpected. */ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && - first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) { - drbd_gen_and_send_sync_uuid(first_peer_device(device)); + connection->agreed_pro_version >= 96 && get_ldev(device)) { + drbd_gen_and_send_sync_uuid(peer_device); put_ldev(device); } @@ -1309,8 +1310,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, atomic_set(&device->rs_pending_cnt, 0); drbd_rs_cancel_all(device); - drbd_send_uuids(first_peer_device(device)); - drbd_send_state(first_peer_device(device), ns); + drbd_send_uuids(peer_device); + drbd_send_state(peer_device, ns); } /* No point in queuing send_bitmap if we don't have a connection * anymore, so check also the _current_ state, not only the new state @@ -1335,7 +1336,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, set_bit(NEW_CUR_UUID, &device->flags); } else { drbd_uuid_new_current(device); - drbd_send_uuids(first_peer_device(device)); + drbd_send_uuids(peer_device); } } put_ldev(device); @@ -1346,7 +1347,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { drbd_uuid_new_current(device); - drbd_send_uuids(first_peer_device(device)); + drbd_send_uuids(peer_device); } /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) @@ -1373,16 +1374,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* Last part of the attaching process ... */ if (ns.conn >= C_CONNECTED && os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { - drbd_send_sizes(first_peer_device(device), 0, 0); /* to start sync... */ - drbd_send_uuids(first_peer_device(device)); - drbd_send_state(first_peer_device(device), ns); + drbd_send_sizes(peer_device, 0, 0); /* to start sync... */ + drbd_send_uuids(peer_device); + drbd_send_state(peer_device, ns); } /* We want to pause/continue resync, tell peer. */ if (ns.conn >= C_CONNECTED && ((os.aftr_isp != ns.aftr_isp) || (os.user_isp != ns.user_isp))) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* In case one of the isp bits got set, suspend other devices. */ if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && @@ -1392,10 +1393,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* Make sure the peer gets informed about eventual state changes (ISP bits) while we were in WFReportParams. */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* We are in the progress to start a full sync... */ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || @@ -1449,7 +1450,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, drbd_disk_str(device->state.disk)); if (ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); drbd_rs_cancel_all(device); @@ -1473,7 +1474,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, drbd_disk_str(device->state.disk)); if (ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ put_ldev(device); @@ -1481,7 +1482,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* Notify peer that I had a local IO error, and did not detached.. */ if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && @@ -1499,14 +1500,14 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* sync target done with resync. Explicitly notify peer, even though * it should (at least for non-empty resyncs) already know itself. */ if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* Verify finished, or reached stop sector. Peer did not know about * the stop sector, and we may even have changed the stop sector during * verify to interrupt/stop early. Send the new state. */ if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED && verify_can_do_stop_sector(device)) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk -- cgit v1.2.3 From 5ab7d2c005135849cf0bb1485d954c98f2cca57c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 27 Jan 2014 15:58:22 +0100 Subject: drbd: fix resync finished detection This fixes one recent regresion, and one long existing bug. The bug: drbd_try_clear_on_disk_bm() assumed that all "count" bits have to be accounted in the resync extent corresponding to the start sector. Since we allow application requests to cross our "extent" boundaries, this assumption is no longer true, resulting in possible misaccounting, scary messages ("BAD! sector=12345s enr=6 rs_left=-7 rs_failed=0 count=58 cstate=..."), and potentially, if the last bit to be cleared during resync would reside in previously misaccounted resync extent, the resync would never be recognized as finished, but would be "stalled" forever, even though all blocks are in sync again and all bits have been cleared... The regression was introduced by drbd: get rid of atomic update on disk bitmap works For an "empty" resync (rs_total == 0), we must not "finish" the resync on the SyncSource before the SyncTarget knows all relevant information (sync uuid). We need to wait for the full round-trip, the SyncTarget will then explicitly notify us. Also for normal, non-empty resyncs (rs_total > 0), the resync-finished condition needs to be tested before the schedule() in wait_for_work, or it is likely to be missed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 315 ++++++++++++++++++--------------------- drivers/block/drbd/drbd_int.h | 42 ++++-- drivers/block/drbd/drbd_state.c | 3 + drivers/block/drbd/drbd_worker.c | 42 +++--- 4 files changed, 197 insertions(+), 205 deletions(-) (limited to 'drivers/block/drbd/drbd_state.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9c42edf4871b..278c31f24639 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -667,36 +667,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer) return 0; } +static const char *drbd_change_sync_fname[] = { + [RECORD_RS_FAILED] = "drbd_rs_failed_io", + [SET_IN_SYNC] = "drbd_set_in_sync", + [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync" +}; + /* ATTENTION. The AL's extents are 4MB each, while the extents in the * resync LRU-cache are 16MB each. * The caller of this function has to hold an get_ldev() reference. * + * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success), + * potentially pulling in (and recounting the corresponding bits) + * this resync extent into the resync extent lru cache. + * + * Returns whether all bits have been cleared for this resync extent, + * precisely: (rs_left <= rs_failed) + * * TODO will be obsoleted once we have a caching lru of the on disk bitmap */ -static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, - int count, int success) +static bool update_rs_extent(struct drbd_device *device, + unsigned int enr, int count, + enum update_sync_bits_mode mode) { struct lc_element *e; - unsigned int enr; D_ASSERT(device, atomic_read(&device->local_cnt)); - /* I simply assume that a sector/size pair never crosses - * a 16 MB extent border. (Currently this is true...) */ - enr = BM_SECT_TO_EXT(sector); - - e = lc_get(device->resync, enr); + /* When setting out-of-sync bits, + * we don't need it cached (lc_find). + * But if it is present in the cache, + * we should update the cached bit count. + * Otherwise, that extent should be in the resync extent lru cache + * already -- or we want to pull it in if necessary -- (lc_get), + * then update and check rs_left and rs_failed. */ + if (mode == SET_OUT_OF_SYNC) + e = lc_find(device->resync, enr); + else + e = lc_get(device->resync, enr); if (e) { struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); if (ext->lce.lc_number == enr) { - if (success) + if (mode == SET_IN_SYNC) ext->rs_left -= count; + else if (mode == SET_OUT_OF_SYNC) + ext->rs_left += count; else ext->rs_failed += count; if (ext->rs_left < ext->rs_failed) { - drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " + drbd_warn(device, "BAD! enr=%u rs_left=%d " "rs_failed=%d count=%d cstate=%s\n", - (unsigned long long)sector, ext->lce.lc_number, ext->rs_left, ext->rs_failed, count, drbd_conn_str(device->state.conn)); @@ -730,24 +750,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto ext->lce.lc_number, ext->rs_failed); } ext->rs_left = rs_left; - ext->rs_failed = success ? 0 : count; + ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0; /* we don't keep a persistent log of the resync lru, * we can commit any change right away. */ lc_committed(device->resync); } - lc_put(device->resync, &ext->lce); + if (mode != SET_OUT_OF_SYNC) + lc_put(device->resync, &ext->lce); /* no race, we are within the al_lock! */ - if (ext->rs_left == ext->rs_failed) { + if (ext->rs_left <= ext->rs_failed) { ext->rs_failed = 0; - wake_up(&first_peer_device(device)->connection->sender_work.q_wait); + return true; } - } else { + } else if (mode != SET_OUT_OF_SYNC) { + /* be quiet if lc_find() did not find it. */ drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", device->resync_locked, device->resync->nr_elements, device->resync->flags); } + return false; } void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) @@ -766,105 +789,112 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go } } -/* clear the bit corresponding to the piece of storage in question: - * size byte of data starting from sector. Only clear a bits of the affected - * one ore more _aligned_ BM_BLOCK_SIZE blocks. - * - * called by worker on C_SYNC_TARGET and receiver on SyncSource. - * - */ -void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, - const char *file, const unsigned int line) +/* It is called lazy update, so don't do write-out too often. */ +static bool lazy_bitmap_update_due(struct drbd_device *device) { - /* Is called from worker and receiver context _only_ */ - unsigned long sbnr, ebnr, lbnr; - unsigned long count = 0; - sector_t esector, nr_sectors; - int wake_up = 0; - unsigned long flags; + return time_after(jiffies, device->rs_last_bcast + 2*HZ); +} - if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { - drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", - (unsigned long long)sector, size); +static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) +{ + struct drbd_connection *connection; + if (rs_done) + set_bit(RS_DONE, &device->flags); + /* and also set RS_PROGRESS below */ + else if (!lazy_bitmap_update_due(device)) return; - } - if (!get_ldev(device)) - return; /* no disk, no metadata, no bitmap to clear bits in */ - - nr_sectors = drbd_get_capacity(device->this_bdev); - esector = sector + (size >> 9) - 1; - - if (!expect(sector < nr_sectors)) - goto out; - if (!expect(esector < nr_sectors)) - esector = nr_sectors - 1; - - lbnr = BM_SECT_TO_BIT(nr_sectors-1); - - /* we clear it (in sync). - * round up start sector, round down end sector. we make sure we only - * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ - if (unlikely(esector < BM_SECT_PER_BIT-1)) - goto out; - if (unlikely(esector == (nr_sectors-1))) - ebnr = lbnr; - else - ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); - sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - - if (sbnr > ebnr) - goto out; + /* compare with test_and_clear_bit() calls in and above + * try_update_all_on_disk_bitmaps() from the drbd_worker(). */ + if (test_and_set_bit(RS_PROGRESS, &device->flags)) + return; + connection = first_peer_device(device)->connection; + if (!test_and_set_bit(CONN_RS_PROGRESS, &connection->flags)) + wake_up(&connection->sender_work.q_wait); +} +static int update_sync_bits(struct drbd_device *device, + unsigned long sbnr, unsigned long ebnr, + enum update_sync_bits_mode mode) +{ /* - * ok, (capacity & 7) != 0 sometimes, but who cares... - * we count rs_{total,left} in bits, not sectors. + * We keep a count of set bits per resync-extent in the ->rs_left + * caching member, so we need to loop and work within the resync extent + * alignment. Typically this loop will execute exactly once. */ - count = drbd_bm_clear_bits(device, sbnr, ebnr); - if (count) { - drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); - spin_lock_irqsave(&device->al_lock, flags); - drbd_try_clear_on_disk_bm(device, sector, count, true); - spin_unlock_irqrestore(&device->al_lock, flags); - - /* just wake_up unconditional now, various lc_chaged(), - * lc_put() in drbd_try_clear_on_disk_bm(). */ - wake_up = 1; + unsigned long flags; + unsigned long count = 0; + unsigned int cleared = 0; + while (sbnr <= ebnr) { + /* set temporary boundary bit number to last bit number within + * the resync extent of the current start bit number, + * but cap at provided end bit number */ + unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK); + unsigned long c; + + if (mode == RECORD_RS_FAILED) + /* Only called from drbd_rs_failed_io(), bits + * supposedly still set. Recount, maybe some + * of the bits have been successfully cleared + * by application IO meanwhile. + */ + c = drbd_bm_count_bits(device, sbnr, tbnr); + else if (mode == SET_IN_SYNC) + c = drbd_bm_clear_bits(device, sbnr, tbnr); + else /* if (mode == SET_OUT_OF_SYNC) */ + c = drbd_bm_set_bits(device, sbnr, tbnr); + + if (c) { + spin_lock_irqsave(&device->al_lock, flags); + cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode); + spin_unlock_irqrestore(&device->al_lock, flags); + count += c; + } + sbnr = tbnr + 1; } -out: - put_ldev(device); - if (wake_up) + if (count) { + if (mode == SET_IN_SYNC) { + unsigned long still_to_go = drbd_bm_total_weight(device); + bool rs_is_done = (still_to_go <= device->rs_failed); + drbd_advance_rs_marks(device, still_to_go); + if (cleared || rs_is_done) + maybe_schedule_on_disk_bitmap_update(device, rs_is_done); + } else if (mode == RECORD_RS_FAILED) + device->rs_failed += count; wake_up(&device->al_wait); + } + return count; } -/* - * this is intended to set one request worth of data out of sync. - * affects at least 1 bit, - * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. +/* clear the bit corresponding to the piece of storage in question: + * size byte of data starting from sector. Only clear a bits of the affected + * one ore more _aligned_ BM_BLOCK_SIZE blocks. + * + * called by worker on C_SYNC_TARGET and receiver on SyncSource. * - * called by tl_clear and drbd_send_dblock (==drbd_make_request). - * so this can be _any_ process. */ -int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, - const char *file, const unsigned int line) +int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, + enum update_sync_bits_mode mode, + const char *file, const unsigned int line) { - unsigned long sbnr, ebnr, flags; + /* Is called from worker and receiver context _only_ */ + unsigned long sbnr, ebnr, lbnr; + unsigned long count = 0; sector_t esector, nr_sectors; - unsigned int enr, count = 0; - struct lc_element *e; - /* this should be an empty REQ_FLUSH */ - if (size == 0) + /* This would be an empty REQ_FLUSH, be silent. */ + if ((mode == SET_OUT_OF_SYNC) && size == 0) return 0; - if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { - drbd_err(device, "sector: %llus, size: %d\n", - (unsigned long long)sector, size); + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { + drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", + drbd_change_sync_fname[mode], + (unsigned long long)sector, size); return 0; } if (!get_ldev(device)) - return 0; /* no disk, no metadata, no bitmap to set bits in */ + return 0; /* no disk, no metadata, no bitmap to manipulate bits in */ nr_sectors = drbd_get_capacity(device->this_bdev); esector = sector + (size >> 9) - 1; @@ -874,25 +904,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size if (!expect(esector < nr_sectors)) esector = nr_sectors - 1; - /* we set it out of sync, - * we do not need to round anything here */ - sbnr = BM_SECT_TO_BIT(sector); - ebnr = BM_SECT_TO_BIT(esector); - - /* ok, (capacity & 7) != 0 sometimes, but who cares... - * we count rs_{total,left} in bits, not sectors. */ - spin_lock_irqsave(&device->al_lock, flags); - count = drbd_bm_set_bits(device, sbnr, ebnr); + lbnr = BM_SECT_TO_BIT(nr_sectors-1); - enr = BM_SECT_TO_EXT(sector); - e = lc_find(device->resync, enr); - if (e) - lc_entry(e, struct bm_extent, lce)->rs_left += count; - spin_unlock_irqrestore(&device->al_lock, flags); + if (mode == SET_IN_SYNC) { + /* Round up start sector, round down end sector. We make sure + * we only clear full, aligned, BM_BLOCK_SIZE blocks. */ + if (unlikely(esector < BM_SECT_PER_BIT-1)) + goto out; + if (unlikely(esector == (nr_sectors-1))) + ebnr = lbnr; + else + ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); + sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); + } else { + /* We set it out of sync, or record resync failure. + * Should not round anything here. */ + sbnr = BM_SECT_TO_BIT(sector); + ebnr = BM_SECT_TO_BIT(esector); + } + count = update_sync_bits(device, sbnr, ebnr, mode); out: put_ldev(device); - return count; } @@ -1209,69 +1242,3 @@ int drbd_rs_del_all(struct drbd_device *device) return 0; } - -/** - * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks - * @device: DRBD device. - * @sector: The sector number. - * @size: Size of failed IO operation, in byte. - */ -void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) -{ - /* Is called from worker and receiver context _only_ */ - unsigned long sbnr, ebnr, lbnr; - unsigned long count; - sector_t esector, nr_sectors; - int wake_up = 0; - - if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { - drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", - (unsigned long long)sector, size); - return; - } - nr_sectors = drbd_get_capacity(device->this_bdev); - esector = sector + (size >> 9) - 1; - - if (!expect(sector < nr_sectors)) - return; - if (!expect(esector < nr_sectors)) - esector = nr_sectors - 1; - - lbnr = BM_SECT_TO_BIT(nr_sectors-1); - - /* - * round up start sector, round down end sector. we make sure we only - * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ - if (unlikely(esector < BM_SECT_PER_BIT-1)) - return; - if (unlikely(esector == (nr_sectors-1))) - ebnr = lbnr; - else - ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); - sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - - if (sbnr > ebnr) - return; - - /* - * ok, (capacity & 7) != 0 sometimes, but who cares... - * we count rs_{total,left} in bits, not sectors. - */ - spin_lock_irq(&device->al_lock); - count = drbd_bm_count_bits(device, sbnr, ebnr); - if (count) { - device->rs_failed += count; - - if (get_ldev(device)) { - drbd_try_clear_on_disk_bm(device, sector, count, false); - put_ldev(device); - } - - /* just wake_up unconditional now, various lc_chaged(), - * lc_put() in drbd_try_clear_on_disk_bm(). */ - wake_up = 1; - } - spin_unlock_irq(&device->al_lock); - if (wake_up) - wake_up(&device->al_wait); -} diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eb002a7656af..a16f9ae3c98a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -432,7 +432,11 @@ enum { * goes into C_CONNECTED state. */ CONSIDER_RESYNC, + RS_PROGRESS, /* tell worker that resync made significant progress */ + RS_DONE, /* tell worker that resync is done */ + MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ + SUSPEND_IO, /* suspend application io */ BITMAP_IO, /* suspend application io; once no more io in flight, start bitmap io */ @@ -577,6 +581,7 @@ enum { * and potentially deadlock on, this drbd worker. */ DISCONNECT_SENT, + CONN_RS_PROGRESS, /* tell worker that resync made significant progress */ }; struct drbd_resource { @@ -1106,17 +1111,21 @@ struct bm_extent { /* in which _bitmap_ extent (resp. sector) the bit for a certain * _storage_ sector is located in */ #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) +#define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) -/* how much _storage_ sectors we have per bitmap sector */ +/* first storage sector a bitmap extent corresponds to */ #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) +/* how much _storage_ sectors we have per bitmap extent */ #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) +/* how many bits are covered by one bitmap extent (resync extent) */ +#define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) + +#define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1) + /* in one sector of the bitmap, we have this many activity_log extents. */ #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) -#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) -#define BM_BLOCKS_PER_BM_EXT_MASK ((1<ldev. Returns 0 if there is no ldev * @M: DRBD device. diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 19da7c7590cd..1bddd6cf8ac7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1011,6 +1011,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, atomic_inc(&device->local_cnt); did_remote = drbd_should_do_remote(device->state); + if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) + clear_bit(RS_DONE, &device->flags); + device->state.i = ns.i; should_do_remote = drbd_should_do_remote(device->state); device->resource->susp = ns.susp; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 47bc84017b5b..bafb62eb22c9 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1740,11 +1740,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) device->rs_mark_time[i] = now; } _drbd_pause_after(device); + /* Forget potentially stale cached per resync extent bit-counts. + * Open coded drbd_rs_cancel_all(device), we already have IRQs + * disabled, and know the disk state is ok. */ + spin_lock(&device->al_lock); + lc_reset(device->resync); + device->resync_locked = 0; + device->resync_wenr = LC_FREE; + spin_unlock(&device->al_lock); } write_unlock(&global_state_lock); spin_unlock_irq(&device->resource->req_lock); if (r == SS_SUCCESS) { + wake_up(&device->al_wait); /* for lc_reset() above */ /* reset rs_last_bcast when a resync or verify is started, * to deal with potential jiffies wrap. */ device->rs_last_bcast = jiffies - HZ; @@ -1807,36 +1816,22 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) static void update_on_disk_bitmap(struct drbd_device *device) { struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; + bool resync_done = test_and_clear_bit(RS_DONE, &device->flags); device->rs_last_bcast = jiffies; if (!get_ldev(device)) return; drbd_bm_write_lazy(device, 0); - if (drbd_bm_total_weight(device) <= device->rs_failed) + if (resync_done && is_sync_state(device->state.conn)) drbd_resync_finished(device); + drbd_bcast_event(device, &sib); /* update timestamp, in case it took a while to write out stuff */ device->rs_last_bcast = jiffies; put_ldev(device); } -bool wants_lazy_bitmap_update(struct drbd_device *device) -{ - enum drbd_conns connection_state = device->state.conn; - return - /* only do a lazy writeout, if device is in some resync state */ - (connection_state == C_SYNC_SOURCE - || connection_state == C_SYNC_TARGET - || connection_state == C_PAUSED_SYNC_S - || connection_state == C_PAUSED_SYNC_T) && - /* AND - * either we just finished, or the last lazy update - * was some time ago already. */ - (drbd_bm_total_weight(device) <= device->rs_failed - || time_after(jiffies, device->rs_last_bcast + 2*HZ)); -} - static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection) { struct drbd_peer_device *peer_device; @@ -1845,8 +1840,9 @@ static void try_update_all_on_disk_bitmaps(struct drbd_connection *connection) rcu_read_lock(); idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { struct drbd_device *device = peer_device->device; - if (!wants_lazy_bitmap_update(device)) + if (!test_and_clear_bit(RS_PROGRESS, &device->flags)) continue; + kref_get(&device->kref); rcu_read_unlock(); update_on_disk_bitmap(device); @@ -1930,15 +1926,18 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * if (send_barrier) maybe_send_barrier(connection, connection->send.current_epoch_nr + 1); + + if (test_bit(CONN_RS_PROGRESS, &connection->flags)) + break; + /* drbd_send() may have called flush_signals() */ if (get_t_state(&connection->worker) != RUNNING) break; + schedule(); /* may be woken up for other things but new work, too, * e.g. if the current epoch got closed. * In which case we send the barrier above. */ - - try_update_all_on_disk_bitmaps(connection); } finish_wait(&connection->sender_work.q_wait, &wait); @@ -1973,6 +1972,9 @@ int drbd_worker(struct drbd_thread *thi) if (list_empty(&work_list)) wait_for_work(connection, &work_list); + if (test_and_clear_bit(CONN_RS_PROGRESS, &connection->flags)) + try_update_all_on_disk_bitmaps(connection); + if (signal_pending(current)) { flush_signals(current); if (get_t_state(thi) == RUNNING) { -- cgit v1.2.3 From ba3c6fb87d2df008eed8faaf01bb198e512fa72f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 11 Feb 2014 08:57:18 +0100 Subject: drbd: close race when detaching from disk BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: bd_release+0x21/0x70 Process drbd_w_t7146 Call Trace: close_bdev_exclusive drbd_free_ldev [drbd] drbd_ldev_destroy [drbd] w_after_state_ch [drbd] Race probably went like this: state.disk = D_FAILED ... first one to hit zero during D_FAILED: put_ldev() /* ----------------> 0 */ i = atomic_dec_return() if (i == 0) if (state.disk == D_FAILED) schedule_work(go_diskless) /* 1 <------ */ get_ldev_if_state() go_diskless() do_some_pre_cleanup() corresponding put_ldev(): force_state(D_DISKLESS) /* 0 <------ */ i = atomic_dec_return() if (i == 0) atomic_inc() /* ---------> 1 */ state.disk = D_DISKLESS schedule_work(after_state_ch) /* execution pre-empted by IRQ ? */ after_state_ch() put_ldev() i = atomic_dec_return() /* 0 */ if (i == 0) if (state.disk == D_DISKLESS) if (state.disk == D_DISKLESS) drbd_ldev_destroy() drbd_ldev_destroy(); Trying to fix this by checking the disk state *before* the atomic_dec_return(), which implies memory barriers, and by inserting extra memory barriers around the state assignment in __drbd_set_state(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 +++++++-- drivers/block/drbd/drbd_state.c | 11 +++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'drivers/block/drbd/drbd_state.c') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a16f9ae3c98a..a0ffc19ccf0e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1946,6 +1946,11 @@ static inline bool is_sync_state(enum drbd_conns connection_state) static inline void put_ldev(struct drbd_device *device) { + enum drbd_disk_state ds = device->state.disk; + /* We must check the state *before* the atomic_dec becomes visible, + * or we have a theoretical race where someone hitting zero, + * while state still D_FAILED, will then see D_DISKLESS in the + * condition below and calling into destroy, where he must not, yet. */ int i = atomic_dec_return(&device->local_cnt); /* This may be called from some endio handler, @@ -1954,10 +1959,10 @@ static inline void put_ldev(struct drbd_device *device) __release(local); D_ASSERT(device, i >= 0); if (i == 0) { - if (device->state.disk == D_DISKLESS) + if (ds == D_DISKLESS) /* even internal references gone, safe to destroy */ drbd_ldev_destroy(device); - if (device->state.disk == D_FAILED) { + if (ds == D_FAILED) { /* all application IO references gone. */ if (!test_and_set_bit(GO_DISKLESS, &device->flags)) drbd_queue_work(&first_peer_device(device)->connection->sender_work, diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 1bddd6cf8ac7..6629f4668102 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -958,7 +958,6 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, enum drbd_state_rv rv = SS_SUCCESS; enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; - bool did_remote, should_do_remote; os = drbd_read_state(device); @@ -1010,18 +1009,22 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) atomic_inc(&device->local_cnt); - did_remote = drbd_should_do_remote(device->state); if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) clear_bit(RS_DONE, &device->flags); + /* changes to local_cnt and device flags should be visible before + * changes to state, which again should be visible before anything else + * depending on that change happens. */ + smp_wmb(); device->state.i = ns.i; - should_do_remote = drbd_should_do_remote(device->state); device->resource->susp = ns.susp; device->resource->susp_nod = ns.susp_nod; device->resource->susp_fen = ns.susp_fen; + smp_wmb(); /* put replicated vs not-replicated requests in seperate epochs */ - if (did_remote != should_do_remote) + if (drbd_should_do_remote((union drbd_dev_state)os.i) != + drbd_should_do_remote((union drbd_dev_state)ns.i)) start_new_tl_epoch(connection); if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) -- cgit v1.2.3 From 8ce953aa39e2bfd66036a27abdf761c2cb93f02c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Feb 2014 09:46:18 +0100 Subject: drbd: silence -Wmissing-prototypes warnings Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 3 +-- drivers/block/drbd/drbd_req.h | 1 + drivers/block/drbd/drbd_state.c | 6 +++--- lib/lru_cache.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/block/drbd/drbd_state.c') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ed35d52b4763..68ec774d52ea 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2877,7 +2877,7 @@ void drbd_delete_device(struct drbd_device *device) kref_sub(&device->kref, refs, drbd_destroy_device); } -int __init drbd_init(void) +static int __init drbd_init(void) { int err; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 628167db673b..81949d65fad0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2913,7 +2913,7 @@ static struct drbd_connection *the_only_connection(struct drbd_resource *resourc return list_first_entry(&resource->connections, struct drbd_connection, connections); } -int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, +static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, const struct sib_info *sib) { struct drbd_resource *resource = device->resource; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cfc3d43ca43f..2fcc3af03bd8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3371,8 +3371,7 @@ disconnect: * return: NULL (alg name was "") * ERR_PTR(error) if something goes wrong * or the crypto hash ptr, if it worked out ok. */ -static -struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, +static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, const char *alg, const char *name) { struct crypto_hash *tfm; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 8566cd5866b4..9f6a04080e9f 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -288,6 +288,7 @@ extern void complete_master_bio(struct drbd_device *device, extern void request_timer_fn(unsigned long data); extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); +extern void tl_abort_disk_io(struct drbd_device *device); /* this is in drbd_main.c */ extern void drbd_restart_request(struct drbd_request *req); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 6629f4668102..a7772e2aba7e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -410,7 +410,7 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask, return rv; } -static void print_st(struct drbd_device *device, char *name, union drbd_state ns) +static void print_st(struct drbd_device *device, const char *name, union drbd_state ns) { drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", name, @@ -1606,7 +1606,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) return 0; } -void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) +static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) { enum chg_state_flags flags = ~0; struct drbd_peer_device *peer_device; @@ -1695,7 +1695,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma return rv; } -void +static void conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) { diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 4a83ecd03650..6111cd19762d 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -169,7 +169,7 @@ out_fail: return NULL; } -void lc_free_by_index(struct lru_cache *lc, unsigned i) +static void lc_free_by_index(struct lru_cache *lc, unsigned i) { void *p = lc->lc_element[i]; WARN_ON(!p); -- cgit v1.2.3 From ccdd6a93ee0c4a68cfaeddc80a87fc092ff35b96 Mon Sep 17 00:00:00 2001 From: Monam Agarwal Date: Sun, 23 Mar 2014 02:02:29 +0530 Subject: drivers/block: Use RCU_INIT_POINTER(x, NULL) in drbd/drbd_state.c This patch replaces rcu_assign_pointer(x, NULL) with RCU_INIT_POINTER(x, NULL) The rcu_assign_pointer() ensures that the initialization of a structure is carried out before storing a pointer to that structure. And in the case of the NULL pointer, there is no structure to initialize. So, rcu_assign_pointer(p, NULL) can be safely converted to RCU_INIT_POINTER(p, NULL) Signed-off-by: Monam Agarwal Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_state.c') diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a7772e2aba7e..4a75b7a03b9d 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1570,7 +1570,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) old_conf = connection->net_conf; connection->my_addr_len = 0; connection->peer_addr_len = 0; - rcu_assign_pointer(connection->net_conf, NULL); + RCU_INIT_POINTER(connection->net_conf, NULL); conn_free_crypto(connection); mutex_unlock(&connection->resource->conf_update); -- cgit v1.2.3 From 2f2abeae3cd139fb3581249388bf2018612a87e5 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Mar 2014 10:34:13 +0100 Subject: drbd: clear CRASHED_PRIMARY only after successful resync If we lost a disk during the first resync after primary crash, we could have prematurely cleared the CRASHED_PRIMARY flag. Testing on C_CONNECTED is not what we meant there, but testing for both peers to become D_UP_TO_DATE. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_state.c') diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4a75b7a03b9d..c35c0f001bb7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1238,7 +1238,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, sib.os = os; sib.ns = ns; - if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { + if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE) + && (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) { clear_bit(CRASHED_PRIMARY, &device->flags); if (device->p_uuid) device->p_uuid[UI_FLAGS] &= ~((u64)2); -- cgit v1.2.3