diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 18:15:35 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 18:15:35 +0200 |
commit | 929cfdd5d3bdc772aff32e5a3fb4e3894394aa75 (patch) | |
tree | f67202d079eaf1f8d65b2e1bfac70b768ae34bc4 /drivers/block/drbd | |
parent | Merge branch 'for-2.6.40/core' of git://git.kernel.dk/linux-2.6-block (diff) | |
parent | loop: handle on-demand devices correctly (diff) | |
download | linux-929cfdd5d3bdc772aff32e5a3fb4e3894394aa75.tar.xz linux-929cfdd5d3bdc772aff32e5a3fb4e3894394aa75.zip |
Merge branch 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.40/drivers' of git://git.kernel.dk/linux-2.6-block: (110 commits)
loop: handle on-demand devices correctly
loop: limit 'max_part' module param to DISK_MAX_PARTS
drbd: fix warning
drbd: fix warning
drbd: Fix spelling
drbd: fix schedule in atomic
drbd: Take a more conservative approach when deciding max_bio_size
drbd: Fixed state transitions after async outdate-peer-handler returned
drbd: Disallow the peer_disk_state to be D_OUTDATED while connected
drbd: Fix for the connection problems on high latency links
drbd: fix potential activity log refcount imbalance in error path
drbd: Only downgrade the disk state in case of disk failures
drbd: fix disconnect/reconnect loop, if ping-timeout == ping-int
drbd: fix potential distributed deadlock
lru_cache.h: fix comments referring to ts_ instead of lc_
drbd: Fix for application IO with the on-io-error=pass-on policy
xen/p2m: Add EXPORT_SYMBOL_GPL to the M2P override functions.
xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override.
xen/blkback: don't fail empty barrier requests
xen/blkback: fix xenbus_transaction_start() hang caused by double xenbus_transaction_end()
...
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 6 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 19 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 37 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 127 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 68 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 20 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 5 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 98 |
9 files changed, 243 insertions, 139 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index c6828b68d77b..09ef9a878ef0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -28,7 +28,7 @@ #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial check sum in our on disk activity log. +/* We maintain a trivial checksum in our on disk activity log. * With that we can ensure correct operation even when the storage * device might do a partial (last) sector write while losing power. */ diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 76210ba401ac..f440a02dfdb1 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -74,7 +74,7 @@ * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage * seems excessive. * - * We plan to reduce the amount of in-core bitmap pages by pageing them in + * We plan to reduce the amount of in-core bitmap pages by paging them in * and out against their on-disk location as necessary, but need to make * sure we don't cause too much meta data IO, and must not deadlock in * tight memory situations. This needs some more work. @@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) * we if bits have been cleared since last IO. */ #define BM_PAGE_LAZY_WRITEOUT 28 -/* store_page_idx uses non-atomic assingment. It is only used directly after +/* store_page_idx uses non-atomic assignment. It is only used directly after * allocating the page. All other bm_set_page_* and bm_clear_page_* need to * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap * changes) may happen from various contexts, and wait_on_bit/wake_up_bit @@ -318,7 +318,7 @@ static void bm_unmap(unsigned long *p_addr) /* word offset from start of bitmap to word number _in_page_ * modulo longs per page #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) - hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) + hm, well, Philipp thinks gcc might not optimize the % into & (... - 1) so do it explicitly: */ #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d871b14ed5a1..ef2ceed3be4b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -700,7 +700,7 @@ struct drbd_request { * see drbd_endio_pri(). */ struct bio *private_bio; - struct hlist_node colision; + struct hlist_node collision; sector_t sector; unsigned int size; unsigned int epoch; /* barrier_nr */ @@ -766,7 +766,7 @@ struct digest_info { struct drbd_epoch_entry { struct drbd_work w; - struct hlist_node colision; + struct hlist_node collision; struct drbd_epoch *epoch; /* for writes */ struct drbd_conf *mdev; struct page *pages; @@ -1129,6 +1129,8 @@ struct drbd_conf { int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ int rs_planed; /* resync sectors already planned */ atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */ + int peer_max_bio_size; + int local_max_bio_size; }; static inline struct drbd_conf *minor_to_mdev(unsigned int minor) @@ -1218,8 +1220,6 @@ extern void drbd_free_resources(struct drbd_conf *mdev); extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_conf *mdev); -enum drbd_req_event; -extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, @@ -1434,6 +1434,7 @@ struct bm_extent { * hash table. */ #define HT_SHIFT 8 #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) +#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ @@ -1518,9 +1519,9 @@ extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; -extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); +extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); -extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); +extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev); extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force); @@ -1828,6 +1829,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, if (!forcedetach) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); + if (mdev->state.disk > D_INCONSISTENT) + _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL); break; } /* NOTE fall through to detach case if forcedetach set */ @@ -2153,6 +2156,10 @@ static inline int get_net_conf(struct drbd_conf *mdev) static inline void put_ldev(struct drbd_conf *mdev) { int i = atomic_dec_return(&mdev->local_cnt); + + /* This may be called from some endio handler, + * so we must not sleep here. */ + __release(local); D_ASSERT(i >= 0); if (i == 0) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5b525c179f39..0358e55356c8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -745,6 +745,9 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) mdev->agreed_pro_version < 88) rv = SS_NOT_SUPPORTED; + else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) + rv = SS_CONNECTED_OUTDATES; + return rv; } @@ -1565,6 +1568,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, put_ldev(mdev); } + /* Notify peer that I had a local IO error, and did not detached.. */ + if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) + drbd_send_state(mdev); + /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { @@ -2064,7 +2071,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl { struct p_sizes p; sector_t d_size, u_size; - int q_order_type; + int q_order_type, max_bio_size; int ok; if (get_ldev_if_state(mdev, D_NEGOTIATING)) { @@ -2072,17 +2079,20 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl d_size = drbd_get_max_capacity(mdev->ldev); u_size = mdev->ldev->dc.disk_size; q_order_type = drbd_queue_order_type(mdev); + max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; + max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE); put_ldev(mdev); } else { d_size = 0; u_size = 0; q_order_type = QUEUE_ORDERED_NONE; + max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ } p.d_size = cpu_to_be64(d_size); p.u_size = cpu_to_be64(u_size); p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); - p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9); + p.max_bio_size = cpu_to_be32(max_bio_size); p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); @@ -2722,7 +2732,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* double check digest, sometimes buffers have been modified in flight. */ if (dgs > 0 && dgs <= 64) { - /* 64 byte, 512 bit, is the larges digest size + /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); @@ -3041,6 +3051,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) mdev->agreed_pro_version = PRO_VERSION_MAX; mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; + mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; + mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; } void drbd_mdev_cleanup(struct drbd_conf *mdev) @@ -3275,7 +3287,7 @@ static void drbd_delete_device(unsigned int minor) drbd_release_ee_lists(mdev); - /* should be free'd on disconnect? */ + /* should be freed on disconnect? */ kfree(mdev->ee_hash); /* mdev->ee_hash_s = 0; @@ -3415,7 +3427,9 @@ struct drbd_conf *drbd_new_device(unsigned int minor) q->backing_dev_info.congested_data = mdev; blk_queue_make_request(q, drbd_make_request); - blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9); + /* Setting the max_hw_sectors to an odd value of 8kibyte here + This triggers a max_bio_size message upon first attach or connect */ + blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_merge_bvec(q, drbd_merge_bvec); q->queue_lock = &mdev->req_lock; @@ -3627,7 +3641,8 @@ struct meta_data_on_disk { /* `-- act_log->nr_elements <-- sync_conf.al_extents */ u32 bm_offset; /* offset to the bitmap, from here */ u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ - u32 reserved_u32[4]; + u32 la_peer_max_bio_size; /* last peer max_bio_size */ + u32 reserved_u32[3]; } __packed; @@ -3668,6 +3683,7 @@ void drbd_md_sync(struct drbd_conf *mdev) buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); + buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size); D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); sector = mdev->ldev->md.md_offset; @@ -3751,6 +3767,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); + spin_lock_irq(&mdev->req_lock); + if (mdev->state.conn < C_CONNECTED) { + int peer; + peer = be32_to_cpu(buffer->la_peer_max_bio_size); + peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE); + mdev->peer_max_bio_size = peer; + } + spin_unlock_irq(&mdev->req_lock); + if (mdev->sync_conf.al_extents < 7) mdev->sync_conf.al_extents = 127; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 03b29f78a37d..515bcd948a43 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -272,9 +272,28 @@ static int _try_outdate_peer_async(void *data) { struct drbd_conf *mdev = (struct drbd_conf *)data; enum drbd_disk_state nps; + union drbd_state ns; nps = drbd_try_outdate_peer(mdev); - drbd_request_state(mdev, NS(pdsk, nps)); + + /* Not using + drbd_request_state(mdev, NS(pdsk, nps)); + here, because we might were able to re-establish the connection + in the meantime. This can only partially be solved in the state's + engine is_valid_state() and is_valid_state_transition() + functions. + + nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN. + pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, + therefore we have to have the pre state change check here. + */ + spin_lock_irq(&mdev->req_lock); + ns = mdev->state; + if (ns.conn < C_WF_REPORT_PARAMS) { + ns.pdsk = nps; + _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); + } + spin_unlock_irq(&mdev->req_lock); return 0; } @@ -577,7 +596,7 @@ void drbd_resume_io(struct drbd_conf *mdev) * Returns 0 on success, negative return values indicate errors. * You should call drbd_md_sync() after calling this function. */ -enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) +enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ sector_t la_size; @@ -773,30 +792,78 @@ static int drbd_check_al_size(struct drbd_conf *mdev) return 0; } -void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local) +static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) { struct request_queue * const q = mdev->rq_queue; - struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; - int max_segments = mdev->ldev->dc.max_bio_bvecs; - int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); + int max_hw_sectors = max_bio_size >> 9; + int max_segments = 0; + + if (get_ldev_if_state(mdev, D_ATTACHING)) { + struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; + + max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); + max_segments = mdev->ldev->dc.max_bio_bvecs; + put_ldev(mdev); + } blk_queue_logical_block_size(q, 512); blk_queue_max_hw_sectors(q, max_hw_sectors); /* This is the workaround for "bio would need to, but cannot, be split" */ blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); - blk_queue_stack_limits(q, b); - dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9); + if (get_ldev_if_state(mdev, D_ATTACHING)) { + struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; + + blk_queue_stack_limits(q, b); - if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { - dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info.ra_pages, - b->backing_dev_info.ra_pages); - q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { + dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", + q->backing_dev_info.ra_pages, + b->backing_dev_info.ra_pages); + q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + } + put_ldev(mdev); } } +void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) +{ + int now, new, local, peer; + + now = queue_max_hw_sectors(mdev->rq_queue) << 9; + local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */ + peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */ + + if (get_ldev_if_state(mdev, D_ATTACHING)) { + local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9; + mdev->local_max_bio_size = local; + put_ldev(mdev); + } + + /* We may ignore peer limits if the peer is modern enough. + Because new from 8.3.8 onwards the peer can use multiple + BIOs for a single peer_request */ + if (mdev->state.conn >= C_CONNECTED) { + if (mdev->agreed_pro_version < 94) + peer = mdev->peer_max_bio_size; + else if (mdev->agreed_pro_version == 94) + peer = DRBD_MAX_SIZE_H80_PACKET; + else /* drbd 8.3.8 onwards */ + peer = DRBD_MAX_BIO_SIZE; + } + + new = min_t(int, local, peer); + + if (mdev->state.role == R_PRIMARY && new < now) + dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now); + + if (new != now) + dev_info(DEV, "max BIO size = %u\n", new); + + drbd_setup_queue_param(mdev, new); +} + /* serialize deconfig (worker exiting, doing cleanup) * and reconfig (drbdsetup disk, drbdsetup net) * @@ -865,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp struct block_device *bdev; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; - unsigned int max_bio_size; enum drbd_state_rv rv; int cp_discovered = 0; int logical_block_size; @@ -1117,20 +1183,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp mdev->read_cnt = 0; mdev->writ_cnt = 0; - max_bio_size = DRBD_MAX_BIO_SIZE; - if (mdev->state.conn == C_CONNECTED) { - /* We are Primary, Connected, and now attach a new local - * backing store. We must not increase the user visible maximum - * bio size on this device to something the peer may not be - * able to handle. */ - if (mdev->agreed_pro_version < 94) - max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9; - else if (mdev->agreed_pro_version == 94) - max_bio_size = DRBD_MAX_SIZE_H80_PACKET; - /* else: drbd 8.3.9 and later, stay with default */ - } - - drbd_setup_queue_param(mdev, max_bio_size); + drbd_reconsider_max_bio_size(mdev); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, @@ -1152,7 +1205,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) set_bit(USE_DEGR_WFC_T, &mdev->flags); - dd = drbd_determin_dev_size(mdev, 0); + dd = drbd_determine_dev_size(mdev, 0); if (dd == dev_size_error) { retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; @@ -1281,11 +1334,19 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { + enum drbd_ret_code retcode; + int ret; drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ - reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - if (mdev->state.disk == D_DISKLESS) - wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); + /* D_FAILED will transition to DISKLESS. */ + ret = wait_event_interruptible(mdev->misc_wait, + mdev->state.disk != D_FAILED); drbd_resume_io(mdev); + if ((int)retcode == (int)SS_IS_DISKLESS) + retcode = SS_NOTHING_TO_DO; + if (ret) + retcode = ERR_INTR; + reply->ret_code = retcode; return 0; } @@ -1658,7 +1719,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); - dd = drbd_determin_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf); drbd_md_sync(mdev); put_ldev(mdev); if (dd == dev_size_error) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fd26666c0b08..25d32c5aa50a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -333,7 +333,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, if (!page) goto fail; - INIT_HLIST_NODE(&e->colision); + INIT_HLIST_NODE(&e->collision); e->epoch = NULL; e->mdev = mdev; e->pages = page; @@ -356,7 +356,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i kfree(e->digest); drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); mempool_free(e, drbd_ee_mempool); } @@ -787,7 +787,7 @@ static int drbd_connect(struct drbd_conf *mdev) } if (sock && msock) { - schedule_timeout_interruptible(HZ / 10); + schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10); ok = drbd_socket_okay(mdev, &sock); ok = drbd_socket_okay(mdev, &msock) && ok; if (ok) @@ -899,11 +899,6 @@ retry: drbd_thread_start(&mdev->asender); - if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) { - drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET); - put_ldev(mdev); - } - if (drbd_send_protocol(mdev) == -1) return -1; drbd_send_sync_param(mdev, &mdev->sync_conf); @@ -1418,7 +1413,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u sector_t sector = e->sector; int ok; - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { drbd_set_in_sync(mdev, sector, e->size); @@ -1487,7 +1482,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi return false; } - /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid + /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. * still no race with drbd_fail_pending_reads */ ok = recv_dless_read(mdev, req, sector, data_size); @@ -1558,11 +1553,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->colision)); - hlist_del_init(&e->colision); + D_ASSERT(!hlist_unhashed(&e->collision)); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); } else { - D_ASSERT(hlist_unhashed(&e->colision)); + D_ASSERT(hlist_unhashed(&e->collision)); } drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); @@ -1579,8 +1574,8 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->colision)); - hlist_del_init(&e->colision); + D_ASSERT(!hlist_unhashed(&e->collision)); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); dec_unacked(mdev); @@ -1755,7 +1750,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); - hlist_add_head(&e->colision, ee_hash_slot(mdev, sector)); + hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); #define OVERLAPS overlaps(i->sector, i->size, sector, size) slot = tl_hash_slot(mdev, sector); @@ -1765,7 +1760,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { /* only ALERT on first iteration, * we may be woken up early... */ @@ -1804,7 +1799,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } if (signal_pending(current)) { - hlist_del_init(&e->colision); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); @@ -1862,7 +1857,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->req_lock); list_del(&e->w.list); - hlist_del_init(&e->colision); + hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->sector); @@ -2916,12 +2911,6 @@ disconnect: return false; } -static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) -{ - /* sorry, we currently have no working implementation - * of distributed TCQ */ -} - /* warn if the arguments differ by more than 12.5% */ static void warn_if_differ_considerably(struct drbd_conf *mdev, const char *s, sector_t a, sector_t b) @@ -2939,7 +2928,6 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned { struct p_sizes *p = &mdev->data.rbuf.sizes; enum determine_dev_size dd = unchanged; - unsigned int max_bio_size; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -2994,7 +2982,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(mdev)) { - dd = drbd_determin_dev_size(mdev, ddsf); + dd = drbd_determine_dev_size(mdev, ddsf); put_ldev(mdev); if (dd == dev_size_error) return false; @@ -3004,23 +2992,15 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned drbd_set_my_capacity(mdev, p_size); } + mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size); + drbd_reconsider_max_bio_size(mdev); + if (get_ldev(mdev)) { if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); ldsc = 1; } - if (mdev->agreed_pro_version < 94) - max_bio_size = be32_to_cpu(p->max_bio_size); - else if (mdev->agreed_pro_version == 94) - max_bio_size = DRBD_MAX_SIZE_H80_PACKET; - else /* drbd 8.3.8 onwards */ - max_bio_size = DRBD_MAX_BIO_SIZE; - - if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9) - drbd_setup_queue_param(mdev, max_bio_size); - - drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); put_ldev(mdev); } @@ -4275,7 +4255,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, colision) { + hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { if (req->sector != sector) { dev_err(DEV, "_ack_id_to_req: found req %p but it has " @@ -4554,6 +4534,7 @@ int drbd_asender(struct drbd_thread *thi) int received = 0; int expect = sizeof(struct p_header80); int empty; + int ping_timeout_active = 0; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4566,6 +4547,7 @@ int drbd_asender(struct drbd_thread *thi) ERR_IF(!drbd_send_ping(mdev)) goto reconnect; mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*HZ/10; + ping_timeout_active = 1; } /* conditionally cork; @@ -4620,8 +4602,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { - if (mdev->meta.socket->sk->sk_rcvtimeo == - mdev->net_conf->ping_timeo*HZ/10) { + if (ping_timeout_active) { dev_err(DEV, "PingAck did not arrive in time.\n"); goto reconnect; } @@ -4660,6 +4641,11 @@ int drbd_asender(struct drbd_thread *thi) if (!cmd->process(mdev, h)) goto reconnect; + /* the idle_timeout (ping-int) + * has been restored in got_PingAck() */ + if (cmd == get_asender_cmd(P_PING_ACK)) + ping_timeout_active = 0; + buf = h; received = 0; expect = sizeof(struct p_header80); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5c0c8be1bb0a..3424d675b769 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -163,7 +163,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * they must have been failed on the spot */ #define OVERLAPS overlaps(sector, size, i->sector, i->size) slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " "other: %p %llus +%u\n", @@ -187,7 +187,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, #undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) slot = ee_hash_slot(mdev, req->sector); - hlist_for_each_entry(e, n, slot, colision) { + hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { wake_up(&mdev->misc_wait); break; @@ -260,8 +260,8 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* remove the request from the conflict detection * respective block_id verification hash */ - if (!hlist_unhashed(&req->colision)) - hlist_del(&req->colision); + if (!hlist_unhashed(&req->collision)) + hlist_del(&req->collision); else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); @@ -329,7 +329,7 @@ static int _req_conflicts(struct drbd_request *req) struct hlist_node *n; struct hlist_head *slot; - D_ASSERT(hlist_unhashed(&req->colision)); + D_ASSERT(hlist_unhashed(&req->collision)); if (!get_net_conf(mdev)) return 0; @@ -341,7 +341,7 @@ static int _req_conflicts(struct drbd_request *req) #define OVERLAPS overlaps(i->sector, i->size, sector, size) slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, colision) { + hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "%s[%u] Concurrent local write detected! " "[DISCARD L] new: %llus +%u; " @@ -359,7 +359,7 @@ static int _req_conflicts(struct drbd_request *req) #undef OVERLAPS #define OVERLAPS overlaps(e->sector, e->size, sector, size) slot = ee_hash_slot(mdev, sector); - hlist_for_each_entry(e, n, slot, colision) { + hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "%s[%u] Concurrent remote write detected!" " [DISCARD L] new: %llus +%u; " @@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ - hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector)); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from drbd_make_request_common only */ - hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector)); /* corresponding hlist_del is in _req_may_be_done() */ /* NOTE @@ -1033,7 +1033,7 @@ fail_conflicting: err = 0; fail_free_complete: - if (rw == WRITE && local) + if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, sector); fail_and_free_req: if (local) { diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 32e2c3e6a813..68a234a5fdc5 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -256,7 +256,7 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, colision) { + hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { D_ASSERT(req->sector == sector); return req; @@ -291,7 +291,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->epoch = 0; req->sector = bio_src->bi_sector; req->size = bio_src->bi_size; - INIT_HLIST_NODE(&req->colision); + INIT_HLIST_NODE(&req->collision); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); } @@ -323,6 +323,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, extern void complete_master_bio(struct drbd_conf *mdev, struct bio_and_error *m); extern void request_timer_fn(unsigned long data); +extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f7e6c92f8d03..4d76b06b6b20 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -126,7 +126,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, + /* No hlist_del_init(&e->collision) here, we did not send the Ack yet, * neither did we wake possibly waiting conflicting requests. * done from "drbd_process_done_ee" within the appropriate w.cb * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ @@ -297,42 +297,48 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * crypto_hash_final(&desc, digest); } -static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +/* TODO merge common code with w_e_end_ov_req */ +int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); int digest_size; void *digest; - int ok; + int ok = 1; D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); - if (unlikely(cancel)) { - drbd_free_ee(mdev, e); - return 1; - } + if (unlikely(cancel)) + goto out; - if (likely((e->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_hash_digestsize(mdev->csums_tfm); - digest = kmalloc(digest_size, GFP_NOIO); - if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + if (likely((e->flags & EE_WAS_ERROR) != 0)) + goto out; - inc_rs_pending(mdev); - ok = drbd_send_drequest_csum(mdev, - e->sector, - e->size, - digest, - digest_size, - P_CSUM_RS_REQUEST); - kfree(digest); - } else { - dev_err(DEV, "kmalloc() of digest failed.\n"); - ok = 0; - } - } else - ok = 1; + digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest = kmalloc(digest_size, GFP_NOIO); + if (digest) { + sector_t sector = e->sector; + unsigned int size = e->size; + drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + /* Free e and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ + drbd_free_ee(mdev, e); + e = NULL; + inc_rs_pending(mdev); + ok = drbd_send_drequest_csum(mdev, sector, size, + digest, digest_size, + P_CSUM_RS_REQUEST); + kfree(digest); + } else { + dev_err(DEV, "kmalloc() of digest failed.\n"); + ok = 0; + } - drbd_free_ee(mdev, e); +out: + if (e) + drbd_free_ee(mdev, e); if (unlikely(!ok)) dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); @@ -834,7 +840,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) const int ratio = (t == 0) ? 0 : (t < 100000) ? ((s*100)/t) : (s/(t/100)); - dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " + dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; " "transferred %luK total %luK\n", ratio, Bit2KB(mdev->rs_same_csum), @@ -1071,9 +1077,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } +/* TODO merge common code with w_e_send_csum */ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + sector_t sector = e->sector; + unsigned int size = e->size; int digest_size; void *digest; int ok = 1; @@ -1093,17 +1102,25 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) else memset(digest, 0, digest_size); + /* Free e and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ + drbd_free_ee(mdev, e); + e = NULL; inc_rs_pending(mdev); - ok = drbd_send_drequest_csum(mdev, e->sector, e->size, - digest, digest_size, P_OV_REPLY); + ok = drbd_send_drequest_csum(mdev, sector, size, + digest, digest_size, + P_OV_REPLY); if (!ok) dec_rs_pending(mdev); kfree(digest); out: - drbd_free_ee(mdev, e); + if (e) + drbd_free_ee(mdev, e); dec_unacked(mdev); - return ok; } @@ -1122,8 +1139,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); struct digest_info *di; - int digest_size; void *digest; + sector_t sector = e->sector; + unsigned int size = e->size; + int digest_size; int ok, eq = 0; if (unlikely(cancel)) { @@ -1153,16 +1172,21 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } } - dec_unacked(mdev); + /* Free e and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ + drbd_free_ee(mdev, e); if (!eq) - drbd_ov_oos_found(mdev, e->sector, e->size); + drbd_ov_oos_found(mdev, sector, size); else ov_oos_print(mdev); - ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size, + ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); - drbd_free_ee(mdev, e); + dec_unacked(mdev); --mdev->ov_left; |