summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/cciss_scsi.c11
-rw-r--r--drivers/block/drbd/drbd_actlog.c8
-rw-r--r--drivers/block/drbd/drbd_bitmap.c30
-rw-r--r--drivers/block/drbd/drbd_int.h45
-rw-r--r--drivers/block/drbd/drbd_main.c97
-rw-r--r--drivers/block/drbd/drbd_nl.c40
-rw-r--r--drivers/block/drbd/drbd_proc.c3
-rw-r--r--drivers/block/drbd/drbd_receiver.c38
-rw-r--r--drivers/block/drbd/drbd_req.c111
-rw-r--r--drivers/block/drbd/drbd_worker.c12
-rw-r--r--drivers/block/floppy.c25
-rw-r--r--drivers/block/loop.c8
-rw-r--r--drivers/block/mg_disk.c13
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c166
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h5
-rw-r--r--drivers/block/nbd.c8
-rw-r--r--drivers/block/rbd.c820
-rw-r--r--drivers/block/rbd_types.h1
-rw-r--r--drivers/block/umem.c17
-rw-r--r--drivers/block/virtio_blk.c115
-rw-r--r--drivers/block/xen-blkback/common.h2
-rw-r--r--drivers/block/xen-blkfront.c63
22 files changed, 1052 insertions, 586 deletions
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index acda773b3720..38aa6dda6b81 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -763,16 +763,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
{
case CMD_TARGET_STATUS:
/* Pass it up to the upper layers... */
- if( ei->ScsiStatus)
- {
-#if 0
- printk(KERN_WARNING "cciss: cmd %p "
- "has SCSI Status = %x\n",
- c, ei->ScsiStatus);
-#endif
- cmd->result |= (ei->ScsiStatus << 1);
- }
- else { /* scsi status is zero??? How??? */
+ if (!ei->ScsiStatus) {
/* Ordinarily, this case should never happen, but there is a bug
in some released firmware revisions that allows it to happen
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e54e31b02b88..3fbef018ce55 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -411,7 +411,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
if (++mdev->al_tr_pos >
div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
@@ -876,7 +876,11 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
unsigned int enr, count = 0;
struct lc_element *e;
- if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+ /* this should be an empty REQ_FLUSH */
+ if (size == 0)
+ return 0;
+
+ if (size < 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
return 0;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index b5c5ff53cb57..d84566496746 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -889,6 +889,7 @@ struct bm_aio_ctx {
unsigned int done;
unsigned flags;
#define BM_AIO_COPY_PAGES 1
+#define BM_WRITE_ALL_PAGES 2
int error;
struct kref kref;
};
@@ -1059,7 +1060,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
if (rw & WRITE) {
- if (bm_test_page_unchanged(b->bm_pages[i])) {
+ if (!(flags & BM_WRITE_ALL_PAGES) &&
+ bm_test_page_unchanged(b->bm_pages[i])) {
dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
continue;
}
@@ -1096,7 +1098,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
if (ctx->error) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
err = -EIO; /* ctx->error ? */
}
@@ -1141,6 +1143,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
}
/**
+ * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
+ * @mdev: DRBD device.
+ *
+ * Will write all pages.
+ */
+int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local)
+{
+ return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0);
+}
+
+/**
* drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
* @mdev: DRBD device.
* @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
@@ -1212,7 +1225,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
if (ctx->error)
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
/* that should force detach, so the in memory bitmap will be
* gone in a moment as well. */
@@ -1475,10 +1488,17 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
first_word = 0;
spin_lock_irq(&b->bm_lock);
}
-
/* last page (respectively only page, for first page == last page) */
last_word = MLPP(el >> LN2_BPL);
- bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+ /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples).
+ * ==> e = 32767, el = 32768, last_page = 2,
+ * and now last_word = 0.
+ * We do not want to touch last_page in this case,
+ * as we did not allocate it, it is not present in bitmap->bm_pages.
+ */
+ if (last_word)
+ bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
/* possibly trailing bits.
* example: (e & 63) == 63, el will be e+1.
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 02f013a073a7..b953cc7c9c00 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -813,7 +813,6 @@ enum {
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
SEND_PING, /* whether asender should send a ping asap */
- UNPLUG_QUEUED, /* only relevant with kernel 2.4 */
UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
MD_DIRTY, /* current uuids and flags not yet on disk */
DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */
@@ -824,7 +823,6 @@ enum {
CRASHED_PRIMARY, /* This node was a crashed primary.
* Gets cleared when the state.conn
* goes into C_CONNECTED state. */
- NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */
CONSIDER_RESYNC,
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
@@ -834,6 +832,7 @@ enum {
BITMAP_IO_QUEUED, /* Started bitmap IO */
GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR, /* Local disk failed returned IO error */
+ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
@@ -851,6 +850,13 @@ enum {
AL_SUSPENDED, /* Activity logging is currently suspended. */
AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
STATE_SENT, /* Do not change state/UUIDs while this is set */
+
+ CALLBACK_PENDING, /* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
+ * pending, from drbd worker context.
+ * If set, bdi_write_congested() returns true,
+ * so shrink_page_list() would not recurse into,
+ * and potentially deadlock on, this drbd worker.
+ */
};
struct drbd_bitmap; /* opaque for drbd_conf */
@@ -1130,8 +1136,8 @@ struct drbd_conf {
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
int rs_planed; /* resync sectors already planned */
atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
- int peer_max_bio_size;
- int local_max_bio_size;
+ unsigned int peer_max_bio_size;
+ unsigned int local_max_bio_size;
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1435,9 +1441,9 @@ struct bm_extent {
* hash table. */
#define HT_SHIFT 8
#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
-#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */
+#define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */
-#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
+#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* The old header only allows packets up to 32Kib data */
/* Number of elements in the app_reads_hash */
#define APP_R_HSIZE 15
@@ -1463,6 +1469,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
unsigned long al_enr);
@@ -1840,12 +1847,20 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
}
+enum drbd_force_detach_flags {
+ DRBD_IO_ERROR,
+ DRBD_META_IO_ERROR,
+ DRBD_FORCE_DETACH,
+};
+
#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
-static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
+static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
+ enum drbd_force_detach_flags forcedetach,
+ const char *where)
{
switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON:
- if (!forcedetach) {
+ if (forcedetach == DRBD_IO_ERROR) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s.\n", where);
if (mdev->state.disk > D_INCONSISTENT)
@@ -1856,6 +1871,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
case EP_DETACH:
case EP_CALL_HELPER:
set_bit(WAS_IO_ERROR, &mdev->flags);
+ if (forcedetach == DRBD_FORCE_DETACH)
+ set_bit(FORCE_DETACH, &mdev->flags);
if (mdev->state.disk > D_FAILED) {
_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
dev_err(DEV,
@@ -1875,7 +1892,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
*/
#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
- int error, int forcedetach, const char *where)
+ int error, enum drbd_force_detach_flags forcedetach, const char *where)
{
if (error) {
unsigned long flags;
@@ -2405,15 +2422,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
D_ASSERT(ap_bio >= 0);
+
+ if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
+ if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+ drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+ }
+
/* this currently does wake_up for every dec_ap_bio!
* maybe rather introduce some type of hysteresis?
* e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
if (ap_bio < mxb)
wake_up(&mdev->misc_wait);
- if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
- if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
- drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
- }
}
static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 920ede2829d6..f93a0320e952 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
static void md_sync_timer_fn(unsigned long data);
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void _tl_clear(struct drbd_conf *mdev);
MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
"Lars Ellenberg <lars@linbit.com>");
@@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
/* Actions operating on the disk state, also want to work on
requests that got barrier acked. */
- switch (what) {
- case fail_frozen_disk_io:
- case restart_frozen_disk_io:
- list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
- req = list_entry(le, struct drbd_request, tl_requests);
- _req_mod(req, what);
- }
- case connection_lost_while_pending:
- case resend:
- break;
- default:
- dev_err(DEV, "what = %d in _tl_restart()\n", what);
+ list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
+ req = list_entry(le, struct drbd_request, tl_requests);
+ _req_mod(req, what);
}
}
@@ -459,11 +451,16 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
*/
void tl_clear(struct drbd_conf *mdev)
{
+ spin_lock_irq(&mdev->req_lock);
+ _tl_clear(mdev);
+ spin_unlock_irq(&mdev->req_lock);
+}
+
+static void _tl_clear(struct drbd_conf *mdev)
+{
struct list_head *le, *tle;
struct drbd_request *r;
- spin_lock_irq(&mdev->req_lock);
-
_tl_restart(mdev, connection_lost_while_pending);
/* we expect this list to be empty. */
@@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev)
memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
- spin_unlock_irq(&mdev->req_lock);
}
void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
@@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
if (ns.susp_fen) {
/* case1: The outdate peer handler is successful: */
if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
- tl_clear(mdev);
if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
drbd_uuid_new_current(mdev);
clear_bit(NEW_CUR_UUID, &mdev->flags);
}
spin_lock_irq(&mdev->req_lock);
+ _tl_clear(mdev);
_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
spin_unlock_irq(&mdev->req_lock);
}
@@ -1514,6 +1510,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* Do not change the order of the if above and the two below... */
if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
+ /* we probably will start a resync soon.
+ * make sure those things are properly reset. */
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ atomic_set(&mdev->rs_pending_cnt, 0);
+ drbd_rs_cancel_all(mdev);
+
drbd_send_uuids(mdev);
drbd_send_state(mdev, ns);
}
@@ -1630,9 +1633,24 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
eh = mdev->ldev->dc.on_io_error;
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
- /* Immediately allow completion of all application IO, that waits
- for completion from the local disk. */
- tl_abort_disk_io(mdev);
+ if (was_io_error && eh == EP_CALL_HELPER)
+ drbd_khelper(mdev, "local-io-error");
+
+ /* Immediately allow completion of all application IO,
+ * that waits for completion from the local disk,
+ * if this was a force-detach due to disk_timeout
+ * or administrator request (drbdsetup detach --force).
+ * Do NOT abort otherwise.
+ * Aborting local requests may cause serious problems,
+ * if requests are completed to upper layers already,
+ * and then later the already submitted local bio completes.
+ * This can cause DMA into former bio pages that meanwhile
+ * have been re-used for other things.
+ * So aborting local requests may cause crashes,
+ * or even worse, silent data corruption.
+ */
+ if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
+ tl_abort_disk_io(mdev);
/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
@@ -1653,9 +1671,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
drbd_md_sync(mdev);
}
put_ldev(mdev);
-
- if (was_io_error && eh == EP_CALL_HELPER)
- drbd_khelper(mdev, "local-io-error");
}
/* second half of local IO error, failure to attach,
@@ -1669,10 +1684,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
"ASSERT FAILED: disk is %s while going diskless\n",
drbd_disk_str(mdev->state.disk));
- mdev->rs_total = 0;
- mdev->rs_failed = 0;
- atomic_set(&mdev->rs_pending_cnt, 0);
-
if (ns.conn >= C_CONNECTED)
drbd_send_state(mdev, ns);
@@ -2194,7 +2205,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
{
struct p_sizes p;
sector_t d_size, u_size;
- int q_order_type, max_bio_size;
+ int q_order_type;
+ unsigned int max_bio_size;
int ok;
if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -2203,7 +2215,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
u_size = mdev->ldev->dc.disk_size;
q_order_type = drbd_queue_order_type(mdev);
max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
- max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
+ max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
put_ldev(mdev);
} else {
d_size = 0;
@@ -2214,7 +2226,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
/* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
if (mdev->agreed_pro_version <= 94)
- max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+ max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
p.d_size = cpu_to_be64(d_size);
p.u_size = cpu_to_be64(u_size);
@@ -3521,9 +3533,9 @@ static void drbd_cleanup(void)
}
/**
- * drbd_congested() - Callback for pdflush
+ * drbd_congested() - Callback for the flusher thread
* @congested_data: User data
- * @bdi_bits: Bits pdflush is currently interested in
+ * @bdi_bits: Bits the BDI flusher thread is currently interested in
*
* Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
*/
@@ -3541,6 +3553,22 @@ static int drbd_congested(void *congested_data, int bdi_bits)
goto out;
}
+ if (test_bit(CALLBACK_PENDING, &mdev->flags)) {
+ r |= (1 << BDI_async_congested);
+ /* Without good local data, we would need to read from remote,
+ * and that would need the worker thread as well, which is
+ * currently blocked waiting for that usermode helper to
+ * finish.
+ */
+ if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
+ r |= (1 << BDI_sync_congested);
+ else
+ put_ldev(mdev);
+ r &= bdi_bits;
+ reason = 'c';
+ goto out;
+ }
+
if (get_ldev(mdev)) {
q = bdev_get_queue(mdev->ldev->backing_bdev);
r = bdi_congested(&q->backing_dev_info, bdi_bits);
@@ -3604,6 +3632,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
q->backing_dev_info.congested_data = mdev;
blk_queue_make_request(q, drbd_make_request);
+ blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
@@ -3870,7 +3899,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
/* this was a try anyways ... */
dev_err(DEV, "meta data update failed!\n");
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
}
/* Update mdev->ldev->md.la_size_sect,
@@ -3950,9 +3979,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
spin_lock_irq(&mdev->req_lock);
if (mdev->state.conn < C_CONNECTED) {
- int peer;
+ unsigned int peer;
peer = be32_to_cpu(buffer->la_peer_max_bio_size);
- peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
+ peer = max(peer, DRBD_MAX_BIO_SIZE_SAFE);
mdev->peer_max_bio_size = peer;
}
spin_unlock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 6d4de6a72e80..edb490aad8b4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -147,6 +147,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
char *argv[] = {usermode_helper, cmd, mb, NULL };
int ret;
+ if (current == mdev->worker.task)
+ set_bit(CALLBACK_PENDING, &mdev->flags);
+
snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
if (get_net_conf(mdev)) {
@@ -189,6 +192,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
+ if (current == mdev->worker.task)
+ clear_bit(CALLBACK_PENDING, &mdev->flags);
+
if (ret < 0) /* Ignore any ERRNOs we got. */
ret = 0;
@@ -668,8 +674,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
la_size_changed && md_moved ? "size changed and md moved" :
la_size_changed ? "size changed" : "md moved");
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
- err = drbd_bitmap_io(mdev, &drbd_bm_write,
- "size changed", BM_LOCKED_MASK);
+ err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+ "size changed", BM_LOCKED_MASK);
if (err) {
rv = dev_size_error;
goto out;
@@ -795,8 +801,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
{
struct request_queue * const q = mdev->rq_queue;
- int max_hw_sectors = max_bio_size >> 9;
- int max_segments = 0;
+ unsigned int max_hw_sectors = max_bio_size >> 9;
+ unsigned int max_segments = 0;
if (get_ldev_if_state(mdev, D_ATTACHING)) {
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
@@ -829,7 +835,7 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_
void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
{
- int now, new, local, peer;
+ unsigned int now, new, local, peer;
now = queue_max_hw_sectors(mdev->rq_queue) << 9;
local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
@@ -840,13 +846,14 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
mdev->local_max_bio_size = local;
put_ldev(mdev);
}
+ local = min(local, DRBD_MAX_BIO_SIZE);
/* We may ignore peer limits if the peer is modern enough.
Because new from 8.3.8 onwards the peer can use multiple
BIOs for a single peer_request */
if (mdev->state.conn >= C_CONNECTED) {
if (mdev->agreed_pro_version < 94) {
- peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+ peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
} else if (mdev->agreed_pro_version == 94)
peer = DRBD_MAX_SIZE_H80_PACKET;
@@ -854,10 +861,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
peer = DRBD_MAX_BIO_SIZE;
}
- new = min_t(int, local, peer);
+ new = min(local, peer);
if (mdev->state.role == R_PRIMARY && new < now)
- dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
+ dev_err(DEV, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
if (new != now)
dev_info(DEV, "max BIO size = %u\n", new);
@@ -950,6 +957,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
* to realize a "hot spare" feature (not that I'd recommend that) */
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+ /* make sure there is no leftover from previous force-detach attempts */
+ clear_bit(FORCE_DETACH, &mdev->flags);
+
+ /* and no leftover from previously aborted resync or verify, either */
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ atomic_set(&mdev->rs_pending_cnt, 0);
+
/* allocation not in the IO path, cqueue thread context */
nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
if (!nbc) {
@@ -1345,6 +1360,7 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
}
if (dt.detach_force) {
+ set_bit(FORCE_DETACH, &mdev->flags);
drbd_force_state(mdev, NS(disk, D_FAILED));
reply->ret_code = SS_SUCCESS;
goto out;
@@ -1962,9 +1978,11 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
int retcode;
/* If there is still bitmap IO pending, probably because of a previous
- * resync just being finished, wait for it before requesting a new resync. */
+ * resync just being finished, wait for it before requesting a new resync.
+ * Also wait for it's after_state_ch(). */
drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+ drbd_flush_workqueue(mdev);
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
@@ -2003,9 +2021,11 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
int retcode;
/* If there is still bitmap IO pending, probably because of a previous
- * resync just being finished, wait for it before requesting a new resync. */
+ * resync just being finished, wait for it before requesting a new resync.
+ * Also wait for it's after_state_ch(). */
drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+ drbd_flush_workqueue(mdev);
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 869bada2ed06..5496104f90b9 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -245,6 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
mdev->state.role == R_SECONDARY) {
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
} else {
+ /* reset mdev->congestion_reason */
+ bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
+
seq_printf(seq,
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ea4836e0ae98..c74ca2df7431 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -277,6 +277,9 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
int i;
+ if (page == NULL)
+ return;
+
if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
i = page_chain_free(page);
else {
@@ -316,7 +319,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
gfp_t gfp_mask) __must_hold(local)
{
struct drbd_epoch_entry *e;
- struct page *page;
+ struct page *page = NULL;
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
@@ -329,9 +332,11 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
return NULL;
}
- page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
- if (!page)
- goto fail;
+ if (data_size) {
+ page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
+ if (!page)
+ goto fail;
+ }
INIT_HLIST_NODE(&e->collision);
e->epoch = NULL;
@@ -1270,7 +1275,6 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
data_size -= dgs;
- ERR_IF(data_size == 0) return NULL;
ERR_IF(data_size & 0x1ff) return NULL;
ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL;
@@ -1291,6 +1295,9 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __
if (!e)
return NULL;
+ if (!data_size)
+ return e;
+
ds = data_size;
page = e->pages;
page_chain_for_each(page) {
@@ -1715,6 +1722,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
dp_flags = be32_to_cpu(p->dp_flags);
rw |= wire_flags_to_bio(mdev, dp_flags);
+ if (e->pages == NULL) {
+ D_ASSERT(e->size == 0);
+ D_ASSERT(dp_flags & DP_FLUSH);
+ }
if (dp_flags & DP_MAY_SET_IN_SYNC)
e->flags |= EE_MAY_SET_IN_SYNC;
@@ -3801,11 +3812,18 @@ void drbd_free_tl_hash(struct drbd_conf *mdev)
mdev->ee_hash = NULL;
mdev->ee_hash_s = 0;
- /* paranoia code */
- for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
- if (h->first)
- dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
- (int)(h - mdev->tl_hash), h->first);
+ /* We may not have had the chance to wait for all locally pending
+ * application requests. The hlist_add_fake() prevents access after
+ * free on master bio completion. */
+ for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) {
+ struct drbd_request *req;
+ struct hlist_node *pos, *n;
+ hlist_for_each_entry_safe(req, pos, n, h, collision) {
+ hlist_del_init(&req->collision);
+ hlist_add_fake(&req->collision);
+ }
+ }
+
kfree(mdev->tl_hash);
mdev->tl_hash = NULL;
mdev->tl_hash_s = 0;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9c5c84946b05..01b2ac641c7b 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
_req_may_be_done_not_susp(req, m);
break;
@@ -472,12 +472,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+ if (req->rq_state & RQ_LOCAL_ABORTED) {
+ _req_may_be_done(req, m);
+ break;
+ }
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
goto_queue_for_net_read:
+ D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+
/* no point in retrying if there is no good remote data,
* or we have no connection. */
if (mdev->state.pdsk != D_UP_TO_DATE) {
@@ -690,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case resend:
+ /* Simply complete (local only) READs. */
+ if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
+ _req_may_be_done(req, m);
+ break;
+ }
+
/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
before the connection loss (B&C only); only P_BARRIER_ACK was missing.
Trowing them out of the TL here by pretending we got a BARRIER_ACK
@@ -765,6 +776,40 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s
return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
}
+static void maybe_pull_ahead(struct drbd_conf *mdev)
+{
+ int congested = 0;
+
+ /* If I don't even have good local storage, we can not reasonably try
+ * to pull ahead of the peer. We also need the local reference to make
+ * sure mdev->act_log is there.
+ * Note: caller has to make sure that net_conf is there.
+ */
+ if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
+ return;
+
+ if (mdev->net_conf->cong_fill &&
+ atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
+ dev_info(DEV, "Congestion-fill threshold reached\n");
+ congested = 1;
+ }
+
+ if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
+ dev_info(DEV, "Congestion-extents threshold reached\n");
+ congested = 1;
+ }
+
+ if (congested) {
+ queue_barrier(mdev); /* last barrier, after mirrored writes */
+
+ if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
+ _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
+ else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
+ _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
+ }
+ put_ldev(mdev);
+}
+
static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
{
const int rw = bio_rw(bio);
@@ -795,7 +840,15 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
req->private_bio = NULL;
}
if (rw == WRITE) {
- remote = 1;
+ /* Need to replicate writes. Unless it is an empty flush,
+ * which is better mapped to a DRBD P_BARRIER packet,
+ * also for drbd wire protocol compatibility reasons. */
+ if (unlikely(size == 0)) {
+ /* The only size==0 bios we expect are empty flushes. */
+ D_ASSERT(bio->bi_rw & REQ_FLUSH);
+ remote = 0;
+ } else
+ remote = 1;
} else {
/* READ || READA */
if (local) {
@@ -831,8 +884,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
* extent. This waits for any resync activity in the corresponding
* resync extent to finish, and, if necessary, pulls in the target
* extent into the activity log, which involves further disk io because
- * of transactional on-disk meta data updates. */
- if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+ * of transactional on-disk meta data updates.
+ * Empty flushes don't need to go into the activity log, they can only
+ * flush data for pending writes which are already in there. */
+ if (rw == WRITE && local && size
+ && !test_bit(AL_SUSPENDED, &mdev->flags)) {
req->rq_state |= RQ_IN_ACT_LOG;
drbd_al_begin_io(mdev, sector);
}
@@ -955,7 +1011,10 @@ allocate_barrier:
if (rw == WRITE && _req_conflicts(req))
goto fail_conflicting;
- list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
+ /* no point in adding empty flushes to the transfer log,
+ * they are mapped to drbd barriers already. */
+ if (likely(size!=0))
+ list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
/* NOTE remote first: to get the concurrent write detection right,
* we must register the request before start of local IO. */
@@ -972,29 +1031,16 @@ allocate_barrier:
_req_mod(req, queue_for_send_oos);
if (remote &&
- mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
- int congested = 0;
-
- if (mdev->net_conf->cong_fill &&
- atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
- dev_info(DEV, "Congestion-fill threshold reached\n");
- congested = 1;
- }
-
- if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
- dev_info(DEV, "Congestion-extents threshold reached\n");
- congested = 1;
- }
-
- if (congested) {
- queue_barrier(mdev); /* last barrier, after mirrored writes */
-
- if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
- _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
- else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
- _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
- }
- }
+ mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96)
+ maybe_pull_ahead(mdev);
+
+ /* If this was a flush, queue a drbd barrier/start a new epoch.
+ * Unless the current epoch was empty anyways, or we are not currently
+ * replicating, in which case there is no point. */
+ if (unlikely(bio->bi_rw & REQ_FLUSH)
+ && mdev->newest_tle->n_writes
+ && drbd_should_do_remote(mdev->state))
+ queue_barrier(mdev);
spin_unlock_irq(&mdev->req_lock);
kfree(b); /* if someone else has beaten us to it... */
@@ -1093,13 +1139,12 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
/*
* what we "blindly" assume:
*/
- D_ASSERT(bio->bi_size > 0);
D_ASSERT((bio->bi_size & 0x1ff) == 0);
/* to make some things easier, force alignment of requests within the
* granularity of our hash tables */
s_enr = bio->bi_sector >> HT_SHIFT;
- e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
+ e_enr = bio->bi_size ? (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT : s_enr;
if (likely(s_enr == e_enr)) {
do {
@@ -1257,7 +1302,7 @@ void request_timer_fn(unsigned long data)
time_after(now, req->start_time + dt) &&
!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
- __drbd_chk_io_error(mdev, 1);
+ __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH);
}
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
spin_unlock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 620c70ff2231..6bce2cc179d4 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
if (list_empty(&mdev->read_ee))
wake_up(&mdev->ee_wait);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
drbd_queue_work(&mdev->data.work, &e->w);
@@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
: list_empty(&mdev->active_ee);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (is_syncer_req)
@@ -1501,14 +1501,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
return;
}
- if (mdev->state.conn < C_AHEAD) {
- /* In case a previous resync run was aborted by an IO error/detach on the peer. */
- drbd_rs_cancel_all(mdev);
- /* This should be done when we abort the resync. We definitely do not
- want to have this for connections going back and forth between
- Ahead/Behind and SyncSource/SyncTarget */
- }
-
if (side == C_SYNC_TARGET) {
/* Since application IO was locked out during C_WF_BITMAP_T and
C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cce7df367b79..a7d6347aaa79 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -191,6 +191,7 @@ static int print_unex = 1;
#include <linux/mutex.h>
#include <linux/io.h>
#include <linux/uaccess.h>
+#include <linux/async.h>
/*
* PS/2 floppies have much slower step rates than regular floppies.
@@ -671,6 +672,7 @@ static void __reschedule_timeout(int drive, const char *message)
if (drive == current_reqD)
drive = current_drive;
+ __cancel_delayed_work(&fd_timeout);
if (drive < 0 || drive >= N_DRIVE) {
delay = 20UL * HZ;
@@ -2515,8 +2517,7 @@ static int make_raw_rw_request(void)
set_fdc((long)current_req->rq_disk->private_data);
raw_cmd = &default_raw_cmd;
- raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_DISK |
- FD_RAW_NEED_SEEK;
+ raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK;
raw_cmd->cmd_count = NR_RW;
if (rq_data_dir(current_req) == READ) {
raw_cmd->flags |= FD_RAW_READ;
@@ -4122,7 +4123,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
return get_disk(disks[drive]);
}
-static int __init floppy_init(void)
+static int __init do_floppy_init(void)
{
int i, unit, drive;
int err, dr;
@@ -4337,6 +4338,24 @@ out_put_disk:
return err;
}
+#ifndef MODULE
+static __init void floppy_async_init(void *data, async_cookie_t cookie)
+{
+ do_floppy_init();
+}
+#endif
+
+static int __init floppy_init(void)
+{
+#ifdef MODULE
+ return do_floppy_init();
+#else
+ /* Don't hold up the bootup by the floppy initialization */
+ async_schedule(floppy_async_init, NULL);
+ return 0;
+#endif
+}
+
static const struct io_region {
int offset;
int size;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bbca966f8f66..3bba65510d23 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1597,14 +1597,12 @@ static int loop_add(struct loop_device **l, int i)
struct gendisk *disk;
int err;
+ err = -ENOMEM;
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
- if (!lo) {
- err = -ENOMEM;
+ if (!lo)
goto out;
- }
- err = idr_pre_get(&loop_index_idr, GFP_KERNEL);
- if (err < 0)
+ if (!idr_pre_get(&loop_index_idr, GFP_KERNEL))
goto out_free_dev;
if (i >= 0) {
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 76fa3deaee84..1788f491e0fb 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -780,9 +780,9 @@ static const struct block_device_operations mg_disk_ops = {
.getgeo = mg_getgeo
};
-static int mg_suspend(struct platform_device *plat_dev, pm_message_t state)
+static int mg_suspend(struct device *dev)
{
- struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+ struct mg_drv_data *prv_data = dev->platform_data;
struct mg_host *host = prv_data->host;
if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
@@ -804,9 +804,9 @@ static int mg_suspend(struct platform_device *plat_dev, pm_message_t state)
return 0;
}
-static int mg_resume(struct platform_device *plat_dev)
+static int mg_resume(struct device *dev)
{
- struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+ struct mg_drv_data *prv_data = dev->platform_data;
struct mg_host *host = prv_data->host;
if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
@@ -825,6 +825,8 @@ static int mg_resume(struct platform_device *plat_dev)
return 0;
}
+static SIMPLE_DEV_PM_OPS(mg_pm, mg_suspend, mg_resume);
+
static int mg_probe(struct platform_device *plat_dev)
{
struct mg_host *host;
@@ -1074,11 +1076,10 @@ static int mg_remove(struct platform_device *plat_dev)
static struct platform_driver mg_disk_driver = {
.probe = mg_probe,
.remove = mg_remove,
- .suspend = mg_suspend,
- .resume = mg_resume,
.driver = {
.name = MG_DEV_NAME,
.owner = THIS_MODULE,
+ .pm = &mg_pm,
}
};
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 264bc77dcb91..a8fddeb3d638 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -37,6 +37,7 @@
#include <linux/kthread.h>
#include <../drivers/ata/ahci.h>
#include <linux/export.h>
+#include <linux/debugfs.h>
#include "mtip32xx.h"
#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
@@ -85,6 +86,7 @@ static int instance;
* allocated in mtip_init().
*/
static int mtip_major;
+static struct dentry *dfs_parent;
static DEFINE_SPINLOCK(rssd_index_lock);
static DEFINE_IDA(rssd_index_ida);
@@ -2546,7 +2548,7 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
}
/*
- * Sysfs register/status dump.
+ * Sysfs status dump.
*
* @dev Pointer to the device structure, passed by the kernrel.
* @attr Pointer to the device_attribute structure passed by the kernel.
@@ -2555,45 +2557,68 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
* return value
* The size, in bytes, of the data copied into buf.
*/
-static ssize_t mtip_hw_show_registers(struct device *dev,
+static ssize_t mtip_hw_show_status(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- u32 group_allocated;
struct driver_data *dd = dev_to_disk(dev)->private_data;
int size = 0;
+
+ if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
+ size += sprintf(buf, "%s", "thermal_shutdown\n");
+ else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
+ size += sprintf(buf, "%s", "write_protect\n");
+ else
+ size += sprintf(buf, "%s", "online\n");
+
+ return size;
+}
+
+static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
+
+static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
+ size_t len, loff_t *offset)
+{
+ struct driver_data *dd = (struct driver_data *)f->private_data;
+ char buf[MTIP_DFS_MAX_BUF_SIZE];
+ u32 group_allocated;
+ int size = *offset;
int n;
- size += sprintf(&buf[size], "Hardware\n--------\n");
- size += sprintf(&buf[size], "S ACTive : [ 0x");
+ if (!len || size)
+ return 0;
+
+ if (size < 0)
+ return -EINVAL;
+
+ size += sprintf(&buf[size], "H/ S ACTive : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--)
size += sprintf(&buf[size], "%08X ",
readl(dd->port->s_active[n]));
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Command Issue : [ 0x");
+ size += sprintf(&buf[size], "H/ Command Issue : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--)
size += sprintf(&buf[size], "%08X ",
readl(dd->port->cmd_issue[n]));
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Completed : [ 0x");
+ size += sprintf(&buf[size], "H/ Completed : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--)
size += sprintf(&buf[size], "%08X ",
readl(dd->port->completed[n]));
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "PORT IRQ STAT : [ 0x%08X ]\n",
+ size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n",
readl(dd->port->mmio + PORT_IRQ_STAT));
- size += sprintf(&buf[size], "HOST IRQ STAT : [ 0x%08X ]\n",
+ size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n",
readl(dd->mmio + HOST_IRQ_STAT));
size += sprintf(&buf[size], "\n");
- size += sprintf(&buf[size], "Local\n-----\n");
- size += sprintf(&buf[size], "Allocated : [ 0x");
+ size += sprintf(&buf[size], "L/ Allocated : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--) {
if (sizeof(long) > sizeof(u32))
@@ -2605,7 +2630,7 @@ static ssize_t mtip_hw_show_registers(struct device *dev,
}
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Commands in Q: [ 0x");
+ size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--) {
if (sizeof(long) > sizeof(u32))
@@ -2617,44 +2642,53 @@ static ssize_t mtip_hw_show_registers(struct device *dev,
}
size += sprintf(&buf[size], "]\n");
- return size;
+ *offset = size <= len ? size : len;
+ size = copy_to_user(ubuf, buf, *offset);
+ if (size)
+ return -EFAULT;
+
+ return *offset;
}
-static ssize_t mtip_hw_show_status(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
+ size_t len, loff_t *offset)
{
- struct driver_data *dd = dev_to_disk(dev)->private_data;
- int size = 0;
+ struct driver_data *dd = (struct driver_data *)f->private_data;
+ char buf[MTIP_DFS_MAX_BUF_SIZE];
+ int size = *offset;
- if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
- size += sprintf(buf, "%s", "thermal_shutdown\n");
- else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
- size += sprintf(buf, "%s", "write_protect\n");
- else
- size += sprintf(buf, "%s", "online\n");
-
- return size;
-}
+ if (!len || size)
+ return 0;
-static ssize_t mtip_hw_show_flags(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct driver_data *dd = dev_to_disk(dev)->private_data;
- int size = 0;
+ if (size < 0)
+ return -EINVAL;
- size += sprintf(&buf[size], "Flag in port struct : [ %08lX ]\n",
+ size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
dd->port->flags);
- size += sprintf(&buf[size], "Flag in dd struct : [ %08lX ]\n",
+ size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n",
dd->dd_flag);
- return size;
+ *offset = size <= len ? size : len;
+ size = copy_to_user(ubuf, buf, *offset);
+ if (size)
+ return -EFAULT;
+
+ return *offset;
}
-static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL);
-static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
-static DEVICE_ATTR(flags, S_IRUGO, mtip_hw_show_flags, NULL);
+static const struct file_operations mtip_regs_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = mtip_hw_read_registers,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations mtip_flags_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = mtip_hw_read_flags,
+ .llseek = no_llseek,
+};
/*
* Create the sysfs related attributes.
@@ -2671,15 +2705,9 @@ static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
if (!kobj || !dd)
return -EINVAL;
- if (sysfs_create_file(kobj, &dev_attr_registers.attr))
- dev_warn(&dd->pdev->dev,
- "Error creating 'registers' sysfs entry\n");
if (sysfs_create_file(kobj, &dev_attr_status.attr))
dev_warn(&dd->pdev->dev,
"Error creating 'status' sysfs entry\n");
- if (sysfs_create_file(kobj, &dev_attr_flags.attr))
- dev_warn(&dd->pdev->dev,
- "Error creating 'flags' sysfs entry\n");
return 0;
}
@@ -2698,13 +2726,39 @@ static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
if (!kobj || !dd)
return -EINVAL;
- sysfs_remove_file(kobj, &dev_attr_registers.attr);
sysfs_remove_file(kobj, &dev_attr_status.attr);
- sysfs_remove_file(kobj, &dev_attr_flags.attr);
return 0;
}
+static int mtip_hw_debugfs_init(struct driver_data *dd)
+{
+ if (!dfs_parent)
+ return -1;
+
+ dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent);
+ if (IS_ERR_OR_NULL(dd->dfs_node)) {
+ dev_warn(&dd->pdev->dev,
+ "Error creating node %s under debugfs\n",
+ dd->disk->disk_name);
+ dd->dfs_node = NULL;
+ return -1;
+ }
+
+ debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
+ &mtip_flags_fops);
+ debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
+ &mtip_regs_fops);
+
+ return 0;
+}
+
+static void mtip_hw_debugfs_exit(struct driver_data *dd)
+{
+ debugfs_remove_recursive(dd->dfs_node);
+}
+
+
/*
* Perform any init/resume time hardware setup
*
@@ -3730,6 +3784,7 @@ skip_create_disk:
mtip_hw_sysfs_init(dd, kobj);
kobject_put(kobj);
}
+ mtip_hw_debugfs_init(dd);
if (dd->mtip_svc_handler) {
set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
@@ -3755,6 +3810,8 @@ start_service_thread:
return rv;
kthread_run_error:
+ mtip_hw_debugfs_exit(dd);
+
/* Delete our gendisk. This also removes the device from /dev */
del_gendisk(dd->disk);
@@ -3805,6 +3862,7 @@ static int mtip_block_remove(struct driver_data *dd)
kobject_put(kobj);
}
}
+ mtip_hw_debugfs_exit(dd);
/*
* Delete our gendisk structure. This also removes the device
@@ -4152,10 +4210,20 @@ static int __init mtip_init(void)
}
mtip_major = error;
+ if (!dfs_parent) {
+ dfs_parent = debugfs_create_dir("rssd", NULL);
+ if (IS_ERR_OR_NULL(dfs_parent)) {
+ printk(KERN_WARNING "Error creating debugfs parent\n");
+ dfs_parent = NULL;
+ }
+ }
+
/* Register our PCI operations. */
error = pci_register_driver(&mtip_pci_driver);
- if (error)
+ if (error) {
+ debugfs_remove(dfs_parent);
unregister_blkdev(mtip_major, MTIP_DRV_NAME);
+ }
return error;
}
@@ -4172,6 +4240,8 @@ static int __init mtip_init(void)
*/
static void __exit mtip_exit(void)
{
+ debugfs_remove_recursive(dfs_parent);
+
/* Release the allocated major block device number. */
unregister_blkdev(mtip_major, MTIP_DRV_NAME);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index b2c88da26b2a..f51fc23d17bb 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -26,7 +26,6 @@
#include <linux/ata.h>
#include <linux/interrupt.h>
#include <linux/genhd.h>
-#include <linux/version.h>
/* Offset of Subsystem Device ID in pci confoguration space */
#define PCI_SUBSYSTEM_DEVICEID 0x2E
@@ -111,6 +110,8 @@
#define dbg_printk(format, arg...)
#endif
+#define MTIP_DFS_MAX_BUF_SIZE 1024
+
#define __force_bit2int (unsigned int __force)
enum {
@@ -447,6 +448,8 @@ struct driver_data {
unsigned long dd_flag; /* NOTE: use atomic bit operations on this */
struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
+
+ struct dentry *dfs_node;
};
#endif
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 061427a75d37..d07c9f7fded6 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -154,6 +154,7 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
struct msghdr msg;
struct kvec iov;
sigset_t blocked, oldset;
+ unsigned long pflags = current->flags;
if (unlikely(!sock)) {
dev_err(disk_to_dev(nbd->disk),
@@ -167,8 +168,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
siginitsetinv(&blocked, sigmask(SIGKILL));
sigprocmask(SIG_SETMASK, &blocked, &oldset);
+ current->flags |= PF_MEMALLOC;
do {
- sock->sk->sk_allocation = GFP_NOIO;
+ sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
iov.iov_base = buf;
iov.iov_len = size;
msg.msg_name = NULL;
@@ -214,6 +216,7 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
} while (size > 0);
sigprocmask(SIG_SETMASK, &oldset, NULL);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
return result;
}
@@ -405,6 +408,7 @@ static int nbd_do_it(struct nbd_device *nbd)
BUG_ON(nbd->magic != NBD_MAGIC);
+ sk_set_memalloc(nbd->sock->sk);
nbd->pid = task_pid_nr(current);
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
@@ -481,7 +485,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd_end_request(req);
} else {
spin_lock(&nbd->queue_lock);
- list_add(&req->queuelist, &nbd->queue_head);
+ list_add_tail(&req->queuelist, &nbd->queue_head);
spin_unlock(&nbd->queue_lock);
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 65665c9c42c6..9917943a3572 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -55,8 +55,6 @@
#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
-#define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
-#define RBD_MAX_POOL_NAME_LEN 64
#define RBD_MAX_SNAP_NAME_LEN 32
#define RBD_MAX_OPT_LEN 1024
@@ -78,13 +76,12 @@
*/
struct rbd_image_header {
u64 image_size;
- char block_name[32];
+ char *object_prefix;
__u8 obj_order;
__u8 crypt_type;
__u8 comp_type;
struct ceph_snap_context *snapc;
size_t snap_names_len;
- u64 snap_seq;
u32 total_snaps;
char *snap_names;
@@ -150,7 +147,7 @@ struct rbd_snap {
* a single device
*/
struct rbd_device {
- int id; /* blkdev unique id */
+ int dev_id; /* blkdev unique id */
int major; /* blkdev assigned major */
struct gendisk *disk; /* blkdev's gendisk and rq */
@@ -163,20 +160,24 @@ struct rbd_device {
spinlock_t lock; /* queue lock */
struct rbd_image_header header;
- char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
- int obj_len;
- char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
- char pool_name[RBD_MAX_POOL_NAME_LEN];
- int poolid;
+ char *image_name;
+ size_t image_name_len;
+ char *header_name;
+ char *pool_name;
+ int pool_id;
struct ceph_osd_event *watch_event;
struct ceph_osd_request *watch_request;
/* protects updating the header */
struct rw_semaphore header_rwsem;
- char snap_name[RBD_MAX_SNAP_NAME_LEN];
+ /* name of the snapshot this device reads from */
+ char *snap_name;
+ /* id of the snapshot this device reads from */
u64 snap_id; /* current snapshot id */
- int read_only;
+ /* whether the snap_id this device reads from still exists */
+ bool snap_exists;
+ int read_only;
struct list_head node;
@@ -201,8 +202,7 @@ static ssize_t rbd_snap_add(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count);
-static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
- struct rbd_snap *snap);
+static void __rbd_remove_snap_dev(struct rbd_snap *snap);
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
size_t count);
@@ -240,7 +240,7 @@ static void rbd_put_dev(struct rbd_device *rbd_dev)
put_device(&rbd_dev->dev);
}
-static int __rbd_refresh_header(struct rbd_device *rbd_dev);
+static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver);
static int rbd_open(struct block_device *bdev, fmode_t mode)
{
@@ -273,9 +273,9 @@ static const struct block_device_operations rbd_bd_ops = {
/*
* Initialize an rbd client instance.
- * We own *opt.
+ * We own *ceph_opts.
*/
-static struct rbd_client *rbd_client_create(struct ceph_options *opt,
+static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts,
struct rbd_options *rbd_opts)
{
struct rbd_client *rbdc;
@@ -291,10 +291,10 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt,
mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rbdc->client = ceph_create_client(opt, rbdc, 0, 0);
+ rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0);
if (IS_ERR(rbdc->client))
goto out_mutex;
- opt = NULL; /* Now rbdc->client is responsible for opt */
+ ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
ret = ceph_open_session(rbdc->client);
if (ret < 0)
@@ -317,23 +317,23 @@ out_mutex:
mutex_unlock(&ctl_mutex);
kfree(rbdc);
out_opt:
- if (opt)
- ceph_destroy_options(opt);
+ if (ceph_opts)
+ ceph_destroy_options(ceph_opts);
return ERR_PTR(ret);
}
/*
* Find a ceph client with specific addr and configuration.
*/
-static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
+static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts)
{
struct rbd_client *client_node;
- if (opt->flags & CEPH_OPT_NOSHARE)
+ if (ceph_opts->flags & CEPH_OPT_NOSHARE)
return NULL;
list_for_each_entry(client_node, &rbd_client_list, node)
- if (ceph_compare_options(opt, client_node->client) == 0)
+ if (!ceph_compare_options(ceph_opts, client_node->client))
return client_node;
return NULL;
}
@@ -349,7 +349,7 @@ enum {
/* string args above */
};
-static match_table_t rbdopt_tokens = {
+static match_table_t rbd_opts_tokens = {
{Opt_notify_timeout, "notify_timeout=%d"},
/* int args above */
/* string args above */
@@ -358,11 +358,11 @@ static match_table_t rbdopt_tokens = {
static int parse_rbd_opts_token(char *c, void *private)
{
- struct rbd_options *rbdopt = private;
+ struct rbd_options *rbd_opts = private;
substring_t argstr[MAX_OPT_ARGS];
int token, intval, ret;
- token = match_token(c, rbdopt_tokens, argstr);
+ token = match_token(c, rbd_opts_tokens, argstr);
if (token < 0)
return -EINVAL;
@@ -383,7 +383,7 @@ static int parse_rbd_opts_token(char *c, void *private)
switch (token) {
case Opt_notify_timeout:
- rbdopt->notify_timeout = intval;
+ rbd_opts->notify_timeout = intval;
break;
default:
BUG_ON(token);
@@ -400,7 +400,7 @@ static struct rbd_client *rbd_get_client(const char *mon_addr,
char *options)
{
struct rbd_client *rbdc;
- struct ceph_options *opt;
+ struct ceph_options *ceph_opts;
struct rbd_options *rbd_opts;
rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
@@ -409,29 +409,29 @@ static struct rbd_client *rbd_get_client(const char *mon_addr,
rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
- opt = ceph_parse_options(options, mon_addr,
- mon_addr + mon_addr_len,
- parse_rbd_opts_token, rbd_opts);
- if (IS_ERR(opt)) {
+ ceph_opts = ceph_parse_options(options, mon_addr,
+ mon_addr + mon_addr_len,
+ parse_rbd_opts_token, rbd_opts);
+ if (IS_ERR(ceph_opts)) {
kfree(rbd_opts);
- return ERR_CAST(opt);
+ return ERR_CAST(ceph_opts);
}
spin_lock(&rbd_client_list_lock);
- rbdc = __rbd_client_find(opt);
+ rbdc = __rbd_client_find(ceph_opts);
if (rbdc) {
/* using an existing client */
kref_get(&rbdc->kref);
spin_unlock(&rbd_client_list_lock);
- ceph_destroy_options(opt);
+ ceph_destroy_options(ceph_opts);
kfree(rbd_opts);
return rbdc;
}
spin_unlock(&rbd_client_list_lock);
- rbdc = rbd_client_create(opt, rbd_opts);
+ rbdc = rbd_client_create(ceph_opts, rbd_opts);
if (IS_ERR(rbdc))
kfree(rbd_opts);
@@ -480,46 +480,60 @@ static void rbd_coll_release(struct kref *kref)
kfree(coll);
}
+static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
+{
+ return !memcmp(&ondisk->text,
+ RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT));
+}
+
/*
* Create a new header structure, translate header format from the on-disk
* header.
*/
static int rbd_header_from_disk(struct rbd_image_header *header,
struct rbd_image_header_ondisk *ondisk,
- u32 allocated_snaps,
- gfp_t gfp_flags)
+ u32 allocated_snaps)
{
- u32 i, snap_count;
+ u32 snap_count;
- if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)))
+ if (!rbd_dev_ondisk_valid(ondisk))
return -ENXIO;
snap_count = le32_to_cpu(ondisk->snap_count);
- if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context))
- / sizeof (*ondisk))
+ if (snap_count > (SIZE_MAX - sizeof(struct ceph_snap_context))
+ / sizeof (u64))
return -EINVAL;
header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
- snap_count * sizeof (*ondisk),
- gfp_flags);
+ snap_count * sizeof(u64),
+ GFP_KERNEL);
if (!header->snapc)
return -ENOMEM;
- header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
if (snap_count) {
+ header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
header->snap_names = kmalloc(header->snap_names_len,
- gfp_flags);
+ GFP_KERNEL);
if (!header->snap_names)
goto err_snapc;
header->snap_sizes = kmalloc(snap_count * sizeof(u64),
- gfp_flags);
+ GFP_KERNEL);
if (!header->snap_sizes)
goto err_names;
} else {
+ WARN_ON(ondisk->snap_names_len);
+ header->snap_names_len = 0;
header->snap_names = NULL;
header->snap_sizes = NULL;
}
- memcpy(header->block_name, ondisk->block_name,
+
+ header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1,
+ GFP_KERNEL);
+ if (!header->object_prefix)
+ goto err_sizes;
+
+ memcpy(header->object_prefix, ondisk->block_name,
sizeof(ondisk->block_name));
+ header->object_prefix[sizeof (ondisk->block_name)] = '\0';
header->image_size = le64_to_cpu(ondisk->image_size);
header->obj_order = ondisk->options.order;
@@ -527,11 +541,13 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
header->comp_type = ondisk->options.comp_type;
atomic_set(&header->snapc->nref, 1);
- header->snap_seq = le64_to_cpu(ondisk->snap_seq);
+ header->snapc->seq = le64_to_cpu(ondisk->snap_seq);
header->snapc->num_snaps = snap_count;
header->total_snaps = snap_count;
if (snap_count && allocated_snaps == snap_count) {
+ int i;
+
for (i = 0; i < snap_count; i++) {
header->snapc->snaps[i] =
le64_to_cpu(ondisk->snaps[i].id);
@@ -540,16 +556,22 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
}
/* copy snapshot names */
- memcpy(header->snap_names, &ondisk->snaps[i],
+ memcpy(header->snap_names, &ondisk->snaps[snap_count],
header->snap_names_len);
}
return 0;
+err_sizes:
+ kfree(header->snap_sizes);
+ header->snap_sizes = NULL;
err_names:
kfree(header->snap_names);
+ header->snap_names = NULL;
err_snapc:
kfree(header->snapc);
+ header->snapc = NULL;
+
return -ENOMEM;
}
@@ -575,52 +597,50 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
return -ENOENT;
}
-static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
+static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size)
{
- struct rbd_image_header *header = &dev->header;
- struct ceph_snap_context *snapc = header->snapc;
- int ret = -ENOENT;
-
- BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME));
+ int ret;
- down_write(&dev->header_rwsem);
+ down_write(&rbd_dev->header_rwsem);
- if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME,
+ if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
sizeof (RBD_SNAP_HEAD_NAME))) {
- if (header->total_snaps)
- snapc->seq = header->snap_seq;
- else
- snapc->seq = 0;
- dev->snap_id = CEPH_NOSNAP;
- dev->read_only = 0;
+ rbd_dev->snap_id = CEPH_NOSNAP;
+ rbd_dev->snap_exists = false;
+ rbd_dev->read_only = 0;
if (size)
- *size = header->image_size;
+ *size = rbd_dev->header.image_size;
} else {
- ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
+ u64 snap_id = 0;
+
+ ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name,
+ &snap_id, size);
if (ret < 0)
goto done;
- dev->snap_id = snapc->seq;
- dev->read_only = 1;
+ rbd_dev->snap_id = snap_id;
+ rbd_dev->snap_exists = true;
+ rbd_dev->read_only = 1;
}
ret = 0;
done:
- up_write(&dev->header_rwsem);
+ up_write(&rbd_dev->header_rwsem);
return ret;
}
static void rbd_header_free(struct rbd_image_header *header)
{
- kfree(header->snapc);
- kfree(header->snap_names);
+ kfree(header->object_prefix);
kfree(header->snap_sizes);
+ kfree(header->snap_names);
+ ceph_put_snap_context(header->snapc);
}
/*
* get the actual striped segment name, offset and length
*/
static u64 rbd_get_segment(struct rbd_image_header *header,
- const char *block_name,
+ const char *object_prefix,
u64 ofs, u64 len,
char *seg_name, u64 *segofs)
{
@@ -628,7 +648,7 @@ static u64 rbd_get_segment(struct rbd_image_header *header,
if (seg_name)
snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
- "%s.%012llx", block_name, seg);
+ "%s.%012llx", object_prefix, seg);
ofs = ofs & ((1 << header->obj_order) - 1);
len = min_t(u64, len, (1 << header->obj_order) - ofs);
@@ -726,9 +746,8 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
* split_bio will BUG_ON if this is not the case
*/
dout("bio_chain_clone split! total=%d remaining=%d"
- "bi_size=%d\n",
- (int)total, (int)len-total,
- (int)old_chain->bi_size);
+ "bi_size=%u\n",
+ total, len - total, old_chain->bi_size);
/* split the bio. We'll release it either in the next
call, or it will have to be released outside */
@@ -777,22 +796,24 @@ err_out:
/*
* helpers for osd request op vectors.
*/
-static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
- int num_ops,
- int opcode,
- u32 payload_len)
-{
- *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
- GFP_NOIO);
- if (!*ops)
- return -ENOMEM;
- (*ops)[0].op = opcode;
+static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops,
+ int opcode, u32 payload_len)
+{
+ struct ceph_osd_req_op *ops;
+
+ ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO);
+ if (!ops)
+ return NULL;
+
+ ops[0].op = opcode;
+
/*
* op extent offset and length will be set later on
* in calc_raw_layout()
*/
- (*ops)[0].payload_len = payload_len;
- return 0;
+ ops[0].payload_len = payload_len;
+
+ return ops;
}
static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
@@ -808,8 +829,8 @@ static void rbd_coll_end_req_index(struct request *rq,
struct request_queue *q;
int min, max, i;
- dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n",
- coll, index, ret, len);
+ dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n",
+ coll, index, ret, (unsigned long long) len);
if (!rq)
return;
@@ -848,16 +869,15 @@ static void rbd_coll_end_req(struct rbd_request *req,
* Send ceph osd request
*/
static int rbd_do_request(struct request *rq,
- struct rbd_device *dev,
+ struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- const char *obj, u64 ofs, u64 len,
+ const char *object_name, u64 ofs, u64 len,
struct bio *bio,
struct page **pages,
int num_pages,
int flags,
struct ceph_osd_req_op *ops,
- int num_reply,
struct rbd_req_coll *coll,
int coll_index,
void (*rbd_cb)(struct ceph_osd_request *req,
@@ -887,15 +907,13 @@ static int rbd_do_request(struct request *rq,
req_data->coll_index = coll_index;
}
- dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
-
- down_read(&dev->header_rwsem);
+ dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name,
+ (unsigned long long) ofs, (unsigned long long) len);
- osdc = &dev->rbd_client->client->osdc;
+ osdc = &rbd_dev->rbd_client->client->osdc;
req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
false, GFP_NOIO, pages, bio);
if (!req) {
- up_read(&dev->header_rwsem);
ret = -ENOMEM;
goto done_pages;
}
@@ -912,7 +930,7 @@ static int rbd_do_request(struct request *rq,
reqhead = req->r_request->front.iov_base;
reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
- strncpy(req->r_oid, obj, sizeof(req->r_oid));
+ strncpy(req->r_oid, object_name, sizeof(req->r_oid));
req->r_oid_len = strlen(req->r_oid);
layout = &req->r_file_layout;
@@ -920,7 +938,7 @@ static int rbd_do_request(struct request *rq,
layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
layout->fl_stripe_count = cpu_to_le32(1);
layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
- layout->fl_pg_pool = cpu_to_le32(dev->poolid);
+ layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id);
ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
req, ops);
@@ -929,7 +947,6 @@ static int rbd_do_request(struct request *rq,
snapc,
&mtime,
req->r_oid, req->r_oid_len);
- up_read(&dev->header_rwsem);
if (linger_req) {
ceph_osdc_set_request_linger(osdc, req);
@@ -944,8 +961,9 @@ static int rbd_do_request(struct request *rq,
ret = ceph_osdc_wait_request(osdc, req);
if (ver)
*ver = le64_to_cpu(req->r_reassert_version.version);
- dout("reassert_ver=%lld\n",
- le64_to_cpu(req->r_reassert_version.version));
+ dout("reassert_ver=%llu\n",
+ (unsigned long long)
+ le64_to_cpu(req->r_reassert_version.version));
ceph_osdc_put_request(req);
}
return ret;
@@ -977,9 +995,10 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
op = (void *)(replyhead + 1);
rc = le32_to_cpu(replyhead->result);
bytes = le64_to_cpu(op->extent.length);
- read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
+ read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
- dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
+ dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n",
+ (unsigned long long) bytes, read_op, (int) rc);
if (rc == -ENOENT && read_op) {
zero_bio_chain(req_data->bio, 0);
@@ -1006,14 +1025,12 @@ static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg
/*
* Do a synchronous ceph osd operation
*/
-static int rbd_req_sync_op(struct rbd_device *dev,
+static int rbd_req_sync_op(struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- int opcode,
int flags,
- struct ceph_osd_req_op *orig_ops,
- int num_reply,
- const char *obj,
+ struct ceph_osd_req_op *ops,
+ const char *object_name,
u64 ofs, u64 len,
char *buf,
struct ceph_osd_request **linger_req,
@@ -1022,45 +1039,28 @@ static int rbd_req_sync_op(struct rbd_device *dev,
int ret;
struct page **pages;
int num_pages;
- struct ceph_osd_req_op *ops = orig_ops;
- u32 payload_len;
+
+ BUG_ON(ops == NULL);
num_pages = calc_pages_for(ofs , len);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages))
return PTR_ERR(pages);
- if (!orig_ops) {
- payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
- ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
- if (ret < 0)
- goto done;
-
- if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
- ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
- if (ret < 0)
- goto done_ops;
- }
- }
-
- ret = rbd_do_request(NULL, dev, snapc, snapid,
- obj, ofs, len, NULL,
+ ret = rbd_do_request(NULL, rbd_dev, snapc, snapid,
+ object_name, ofs, len, NULL,
pages, num_pages,
flags,
ops,
- 2,
NULL, 0,
NULL,
linger_req, ver);
if (ret < 0)
- goto done_ops;
+ goto done;
if ((flags & CEPH_OSD_FLAG_READ) && buf)
ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
-done_ops:
- if (!orig_ops)
- rbd_destroy_ops(ops);
done:
ceph_release_page_vector(pages, num_pages);
return ret;
@@ -1070,10 +1070,10 @@ done:
* Do an asynchronous ceph osd operation
*/
static int rbd_do_op(struct request *rq,
- struct rbd_device *rbd_dev ,
+ struct rbd_device *rbd_dev,
struct ceph_snap_context *snapc,
u64 snapid,
- int opcode, int flags, int num_reply,
+ int opcode, int flags,
u64 ofs, u64 len,
struct bio *bio,
struct rbd_req_coll *coll,
@@ -1091,14 +1091,15 @@ static int rbd_do_op(struct request *rq,
return -ENOMEM;
seg_len = rbd_get_segment(&rbd_dev->header,
- rbd_dev->header.block_name,
+ rbd_dev->header.object_prefix,
ofs, len,
seg_name, &seg_ofs);
payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
- ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
- if (ret < 0)
+ ret = -ENOMEM;
+ ops = rbd_create_rw_ops(1, opcode, payload_len);
+ if (!ops)
goto done;
/* we've taken care of segment sizes earlier when we
@@ -1112,7 +1113,6 @@ static int rbd_do_op(struct request *rq,
NULL, 0,
flags,
ops,
- num_reply,
coll, coll_index,
rbd_req_cb, 0, NULL);
@@ -1136,7 +1136,6 @@ static int rbd_req_write(struct request *rq,
return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
- 2,
ofs, len, bio, coll, coll_index);
}
@@ -1155,55 +1154,58 @@ static int rbd_req_read(struct request *rq,
snapid,
CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
- 2,
ofs, len, bio, coll, coll_index);
}
/*
* Request sync osd read
*/
-static int rbd_req_sync_read(struct rbd_device *dev,
- struct ceph_snap_context *snapc,
+static int rbd_req_sync_read(struct rbd_device *rbd_dev,
u64 snapid,
- const char *obj,
+ const char *object_name,
u64 ofs, u64 len,
char *buf,
u64 *ver)
{
- return rbd_req_sync_op(dev, NULL,
+ struct ceph_osd_req_op *ops;
+ int ret;
+
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0);
+ if (!ops)
+ return -ENOMEM;
+
+ ret = rbd_req_sync_op(rbd_dev, NULL,
snapid,
- CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
- NULL,
- 1, obj, ofs, len, buf, NULL, ver);
+ ops, object_name, ofs, len, buf, NULL, ver);
+ rbd_destroy_ops(ops);
+
+ return ret;
}
/*
* Request sync osd watch
*/
-static int rbd_req_sync_notify_ack(struct rbd_device *dev,
+static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev,
u64 ver,
- u64 notify_id,
- const char *obj)
+ u64 notify_id)
{
struct ceph_osd_req_op *ops;
- struct page **pages = NULL;
int ret;
- ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0);
+ if (!ops)
+ return -ENOMEM;
- ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
+ ops[0].watch.ver = cpu_to_le64(ver);
ops[0].watch.cookie = notify_id;
ops[0].watch.flag = 0;
- ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
- obj, 0, 0, NULL,
- pages, 0,
+ ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP,
+ rbd_dev->header_name, 0, 0, NULL,
+ NULL, 0,
CEPH_OSD_FLAG_READ,
ops,
- 1,
NULL, 0,
rbd_simple_req_cb, 0, NULL);
@@ -1213,54 +1215,53 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev,
static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{
- struct rbd_device *dev = (struct rbd_device *)data;
+ struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ u64 hver;
int rc;
- if (!dev)
+ if (!rbd_dev)
return;
- dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
- notify_id, (int)opcode);
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_refresh_header(dev);
- mutex_unlock(&ctl_mutex);
+ dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n",
+ rbd_dev->header_name, (unsigned long long) notify_id,
+ (unsigned int) opcode);
+ rc = rbd_refresh_header(rbd_dev, &hver);
if (rc)
pr_warning(RBD_DRV_NAME "%d got notification but failed to "
- " update snaps: %d\n", dev->major, rc);
+ " update snaps: %d\n", rbd_dev->major, rc);
- rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
+ rbd_req_sync_notify_ack(rbd_dev, hver, notify_id);
}
/*
* Request sync osd watch
*/
-static int rbd_req_sync_watch(struct rbd_device *dev,
- const char *obj,
- u64 ver)
+static int rbd_req_sync_watch(struct rbd_device *rbd_dev)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ int ret;
- int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
+ if (!ops)
+ return -ENOMEM;
ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
- (void *)dev, &dev->watch_event);
+ (void *)rbd_dev, &rbd_dev->watch_event);
if (ret < 0)
goto fail;
- ops[0].watch.ver = cpu_to_le64(ver);
- ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+ ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version);
+ ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
ops[0].watch.flag = 1;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL,
- &dev->watch_request, NULL);
+ rbd_dev->header_name,
+ 0, 0, NULL,
+ &rbd_dev->watch_request, NULL);
if (ret < 0)
goto fail_event;
@@ -1269,8 +1270,8 @@ static int rbd_req_sync_watch(struct rbd_device *dev,
return 0;
fail_event:
- ceph_osdc_cancel_event(dev->watch_event);
- dev->watch_event = NULL;
+ ceph_osdc_cancel_event(rbd_dev->watch_event);
+ rbd_dev->watch_event = NULL;
fail:
rbd_destroy_ops(ops);
return ret;
@@ -1279,64 +1280,65 @@ fail:
/*
* Request sync osd unwatch
*/
-static int rbd_req_sync_unwatch(struct rbd_device *dev,
- const char *obj)
+static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev)
{
struct ceph_osd_req_op *ops;
+ int ret;
- int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
+ if (!ops)
+ return -ENOMEM;
ops[0].watch.ver = 0;
- ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+ ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
ops[0].watch.flag = 0;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, NULL);
+ rbd_dev->header_name,
+ 0, 0, NULL, NULL, NULL);
+
rbd_destroy_ops(ops);
- ceph_osdc_cancel_event(dev->watch_event);
- dev->watch_event = NULL;
+ ceph_osdc_cancel_event(rbd_dev->watch_event);
+ rbd_dev->watch_event = NULL;
return ret;
}
struct rbd_notify_info {
- struct rbd_device *dev;
+ struct rbd_device *rbd_dev;
};
static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
{
- struct rbd_device *dev = (struct rbd_device *)data;
- if (!dev)
+ struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ if (!rbd_dev)
return;
- dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
- notify_id, (int)opcode);
+ dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n",
+ rbd_dev->header_name, (unsigned long long) notify_id,
+ (unsigned int) opcode);
}
/*
* Request sync osd notify
*/
-static int rbd_req_sync_notify(struct rbd_device *dev,
- const char *obj)
+static int rbd_req_sync_notify(struct rbd_device *rbd_dev)
{
struct ceph_osd_req_op *ops;
- struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct ceph_osd_event *event;
struct rbd_notify_info info;
int payload_len = sizeof(u32) + sizeof(u32);
int ret;
- ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len);
- if (ret < 0)
- return ret;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len);
+ if (!ops)
+ return -ENOMEM;
- info.dev = dev;
+ info.rbd_dev = rbd_dev;
ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
(void *)&info, &event);
@@ -1349,12 +1351,12 @@ static int rbd_req_sync_notify(struct rbd_device *dev,
ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
ops[0].watch.timeout = 12;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, NULL);
+ rbd_dev->header_name,
+ 0, 0, NULL, NULL, NULL);
if (ret < 0)
goto fail_event;
@@ -1373,36 +1375,37 @@ fail:
/*
* Request sync osd read
*/
-static int rbd_req_sync_exec(struct rbd_device *dev,
- const char *obj,
- const char *cls,
- const char *method,
+static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
+ const char *object_name,
+ const char *class_name,
+ const char *method_name,
const char *data,
int len,
u64 *ver)
{
struct ceph_osd_req_op *ops;
- int cls_len = strlen(cls);
- int method_len = strlen(method);
- int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
- cls_len + method_len + len);
- if (ret < 0)
- return ret;
+ int class_name_len = strlen(class_name);
+ int method_name_len = strlen(method_name);
+ int ret;
- ops[0].cls.class_name = cls;
- ops[0].cls.class_len = (__u8)cls_len;
- ops[0].cls.method_name = method;
- ops[0].cls.method_len = (__u8)method_len;
+ ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL,
+ class_name_len + method_name_len + len);
+ if (!ops)
+ return -ENOMEM;
+
+ ops[0].cls.class_name = class_name;
+ ops[0].cls.class_len = (__u8) class_name_len;
+ ops[0].cls.method_name = method_name;
+ ops[0].cls.method_len = (__u8) method_name_len;
ops[0].cls.argc = 0;
ops[0].cls.indata = data;
ops[0].cls.indata_len = len;
- ret = rbd_req_sync_op(dev, NULL,
+ ret = rbd_req_sync_op(rbd_dev, NULL,
CEPH_NOSNAP,
- 0,
CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
ops,
- 1, obj, 0, 0, NULL, NULL, ver);
+ object_name, 0, 0, NULL, NULL, ver);
rbd_destroy_ops(ops);
@@ -1437,10 +1440,12 @@ static void rbd_rq_fn(struct request_queue *q)
struct bio *bio;
struct bio *rq_bio, *next_bio = NULL;
bool do_write;
- int size, op_size = 0;
+ unsigned int size;
+ u64 op_size = 0;
u64 ofs;
int num_segs, cur_seg = 0;
struct rbd_req_coll *coll;
+ struct ceph_snap_context *snapc;
/* peek at request from block layer */
if (!rq)
@@ -1467,23 +1472,38 @@ static void rbd_rq_fn(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
+ down_read(&rbd_dev->header_rwsem);
+
+ if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) {
+ up_read(&rbd_dev->header_rwsem);
+ dout("request for non-existent snapshot");
+ spin_lock_irq(q->queue_lock);
+ __blk_end_request_all(rq, -ENXIO);
+ continue;
+ }
+
+ snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+
+ up_read(&rbd_dev->header_rwsem);
+
dout("%s 0x%x bytes at 0x%llx\n",
do_write ? "write" : "read",
- size, blk_rq_pos(rq) * SECTOR_SIZE);
+ size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
coll = rbd_alloc_coll(num_segs);
if (!coll) {
spin_lock_irq(q->queue_lock);
__blk_end_request_all(rq, -ENOMEM);
+ ceph_put_snap_context(snapc);
continue;
}
do {
/* a bio clone to be passed down to OSD req */
- dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
+ dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
op_size = rbd_get_segment(&rbd_dev->header,
- rbd_dev->header.block_name,
+ rbd_dev->header.object_prefix,
ofs, size,
NULL, NULL);
kref_get(&coll->kref);
@@ -1499,7 +1519,7 @@ static void rbd_rq_fn(struct request_queue *q)
/* init OSD command: write or read */
if (do_write)
rbd_req_write(rq, rbd_dev,
- rbd_dev->header.snapc,
+ snapc,
ofs,
op_size, bio,
coll, cur_seg);
@@ -1522,6 +1542,8 @@ next_seg:
if (bp)
bio_pair_release(bp);
spin_lock_irq(q->queue_lock);
+
+ ceph_put_snap_context(snapc);
}
}
@@ -1592,18 +1614,19 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
return -ENOMEM;
rc = rbd_req_sync_read(rbd_dev,
- NULL, CEPH_NOSNAP,
- rbd_dev->obj_md_name,
+ CEPH_NOSNAP,
+ rbd_dev->header_name,
0, len,
(char *)dh, &ver);
if (rc < 0)
goto out_dh;
- rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
+ rc = rbd_header_from_disk(header, dh, snap_count);
if (rc < 0) {
if (rc == -ENXIO)
pr_warning("unrecognized header format"
- " for image %s", rbd_dev->obj);
+ " for image %s\n",
+ rbd_dev->image_name);
goto out_dh;
}
@@ -1628,7 +1651,7 @@ out_dh:
/*
* create a snapshot
*/
-static int rbd_header_add_snap(struct rbd_device *dev,
+static int rbd_header_add_snap(struct rbd_device *rbd_dev,
const char *snap_name,
gfp_t gfp_flags)
{
@@ -1636,16 +1659,15 @@ static int rbd_header_add_snap(struct rbd_device *dev,
u64 new_snapid;
int ret;
void *data, *p, *e;
- u64 ver;
struct ceph_mon_client *monc;
/* we should create a snapshot only if we're pointing at the head */
- if (dev->snap_id != CEPH_NOSNAP)
+ if (rbd_dev->snap_id != CEPH_NOSNAP)
return -EINVAL;
- monc = &dev->rbd_client->client->monc;
- ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
- dout("created snapid=%lld\n", new_snapid);
+ monc = &rbd_dev->rbd_client->client->monc;
+ ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid);
+ dout("created snapid=%llu\n", (unsigned long long) new_snapid);
if (ret < 0)
return ret;
@@ -1659,19 +1681,13 @@ static int rbd_header_add_snap(struct rbd_device *dev,
ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
ceph_encode_64_safe(&p, e, new_snapid, bad);
- ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
- data, p - data, &ver);
+ ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+ "rbd", "snap_add",
+ data, p - data, NULL);
kfree(data);
- if (ret < 0)
- return ret;
-
- down_write(&dev->header_rwsem);
- dev->header.snapc->seq = new_snapid;
- up_write(&dev->header_rwsem);
-
- return 0;
+ return ret < 0 ? ret : 0;
bad:
return -ERANGE;
}
@@ -1679,52 +1695,52 @@ bad:
static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
{
struct rbd_snap *snap;
+ struct rbd_snap *next;
- while (!list_empty(&rbd_dev->snaps)) {
- snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
- __rbd_remove_snap_dev(rbd_dev, snap);
- }
+ list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node)
+ __rbd_remove_snap_dev(snap);
}
/*
* only read the first part of the ondisk header, without the snaps info
*/
-static int __rbd_refresh_header(struct rbd_device *rbd_dev)
+static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
{
int ret;
struct rbd_image_header h;
- u64 snap_seq;
- int follow_seq = 0;
ret = rbd_read_header(rbd_dev, &h);
if (ret < 0)
return ret;
- /* resized? */
- set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE);
-
down_write(&rbd_dev->header_rwsem);
- snap_seq = rbd_dev->header.snapc->seq;
- if (rbd_dev->header.total_snaps &&
- rbd_dev->header.snapc->snaps[0] == snap_seq)
- /* pointing at the head, will need to follow that
- if head moves */
- follow_seq = 1;
+ /* resized? */
+ if (rbd_dev->snap_id == CEPH_NOSNAP) {
+ sector_t size = (sector_t) h.image_size / SECTOR_SIZE;
- kfree(rbd_dev->header.snapc);
- kfree(rbd_dev->header.snap_names);
+ dout("setting size to %llu sectors", (unsigned long long) size);
+ set_capacity(rbd_dev->disk, size);
+ }
+
+ /* rbd_dev->header.object_prefix shouldn't change */
kfree(rbd_dev->header.snap_sizes);
+ kfree(rbd_dev->header.snap_names);
+ /* osd requests may still refer to snapc */
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ if (hver)
+ *hver = h.obj_version;
+ rbd_dev->header.obj_version = h.obj_version;
+ rbd_dev->header.image_size = h.image_size;
rbd_dev->header.total_snaps = h.total_snaps;
rbd_dev->header.snapc = h.snapc;
rbd_dev->header.snap_names = h.snap_names;
rbd_dev->header.snap_names_len = h.snap_names_len;
rbd_dev->header.snap_sizes = h.snap_sizes;
- if (follow_seq)
- rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
- else
- rbd_dev->header.snapc->seq = snap_seq;
+ /* Free the extra copy of the object prefix */
+ WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix));
+ kfree(h.object_prefix);
ret = __rbd_init_snaps_header(rbd_dev);
@@ -1733,6 +1749,17 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev)
return ret;
}
+static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
+{
+ int ret;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ ret = __rbd_refresh_header(rbd_dev, hver);
+ mutex_unlock(&ctl_mutex);
+
+ return ret;
+}
+
static int rbd_init_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk;
@@ -1762,7 +1789,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
goto out;
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
- rbd_dev->id);
+ rbd_dev->dev_id);
disk->major = rbd_dev->major;
disk->first_minor = 0;
disk->fops = &rbd_bd_ops;
@@ -1819,8 +1846,13 @@ static ssize_t rbd_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+ sector_t size;
+
+ down_read(&rbd_dev->header_rwsem);
+ size = get_capacity(rbd_dev->disk);
+ up_read(&rbd_dev->header_rwsem);
- return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
+ return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
}
static ssize_t rbd_major_show(struct device *dev,
@@ -1848,12 +1880,20 @@ static ssize_t rbd_pool_show(struct device *dev,
return sprintf(buf, "%s\n", rbd_dev->pool_name);
}
+static ssize_t rbd_pool_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+
+ return sprintf(buf, "%d\n", rbd_dev->pool_id);
+}
+
static ssize_t rbd_name_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- return sprintf(buf, "%s\n", rbd_dev->obj);
+ return sprintf(buf, "%s\n", rbd_dev->image_name);
}
static ssize_t rbd_snap_show(struct device *dev,
@@ -1871,23 +1911,18 @@ static ssize_t rbd_image_refresh(struct device *dev,
size_t size)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
- int rc;
- int ret = size;
-
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ int ret;
- rc = __rbd_refresh_header(rbd_dev);
- if (rc < 0)
- ret = rc;
+ ret = rbd_refresh_header(rbd_dev, NULL);
- mutex_unlock(&ctl_mutex);
- return ret;
+ return ret < 0 ? ret : size;
}
static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
@@ -1898,6 +1933,7 @@ static struct attribute *rbd_attrs[] = {
&dev_attr_major.attr,
&dev_attr_client_id.attr,
&dev_attr_pool.attr,
+ &dev_attr_pool_id.attr,
&dev_attr_name.attr,
&dev_attr_current_snap.attr,
&dev_attr_refresh.attr,
@@ -1977,15 +2013,13 @@ static struct device_type rbd_snap_device_type = {
.release = rbd_snap_dev_release,
};
-static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
- struct rbd_snap *snap)
+static void __rbd_remove_snap_dev(struct rbd_snap *snap)
{
list_del(&snap->node);
device_unregister(&snap->dev);
}
-static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
- struct rbd_snap *snap,
+static int rbd_register_snap_dev(struct rbd_snap *snap,
struct device *parent)
{
struct device *dev = &snap->dev;
@@ -2000,29 +2034,36 @@ static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
return ret;
}
-static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
- int i, const char *name,
- struct rbd_snap **snapp)
+static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev,
+ int i, const char *name)
{
+ struct rbd_snap *snap;
int ret;
- struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
+
+ snap = kzalloc(sizeof (*snap), GFP_KERNEL);
if (!snap)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+
+ ret = -ENOMEM;
snap->name = kstrdup(name, GFP_KERNEL);
+ if (!snap->name)
+ goto err;
+
snap->size = rbd_dev->header.snap_sizes[i];
snap->id = rbd_dev->header.snapc->snaps[i];
if (device_is_registered(&rbd_dev->dev)) {
- ret = rbd_register_snap_dev(rbd_dev, snap,
- &rbd_dev->dev);
+ ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
if (ret < 0)
goto err;
}
- *snapp = snap;
- return 0;
+
+ return snap;
+
err:
kfree(snap->name);
kfree(snap);
- return ret;
+
+ return ERR_PTR(ret);
}
/*
@@ -2055,7 +2096,6 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
const char *name, *first_name;
int i = rbd_dev->header.total_snaps;
struct rbd_snap *snap, *old_snap = NULL;
- int ret;
struct list_head *p, *n;
first_name = rbd_dev->header.snap_names;
@@ -2070,8 +2110,15 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
cur_id = rbd_dev->header.snapc->snaps[i - 1];
if (!i || old_snap->id < cur_id) {
- /* old_snap->id was skipped, thus was removed */
- __rbd_remove_snap_dev(rbd_dev, old_snap);
+ /*
+ * old_snap->id was skipped, thus was
+ * removed. If this rbd_dev is mapped to
+ * the removed snapshot, record that it no
+ * longer exists, to prevent further I/O.
+ */
+ if (rbd_dev->snap_id == old_snap->id)
+ rbd_dev->snap_exists = false;
+ __rbd_remove_snap_dev(old_snap);
continue;
}
if (old_snap->id == cur_id) {
@@ -2091,9 +2138,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
if (cur_id >= old_snap->id)
break;
/* a new snapshot */
- ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
- if (ret < 0)
- return ret;
+ snap = __rbd_add_snap_dev(rbd_dev, i - 1, name);
+ if (IS_ERR(snap))
+ return PTR_ERR(snap);
/* note that we add it backward so using n and not p */
list_add(&snap->node, n);
@@ -2107,9 +2154,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
WARN_ON(1);
return -EINVAL;
}
- ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
- if (ret < 0)
- return ret;
+ snap = __rbd_add_snap_dev(rbd_dev, i - 1, name);
+ if (IS_ERR(snap))
+ return PTR_ERR(snap);
list_add(&snap->node, &rbd_dev->snaps);
}
@@ -2129,14 +2176,13 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
dev->type = &rbd_device_type;
dev->parent = &rbd_root_dev;
dev->release = rbd_dev_release;
- dev_set_name(dev, "%d", rbd_dev->id);
+ dev_set_name(dev, "%d", rbd_dev->dev_id);
ret = device_register(dev);
if (ret < 0)
goto out;
list_for_each_entry(snap, &rbd_dev->snaps, node) {
- ret = rbd_register_snap_dev(rbd_dev, snap,
- &rbd_dev->dev);
+ ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
if (ret < 0)
break;
}
@@ -2155,12 +2201,9 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
int ret, rc;
do {
- ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
- rbd_dev->header.obj_version);
+ ret = rbd_req_sync_watch(rbd_dev);
if (ret == -ERANGE) {
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
- rc = __rbd_refresh_header(rbd_dev);
- mutex_unlock(&ctl_mutex);
+ rc = rbd_refresh_header(rbd_dev, NULL);
if (rc < 0)
return rc;
}
@@ -2177,7 +2220,7 @@ static atomic64_t rbd_id_max = ATOMIC64_INIT(0);
*/
static void rbd_id_get(struct rbd_device *rbd_dev)
{
- rbd_dev->id = atomic64_inc_return(&rbd_id_max);
+ rbd_dev->dev_id = atomic64_inc_return(&rbd_id_max);
spin_lock(&rbd_dev_list_lock);
list_add_tail(&rbd_dev->node, &rbd_dev_list);
@@ -2191,7 +2234,7 @@ static void rbd_id_get(struct rbd_device *rbd_dev)
static void rbd_id_put(struct rbd_device *rbd_dev)
{
struct list_head *tmp;
- int rbd_id = rbd_dev->id;
+ int rbd_id = rbd_dev->dev_id;
int max_id;
BUG_ON(rbd_id < 1);
@@ -2282,19 +2325,58 @@ static inline size_t copy_token(const char **buf,
}
/*
- * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
+ * Finds the next token in *buf, dynamically allocates a buffer big
+ * enough to hold a copy of it, and copies the token into the new
+ * buffer. The copy is guaranteed to be terminated with '\0'. Note
+ * that a duplicate buffer is created even for a zero-length token.
+ *
+ * Returns a pointer to the newly-allocated duplicate, or a null
+ * pointer if memory for the duplicate was not available. If
+ * the lenp argument is a non-null pointer, the length of the token
+ * (not including the '\0') is returned in *lenp.
+ *
+ * If successful, the *buf pointer will be updated to point beyond
+ * the end of the found token.
+ *
+ * Note: uses GFP_KERNEL for allocation.
+ */
+static inline char *dup_token(const char **buf, size_t *lenp)
+{
+ char *dup;
+ size_t len;
+
+ len = next_token(buf);
+ dup = kmalloc(len + 1, GFP_KERNEL);
+ if (!dup)
+ return NULL;
+
+ memcpy(dup, *buf, len);
+ *(dup + len) = '\0';
+ *buf += len;
+
+ if (lenp)
+ *lenp = len;
+
+ return dup;
+}
+
+/*
+ * This fills in the pool_name, image_name, image_name_len, snap_name,
* rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
* on the list of monitor addresses and other options provided via
* /sys/bus/rbd/add.
+ *
+ * Note: rbd_dev is assumed to have been initially zero-filled.
*/
static int rbd_add_parse_args(struct rbd_device *rbd_dev,
const char *buf,
const char **mon_addrs,
size_t *mon_addrs_size,
char *options,
- size_t options_size)
+ size_t options_size)
{
- size_t len;
+ size_t len;
+ int ret;
/* The first four tokens are required */
@@ -2310,56 +2392,74 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev,
if (!len || len >= options_size)
return -EINVAL;
- len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
- if (!len || len >= sizeof (rbd_dev->pool_name))
- return -EINVAL;
-
- len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
- if (!len || len >= sizeof (rbd_dev->obj))
- return -EINVAL;
+ ret = -ENOMEM;
+ rbd_dev->pool_name = dup_token(&buf, NULL);
+ if (!rbd_dev->pool_name)
+ goto out_err;
- /* We have the object length in hand, save it. */
+ rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len);
+ if (!rbd_dev->image_name)
+ goto out_err;
- rbd_dev->obj_len = len;
+ /* Create the name of the header object */
- BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN
- < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX));
- sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX);
+ rbd_dev->header_name = kmalloc(rbd_dev->image_name_len
+ + sizeof (RBD_SUFFIX),
+ GFP_KERNEL);
+ if (!rbd_dev->header_name)
+ goto out_err;
+ sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX);
/*
- * The snapshot name is optional, but it's an error if it's
- * too long. If no snapshot is supplied, fill in the default.
+ * The snapshot name is optional. If none is is supplied,
+ * we use the default value.
*/
- len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name));
- if (!len)
+ rbd_dev->snap_name = dup_token(&buf, &len);
+ if (!rbd_dev->snap_name)
+ goto out_err;
+ if (!len) {
+ /* Replace the empty name with the default */
+ kfree(rbd_dev->snap_name);
+ rbd_dev->snap_name
+ = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL);
+ if (!rbd_dev->snap_name)
+ goto out_err;
+
memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
sizeof (RBD_SNAP_HEAD_NAME));
- else if (len >= sizeof (rbd_dev->snap_name))
- return -EINVAL;
+ }
return 0;
+
+out_err:
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->image_name);
+ kfree(rbd_dev->pool_name);
+ rbd_dev->pool_name = NULL;
+
+ return ret;
}
static ssize_t rbd_add(struct bus_type *bus,
const char *buf,
size_t count)
{
- struct rbd_device *rbd_dev;
+ char *options;
+ struct rbd_device *rbd_dev = NULL;
const char *mon_addrs = NULL;
size_t mon_addrs_size = 0;
- char *options = NULL;
struct ceph_osd_client *osdc;
int rc = -ENOMEM;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
- rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
- if (!rbd_dev)
- goto err_nomem;
options = kmalloc(count, GFP_KERNEL);
if (!options)
goto err_nomem;
+ rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
+ if (!rbd_dev)
+ goto err_nomem;
/* static rbd_device initialization */
spin_lock_init(&rbd_dev->lock);
@@ -2367,15 +2467,13 @@ static ssize_t rbd_add(struct bus_type *bus,
INIT_LIST_HEAD(&rbd_dev->snaps);
init_rwsem(&rbd_dev->header_rwsem);
- init_rwsem(&rbd_dev->header_rwsem);
-
/* generate unique id: find highest unique id, add one */
rbd_id_get(rbd_dev);
/* Fill in the device name, now that we have its id. */
BUILD_BUG_ON(DEV_NAME_LEN
< sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
- sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
+ sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
/* parse add command */
rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size,
@@ -2395,7 +2493,7 @@ static ssize_t rbd_add(struct bus_type *bus,
rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
if (rc < 0)
goto err_out_client;
- rbd_dev->poolid = rc;
+ rbd_dev->pool_id = rc;
/* register our block device */
rc = register_blkdev(0, rbd_dev->name);
@@ -2435,10 +2533,16 @@ err_out_blkdev:
err_out_client:
rbd_put_client(rbd_dev);
err_put_id:
+ if (rbd_dev->pool_name) {
+ kfree(rbd_dev->snap_name);
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->image_name);
+ kfree(rbd_dev->pool_name);
+ }
rbd_id_put(rbd_dev);
err_nomem:
- kfree(options);
kfree(rbd_dev);
+ kfree(options);
dout("Error adding device %s\n", buf);
module_put(THIS_MODULE);
@@ -2446,7 +2550,7 @@ err_nomem:
return (ssize_t) rc;
}
-static struct rbd_device *__rbd_get_dev(unsigned long id)
+static struct rbd_device *__rbd_get_dev(unsigned long dev_id)
{
struct list_head *tmp;
struct rbd_device *rbd_dev;
@@ -2454,7 +2558,7 @@ static struct rbd_device *__rbd_get_dev(unsigned long id)
spin_lock(&rbd_dev_list_lock);
list_for_each(tmp, &rbd_dev_list) {
rbd_dev = list_entry(tmp, struct rbd_device, node);
- if (rbd_dev->id == id) {
+ if (rbd_dev->dev_id == dev_id) {
spin_unlock(&rbd_dev_list_lock);
return rbd_dev;
}
@@ -2474,7 +2578,7 @@ static void rbd_dev_release(struct device *dev)
rbd_dev->watch_request);
}
if (rbd_dev->watch_event)
- rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name);
+ rbd_req_sync_unwatch(rbd_dev);
rbd_put_client(rbd_dev);
@@ -2483,6 +2587,10 @@ static void rbd_dev_release(struct device *dev)
unregister_blkdev(rbd_dev->major, rbd_dev->name);
/* done with the id, and with the rbd_dev */
+ kfree(rbd_dev->snap_name);
+ kfree(rbd_dev->header_name);
+ kfree(rbd_dev->pool_name);
+ kfree(rbd_dev->image_name);
rbd_id_put(rbd_dev);
kfree(rbd_dev);
@@ -2544,7 +2652,7 @@ static ssize_t rbd_snap_add(struct device *dev,
if (ret < 0)
goto err_unlock;
- ret = __rbd_refresh_header(rbd_dev);
+ ret = __rbd_refresh_header(rbd_dev, NULL);
if (ret < 0)
goto err_unlock;
@@ -2553,7 +2661,7 @@ static ssize_t rbd_snap_add(struct device *dev,
mutex_unlock(&ctl_mutex);
/* make a best effort, don't error if failed */
- rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
+ rbd_req_sync_notify(rbd_dev);
ret = count;
kfree(name);
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index 950708688f17..0924e9e41a60 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -31,7 +31,6 @@
#define RBD_MIN_OBJ_ORDER 16
#define RBD_MAX_OBJ_ORDER 30
-#define RBD_MAX_OBJ_NAME_LEN 96
#define RBD_MAX_SEG_NAME_LEN 128
#define RBD_COMP_NONE 0
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index aa2712060bfb..eb0d8216f557 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -513,6 +513,21 @@ static void process_page(unsigned long data)
}
}
+static void mm_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+ struct cardinfo *card = cb->data;
+
+ spin_lock_irq(&card->lock);
+ activate(card);
+ spin_unlock_irq(&card->lock);
+ kfree(cb);
+}
+
+static int mm_check_plugged(struct cardinfo *card)
+{
+ return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
+}
+
static void mm_make_request(struct request_queue *q, struct bio *bio)
{
struct cardinfo *card = q->queuedata;
@@ -523,6 +538,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
*card->biotail = bio;
bio->bi_next = NULL;
card->biotail = &bio->bi_next;
+ if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card))
+ activate(card);
spin_unlock_irq(&card->lock);
return;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 693187df7601..c0bbeb470754 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq;
struct virtio_blk
{
- spinlock_t lock;
-
struct virtio_device *vdev;
struct virtqueue *vq;
@@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq)
unsigned int len;
unsigned long flags;
- spin_lock_irqsave(&vblk->lock, flags);
+ spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
int error;
@@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq)
}
/* In case queue is stopped waiting for more buffers. */
blk_start_queue(vblk->disk->queue);
- spin_unlock_irqrestore(&vblk->lock, flags);
+ spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
}
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -397,6 +395,83 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
return 0;
}
+static int virtblk_get_cache_mode(struct virtio_device *vdev)
+{
+ u8 writeback;
+ int err;
+
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
+ offsetof(struct virtio_blk_config, wce),
+ &writeback);
+ if (err)
+ writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
+
+ return writeback;
+}
+
+static void virtblk_update_cache_mode(struct virtio_device *vdev)
+{
+ u8 writeback = virtblk_get_cache_mode(vdev);
+ struct virtio_blk *vblk = vdev->priv;
+
+ if (writeback)
+ blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
+ else
+ blk_queue_flush(vblk->disk->queue, 0);
+
+ revalidate_disk(vblk->disk);
+}
+
+static const char *const virtblk_cache_types[] = {
+ "write through", "write back"
+};
+
+static ssize_t
+virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct virtio_blk *vblk = disk->private_data;
+ struct virtio_device *vdev = vblk->vdev;
+ int i;
+ u8 writeback;
+
+ BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
+ for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
+ if (sysfs_streq(buf, virtblk_cache_types[i]))
+ break;
+
+ if (i < 0)
+ return -EINVAL;
+
+ writeback = i;
+ vdev->config->set(vdev,
+ offsetof(struct virtio_blk_config, wce),
+ &writeback, sizeof(writeback));
+
+ virtblk_update_cache_mode(vdev);
+ return count;
+}
+
+static ssize_t
+virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct virtio_blk *vblk = disk->private_data;
+ u8 writeback = virtblk_get_cache_mode(vblk->vdev);
+
+ BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
+ return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
+}
+
+static const struct device_attribute dev_attr_cache_type_ro =
+ __ATTR(cache_type, S_IRUGO,
+ virtblk_cache_type_show, NULL);
+static const struct device_attribute dev_attr_cache_type_rw =
+ __ATTR(cache_type, S_IRUGO|S_IWUSR,
+ virtblk_cache_type_show, virtblk_cache_type_store);
+
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
@@ -431,7 +506,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_free_index;
}
- spin_lock_init(&vblk->lock);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
@@ -456,7 +530,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
goto out_mempool;
}
- q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
+ q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
@@ -474,8 +548,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
vblk->index = index;
/* configure queue flush support */
- if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
- blk_queue_flush(q, REQ_FLUSH);
+ virtblk_update_cache_mode(vdev);
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
@@ -553,6 +626,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_del_disk;
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
+ err = device_create_file(disk_to_dev(vblk->disk),
+ &dev_attr_cache_type_rw);
+ else
+ err = device_create_file(disk_to_dev(vblk->disk),
+ &dev_attr_cache_type_ro);
+ if (err)
+ goto out_del_disk;
return 0;
out_del_disk:
@@ -576,30 +657,20 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
int index = vblk->index;
- struct virtblk_req *vbr;
- unsigned long flags;
/* Prevent config work handler from accessing the device. */
mutex_lock(&vblk->config_lock);
vblk->config_enable = false;
mutex_unlock(&vblk->config_lock);
+ del_gendisk(vblk->disk);
+ blk_cleanup_queue(vblk->disk->queue);
+
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
flush_work(&vblk->config_work);
- del_gendisk(vblk->disk);
-
- /* Abort requests dispatched to driver. */
- spin_lock_irqsave(&vblk->lock, flags);
- while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) {
- __blk_end_request_all(vbr->req, -EIO);
- mempool_free(vbr, vblk->pool);
- }
- spin_unlock_irqrestore(&vblk->lock, flags);
-
- blk_cleanup_queue(vblk->disk->queue);
put_disk(vblk->disk);
mempool_destroy(vblk->pool);
vdev->config->del_vqs(vdev);
@@ -655,7 +726,7 @@ static const struct virtio_device_id id_table[] = {
static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
- VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
+ VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
};
/*
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 773cf27dc23f..9ad3b5ec1dc1 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -257,6 +257,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
@@ -287,6 +288,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 60eed4bdd2e4..2c2d2e5c1597 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -141,14 +141,36 @@ static int get_id_from_freelist(struct blkfront_info *info)
return free;
}
-static void add_id_to_freelist(struct blkfront_info *info,
+static int add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
+ if (info->shadow[id].req.u.rw.id != id)
+ return -EINVAL;
+ if (info->shadow[id].request == NULL)
+ return -EINVAL;
info->shadow[id].req.u.rw.id = info->shadow_free;
info->shadow[id].request = NULL;
info->shadow_free = id;
+ return 0;
}
+static const char *op_name(int op)
+{
+ static const char *const names[] = {
+ [BLKIF_OP_READ] = "read",
+ [BLKIF_OP_WRITE] = "write",
+ [BLKIF_OP_WRITE_BARRIER] = "barrier",
+ [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
+ [BLKIF_OP_DISCARD] = "discard" };
+
+ if (op < 0 || op >= ARRAY_SIZE(names))
+ return "unknown";
+
+ if (!names[op])
+ return "reserved";
+
+ return names[op];
+}
static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
{
unsigned int end = minor + nr;
@@ -746,20 +768,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
+ /*
+ * The backend has messed up and given us an id that we would
+ * never have given to it (we stamp it up to BLK_RING_SIZE -
+ * look in get_id_from_freelist.
+ */
+ if (id >= BLK_RING_SIZE) {
+ WARN(1, "%s: response to %s has incorrect id (%ld)\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ /* We can't safely get the 'struct request' as
+ * the id is busted. */
+ continue;
+ }
req = info->shadow[id].request;
if (bret->operation != BLKIF_OP_DISCARD)
blkif_completion(&info->shadow[id]);
- add_id_to_freelist(info, id);
+ if (add_id_to_freelist(info, id)) {
+ WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ continue;
+ }
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
- printk(KERN_WARNING "blkfront: %s: discard op failed\n",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
@@ -771,18 +809,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
- printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.u.rw.nr_segments == 0)) {
- printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(error)) {
@@ -854,9 +888,8 @@ static int setup_blkring(struct xenbus_device *dev,
if (err)
goto fail;
- err = bind_evtchn_to_irqhandler(info->evtchn,
- blkif_interrupt,
- IRQF_SAMPLE_RANDOM, "blkif", info);
+ err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0,
+ "blkif", info);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");