summaryrefslogtreecommitdiffstats
path: root/drivers/block/rbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r--drivers/block/rbd.c231
1 files changed, 152 insertions, 79 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c800047f5835..cc06c55875b9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -826,77 +826,144 @@ static void zero_bio_chain(struct bio *chain, int start_ofs)
}
/*
- * bio_chain_clone - clone a chain of bios up to a certain length.
- * might return a bio_pair that will need to be released.
+ * Clone a portion of a bio, starting at the given byte offset
+ * and continuing for the number of bytes indicated.
*/
-static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
- struct bio_pair **bp,
- int len, gfp_t gfpmask)
-{
- struct bio *old_chain = *old;
- struct bio *new_chain = NULL;
- struct bio *tail;
- int total = 0;
-
- if (*bp) {
- bio_pair_release(*bp);
- *bp = NULL;
- }
+static struct bio *bio_clone_range(struct bio *bio_src,
+ unsigned int offset,
+ unsigned int len,
+ gfp_t gfpmask)
+{
+ struct bio_vec *bv;
+ unsigned int resid;
+ unsigned short idx;
+ unsigned int voff;
+ unsigned short end_idx;
+ unsigned short vcnt;
+ struct bio *bio;
- while (old_chain && (total < len)) {
- struct bio *tmp;
+ /* Handle the easy case for the caller */
- tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
- if (!tmp)
- goto err_out;
- gfpmask &= ~__GFP_WAIT; /* can't wait after the first */
+ if (!offset && len == bio_src->bi_size)
+ return bio_clone(bio_src, gfpmask);
- if (total + old_chain->bi_size > len) {
- struct bio_pair *bp;
+ if (WARN_ON_ONCE(!len))
+ return NULL;
+ if (WARN_ON_ONCE(len > bio_src->bi_size))
+ return NULL;
+ if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
+ return NULL;
- /*
- * this split can only happen with a single paged bio,
- * split_bio will BUG_ON if this is not the case
- */
- dout("bio_chain_clone split! total=%d remaining=%d"
- "bi_size=%u\n",
- total, len - total, old_chain->bi_size);
+ /* Find first affected segment... */
- /* split the bio. We'll release it either in the next
- call, or it will have to be released outside */
- bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
- if (!bp)
- goto err_out;
+ resid = offset;
+ __bio_for_each_segment(bv, bio_src, idx, 0) {
+ if (resid < bv->bv_len)
+ break;
+ resid -= bv->bv_len;
+ }
+ voff = resid;
- __bio_clone(tmp, &bp->bio1);
+ /* ...and the last affected segment */
- *next = &bp->bio2;
- } else {
- __bio_clone(tmp, old_chain);
- *next = old_chain->bi_next;
- }
+ resid += len;
+ __bio_for_each_segment(bv, bio_src, end_idx, idx) {
+ if (resid <= bv->bv_len)
+ break;
+ resid -= bv->bv_len;
+ }
+ vcnt = end_idx - idx + 1;
+
+ /* Build the clone */
+
+ bio = bio_alloc(gfpmask, (unsigned int) vcnt);
+ if (!bio)
+ return NULL; /* ENOMEM */
- tmp->bi_bdev = NULL;
- tmp->bi_next = NULL;
- if (new_chain)
- tail->bi_next = tmp;
- else
- new_chain = tmp;
- tail = tmp;
- old_chain = old_chain->bi_next;
+ bio->bi_bdev = bio_src->bi_bdev;
+ bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT);
+ bio->bi_rw = bio_src->bi_rw;
+ bio->bi_flags |= 1 << BIO_CLONED;
- total += tmp->bi_size;
+ /*
+ * Copy over our part of the bio_vec, then update the first
+ * and last (or only) entries.
+ */
+ memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
+ vcnt * sizeof (struct bio_vec));
+ bio->bi_io_vec[0].bv_offset += voff;
+ if (vcnt > 1) {
+ bio->bi_io_vec[0].bv_len -= voff;
+ bio->bi_io_vec[vcnt - 1].bv_len = resid;
+ } else {
+ bio->bi_io_vec[0].bv_len = len;
}
- rbd_assert(total == len);
+ bio->bi_vcnt = vcnt;
+ bio->bi_size = len;
+ bio->bi_idx = 0;
+
+ return bio;
+}
+
+/*
+ * Clone a portion of a bio chain, starting at the given byte offset
+ * into the first bio in the source chain and continuing for the
+ * number of bytes indicated. The result is another bio chain of
+ * exactly the given length, or a null pointer on error.
+ *
+ * The bio_src and offset parameters are both in-out. On entry they
+ * refer to the first source bio and the offset into that bio where
+ * the start of data to be cloned is located.
+ *
+ * On return, bio_src is updated to refer to the bio in the source
+ * chain that contains first un-cloned byte, and *offset will
+ * contain the offset of that byte within that bio.
+ */
+static struct bio *bio_chain_clone_range(struct bio **bio_src,
+ unsigned int *offset,
+ unsigned int len,
+ gfp_t gfpmask)
+{
+ struct bio *bi = *bio_src;
+ unsigned int off = *offset;
+ struct bio *chain = NULL;
+ struct bio **end;
+
+ /* Build up a chain of clone bios up to the limit */
+
+ if (!bi || off >= bi->bi_size || !len)
+ return NULL; /* Nothing to clone */
- *old = old_chain;
+ end = &chain;
+ while (len) {
+ unsigned int bi_size;
+ struct bio *bio;
+
+ if (!bi)
+ goto out_err; /* EINVAL; ran out of bio's */
+ bi_size = min_t(unsigned int, bi->bi_size - off, len);
+ bio = bio_clone_range(bi, off, bi_size, gfpmask);
+ if (!bio)
+ goto out_err; /* ENOMEM */
+
+ *end = bio;
+ end = &bio->bi_next;
+
+ off += bi_size;
+ if (off == bi->bi_size) {
+ bi = bi->bi_next;
+ off = 0;
+ }
+ len -= bi_size;
+ }
+ *bio_src = bi;
+ *offset = off;
- return new_chain;
+ return chain;
+out_err:
+ bio_chain_put(chain);
-err_out:
- dout("bio_chain_clone with err\n");
- bio_chain_put(new_chain);
return NULL;
}
@@ -1014,8 +1081,9 @@ static int rbd_do_request(struct request *rq,
req_data->coll_index = coll_index;
}
- dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name,
- (unsigned long long) ofs, (unsigned long long) len);
+ dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n",
+ object_name, (unsigned long long) ofs,
+ (unsigned long long) len, coll, coll_index);
osdc = &rbd_dev->rbd_client->client->osdc;
req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
@@ -1463,18 +1531,16 @@ static void rbd_rq_fn(struct request_queue *q)
{
struct rbd_device *rbd_dev = q->queuedata;
struct request *rq;
- struct bio_pair *bp = NULL;
while ((rq = blk_fetch_request(q))) {
struct bio *bio;
- struct bio *rq_bio, *next_bio = NULL;
bool do_write;
unsigned int size;
- u64 op_size = 0;
u64 ofs;
int num_segs, cur_seg = 0;
struct rbd_req_coll *coll;
struct ceph_snap_context *snapc;
+ unsigned int bio_offset;
dout("fetched request\n");
@@ -1486,10 +1552,6 @@ static void rbd_rq_fn(struct request_queue *q)
/* deduce our operation (read, write) */
do_write = (rq_data_dir(rq) == WRITE);
-
- size = blk_rq_bytes(rq);
- ofs = blk_rq_pos(rq) * SECTOR_SIZE;
- rq_bio = rq->bio;
if (do_write && rbd_dev->mapping.read_only) {
__blk_end_request_all(rq, -EROFS);
continue;
@@ -1512,6 +1574,10 @@ static void rbd_rq_fn(struct request_queue *q)
up_read(&rbd_dev->header_rwsem);
+ size = blk_rq_bytes(rq);
+ ofs = blk_rq_pos(rq) * SECTOR_SIZE;
+ bio = rq->bio;
+
dout("%s 0x%x bytes at 0x%llx\n",
do_write ? "write" : "read",
size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
@@ -1531,30 +1597,37 @@ static void rbd_rq_fn(struct request_queue *q)
continue;
}
+ bio_offset = 0;
do {
- /* a bio clone to be passed down to OSD req */
+ u64 limit = rbd_segment_length(rbd_dev, ofs, size);
+ unsigned int chain_size;
+ struct bio *bio_chain;
+
+ BUG_ON(limit > (u64) UINT_MAX);
+ chain_size = (unsigned int) limit;
dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
- op_size = rbd_segment_length(rbd_dev, ofs, size);
+
kref_get(&coll->kref);
- bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
- op_size, GFP_ATOMIC);
- if (bio)
+
+ /* Pass a cloned bio chain via an osd request */
+
+ bio_chain = bio_chain_clone_range(&bio,
+ &bio_offset, chain_size,
+ GFP_ATOMIC);
+ if (bio_chain)
(void) rbd_do_op(rq, rbd_dev, snapc,
- ofs, op_size,
- bio, coll, cur_seg);
+ ofs, chain_size,
+ bio_chain, coll, cur_seg);
else
rbd_coll_end_req_index(rq, coll, cur_seg,
- -ENOMEM, op_size);
- size -= op_size;
- ofs += op_size;
+ -ENOMEM, chain_size);
+ size -= chain_size;
+ ofs += chain_size;
cur_seg++;
- rq_bio = next_bio;
} while (size > 0);
kref_put(&coll->kref, rbd_coll_release);
- if (bp)
- bio_pair_release(bp);
spin_lock_irq(q->queue_lock);
ceph_put_snap_context(snapc);
@@ -1564,7 +1637,7 @@ static void rbd_rq_fn(struct request_queue *q)
/*
* a queue callback. Makes sure that we don't create a bio that spans across
* multiple osd objects. One exception would be with a single page bios,
- * which we handle later at bio_chain_clone
+ * which we handle later at bio_chain_clone_range()
*/
static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
struct bio_vec *bvec)