diff options
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r-- | drivers/block/rbd.c | 231 |
1 files changed, 152 insertions, 79 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c800047f5835..cc06c55875b9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -826,77 +826,144 @@ static void zero_bio_chain(struct bio *chain, int start_ofs) } /* - * bio_chain_clone - clone a chain of bios up to a certain length. - * might return a bio_pair that will need to be released. + * Clone a portion of a bio, starting at the given byte offset + * and continuing for the number of bytes indicated. */ -static struct bio *bio_chain_clone(struct bio **old, struct bio **next, - struct bio_pair **bp, - int len, gfp_t gfpmask) -{ - struct bio *old_chain = *old; - struct bio *new_chain = NULL; - struct bio *tail; - int total = 0; - - if (*bp) { - bio_pair_release(*bp); - *bp = NULL; - } +static struct bio *bio_clone_range(struct bio *bio_src, + unsigned int offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio_vec *bv; + unsigned int resid; + unsigned short idx; + unsigned int voff; + unsigned short end_idx; + unsigned short vcnt; + struct bio *bio; - while (old_chain && (total < len)) { - struct bio *tmp; + /* Handle the easy case for the caller */ - tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); - if (!tmp) - goto err_out; - gfpmask &= ~__GFP_WAIT; /* can't wait after the first */ + if (!offset && len == bio_src->bi_size) + return bio_clone(bio_src, gfpmask); - if (total + old_chain->bi_size > len) { - struct bio_pair *bp; + if (WARN_ON_ONCE(!len)) + return NULL; + if (WARN_ON_ONCE(len > bio_src->bi_size)) + return NULL; + if (WARN_ON_ONCE(offset > bio_src->bi_size - len)) + return NULL; - /* - * this split can only happen with a single paged bio, - * split_bio will BUG_ON if this is not the case - */ - dout("bio_chain_clone split! total=%d remaining=%d" - "bi_size=%u\n", - total, len - total, old_chain->bi_size); + /* Find first affected segment... */ - /* split the bio. We'll release it either in the next - call, or it will have to be released outside */ - bp = bio_split(old_chain, (len - total) / SECTOR_SIZE); - if (!bp) - goto err_out; + resid = offset; + __bio_for_each_segment(bv, bio_src, idx, 0) { + if (resid < bv->bv_len) + break; + resid -= bv->bv_len; + } + voff = resid; - __bio_clone(tmp, &bp->bio1); + /* ...and the last affected segment */ - *next = &bp->bio2; - } else { - __bio_clone(tmp, old_chain); - *next = old_chain->bi_next; - } + resid += len; + __bio_for_each_segment(bv, bio_src, end_idx, idx) { + if (resid <= bv->bv_len) + break; + resid -= bv->bv_len; + } + vcnt = end_idx - idx + 1; + + /* Build the clone */ + + bio = bio_alloc(gfpmask, (unsigned int) vcnt); + if (!bio) + return NULL; /* ENOMEM */ - tmp->bi_bdev = NULL; - tmp->bi_next = NULL; - if (new_chain) - tail->bi_next = tmp; - else - new_chain = tmp; - tail = tmp; - old_chain = old_chain->bi_next; + bio->bi_bdev = bio_src->bi_bdev; + bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); + bio->bi_rw = bio_src->bi_rw; + bio->bi_flags |= 1 << BIO_CLONED; - total += tmp->bi_size; + /* + * Copy over our part of the bio_vec, then update the first + * and last (or only) entries. + */ + memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx], + vcnt * sizeof (struct bio_vec)); + bio->bi_io_vec[0].bv_offset += voff; + if (vcnt > 1) { + bio->bi_io_vec[0].bv_len -= voff; + bio->bi_io_vec[vcnt - 1].bv_len = resid; + } else { + bio->bi_io_vec[0].bv_len = len; } - rbd_assert(total == len); + bio->bi_vcnt = vcnt; + bio->bi_size = len; + bio->bi_idx = 0; + + return bio; +} + +/* + * Clone a portion of a bio chain, starting at the given byte offset + * into the first bio in the source chain and continuing for the + * number of bytes indicated. The result is another bio chain of + * exactly the given length, or a null pointer on error. + * + * The bio_src and offset parameters are both in-out. On entry they + * refer to the first source bio and the offset into that bio where + * the start of data to be cloned is located. + * + * On return, bio_src is updated to refer to the bio in the source + * chain that contains first un-cloned byte, and *offset will + * contain the offset of that byte within that bio. + */ +static struct bio *bio_chain_clone_range(struct bio **bio_src, + unsigned int *offset, + unsigned int len, + gfp_t gfpmask) +{ + struct bio *bi = *bio_src; + unsigned int off = *offset; + struct bio *chain = NULL; + struct bio **end; + + /* Build up a chain of clone bios up to the limit */ + + if (!bi || off >= bi->bi_size || !len) + return NULL; /* Nothing to clone */ - *old = old_chain; + end = &chain; + while (len) { + unsigned int bi_size; + struct bio *bio; + + if (!bi) + goto out_err; /* EINVAL; ran out of bio's */ + bi_size = min_t(unsigned int, bi->bi_size - off, len); + bio = bio_clone_range(bi, off, bi_size, gfpmask); + if (!bio) + goto out_err; /* ENOMEM */ + + *end = bio; + end = &bio->bi_next; + + off += bi_size; + if (off == bi->bi_size) { + bi = bi->bi_next; + off = 0; + } + len -= bi_size; + } + *bio_src = bi; + *offset = off; - return new_chain; + return chain; +out_err: + bio_chain_put(chain); -err_out: - dout("bio_chain_clone with err\n"); - bio_chain_put(new_chain); return NULL; } @@ -1014,8 +1081,9 @@ static int rbd_do_request(struct request *rq, req_data->coll_index = coll_index; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, - (unsigned long long) ofs, (unsigned long long) len); + dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", + object_name, (unsigned long long) ofs, + (unsigned long long) len, coll, coll_index); osdc = &rbd_dev->rbd_client->client->osdc; req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, @@ -1463,18 +1531,16 @@ static void rbd_rq_fn(struct request_queue *q) { struct rbd_device *rbd_dev = q->queuedata; struct request *rq; - struct bio_pair *bp = NULL; while ((rq = blk_fetch_request(q))) { struct bio *bio; - struct bio *rq_bio, *next_bio = NULL; bool do_write; unsigned int size; - u64 op_size = 0; u64 ofs; int num_segs, cur_seg = 0; struct rbd_req_coll *coll; struct ceph_snap_context *snapc; + unsigned int bio_offset; dout("fetched request\n"); @@ -1486,10 +1552,6 @@ static void rbd_rq_fn(struct request_queue *q) /* deduce our operation (read, write) */ do_write = (rq_data_dir(rq) == WRITE); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - rq_bio = rq->bio; if (do_write && rbd_dev->mapping.read_only) { __blk_end_request_all(rq, -EROFS); continue; @@ -1512,6 +1574,10 @@ static void rbd_rq_fn(struct request_queue *q) up_read(&rbd_dev->header_rwsem); + size = blk_rq_bytes(rq); + ofs = blk_rq_pos(rq) * SECTOR_SIZE; + bio = rq->bio; + dout("%s 0x%x bytes at 0x%llx\n", do_write ? "write" : "read", size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); @@ -1531,30 +1597,37 @@ static void rbd_rq_fn(struct request_queue *q) continue; } + bio_offset = 0; do { - /* a bio clone to be passed down to OSD req */ + u64 limit = rbd_segment_length(rbd_dev, ofs, size); + unsigned int chain_size; + struct bio *bio_chain; + + BUG_ON(limit > (u64) UINT_MAX); + chain_size = (unsigned int) limit; dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - op_size = rbd_segment_length(rbd_dev, ofs, size); + kref_get(&coll->kref); - bio = bio_chain_clone(&rq_bio, &next_bio, &bp, - op_size, GFP_ATOMIC); - if (bio) + + /* Pass a cloned bio chain via an osd request */ + + bio_chain = bio_chain_clone_range(&bio, + &bio_offset, chain_size, + GFP_ATOMIC); + if (bio_chain) (void) rbd_do_op(rq, rbd_dev, snapc, - ofs, op_size, - bio, coll, cur_seg); + ofs, chain_size, + bio_chain, coll, cur_seg); else rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, op_size); - size -= op_size; - ofs += op_size; + -ENOMEM, chain_size); + size -= chain_size; + ofs += chain_size; cur_seg++; - rq_bio = next_bio; } while (size > 0); kref_put(&coll->kref, rbd_coll_release); - if (bp) - bio_pair_release(bp); spin_lock_irq(q->queue_lock); ceph_put_snap_context(snapc); @@ -1564,7 +1637,7 @@ static void rbd_rq_fn(struct request_queue *q) /* * a queue callback. Makes sure that we don't create a bio that spans across * multiple osd objects. One exception would be with a single page bios, - * which we handle later at bio_chain_clone + * which we handle later at bio_chain_clone_range() */ static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, struct bio_vec *bvec) |