summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-23 04:02:52 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-23 04:02:52 +0200
commitd4c90b1b9fe907da0d310008e5a769b591a14399 (patch)
treed37589ab70ada2778d315a0ad24d6e68c8615af6 /include
parentLinux 3.11-rc2 (diff)
parentMerge branch 'bcache-for-3.11' of git://evilpiepirate.org/~kent/linux-bcache ... (diff)
downloadlinux-d4c90b1b9fe907da0d310008e5a769b591a14399.tar.xz
linux-d4c90b1b9fe907da0d310008e5a769b591a14399.zip
Merge branch 'for-3.11/drivers' of git://git.kernel.dk/linux-block
Pull block IO driver bits from Jens Axboe: "As I mentioned in the core block pull request, due to real life circumstances the driver pull request would be late. Now it looks like -rc2 late... On the plus side, apart form the rsxx update, these are all things that I could argue could go in later in the cycle as they are fixes and not features. So even though things are late, it's not ALL bad. The pull request contains: - Updates to bcache, all bug fixes, from Kent. - A pile of drbd bug fixes (no big features this time!). - xen blk front/back fixes. - rsxx driver updates, some of them deferred form 3.10. So should be well cooked by now" * 'for-3.11/drivers' of git://git.kernel.dk/linux-block: (63 commits) bcache: Allocation kthread fixes bcache: Fix GC_SECTORS_USED() calculation bcache: Journal replay fix bcache: Shutdown fix bcache: Fix a sysfs splat on shutdown bcache: Advertise that flushes are supported bcache: check for allocation failures bcache: Fix a dumb race bcache: Use standard utility code bcache: Update email address bcache: Delete fuzz tester bcache: Document shrinker reserve better bcache: FUA fixes drbd: Allow online change of al-stripes and al-stripe-size drbd: Constants should be UPPERCASE drbd: Ignore the exit code of a fence-peer handler if it returns too late drbd: Fix rcu_read_lock balance on error path drbd: fix error return code in drbd_init() drbd: Do not sleep inside rcu bcache: Refresh usage docs ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/drbd.h6
-rw-r--r--include/linux/drbd_genl.h2
-rw-r--r--include/linux/drbd_limits.h9
-rw-r--r--include/trace/events/bcache.h381
-rw-r--r--include/xen/interface/io/blkif.h53
-rw-r--r--include/xen/interface/io/ring.h5
6 files changed, 346 insertions, 110 deletions
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1b4d4ee1168f..de7d74ab3de6 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -177,7 +177,11 @@ enum drbd_ret_code {
ERR_NEED_APV_100 = 163,
ERR_NEED_ALLOW_TWO_PRI = 164,
ERR_MD_UNCLEAN = 165,
-
+ ERR_MD_LAYOUT_CONNECTED = 166,
+ ERR_MD_LAYOUT_TOO_BIG = 167,
+ ERR_MD_LAYOUT_TOO_SMALL = 168,
+ ERR_MD_LAYOUT_NO_FIT = 169,
+ ERR_IMPLICIT_SHRINK = 170,
/* insert new ones above this line */
AFTER_LAST_ERR_CODE
};
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index d0d8fac8a6e4..e8c44572b8cb 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -181,6 +181,8 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
__u64_field(1, DRBD_GENLA_F_MANDATORY, resize_size)
__flg_field(2, DRBD_GENLA_F_MANDATORY, resize_force)
__flg_field(3, DRBD_GENLA_F_MANDATORY, no_resync)
+ __u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF)
+ __u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF)
)
GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 1fedf2b17cc8..17e50bb00521 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -215,4 +215,13 @@
#define DRBD_ALWAYS_ASBP_DEF 0
#define DRBD_USE_RLE_DEF 1
+#define DRBD_AL_STRIPES_MIN 1
+#define DRBD_AL_STRIPES_MAX 1024
+#define DRBD_AL_STRIPES_DEF 1
+#define DRBD_AL_STRIPES_SCALE '1'
+
+#define DRBD_AL_STRIPE_SIZE_MIN 4
+#define DRBD_AL_STRIPE_SIZE_MAX 16777216
+#define DRBD_AL_STRIPE_SIZE_DEF 32
+#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
#endif
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 3cc5a0b278c3..5ebda976ea93 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -9,9 +9,7 @@
struct search;
DECLARE_EVENT_CLASS(bcache_request,
-
TP_PROTO(struct search *s, struct bio *bio),
-
TP_ARGS(s, bio),
TP_STRUCT__entry(
@@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request,
__field(dev_t, orig_sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
- __array(char, comm, TASK_COMM_LEN )
),
TP_fast_assign(
@@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request,
__entry->orig_sector = bio->bi_sector - 16;
__entry->nr_sector = bio->bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
- memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
- TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)",
+ TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->rwbs,
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm,
- __entry->orig_major, __entry->orig_minor,
+ __entry->rwbs, (unsigned long long)__entry->sector,
+ __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
(unsigned long long)__entry->orig_sector)
);
-DEFINE_EVENT(bcache_request, bcache_request_start,
+DECLARE_EVENT_CLASS(bkey,
+ TP_PROTO(struct bkey *k),
+ TP_ARGS(k),
- TP_PROTO(struct search *s, struct bio *bio),
+ TP_STRUCT__entry(
+ __field(u32, size )
+ __field(u32, inode )
+ __field(u64, offset )
+ __field(bool, dirty )
+ ),
- TP_ARGS(s, bio)
+ TP_fast_assign(
+ __entry->inode = KEY_INODE(k);
+ __entry->offset = KEY_OFFSET(k);
+ __entry->size = KEY_SIZE(k);
+ __entry->dirty = KEY_DIRTY(k);
+ ),
+
+ TP_printk("%u:%llu len %u dirty %u", __entry->inode,
+ __entry->offset, __entry->size, __entry->dirty)
);
-DEFINE_EVENT(bcache_request, bcache_request_end,
+DECLARE_EVENT_CLASS(btree_node,
+ TP_PROTO(struct btree *b),
+ TP_ARGS(b),
+
+ TP_STRUCT__entry(
+ __field(size_t, bucket )
+ ),
+ TP_fast_assign(
+ __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+ ),
+
+ TP_printk("bucket %zu", __entry->bucket)
+);
+
+/* request.c */
+
+DEFINE_EVENT(bcache_request, bcache_request_start,
TP_PROTO(struct search *s, struct bio *bio),
+ TP_ARGS(s, bio)
+);
+DEFINE_EVENT(bcache_request, bcache_request_end,
+ TP_PROTO(struct search *s, struct bio *bio),
TP_ARGS(s, bio)
);
DECLARE_EVENT_CLASS(bcache_bio,
-
TP_PROTO(struct bio *bio),
-
TP_ARGS(bio),
TP_STRUCT__entry(
@@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio,
__field(sector_t, sector )
__field(unsigned int, nr_sector )
__array(char, rwbs, 6 )
- __array(char, comm, TASK_COMM_LEN )
),
TP_fast_assign(
@@ -78,191 +104,328 @@ DECLARE_EVENT_CLASS(bcache_bio,
__entry->sector = bio->bi_sector;
__entry->nr_sector = bio->bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
- memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
- TP_printk("%d,%d %s %llu + %u [%s]",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->rwbs,
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm)
+ TP_printk("%d,%d %s %llu + %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+ (unsigned long long)__entry->sector, __entry->nr_sector)
);
-
-DEFINE_EVENT(bcache_bio, bcache_passthrough,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
TP_PROTO(struct bio *bio),
+ TP_ARGS(bio)
+);
+DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
+ TP_PROTO(struct bio *bio),
TP_ARGS(bio)
);
-DEFINE_EVENT(bcache_bio, bcache_cache_hit,
+TRACE_EVENT(bcache_read,
+ TP_PROTO(struct bio *bio, bool hit, bool bypass),
+ TP_ARGS(bio, hit, bypass),
- TP_PROTO(struct bio *bio),
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(sector_t, sector )
+ __field(unsigned int, nr_sector )
+ __array(char, rwbs, 6 )
+ __field(bool, cache_hit )
+ __field(bool, bypass )
+ ),
- TP_ARGS(bio)
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ __entry->cache_hit = hit;
+ __entry->bypass = bypass;
+ ),
+
+ TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rwbs, (unsigned long long)__entry->sector,
+ __entry->nr_sector, __entry->cache_hit, __entry->bypass)
);
-DEFINE_EVENT(bcache_bio, bcache_cache_miss,
+TRACE_EVENT(bcache_write,
+ TP_PROTO(struct bio *bio, bool writeback, bool bypass),
+ TP_ARGS(bio, writeback, bypass),
- TP_PROTO(struct bio *bio),
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(sector_t, sector )
+ __field(unsigned int, nr_sector )
+ __array(char, rwbs, 6 )
+ __field(bool, writeback )
+ __field(bool, bypass )
+ ),
- TP_ARGS(bio)
+ TP_fast_assign(
+ __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->sector = bio->bi_sector;
+ __entry->nr_sector = bio->bi_size >> 9;
+ blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+ __entry->writeback = writeback;
+ __entry->bypass = bypass;
+ ),
+
+ TP_printk("%d,%d %s %llu + %u hit %u bypass %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rwbs, (unsigned long long)__entry->sector,
+ __entry->nr_sector, __entry->writeback, __entry->bypass)
);
DEFINE_EVENT(bcache_bio, bcache_read_retry,
-
TP_PROTO(struct bio *bio),
-
TP_ARGS(bio)
);
-DEFINE_EVENT(bcache_bio, bcache_writethrough,
+DEFINE_EVENT(bkey, bcache_cache_insert,
+ TP_PROTO(struct bkey *k),
+ TP_ARGS(k)
+);
- TP_PROTO(struct bio *bio),
+/* Journal */
- TP_ARGS(bio)
-);
+DECLARE_EVENT_CLASS(cache_set,
+ TP_PROTO(struct cache_set *c),
+ TP_ARGS(c),
-DEFINE_EVENT(bcache_bio, bcache_writeback,
+ TP_STRUCT__entry(
+ __array(char, uuid, 16 )
+ ),
- TP_PROTO(struct bio *bio),
+ TP_fast_assign(
+ memcpy(__entry->uuid, c->sb.set_uuid, 16);
+ ),
- TP_ARGS(bio)
+ TP_printk("%pU", __entry->uuid)
);
-DEFINE_EVENT(bcache_bio, bcache_write_skip,
-
- TP_PROTO(struct bio *bio),
+DEFINE_EVENT(bkey, bcache_journal_replay_key,
+ TP_PROTO(struct bkey *k),
+ TP_ARGS(k)
+);
- TP_ARGS(bio)
+DEFINE_EVENT(cache_set, bcache_journal_full,
+ TP_PROTO(struct cache_set *c),
+ TP_ARGS(c)
);
-DEFINE_EVENT(bcache_bio, bcache_btree_read,
+DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+ TP_PROTO(struct cache_set *c),
+ TP_ARGS(c)
+);
+DEFINE_EVENT(bcache_bio, bcache_journal_write,
TP_PROTO(struct bio *bio),
-
TP_ARGS(bio)
);
-DEFINE_EVENT(bcache_bio, bcache_btree_write,
+/* Btree */
- TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize,
+ TP_PROTO(struct cache_set *c),
+ TP_ARGS(c)
+);
- TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_read,
+ TP_PROTO(struct btree *b),
+ TP_ARGS(b)
);
-DEFINE_EVENT(bcache_bio, bcache_write_dirty,
+TRACE_EVENT(bcache_btree_write,
+ TP_PROTO(struct btree *b),
+ TP_ARGS(b),
- TP_PROTO(struct bio *bio),
+ TP_STRUCT__entry(
+ __field(size_t, bucket )
+ __field(unsigned, block )
+ __field(unsigned, keys )
+ ),
- TP_ARGS(bio)
+ TP_fast_assign(
+ __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+ __entry->block = b->written;
+ __entry->keys = b->sets[b->nsets].data->keys;
+ ),
+
+ TP_printk("bucket %zu", __entry->bucket)
);
-DEFINE_EVENT(bcache_bio, bcache_read_dirty,
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
+ TP_PROTO(struct btree *b),
+ TP_ARGS(b)
+);
- TP_PROTO(struct bio *bio),
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
+ TP_PROTO(struct btree *b),
+ TP_ARGS(b)
+);
- TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_node_free,
+ TP_PROTO(struct btree *b),
+ TP_ARGS(b)
);
-DEFINE_EVENT(bcache_bio, bcache_write_moving,
+TRACE_EVENT(bcache_btree_gc_coalesce,
+ TP_PROTO(unsigned nodes),
+ TP_ARGS(nodes),
- TP_PROTO(struct bio *bio),
+ TP_STRUCT__entry(
+ __field(unsigned, nodes )
+ ),
- TP_ARGS(bio)
+ TP_fast_assign(
+ __entry->nodes = nodes;
+ ),
+
+ TP_printk("coalesced %u nodes", __entry->nodes)
);
-DEFINE_EVENT(bcache_bio, bcache_read_moving,
+DEFINE_EVENT(cache_set, bcache_gc_start,
+ TP_PROTO(struct cache_set *c),
+ TP_ARGS(c)
+);
- TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_gc_end,
+ TP_PROTO(struct cache_set *c),
+ TP_ARGS(c)
+);
- TP_ARGS(bio)
+DEFINE_EVENT(bkey, bcache_gc_copy,
+ TP_PROTO(struct bkey *k),
+ TP_ARGS(k)
);
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
+DEFINE_EVENT(bkey, bcache_gc_copy_collision,
+ TP_PROTO(struct bkey *k),
+ TP_ARGS(k)
+);
- TP_PROTO(struct bio *bio),
+TRACE_EVENT(bcache_btree_insert_key,
+ TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status),
+ TP_ARGS(b, k, op, status),
- TP_ARGS(bio)
-);
+ TP_STRUCT__entry(
+ __field(u64, btree_node )
+ __field(u32, btree_level )
+ __field(u32, inode )
+ __field(u64, offset )
+ __field(u32, size )
+ __field(u8, dirty )
+ __field(u8, op )
+ __field(u8, status )
+ ),
-DECLARE_EVENT_CLASS(bcache_cache_bio,
+ TP_fast_assign(
+ __entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0);
+ __entry->btree_level = b->level;
+ __entry->inode = KEY_INODE(k);
+ __entry->offset = KEY_OFFSET(k);
+ __entry->size = KEY_SIZE(k);
+ __entry->dirty = KEY_DIRTY(k);
+ __entry->op = op;
+ __entry->status = status;
+ ),
- TP_PROTO(struct bio *bio,
- sector_t orig_sector,
- struct block_device* orig_bdev),
+ TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u",
+ __entry->status, __entry->op,
+ __entry->btree_node, __entry->btree_level,
+ __entry->inode, __entry->offset,
+ __entry->size, __entry->dirty)
+);
- TP_ARGS(bio, orig_sector, orig_bdev),
+DECLARE_EVENT_CLASS(btree_split,
+ TP_PROTO(struct btree *b, unsigned keys),
+ TP_ARGS(b, keys),
TP_STRUCT__entry(
- __field(dev_t, dev )
- __field(dev_t, orig_dev )
- __field(sector_t, sector )
- __field(sector_t, orig_sector )
- __field(unsigned int, nr_sector )
- __array(char, rwbs, 6 )
- __array(char, comm, TASK_COMM_LEN )
+ __field(size_t, bucket )
+ __field(unsigned, keys )
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
- __entry->orig_dev = orig_bdev->bd_dev;
- __entry->sector = bio->bi_sector;
- __entry->orig_sector = orig_sector;
- __entry->nr_sector = bio->bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
- memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+ __entry->keys = keys;
),
- TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d %llu)",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->rwbs,
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm,
- MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev),
- (unsigned long long)__entry->orig_sector)
+ TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys)
);
-DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert,
-
- TP_PROTO(struct bio *bio,
- sector_t orig_sector,
- struct block_device *orig_bdev),
+DEFINE_EVENT(btree_split, bcache_btree_node_split,
+ TP_PROTO(struct btree *b, unsigned keys),
+ TP_ARGS(b, keys)
+);
- TP_ARGS(bio, orig_sector, orig_bdev)
+DEFINE_EVENT(btree_split, bcache_btree_node_compact,
+ TP_PROTO(struct btree *b, unsigned keys),
+ TP_ARGS(b, keys)
);
-DECLARE_EVENT_CLASS(bcache_gc,
+DEFINE_EVENT(btree_node, bcache_btree_set_root,
+ TP_PROTO(struct btree *b),
+ TP_ARGS(b)
+);
- TP_PROTO(uint8_t *uuid),
+/* Allocator */
- TP_ARGS(uuid),
+TRACE_EVENT(bcache_alloc_invalidate,
+ TP_PROTO(struct cache *ca),
+ TP_ARGS(ca),
TP_STRUCT__entry(
- __field(uint8_t *, uuid)
+ __field(unsigned, free )
+ __field(unsigned, free_inc )
+ __field(unsigned, free_inc_size )
+ __field(unsigned, unused )
),
TP_fast_assign(
- __entry->uuid = uuid;
+ __entry->free = fifo_used(&ca->free);
+ __entry->free_inc = fifo_used(&ca->free_inc);
+ __entry->free_inc_size = ca->free_inc.size;
+ __entry->unused = fifo_used(&ca->unused);
),
- TP_printk("%pU", __entry->uuid)
+ TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
+ __entry->free_inc, __entry->free_inc_size, __entry->unused)
);
+TRACE_EVENT(bcache_alloc_fail,
+ TP_PROTO(struct cache *ca),
+ TP_ARGS(ca),
-DEFINE_EVENT(bcache_gc, bcache_gc_start,
+ TP_STRUCT__entry(
+ __field(unsigned, free )
+ __field(unsigned, free_inc )
+ __field(unsigned, unused )
+ __field(unsigned, blocked )
+ ),
- TP_PROTO(uint8_t *uuid),
+ TP_fast_assign(
+ __entry->free = fifo_used(&ca->free);
+ __entry->free_inc = fifo_used(&ca->free_inc);
+ __entry->unused = fifo_used(&ca->unused);
+ __entry->blocked = atomic_read(&ca->set->prio_blocked);
+ ),
- TP_ARGS(uuid)
+ TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
+ __entry->free_inc, __entry->unused, __entry->blocked)
);
-DEFINE_EVENT(bcache_gc, bcache_gc_end,
+/* Background writeback */
- TP_PROTO(uint8_t *uuid),
+DEFINE_EVENT(bkey, bcache_writeback,
+ TP_PROTO(struct bkey *k),
+ TP_ARGS(k)
+);
- TP_ARGS(uuid)
+DEFINE_EVENT(bkey, bcache_writeback_collision,
+ TP_PROTO(struct bkey *k),
+ TP_ARGS(k)
);
#endif /* _TRACE_BCACHE_H */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ffd4652de91c..65e12099ef89 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -103,12 +103,46 @@ typedef uint64_t blkif_sector_t;
#define BLKIF_OP_DISCARD 5
/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * This pages are filled with an array of blkif_request_segment_aligned
+ * that hold the information about the segments. The number of indirect
+ * pages to use is determined by the maximum number of segments
+ * a indirect request contains. Every indirect page can contain a maximum
+ * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
+ * so to calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments/512).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT 6
+
+/*
* Maximum scatter/gather segments per request.
* This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
* NB. This could be 12 if the ring indexes weren't stored in the same page.
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+struct blkif_request_segment_aligned {
+ grant_ref_t gref; /* reference to I/O buffer frame */
+ /* @first_sect: first sector in frame to transfer (inclusive). */
+ /* @last_sect: last sector in frame to transfer (inclusive). */
+ uint8_t first_sect, last_sect;
+ uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */
+} __attribute__((__packed__));
+
struct blkif_request_rw {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
@@ -147,12 +181,31 @@ struct blkif_request_other {
uint64_t id; /* private guest value, echoed in resp */
} __attribute__((__packed__));
+struct blkif_request_indirect {
+ uint8_t indirect_op;
+ uint16_t nr_segments;
+#ifdef CONFIG_X86_64
+ uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */
+#endif
+ uint64_t id;
+ blkif_sector_t sector_number;
+ blkif_vdev_t handle;
+ uint16_t _pad2;
+ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef CONFIG_X86_64
+ uint32_t _pad3; /* make it 64 byte aligned */
+#else
+ uint64_t _pad3; /* make it 64 byte aligned */
+#endif
+} __attribute__((__packed__));
+
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
union {
struct blkif_request_rw rw;
struct blkif_request_discard discard;
struct blkif_request_other other;
+ struct blkif_request_indirect indirect;
} u;
} __attribute__((__packed__));
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index 75271b9a8f61..7d28aff605c7 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -188,6 +188,11 @@ struct __name##_back_ring { \
#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
(((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \
+ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+
#define RING_PUSH_REQUESTS(_r) do { \
wmb(); /* back sees requests /before/ updated producer index */ \
(_r)->sring->req_prod = (_r)->req_prod_pvt; \