summaryrefslogtreecommitdiffstats
path: root/drivers/block/xen-blkback
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkback')
-rw-r--r--drivers/block/xen-blkback/blkback.c301
-rw-r--r--drivers/block/xen-blkback/common.h16
-rw-r--r--drivers/block/xen-blkback/xenbus.c23
3 files changed, 313 insertions, 27 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 280a13846e6c..74374fb762aa 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,7 @@
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/freezer.h>
+#include <linux/bitmap.h>
#include <xen/events.h>
#include <xen/page.h>
@@ -79,6 +80,7 @@ struct pending_req {
unsigned short operation;
int status;
struct list_head free_list;
+ DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
};
#define BLKBACK_INVALID_HANDLE (~0)
@@ -99,6 +101,36 @@ struct xen_blkbk {
static struct xen_blkbk *blkbk;
/*
+ * Maximum number of grant pages that can be mapped in blkback.
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
+ * pages that blkback will persistently map.
+ * Currently, this is:
+ * RING_SIZE = 32 (for all known ring types)
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
+ * sizeof(struct persistent_gnt) = 48
+ * So the maximum memory used to store the grants is:
+ * 32 * 11 * 48 = 16896 bytes
+ */
+static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
+{
+ switch (protocol) {
+ case BLKIF_PROTOCOL_NATIVE:
+ return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
+ BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ case BLKIF_PROTOCOL_X86_32:
+ return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
+ BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ case BLKIF_PROTOCOL_X86_64:
+ return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
+ BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+
+/*
* Little helpful macro to figure out the index and virtual address of the
* pending_pages[..]. For each 'pending_req' we have have up to
* BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -129,6 +161,90 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
static void make_response(struct xen_blkif *blkif, u64 id,
unsigned short op, int st);
+#define foreach_grant(pos, rbtree, node) \
+ for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
+ &(pos)->node != NULL; \
+ (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
+
+
+static void add_persistent_gnt(struct rb_root *root,
+ struct persistent_gnt *persistent_gnt)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct persistent_gnt *this;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ this = container_of(*new, struct persistent_gnt, node);
+
+ parent = *new;
+ if (persistent_gnt->gnt < this->gnt)
+ new = &((*new)->rb_left);
+ else if (persistent_gnt->gnt > this->gnt)
+ new = &((*new)->rb_right);
+ else {
+ pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
+ BUG();
+ }
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&(persistent_gnt->node), parent, new);
+ rb_insert_color(&(persistent_gnt->node), root);
+}
+
+static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+ grant_ref_t gref)
+{
+ struct persistent_gnt *data;
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ data = container_of(node, struct persistent_gnt, node);
+
+ if (gref < data->gnt)
+ node = node->rb_left;
+ else if (gref > data->gnt)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void free_persistent_gnts(struct rb_root *root, unsigned int num)
+{
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct persistent_gnt *persistent_gnt;
+ int ret = 0;
+ int segs_to_unmap = 0;
+
+ foreach_grant(persistent_gnt, root, node) {
+ BUG_ON(persistent_gnt->handle ==
+ BLKBACK_INVALID_HANDLE);
+ gnttab_set_unmap_op(&unmap[segs_to_unmap],
+ (unsigned long) pfn_to_kaddr(page_to_pfn(
+ persistent_gnt->page)),
+ GNTMAP_host_map,
+ persistent_gnt->handle);
+
+ pages[segs_to_unmap] = persistent_gnt->page;
+ rb_erase(&persistent_gnt->node, root);
+ kfree(persistent_gnt);
+ num--;
+
+ if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
+ !rb_next(&persistent_gnt->node)) {
+ ret = gnttab_unmap_refs(unmap, NULL, pages,
+ segs_to_unmap);
+ BUG_ON(ret);
+ segs_to_unmap = 0;
+ }
+ }
+ BUG_ON(num != 0);
+}
+
/*
* Retrieve from the 'pending_reqs' a free pending_req structure to be used.
*/
@@ -302,6 +418,14 @@ int xen_blkif_schedule(void *arg)
print_stats(blkif);
}
+ /* Free all persistent grant pages */
+ if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
+ free_persistent_gnts(&blkif->persistent_gnts,
+ blkif->persistent_gnt_c);
+
+ BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
+ blkif->persistent_gnt_c = 0;
+
if (log_stats)
print_stats(blkif);
@@ -328,6 +452,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
int ret;
for (i = 0; i < req->nr_pages; i++) {
+ if (!test_bit(i, req->unmap_seg))
+ continue;
handle = pending_handle(req, i);
if (handle == BLKBACK_INVALID_HANDLE)
continue;
@@ -344,12 +470,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
static int xen_blkbk_map(struct blkif_request *req,
struct pending_req *pending_req,
- struct seg_buf seg[])
+ struct seg_buf seg[],
+ struct page *pages[])
{
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- int i;
+ struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ struct persistent_gnt *persistent_gnt = NULL;
+ struct xen_blkif *blkif = pending_req->blkif;
+ phys_addr_t addr = 0;
+ int i, j;
+ bool new_map;
int nseg = req->u.rw.nr_segments;
+ int segs_to_map = 0;
int ret = 0;
+ int use_persistent_gnts;
+
+ use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
+
+ BUG_ON(blkif->persistent_gnt_c >
+ max_mapped_grant_pages(pending_req->blkif->blk_protocol));
/*
* Fill out preq.nr_sects with proper amount of sectors, and setup
@@ -359,36 +499,146 @@ static int xen_blkbk_map(struct blkif_request *req,
for (i = 0; i < nseg; i++) {
uint32_t flags;
- flags = GNTMAP_host_map;
- if (pending_req->operation != BLKIF_OP_READ)
- flags |= GNTMAP_readonly;
- gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
- req->u.rw.seg[i].gref,
- pending_req->blkif->domid);
+ if (use_persistent_gnts)
+ persistent_gnt = get_persistent_gnt(
+ &blkif->persistent_gnts,
+ req->u.rw.seg[i].gref);
+
+ if (persistent_gnt) {
+ /*
+ * We are using persistent grants and
+ * the grant is already mapped
+ */
+ new_map = false;
+ } else if (use_persistent_gnts &&
+ blkif->persistent_gnt_c <
+ max_mapped_grant_pages(blkif->blk_protocol)) {
+ /*
+ * We are using persistent grants, the grant is
+ * not mapped but we have room for it
+ */
+ new_map = true;
+ persistent_gnt = kmalloc(
+ sizeof(struct persistent_gnt),
+ GFP_KERNEL);
+ if (!persistent_gnt)
+ return -ENOMEM;
+ persistent_gnt->page = alloc_page(GFP_KERNEL);
+ if (!persistent_gnt->page) {
+ kfree(persistent_gnt);
+ return -ENOMEM;
+ }
+ persistent_gnt->gnt = req->u.rw.seg[i].gref;
+ persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
+
+ pages_to_gnt[segs_to_map] =
+ persistent_gnt->page;
+ addr = (unsigned long) pfn_to_kaddr(
+ page_to_pfn(persistent_gnt->page));
+
+ add_persistent_gnt(&blkif->persistent_gnts,
+ persistent_gnt);
+ blkif->persistent_gnt_c++;
+ pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+ persistent_gnt->gnt, blkif->persistent_gnt_c,
+ max_mapped_grant_pages(blkif->blk_protocol));
+ } else {
+ /*
+ * We are either using persistent grants and
+ * hit the maximum limit of grants mapped,
+ * or we are not using persistent grants.
+ */
+ if (use_persistent_gnts &&
+ !blkif->vbd.overflow_max_grants) {
+ blkif->vbd.overflow_max_grants = 1;
+ pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+ blkif->domid, blkif->vbd.handle);
+ }
+ new_map = true;
+ pages[i] = blkbk->pending_page(pending_req, i);
+ addr = vaddr(pending_req, i);
+ pages_to_gnt[segs_to_map] =
+ blkbk->pending_page(pending_req, i);
+ }
+
+ if (persistent_gnt) {
+ pages[i] = persistent_gnt->page;
+ persistent_gnts[i] = persistent_gnt;
+ } else {
+ persistent_gnts[i] = NULL;
+ }
+
+ if (new_map) {
+ flags = GNTMAP_host_map;
+ if (!persistent_gnt &&
+ (pending_req->operation != BLKIF_OP_READ))
+ flags |= GNTMAP_readonly;
+ gnttab_set_map_op(&map[segs_to_map++], addr,
+ flags, req->u.rw.seg[i].gref,
+ blkif->domid);
+ }
}
- ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
- BUG_ON(ret);
+ if (segs_to_map) {
+ ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
+ BUG_ON(ret);
+ }
/*
* Now swizzle the MFN in our domain with the MFN from the other domain
* so that when we access vaddr(pending_req,i) it has the contents of
* the page from the other domain.
*/
- for (i = 0; i < nseg; i++) {
- if (unlikely(map[i].status != 0)) {
- pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
- map[i].handle = BLKBACK_INVALID_HANDLE;
- ret |= 1;
+ bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ for (i = 0, j = 0; i < nseg; i++) {
+ if (!persistent_gnts[i] ||
+ persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+ /* This is a newly mapped grant */
+ BUG_ON(j >= segs_to_map);
+ if (unlikely(map[j].status != 0)) {
+ pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
+ map[j].handle = BLKBACK_INVALID_HANDLE;
+ ret |= 1;
+ if (persistent_gnts[i]) {
+ rb_erase(&persistent_gnts[i]->node,
+ &blkif->persistent_gnts);
+ blkif->persistent_gnt_c--;
+ kfree(persistent_gnts[i]);
+ persistent_gnts[i] = NULL;
+ }
+ }
+ }
+ if (persistent_gnts[i]) {
+ if (persistent_gnts[i]->handle ==
+ BLKBACK_INVALID_HANDLE) {
+ /*
+ * If this is a new persistent grant
+ * save the handler
+ */
+ persistent_gnts[i]->handle = map[j].handle;
+ persistent_gnts[i]->dev_bus_addr =
+ map[j++].dev_bus_addr;
+ }
+ pending_handle(pending_req, i) =
+ persistent_gnts[i]->handle;
+
+ if (ret)
+ continue;
+
+ seg[i].buf = persistent_gnts[i]->dev_bus_addr |
+ (req->u.rw.seg[i].first_sect << 9);
+ } else {
+ pending_handle(pending_req, i) = map[j].handle;
+ bitmap_set(pending_req->unmap_seg, i, 1);
+
+ if (ret) {
+ j++;
+ continue;
+ }
+
+ seg[i].buf = map[j++].dev_bus_addr |
+ (req->u.rw.seg[i].first_sect << 9);
}
-
- pending_handle(pending_req, i) = map[i].handle;
-
- if (ret)
- continue;
-
- seg[i].buf = map[i].dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
}
return ret;
}
@@ -591,6 +841,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
int operation;
struct blk_plug plug;
bool drain = false;
+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
switch (req->operation) {
case BLKIF_OP_READ:
@@ -677,7 +928,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
* the hypercall to unmap the grants - that is all done in
* xen_blkbk_unmap.
*/
- if (xen_blkbk_map(req, pending_req, seg))
+ if (xen_blkbk_map(req, pending_req, seg, pages))
goto fail_flush;
/*
@@ -689,7 +940,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
for (i = 0; i < nseg; i++) {
while ((bio == NULL) ||
(bio_add_page(bio,
- blkbk->pending_page(pending_req, i),
+ pages[i],
seg[i].nsec << 9,
seg[i].buf & ~PAGE_MASK) == 0)) {
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9a54623e52d7..6072390c7f57 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -34,6 +34,7 @@
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/io.h>
+#include <linux/rbtree.h>
#include <asm/setup.h>
#include <asm/pgalloc.h>
#include <asm/hypervisor.h>
@@ -160,10 +161,21 @@ struct xen_vbd {
sector_t size;
unsigned int flush_support:1;
unsigned int discard_secure:1;
+ unsigned int feature_gnt_persistent:1;
+ unsigned int overflow_max_grants:1;
};
struct backend_info;
+
+struct persistent_gnt {
+ struct page *page;
+ grant_ref_t gnt;
+ grant_handle_t handle;
+ uint64_t dev_bus_addr;
+ struct rb_node node;
+};
+
struct xen_blkif {
/* Unique identifier for this interface. */
domid_t domid;
@@ -190,6 +202,10 @@ struct xen_blkif {
struct task_struct *xenblkd;
unsigned int waiting_reqs;
+ /* tree to store persistent grants */
+ struct rb_root persistent_gnts;
+ unsigned int persistent_gnt_c;
+
/* statistics */
unsigned long st_print;
int st_rd_req;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index f58434c2617c..63980722db41 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -117,6 +117,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
init_waitqueue_head(&blkif->waiting_to_free);
+ blkif->persistent_gnts.rb_node = NULL;
return blkif;
}
@@ -672,6 +673,13 @@ again:
xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
+ err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
+ dev->nodename);
+ goto abort;
+ }
+
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
(unsigned long long)vbd_sz(&be->blkif->vbd));
if (err) {
@@ -720,6 +728,7 @@ static int connect_ring(struct backend_info *be)
struct xenbus_device *dev = be->dev;
unsigned long ring_ref;
unsigned int evtchn;
+ unsigned int pers_grants;
char protocol[64] = "";
int err;
@@ -749,8 +758,18 @@ static int connect_ring(struct backend_info *be)
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
return -1;
}
- pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
- ring_ref, evtchn, be->blkif->blk_protocol, protocol);
+ err = xenbus_gather(XBT_NIL, dev->otherend,
+ "feature-persistent", "%u",
+ &pers_grants, NULL);
+ if (err)
+ pers_grants = 0;
+
+ be->blkif->vbd.feature_gnt_persistent = pers_grants;
+ be->blkif->vbd.overflow_max_grants = 0;
+
+ pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
+ ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+ pers_grants ? "persistent grants" : "");
/* Map the shared frame, irq etc. */
err = xen_blkif_map(be->blkif, ring_ref, evtchn);