summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/brd.c2
-rw-r--r--drivers/block/drbd/drbd_main.c2
-rw-r--r--drivers/block/drbd/drbd_nl.c28
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c12
-rw-r--r--drivers/block/nbd.c4
-rw-r--r--drivers/block/osdblk.c2
-rw-r--r--drivers/block/ps3disk.c2
-rw-r--r--drivers/block/rbd.c305
-rw-r--r--drivers/block/skd_main.c61
-rw-r--r--drivers/block/virtio_blk.c6
-rw-r--r--drivers/block/xen-blkback/xenbus.c2
-rw-r--r--drivers/block/xen-blkfront.c3
-rw-r--r--drivers/block/zram/zcomp.c300
-rw-r--r--drivers/block/zram/zcomp.h14
-rw-r--r--drivers/block/zram/zram_drv.c104
-rw-r--r--drivers/block/zram/zram_drv.h2
18 files changed, 289 insertions, 564 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 437b3a822f44..d597e432e195 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -861,7 +861,7 @@ rqbiocnt(struct request *r)
* discussion.
*
* We cannot use get_page in the workaround, because it insists on a
- * positive page count as a precondition. So we use _count directly.
+ * positive page count as a precondition. So we use _refcount directly.
*/
static void
bio_pageinc(struct bio *bio)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 51a071e32221..c04bd9bc39fd 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -381,7 +381,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
#ifdef CONFIG_BLK_DEV_RAM_DAX
static long brd_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn)
+ void __pmem **kaddr, pfn_t *pfn, long size)
{
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index fa209773d494..2ba1494b2799 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2761,7 +2761,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
q->backing_dev_info.congested_data = device;
blk_queue_make_request(q, drbd_make_request);
- blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
+ blk_queue_write_cache(q, true, true);
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1fd1dccebb6b..0bac9c8246bc 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -3633,14 +3633,15 @@ static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
goto nla_put_failure;
if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
nla_put_u32(skb, T_current_state, device->state.i) ||
- nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
- nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
- nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
- nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
- nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
- nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
- nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
- nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
+ nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) ||
+ nla_put_u64_0pad(skb, T_capacity,
+ drbd_get_capacity(device->this_bdev)) ||
+ nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) ||
+ nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) ||
+ nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) ||
+ nla_put_u64_0pad(skb, T_writ_cnt, device->writ_cnt) ||
+ nla_put_u64_0pad(skb, T_al_writ_cnt, device->al_writ_cnt) ||
+ nla_put_u64_0pad(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
@@ -3657,13 +3658,16 @@ static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
goto nla_put_failure;
if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
- nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
- nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
+ nla_put_u64_0pad(skb, T_bits_total, drbd_bm_bits(device)) ||
+ nla_put_u64_0pad(skb, T_bits_oos,
+ drbd_bm_total_weight(device)))
goto nla_put_failure;
if (C_SYNC_SOURCE <= device->state.conn &&
C_PAUSED_SYNC_T >= device->state.conn) {
- if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
- nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
+ if (nla_put_u64_0pad(skb, T_bits_rs_total,
+ device->rs_total) ||
+ nla_put_u64_0pad(skb, T_bits_rs_failed,
+ device->rs_failed))
goto nla_put_failure;
}
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 80cf8add46ff..1fa8cc235977 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -943,7 +943,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
- blk_queue_flush(lo->lo_queue, REQ_FLUSH);
+ blk_queue_write_cache(lo->lo_queue, true, false);
loop_update_dio(lo);
set_capacity(lo->lo_disk, size);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 25824c1697c5..6053e4659fa2 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3000,14 +3000,14 @@ restart_eh:
"Completion workers still active!");
spin_lock(dd->queue->queue_lock);
- blk_mq_all_tag_busy_iter(*dd->tags.tags,
+ blk_mq_tagset_busy_iter(&dd->tags,
mtip_queue_cmd, dd);
spin_unlock(dd->queue->queue_lock);
set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags);
if (mtip_device_reset(dd))
- blk_mq_all_tag_busy_iter(*dd->tags.tags,
+ blk_mq_tagset_busy_iter(&dd->tags,
mtip_abort_cmd, dd);
clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
@@ -4023,12 +4023,6 @@ skip_create_disk:
blk_queue_io_min(dd->queue, 4096);
blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask);
- /*
- * write back cache is not supported in the device. FUA depends on
- * write back cache support, hence setting flush support to zero.
- */
- blk_queue_flush(dd->queue, 0);
-
/* Signal trim support */
if (dd->trim_supp == true) {
set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
@@ -4174,7 +4168,7 @@ static int mtip_block_remove(struct driver_data *dd)
blk_mq_freeze_queue_start(dd->queue);
blk_mq_stop_hw_queues(dd->queue);
- blk_mq_all_tag_busy_iter(dd->tags.tags[0], mtip_no_dev_cleanup, dd);
+ blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
/*
* Delete our gendisk structure. This also removes the device
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 08afbc7a2bb8..31e73a7a40f2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -693,9 +693,9 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
if (nbd->flags & NBD_FLAG_SEND_TRIM)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
if (nbd->flags & NBD_FLAG_SEND_FLUSH)
- blk_queue_flush(nbd->disk->queue, REQ_FLUSH);
+ blk_queue_write_cache(nbd->disk->queue, true, false);
else
- blk_queue_flush(nbd->disk->queue, 0);
+ blk_queue_write_cache(nbd->disk->queue, false, false);
}
static int nbd_dev_dbg_init(struct nbd_device *nbd);
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 1b709a4e3b5e..c2854a2bfdb0 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -437,7 +437,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
blk_queue_prep_rq(q, blk_queue_start_tag);
- blk_queue_flush(q, REQ_FLUSH);
+ blk_queue_write_cache(q, true, false);
disk->queue = q;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index c120d70d3fb3..4b7e405830d7 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -468,7 +468,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
blk_queue_dma_alignment(queue, dev->blk_size-1);
blk_queue_logical_block_size(queue, dev->blk_size);
- blk_queue_flush(queue, REQ_FLUSH);
+ blk_queue_write_cache(queue, true, false);
blk_queue_max_segments(queue, -1);
blk_queue_max_segment_size(queue, dev->bounce_size);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 0ede6d7e2568..81666a56415e 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -350,12 +350,12 @@ struct rbd_device {
struct rbd_spec *spec;
struct rbd_options *opts;
- char *header_name;
+ struct ceph_object_id header_oid;
+ struct ceph_object_locator header_oloc;
struct ceph_file_layout layout;
- struct ceph_osd_event *watch_event;
- struct rbd_obj_request *watch_request;
+ struct ceph_osd_linger_request *watch_handle;
struct rbd_spec *parent_spec;
u64 parent_overlap;
@@ -1596,12 +1596,6 @@ static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
return __rbd_obj_request_wait(obj_request, 0);
}
-static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request,
- unsigned long timeout)
-{
- return __rbd_obj_request_wait(obj_request, timeout);
-}
-
static void rbd_img_request_complete(struct rbd_img_request *img_request)
{
@@ -1751,12 +1745,6 @@ static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
complete_all(&obj_request->completion);
}
-static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
-{
- dout("%s: obj %p\n", __func__, obj_request);
- obj_request_done_set(obj_request);
-}
-
static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
{
struct rbd_img_request *img_request = NULL;
@@ -1828,13 +1816,12 @@ static void rbd_osd_call_callback(struct rbd_obj_request *obj_request)
obj_request_done_set(obj_request);
}
-static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
- struct ceph_msg *msg)
+static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
{
struct rbd_obj_request *obj_request = osd_req->r_priv;
u16 opcode;
- dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
+ dout("%s: osd_req %p\n", __func__, osd_req);
rbd_assert(osd_req == obj_request->osd_req);
if (obj_request_img_data_test(obj_request)) {
rbd_assert(obj_request->img_request);
@@ -1878,10 +1865,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
case CEPH_OSD_OP_CALL:
rbd_osd_call_callback(obj_request);
break;
- case CEPH_OSD_OP_NOTIFY_ACK:
- case CEPH_OSD_OP_WATCH:
- rbd_osd_trivial_callback(obj_request);
- break;
default:
rbd_warn(NULL, "%s: unsupported op %hu",
obj_request->object_name, (unsigned short) opcode);
@@ -1896,27 +1879,17 @@ static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
{
struct rbd_img_request *img_request = obj_request->img_request;
struct ceph_osd_request *osd_req = obj_request->osd_req;
- u64 snap_id;
- rbd_assert(osd_req != NULL);
-
- snap_id = img_request ? img_request->snap_id : CEPH_NOSNAP;
- ceph_osdc_build_request(osd_req, obj_request->offset,
- NULL, snap_id, NULL);
+ if (img_request)
+ osd_req->r_snapid = img_request->snap_id;
}
static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
{
- struct rbd_img_request *img_request = obj_request->img_request;
struct ceph_osd_request *osd_req = obj_request->osd_req;
- struct ceph_snap_context *snapc;
- struct timespec mtime = CURRENT_TIME;
- rbd_assert(osd_req != NULL);
-
- snapc = img_request ? img_request->snapc : NULL;
- ceph_osdc_build_request(osd_req, obj_request->offset,
- snapc, CEPH_NOSNAP, &mtime);
+ osd_req->r_mtime = CURRENT_TIME;
+ osd_req->r_data_offset = obj_request->offset;
}
/*
@@ -1954,7 +1927,7 @@ static struct ceph_osd_request *rbd_osd_req_create(
osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false,
GFP_NOIO);
if (!osd_req)
- return NULL; /* ENOMEM */
+ goto fail;
if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
@@ -1965,9 +1938,18 @@ static struct ceph_osd_request *rbd_osd_req_create(
osd_req->r_priv = obj_request;
osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
- ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
+ if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
+ obj_request->object_name))
+ goto fail;
+
+ if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO))
+ goto fail;
return osd_req;
+
+fail:
+ ceph_osdc_put_request(osd_req);
+ return NULL;
}
/*
@@ -2003,16 +1985,25 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops,
false, GFP_NOIO);
if (!osd_req)
- return NULL; /* ENOMEM */
+ goto fail;
osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
osd_req->r_callback = rbd_osd_req_callback;
osd_req->r_priv = obj_request;
osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
- ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
+ if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
+ obj_request->object_name))
+ goto fail;
+
+ if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO))
+ goto fail;
return osd_req;
+
+fail:
+ ceph_osdc_put_request(osd_req);
+ return NULL;
}
@@ -2973,17 +2964,20 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request)
{
struct rbd_obj_request *obj_request;
struct rbd_obj_request *next_obj_request;
+ int ret = 0;
dout("%s: img %p\n", __func__, img_request);
- for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
- int ret;
+ rbd_img_request_get(img_request);
+ for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
ret = rbd_img_obj_request_submit(obj_request);
if (ret)
- return ret;
+ goto out_put_ireq;
}
- return 0;
+out_put_ireq:
+ rbd_img_request_put(img_request);
+ return ret;
}
static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
@@ -3090,45 +3084,18 @@ out_err:
obj_request_done_set(obj_request);
}
-static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id)
-{
- struct rbd_obj_request *obj_request;
- struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
- int ret;
-
- obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
- OBJ_REQUEST_NODATA);
- if (!obj_request)
- return -ENOMEM;
-
- ret = -ENOMEM;
- obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
- obj_request);
- if (!obj_request->osd_req)
- goto out;
-
- osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK,
- notify_id, 0, 0);
- rbd_osd_req_format_read(obj_request);
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev);
+static void __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev);
- ret = rbd_obj_request_submit(osdc, obj_request);
- if (ret)
- goto out;
- ret = rbd_obj_request_wait(obj_request);
-out:
- rbd_obj_request_put(obj_request);
-
- return ret;
-}
-
-static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
+static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
+ u64 notifier_id, void *data, size_t data_len)
{
- struct rbd_device *rbd_dev = (struct rbd_device *)data;
+ struct rbd_device *rbd_dev = arg;
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
int ret;
- dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
- rbd_dev->header_name, (unsigned long long)notify_id,
- (unsigned int)opcode);
+ dout("%s rbd_dev %p cookie %llu notify_id %llu\n", __func__, rbd_dev,
+ cookie, notify_id);
/*
* Until adequate refresh error handling is in place, there is
@@ -3140,63 +3107,31 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
if (ret)
rbd_warn(rbd_dev, "refresh failed: %d", ret);
- ret = rbd_obj_notify_ack_sync(rbd_dev, notify_id);
+ ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, notify_id, cookie,
+ NULL, 0);
if (ret)
rbd_warn(rbd_dev, "notify_ack ret %d", ret);
}
-/*
- * Send a (un)watch request and wait for the ack. Return a request
- * with a ref held on success or error.
- */
-static struct rbd_obj_request *rbd_obj_watch_request_helper(
- struct rbd_device *rbd_dev,
- bool watch)
+static void rbd_watch_errcb(void *arg, u64 cookie, int err)
{
- struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
- struct ceph_options *opts = osdc->client->options;
- struct rbd_obj_request *obj_request;
+ struct rbd_device *rbd_dev = arg;
int ret;
- obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
- OBJ_REQUEST_NODATA);
- if (!obj_request)
- return ERR_PTR(-ENOMEM);
-
- obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_WRITE, 1,
- obj_request);
- if (!obj_request->osd_req) {
- ret = -ENOMEM;
- goto out;
- }
-
- osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
- rbd_dev->watch_event->cookie, 0, watch);
- rbd_osd_req_format_write(obj_request);
+ rbd_warn(rbd_dev, "encountered watch error: %d", err);
- if (watch)
- ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
-
- ret = rbd_obj_request_submit(osdc, obj_request);
- if (ret)
- goto out;
+ __rbd_dev_header_unwatch_sync(rbd_dev);
- ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout);
- if (ret)
- goto out;
-
- ret = obj_request->result;
+ ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret) {
- if (watch)
- rbd_obj_request_end(obj_request);
- goto out;
+ rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
+ return;
}
- return obj_request;
-
-out:
- rbd_obj_request_put(obj_request);
- return ERR_PTR(ret);
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
}
/*
@@ -3205,35 +3140,33 @@ out:
static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
- struct rbd_obj_request *obj_request;
- int ret;
+ struct ceph_osd_linger_request *handle;
- rbd_assert(!rbd_dev->watch_event);
- rbd_assert(!rbd_dev->watch_request);
+ rbd_assert(!rbd_dev->watch_handle);
- ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
- &rbd_dev->watch_event);
- if (ret < 0)
- return ret;
+ handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid,
+ &rbd_dev->header_oloc, rbd_watch_cb,
+ rbd_watch_errcb, rbd_dev);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
- obj_request = rbd_obj_watch_request_helper(rbd_dev, true);
- if (IS_ERR(obj_request)) {
- ceph_osdc_cancel_event(rbd_dev->watch_event);
- rbd_dev->watch_event = NULL;
- return PTR_ERR(obj_request);
- }
+ rbd_dev->watch_handle = handle;
+ return 0;
+}
- /*
- * A watch request is set to linger, so the underlying osd
- * request won't go away until we unregister it. We retain
- * a pointer to the object request during that time (in
- * rbd_dev->watch_request), so we'll keep a reference to it.
- * We'll drop that reference after we've unregistered it in
- * rbd_dev_header_unwatch_sync().
- */
- rbd_dev->watch_request = obj_request;
+static void __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
+{
+ struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ int ret;
- return 0;
+ if (!rbd_dev->watch_handle)
+ return;
+
+ ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle);
+ if (ret)
+ rbd_warn(rbd_dev, "failed to unwatch: %d", ret);
+
+ rbd_dev->watch_handle = NULL;
}
/*
@@ -3241,24 +3174,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
*/
static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
{
- struct rbd_obj_request *obj_request;
-
- rbd_assert(rbd_dev->watch_event);
- rbd_assert(rbd_dev->watch_request);
-
- rbd_obj_request_end(rbd_dev->watch_request);
- rbd_obj_request_put(rbd_dev->watch_request);
- rbd_dev->watch_request = NULL;
-
- obj_request = rbd_obj_watch_request_helper(rbd_dev, false);
- if (!IS_ERR(obj_request))
- rbd_obj_request_put(obj_request);
- else
- rbd_warn(rbd_dev, "unable to tear down watch request (%ld)",
- PTR_ERR(obj_request));
-
- ceph_osdc_cancel_event(rbd_dev->watch_event);
- rbd_dev->watch_event = NULL;
+ __rbd_dev_header_unwatch_sync(rbd_dev);
dout("%s flushing notifies\n", __func__);
ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
@@ -3591,7 +3507,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
if (!ondisk)
return -ENOMEM;
- ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_oid.name,
0, size, ondisk);
if (ret < 0)
goto out;
@@ -4033,6 +3949,8 @@ static void rbd_dev_release(struct device *dev)
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
bool need_put = !!rbd_dev->opts;
+ ceph_oid_destroy(&rbd_dev->header_oid);
+
rbd_put_client(rbd_dev->rbd_client);
rbd_spec_put(rbd_dev->spec);
kfree(rbd_dev->opts);
@@ -4063,6 +3981,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
INIT_LIST_HEAD(&rbd_dev->node);
init_rwsem(&rbd_dev->header_rwsem);
+ ceph_oid_init(&rbd_dev->header_oid);
+ ceph_oloc_init(&rbd_dev->header_oloc);
+
rbd_dev->dev.bus = &rbd_bus_type;
rbd_dev->dev.type = &rbd_device_type;
rbd_dev->dev.parent = &rbd_root_dev;
@@ -4111,7 +4032,7 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
__le64 size;
} __attribute__ ((packed)) size_buf = { 0 };
- ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
"rbd", "get_size",
&snapid, sizeof (snapid),
&size_buf, sizeof (size_buf));
@@ -4151,7 +4072,7 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
if (!reply_buf)
return -ENOMEM;
- ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
"rbd", "get_object_prefix", NULL, 0,
reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
@@ -4186,7 +4107,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
u64 unsup;
int ret;
- ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
"rbd", "get_features",
&snapid, sizeof (snapid),
&features_buf, sizeof (features_buf));
@@ -4248,7 +4169,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
}
snapid = cpu_to_le64(rbd_dev->spec->snap_id);
- ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
"rbd", "get_parent",
&snapid, sizeof (snapid),
reply_buf, size);
@@ -4351,7 +4272,7 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
u64 stripe_count;
int ret;
- ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
"rbd", "get_stripe_unit_count", NULL, 0,
(char *)&striping_info_buf, size);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
@@ -4599,7 +4520,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
if (!reply_buf)
return -ENOMEM;
- ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
"rbd", "get_snapcontext", NULL, 0,
reply_buf, size);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
@@ -4664,7 +4585,7 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
return ERR_PTR(-ENOMEM);
snapid = cpu_to_le64(snap_id);
- ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
+ ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
"rbd", "get_snapshot_name",
&snapid, sizeof (snapid),
reply_buf, size);
@@ -4975,13 +4896,13 @@ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
again:
ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
if (ret == -ENOENT && tries++ < 1) {
- ret = ceph_monc_do_get_version(&rbdc->client->monc, "osdmap",
- &newest_epoch);
+ ret = ceph_monc_get_version(&rbdc->client->monc, "osdmap",
+ &newest_epoch);
if (ret < 0)
return ret;
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
- ceph_monc_request_next_osdmap(&rbdc->client->monc);
+ ceph_osdc_maybe_request_map(&rbdc->client->osdc);
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
newest_epoch,
opts->mount_timeout);
@@ -5260,35 +5181,26 @@ err_out_unlock:
static int rbd_dev_header_name(struct rbd_device *rbd_dev)
{
struct rbd_spec *spec = rbd_dev->spec;
- size_t size;
+ int ret;
/* Record the header object name for this rbd image. */
rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_dev->header_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
if (rbd_dev->image_format == 1)
- size = strlen(spec->image_name) + sizeof (RBD_SUFFIX);
+ ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
+ spec->image_name, RBD_SUFFIX);
else
- size = sizeof (RBD_HEADER_PREFIX) + strlen(spec->image_id);
-
- rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
- if (!rbd_dev->header_name)
- return -ENOMEM;
+ ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
+ RBD_HEADER_PREFIX, spec->image_id);
- if (rbd_dev->image_format == 1)
- sprintf(rbd_dev->header_name, "%s%s",
- spec->image_name, RBD_SUFFIX);
- else
- sprintf(rbd_dev->header_name, "%s%s",
- RBD_HEADER_PREFIX, spec->image_id);
- return 0;
+ return ret;
}
static void rbd_dev_image_release(struct rbd_device *rbd_dev)
{
rbd_dev_unprobe(rbd_dev);
- kfree(rbd_dev->header_name);
- rbd_dev->header_name = NULL;
rbd_dev->image_format = 0;
kfree(rbd_dev->spec->image_id);
rbd_dev->spec->image_id = NULL;
@@ -5327,7 +5239,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
pr_info("image %s/%s does not exist\n",
rbd_dev->spec->pool_name,
rbd_dev->spec->image_name);
- goto out_header_name;
+ goto err_out_format;
}
}
@@ -5373,7 +5285,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
goto err_out_probe;
dout("discovered format %u image, header name is %s\n",
- rbd_dev->image_format, rbd_dev->header_name);
+ rbd_dev->image_format, rbd_dev->header_oid.name);
return 0;
err_out_probe:
@@ -5381,9 +5293,6 @@ err_out_probe:
err_out_watch:
if (!depth)
rbd_dev_header_unwatch_sync(rbd_dev);
-out_header_name:
- kfree(rbd_dev->header_name);
- rbd_dev->header_name = NULL;
err_out_format:
rbd_dev->image_format = 0;
kfree(rbd_dev->spec->image_id);
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 586f9168ffa4..910e065918af 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -133,7 +133,6 @@ MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
#define SKD_TIMER_MINUTES(minutes) ((minutes) * (60))
#define INQ_STD_NBYTES 36
-#define SKD_DISCARD_CDB_LENGTH 24
enum skd_drvr_state {
SKD_DRVR_STATE_LOAD,
@@ -212,7 +211,6 @@ struct skd_request_context {
struct request *req;
u8 flush_cmd;
- u8 discard_page;
u32 timeout_stamp;
u8 sg_data_dir;
@@ -230,7 +228,6 @@ struct skd_request_context {
};
#define SKD_DATA_DIR_HOST_TO_CARD 1
#define SKD_DATA_DIR_CARD_TO_HOST 2
-#define SKD_DATA_DIR_NONE 3 /* especially for DISCARD requests. */
struct skd_special_context {
struct skd_request_context req;
@@ -540,31 +537,6 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
scsi_req->cdb[9] = 0;
}
-static void
-skd_prep_discard_cdb(struct skd_scsi_request *scsi_req,
- struct skd_request_context *skreq,
- struct page *page,
- u32 lba, u32 count)
-{
- char *buf;
- unsigned long len;
- struct request *req;
-
- buf = page_address(page);
- len = SKD_DISCARD_CDB_LENGTH;
-
- scsi_req->cdb[0] = UNMAP;
- scsi_req->cdb[8] = len;
-
- put_unaligned_be16(6 + 16, &buf[0]);
- put_unaligned_be16(16, &buf[2]);
- put_unaligned_be64(lba, &buf[8]);
- put_unaligned_be32(count, &buf[16]);
-
- req = skreq->req;
- blk_add_request_payload(req, page, len);
-}
-
static void skd_request_fn_not_online(struct request_queue *q);
static void skd_request_fn(struct request_queue *q)
@@ -575,7 +547,6 @@ static void skd_request_fn(struct request_queue *q)
struct skd_request_context *skreq;
struct request *req = NULL;
struct skd_scsi_request *scsi_req;
- struct page *page;
unsigned long io_flags;
int error;
u32 lba;
@@ -669,7 +640,6 @@ static void skd_request_fn(struct request_queue *q)
skreq->flush_cmd = 0;
skreq->n_sg = 0;
skreq->sg_byte_count = 0;
- skreq->discard_page = 0;
/*
* OK to now dequeue request from q.
@@ -735,18 +705,7 @@ static void skd_request_fn(struct request_queue *q)
else
skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
- if (io_flags & REQ_DISCARD) {
- page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
- if (!page) {
- pr_err("request_fn:Page allocation failed.\n");
- skd_end_request(skdev, skreq, -ENOMEM);
- break;
- }
- skreq->discard_page = 1;
- req->completion_data = page;
- skd_prep_discard_cdb(scsi_req, skreq, page, lba, count);
-
- } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
+ if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
skd_prep_zerosize_flush_cdb(scsi_req, skreq);
SKD_ASSERT(skreq->flush_cmd == 1);
@@ -851,16 +810,6 @@ skip_sg:
static void skd_end_request(struct skd_device *skdev,
struct skd_request_context *skreq, int error)
{
- struct request *req = skreq->req;
- unsigned int io_flags = req->cmd_flags;
-
- if ((io_flags & REQ_DISCARD) &&
- (skreq->discard_page == 1)) {
- pr_debug("%s:%s:%d, free the page!",
- skdev->name, __func__, __LINE__);
- __free_page(req->completion_data);
- }
-
if (unlikely(error)) {
struct request *req = skreq->req;
char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
@@ -4412,19 +4361,13 @@ static int skd_cons_disk(struct skd_device *skdev)
disk->queue = q;
q->queuedata = skdev;
- blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
+ blk_queue_write_cache(q, true, true);
blk_queue_max_segments(q, skdev->sgs_per_request);
blk_queue_max_hw_sectors(q, SKD_N_MAX_SECTORS);
/* set sysfs ptimal_io_size to 8K */
blk_queue_io_opt(q, 8192);
- /* DISCARD Flag initialization. */
- q->limits.discard_granularity = 8192;
- q->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
- q->limits.discard_zeroes_data = 1;
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 28cff0d23d82..42758b52768c 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -493,11 +493,7 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev)
u8 writeback = virtblk_get_cache_mode(vdev);
struct virtio_blk *vblk = vdev->priv;
- if (writeback)
- blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
- else
- blk_queue_flush(vblk->disk->queue, 0);
-
+ blk_queue_write_cache(vblk->disk->queue, writeback, false);
revalidate_disk(vblk->disk);
}
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 26aa080e243c..3355f1cdd4e5 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -477,7 +477,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
vbd->type |= VDISK_REMOVABLE;
q = bdev_get_queue(bdev);
- if (q && q->flush_flags)
+ if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
vbd->flush_support = true;
if (q && blk_queue_secdiscard(q))
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 6405b6557792..ca13df854639 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -998,7 +998,8 @@ static const char *flush_info(unsigned int feature_flush)
static void xlvbd_flush(struct blkfront_info *info)
{
- blk_queue_flush(info->rq, info->feature_flush);
+ blk_queue_write_cache(info->rq, info->feature_flush & REQ_FLUSH,
+ info->feature_flush & REQ_FUA);
pr_info("blkfront: %s: %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info->feature_flush),
"persistent grants:", info->feature_persistent ?
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 3ef42e563bb5..b51a816d766b 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched.h>
+#include <linux/cpu.h>
#include "zcomp.h"
#include "zcomp_lzo.h"
@@ -20,29 +21,6 @@
#include "zcomp_lz4.h"
#endif
-/*
- * single zcomp_strm backend
- */
-struct zcomp_strm_single {
- struct mutex strm_lock;
- struct zcomp_strm *zstrm;
-};
-
-/*
- * multi zcomp_strm backend
- */
-struct zcomp_strm_multi {
- /* protect strm list */
- spinlock_t strm_lock;
- /* max possible number of zstrm streams */
- int max_strm;
- /* number of available zstrm streams */
- int avail_strm;
- /* list of available strms */
- struct list_head idle_strm;
- wait_queue_head_t strm_wait;
-};
-
static struct zcomp_backend *backends[] = {
&zcomp_lzo,
#ifdef CONFIG_ZRAM_LZ4_COMPRESS
@@ -93,188 +71,6 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags)
return zstrm;
}
-/*
- * get idle zcomp_strm or wait until other process release
- * (zcomp_strm_release()) one for us
- */
-static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp)
-{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- while (1) {
- spin_lock(&zs->strm_lock);
- if (!list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- spin_unlock(&zs->strm_lock);
- return zstrm;
- }
- /* zstrm streams limit reached, wait for idle stream */
- if (zs->avail_strm >= zs->max_strm) {
- spin_unlock(&zs->strm_lock);
- wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
- continue;
- }
- /* allocate new zstrm stream */
- zs->avail_strm++;
- spin_unlock(&zs->strm_lock);
- /*
- * This function can be called in swapout/fs write path
- * so we can't use GFP_FS|IO. And it assumes we already
- * have at least one stream in zram initialization so we
- * don't do best effort to allocate more stream in here.
- * A default stream will work well without further multiple
- * streams. That's why we use NORETRY | NOWARN.
- */
- zstrm = zcomp_strm_alloc(comp, GFP_NOIO | __GFP_NORETRY |
- __GFP_NOWARN);
- if (!zstrm) {
- spin_lock(&zs->strm_lock);
- zs->avail_strm--;
- spin_unlock(&zs->strm_lock);
- wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
- continue;
- }
- break;
- }
- return zstrm;
-}
-
-/* add stream back to idle list and wake up waiter or free the stream */
-static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm)
-{
- struct zcomp_strm_multi *zs = comp->stream;
-
- spin_lock(&zs->strm_lock);
- if (zs->avail_strm <= zs->max_strm) {
- list_add(&zstrm->list, &zs->idle_strm);
- spin_unlock(&zs->strm_lock);
- wake_up(&zs->strm_wait);
- return;
- }
-
- zs->avail_strm--;
- spin_unlock(&zs->strm_lock);
- zcomp_strm_free(comp, zstrm);
-}
-
-/* change max_strm limit */
-static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm)
-{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- spin_lock(&zs->strm_lock);
- zs->max_strm = num_strm;
- /*
- * if user has lowered the limit and there are idle streams,
- * immediately free as much streams (and memory) as we can.
- */
- while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- zcomp_strm_free(comp, zstrm);
- zs->avail_strm--;
- }
- spin_unlock(&zs->strm_lock);
- return true;
-}
-
-static void zcomp_strm_multi_destroy(struct zcomp *comp)
-{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- while (!list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- zcomp_strm_free(comp, zstrm);
- }
- kfree(zs);
-}
-
-static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm)
-{
- struct zcomp_strm *zstrm;
- struct zcomp_strm_multi *zs;
-
- comp->destroy = zcomp_strm_multi_destroy;
- comp->strm_find = zcomp_strm_multi_find;
- comp->strm_release = zcomp_strm_multi_release;
- comp->set_max_streams = zcomp_strm_multi_set_max_streams;
- zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL);
- if (!zs)
- return -ENOMEM;
-
- comp->stream = zs;
- spin_lock_init(&zs->strm_lock);
- INIT_LIST_HEAD(&zs->idle_strm);
- init_waitqueue_head(&zs->strm_wait);
- zs->max_strm = max_strm;
- zs->avail_strm = 1;
-
- zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
- if (!zstrm) {
- kfree(zs);
- return -ENOMEM;
- }
- list_add(&zstrm->list, &zs->idle_strm);
- return 0;
-}
-
-static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp)
-{
- struct zcomp_strm_single *zs = comp->stream;
- mutex_lock(&zs->strm_lock);
- return zs->zstrm;
-}
-
-static void zcomp_strm_single_release(struct zcomp *comp,
- struct zcomp_strm *zstrm)
-{
- struct zcomp_strm_single *zs = comp->stream;
- mutex_unlock(&zs->strm_lock);
-}
-
-static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm)
-{
- /* zcomp_strm_single support only max_comp_streams == 1 */
- return false;
-}
-
-static void zcomp_strm_single_destroy(struct zcomp *comp)
-{
- struct zcomp_strm_single *zs = comp->stream;
- zcomp_strm_free(comp, zs->zstrm);
- kfree(zs);
-}
-
-static int zcomp_strm_single_create(struct zcomp *comp)
-{
- struct zcomp_strm_single *zs;
-
- comp->destroy = zcomp_strm_single_destroy;
- comp->strm_find = zcomp_strm_single_find;
- comp->strm_release = zcomp_strm_single_release;
- comp->set_max_streams = zcomp_strm_single_set_max_streams;
- zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL);
- if (!zs)
- return -ENOMEM;
-
- comp->stream = zs;
- mutex_init(&zs->strm_lock);
- zs->zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
- if (!zs->zstrm) {
- kfree(zs);
- return -ENOMEM;
- }
- return 0;
-}
-
/* show available compressors */
ssize_t zcomp_available_show(const char *comp, char *buf)
{
@@ -299,19 +95,14 @@ bool zcomp_available_algorithm(const char *comp)
return find_backend(comp) != NULL;
}
-bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
-{
- return comp->set_max_streams(comp, num_strm);
-}
-
struct zcomp_strm *zcomp_strm_find(struct zcomp *comp)
{
- return comp->strm_find(comp);
+ return *get_cpu_ptr(comp->stream);
}
void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm)
{
- comp->strm_release(comp, zstrm);
+ put_cpu_ptr(comp->stream);
}
int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
@@ -327,9 +118,83 @@ int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
return comp->backend->decompress(src, src_len, dst);
}
+static int __zcomp_cpu_notifier(struct zcomp *comp,
+ unsigned long action, unsigned long cpu)
+{
+ struct zcomp_strm *zstrm;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
+ break;
+ zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(zstrm)) {
+ pr_err("Can't allocate a compression stream\n");
+ return NOTIFY_BAD;
+ }
+ *per_cpu_ptr(comp->stream, cpu) = zstrm;
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ zstrm = *per_cpu_ptr(comp->stream, cpu);
+ if (!IS_ERR_OR_NULL(zstrm))
+ zcomp_strm_free(comp, zstrm);
+ *per_cpu_ptr(comp->stream, cpu) = NULL;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static int zcomp_cpu_notifier(struct notifier_block *nb,
+ unsigned long action, void *pcpu)
+{
+ unsigned long cpu = (unsigned long)pcpu;
+ struct zcomp *comp = container_of(nb, typeof(*comp), notifier);
+
+ return __zcomp_cpu_notifier(comp, action, cpu);
+}
+
+static int zcomp_init(struct zcomp *comp)
+{
+ unsigned long cpu;
+ int ret;
+
+ comp->notifier.notifier_call = zcomp_cpu_notifier;
+
+ comp->stream = alloc_percpu(struct zcomp_strm *);
+ if (!comp->stream)
+ return -ENOMEM;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu) {
+ ret = __zcomp_cpu_notifier(comp, CPU_UP_PREPARE, cpu);
+ if (ret == NOTIFY_BAD)
+ goto cleanup;
+ }
+ __register_cpu_notifier(&comp->notifier);
+ cpu_notifier_register_done();
+ return 0;
+
+cleanup:
+ for_each_online_cpu(cpu)
+ __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
+ cpu_notifier_register_done();
+ return -ENOMEM;
+}
+
void zcomp_destroy(struct zcomp *comp)
{
- comp->destroy(comp);
+ unsigned long cpu;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
+ __unregister_cpu_notifier(&comp->notifier);
+ cpu_notifier_register_done();
+
+ free_percpu(comp->stream);
kfree(comp);
}
@@ -339,9 +204,9 @@ void zcomp_destroy(struct zcomp *comp)
* backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL)
* if requested algorithm is not supported, ERR_PTR(-ENOMEM) in
* case of allocation error, or any other error potentially
- * returned by functions zcomp_strm_{multi,single}_create.
+ * returned by zcomp_init().
*/
-struct zcomp *zcomp_create(const char *compress, int max_strm)
+struct zcomp *zcomp_create(const char *compress)
{
struct zcomp *comp;
struct zcomp_backend *backend;
@@ -356,10 +221,7 @@ struct zcomp *zcomp_create(const char *compress, int max_strm)
return ERR_PTR(-ENOMEM);
comp->backend = backend;
- if (max_strm > 1)
- error = zcomp_strm_multi_create(comp, max_strm);
- else
- error = zcomp_strm_single_create(comp);
+ error = zcomp_init(comp);
if (error) {
kfree(comp);
return ERR_PTR(error);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index b7d2a4bcae54..ffd88cb747fe 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -10,8 +10,6 @@
#ifndef _ZCOMP_H_
#define _ZCOMP_H_
-#include <linux/mutex.h>
-
struct zcomp_strm {
/* compression/decompression buffer */
void *buffer;
@@ -21,8 +19,6 @@ struct zcomp_strm {
* working memory)
*/
void *private;
- /* used in multi stream backend, protected by backend strm_lock */
- struct list_head list;
};
/* static compression backend */
@@ -41,19 +37,15 @@ struct zcomp_backend {
/* dynamic per-device compression frontend */
struct zcomp {
- void *stream;
+ struct zcomp_strm * __percpu *stream;
struct zcomp_backend *backend;
-
- struct zcomp_strm *(*strm_find)(struct zcomp *comp);
- void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm);
- bool (*set_max_streams)(struct zcomp *comp, int num_strm);
- void (*destroy)(struct zcomp *comp);
+ struct notifier_block notifier;
};
ssize_t zcomp_available_show(const char *comp, char *buf);
bool zcomp_available_algorithm(const char *comp);
-struct zcomp *zcomp_create(const char *comp, int max_strm);
+struct zcomp *zcomp_create(const char *comp);
void zcomp_destroy(struct zcomp *comp);
struct zcomp_strm *zcomp_strm_find(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 370c2f76016d..8fcad8b761f1 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -304,46 +304,25 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+/*
+ * We switched to per-cpu streams and this attr is not needed anymore.
+ * However, we will keep it around for some time, because:
+ * a) we may revert per-cpu streams in the future
+ * b) it's visible to user space and we need to follow our 2 years
+ * retirement rule; but we already have a number of 'soon to be
+ * altered' attrs, so max_comp_streams need to wait for the next
+ * layoff cycle.
+ */
static ssize_t max_comp_streams_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int val;
- struct zram *zram = dev_to_zram(dev);
-
- down_read(&zram->init_lock);
- val = zram->max_comp_streams;
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+ return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
}
static ssize_t max_comp_streams_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
- int num;
- struct zram *zram = dev_to_zram(dev);
- int ret;
-
- ret = kstrtoint(buf, 0, &num);
- if (ret < 0)
- return ret;
- if (num < 1)
- return -EINVAL;
-
- down_write(&zram->init_lock);
- if (init_done(zram)) {
- if (!zcomp_set_max_streams(zram->comp, num)) {
- pr_info("Cannot change max compression streams\n");
- ret = -EINVAL;
- goto out;
- }
- }
-
- zram->max_comp_streams = num;
- ret = len;
-out:
- up_write(&zram->init_lock);
- return ret;
+ return len;
}
static ssize_t comp_algorithm_show(struct device *dev,
@@ -456,8 +435,26 @@ static ssize_t mm_stat_show(struct device *dev,
return ret;
}
+static ssize_t debug_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int version = 1;
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ ret = scnprintf(buf, PAGE_SIZE,
+ "version: %d\n%8llu\n",
+ version,
+ (u64)atomic64_read(&zram->stats.writestall));
+ up_read(&zram->init_lock);
+
+ return ret;
+}
+
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
+static DEVICE_ATTR_RO(debug_stat);
ZRAM_ATTR_RO(num_reads);
ZRAM_ATTR_RO(num_writes);
ZRAM_ATTR_RO(failed_reads);
@@ -514,7 +511,7 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
goto out_error;
}
- meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
+ meta->mem_pool = zs_create_pool(pool_name);
if (!meta->mem_pool) {
pr_err("Error creating memory pool\n");
goto out_error;
@@ -650,7 +647,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
{
int ret = 0;
size_t clen;
- unsigned long handle;
+ unsigned long handle = 0;
struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
@@ -673,9 +670,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}
- zstrm = zcomp_strm_find(zram->comp);
+compress_again:
user_mem = kmap_atomic(page);
-
if (is_partial_io(bvec)) {
memcpy(uncmem + offset, user_mem + bvec->bv_offset,
bvec->bv_len);
@@ -699,6 +695,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}
+ zstrm = zcomp_strm_find(zram->comp);
ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
if (!is_partial_io(bvec)) {
kunmap_atomic(user_mem);
@@ -710,6 +707,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
pr_err("Compression failed! err=%d\n", ret);
goto out;
}
+
src = zstrm->buffer;
if (unlikely(clen > max_zpage_size)) {
clen = PAGE_SIZE;
@@ -717,8 +715,35 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
src = uncmem;
}
- handle = zs_malloc(meta->mem_pool, clen);
+ /*
+ * handle allocation has 2 paths:
+ * a) fast path is executed with preemption disabled (for
+ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
+ * since we can't sleep;
+ * b) slow path enables preemption and attempts to allocate
+ * the page with __GFP_DIRECT_RECLAIM bit set. we have to
+ * put per-cpu compression stream and, thus, to re-do
+ * the compression once handle is allocated.
+ *
+ * if we have a 'non-null' handle here then we are coming
+ * from the slow path and handle has already been allocated.
+ */
+ if (!handle)
+ handle = zs_malloc(meta->mem_pool, clen,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM);
if (!handle) {
+ zcomp_strm_release(zram->comp, zstrm);
+ zstrm = NULL;
+
+ atomic64_inc(&zram->stats.writestall);
+
+ handle = zs_malloc(meta->mem_pool, clen,
+ GFP_NOIO | __GFP_HIGHMEM);
+ if (handle)
+ goto compress_again;
+
pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
index, clen);
ret = -ENOMEM;
@@ -1009,7 +1034,6 @@ static void zram_reset_device(struct zram *zram)
/* Reset stats */
memset(&zram->stats, 0, sizeof(zram->stats));
zram->disksize = 0;
- zram->max_comp_streams = 1;
set_capacity(zram->disk, 0);
part_stat_set_all(&zram->disk->part0, 0);
@@ -1038,7 +1062,7 @@ static ssize_t disksize_store(struct device *dev,
if (!meta)
return -ENOMEM;
- comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+ comp = zcomp_create(zram->compressor);
if (IS_ERR(comp)) {
pr_err("Cannot initialise %s compressing backend\n",
zram->compressor);
@@ -1177,6 +1201,7 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_comp_algorithm.attr,
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
+ &dev_attr_debug_stat.attr,
NULL,
};
@@ -1273,7 +1298,6 @@ static int zram_add(void)
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram->meta = NULL;
- zram->max_comp_streams = 1;
pr_info("Added device: %s\n", zram->disk->disk_name);
return device_id;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 8e92339686d7..3f5bf66a27e4 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -85,6 +85,7 @@ struct zram_stats {
atomic64_t zero_pages; /* no. of zero filled pages */
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
+ atomic64_t writestall; /* no. of write slow paths */
};
struct zram_meta {
@@ -102,7 +103,6 @@ struct zram {
* the number of pages zram can consume for storing compressed data
*/
unsigned long limit_pages;
- int max_comp_streams;
struct zram_stats stats;
atomic_t refcount; /* refcount for zram_meta */