summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoe.h4
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c27
-rw-r--r--drivers/block/aoe/aoedev.c11
-rw-r--r--drivers/block/aoe/aoemain.c2
-rw-r--r--drivers/block/ataflop.c26
-rw-r--r--drivers/block/drbd/drbd_main.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/floppy.c6
-rw-r--r--drivers/block/loop.c415
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c226
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h48
-rw-r--r--drivers/block/nbd.c3
-rw-r--r--drivers/block/null_blk.h1
-rw-r--r--drivers/block/null_blk_main.c21
-rw-r--r--drivers/block/null_blk_zoned.c27
-rw-r--r--drivers/block/paride/pd.c30
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/skd_main.c16
-rw-r--r--drivers/block/sunvdc.c153
-rw-r--r--drivers/block/sx8.c434
-rw-r--r--drivers/block/umem.c3
-rw-r--r--drivers/block/virtio_blk.c17
-rw-r--r--drivers/block/zram/Kconfig5
-rw-r--r--drivers/block/zram/zram_drv.c502
-rw-r--r--drivers/block/zram/zram_drv.h19
27 files changed, 1082 insertions, 922 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 7ca76ed2e71a..84d0fcebd6af 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -100,6 +100,10 @@ enum {
MAX_TAINT = 1000, /* cap on aoetgt taint */
};
+struct aoe_req {
+ unsigned long nr_bios;
+};
+
struct buf {
ulong nframesout;
struct bio *bio;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ed26b7287256..e2c6aae2d636 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -387,6 +387,7 @@ aoeblk_gdalloc(void *vp)
set = &d->tag_set;
set->ops = &aoeblk_mq_ops;
+ set->cmd_size = sizeof(struct aoe_req);
set->nr_hw_queues = 1;
set->queue_depth = 128;
set->numa_node = NUMA_NO_NODE;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index bb2fba651bd2..3cf9bc5d8d95 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -822,17 +822,6 @@ out:
spin_unlock_irqrestore(&d->lock, flags);
}
-static unsigned long
-rqbiocnt(struct request *r)
-{
- struct bio *bio;
- unsigned long n = 0;
-
- __rq_for_each_bio(bio, r)
- n++;
- return n;
-}
-
static void
bufinit(struct buf *buf, struct request *rq, struct bio *bio)
{
@@ -847,6 +836,7 @@ nextbuf(struct aoedev *d)
{
struct request *rq;
struct request_queue *q;
+ struct aoe_req *req;
struct buf *buf;
struct bio *bio;
@@ -865,7 +855,11 @@ nextbuf(struct aoedev *d)
blk_mq_start_request(rq);
d->ip.rq = rq;
d->ip.nxbio = rq->bio;
- rq->special = (void *) rqbiocnt(rq);
+
+ req = blk_mq_rq_to_pdu(rq);
+ req->nr_bios = 0;
+ __rq_for_each_bio(bio, rq)
+ req->nr_bios++;
}
buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
if (buf == NULL) {
@@ -1069,16 +1063,13 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
static void
aoe_end_buf(struct aoedev *d, struct buf *buf)
{
- struct request *rq;
- unsigned long n;
+ struct request *rq = buf->rq;
+ struct aoe_req *req = blk_mq_rq_to_pdu(rq);
if (buf == d->ip.buf)
d->ip.buf = NULL;
- rq = buf->rq;
mempool_free(buf, d->bufpool);
- n = (unsigned long) rq->special;
- rq->special = (void *) --n;
- if (n == 0)
+ if (--req->nr_bios == 0)
aoe_end_request(d, rq, 0);
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 9063f8efbd3b..5b49f1b33ebe 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -160,21 +160,22 @@ static void
aoe_failip(struct aoedev *d)
{
struct request *rq;
+ struct aoe_req *req;
struct bio *bio;
- unsigned long n;
aoe_failbuf(d, d->ip.buf);
-
rq = d->ip.rq;
if (rq == NULL)
return;
+
+ req = blk_mq_rq_to_pdu(rq);
while ((bio = d->ip.nxbio)) {
bio->bi_status = BLK_STS_IOERR;
d->ip.nxbio = bio->bi_next;
- n = (unsigned long) rq->special;
- rq->special = (void *) --n;
+ req->nr_bios--;
}
- if ((unsigned long) rq->special == 0)
+
+ if (!req->nr_bios)
aoe_end_request(d, rq, 0);
}
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 251482066977..1e4e2971171c 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -24,7 +24,7 @@ static void discover_timer(struct timer_list *t)
aoecmd_cfg(0xffff, 0xff);
}
-static void
+static void __exit
aoe_exit(void)
{
del_timer_sync(&timer);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index f88b4c26d422..b0dbbdfeb33e 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1471,6 +1471,15 @@ static void setup_req_params( int drive )
ReqTrack, ReqSector, (unsigned long)ReqData ));
}
+static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+ spin_lock_irq(&ataflop_lock);
+ atari_disable_irq(IRQ_MFP_FDC);
+ finish_fdc();
+ atari_enable_irq(IRQ_MFP_FDC);
+ spin_unlock_irq(&ataflop_lock);
+}
+
static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -1947,6 +1956,7 @@ static const struct block_device_operations floppy_fops = {
static const struct blk_mq_ops ataflop_mq_ops = {
.queue_rq = ataflop_queue_rq,
+ .commit_rqs = ataflop_commit_rqs,
};
static struct kobject *floppy_find(dev_t dev, int *part, void *data)
@@ -1982,6 +1992,7 @@ static int __init atari_floppy_init (void)
&ataflop_mq_ops, 2,
BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(unit[i].disk->queue)) {
+ put_disk(unit[i].disk);
ret = PTR_ERR(unit[i].disk->queue);
unit[i].disk->queue = NULL;
goto err;
@@ -2033,18 +2044,13 @@ static int __init atari_floppy_init (void)
return 0;
err:
- do {
+ while (--i >= 0) {
struct gendisk *disk = unit[i].disk;
- if (disk) {
- if (disk->queue) {
- blk_cleanup_queue(disk->queue);
- disk->queue = NULL;
- }
- blk_mq_free_tag_set(&unit[i].tag_set);
- put_disk(unit[i].disk);
- }
- } while (i--);
+ blk_cleanup_queue(disk->queue);
+ blk_mq_free_tag_set(&unit[i].tag_set);
+ put_disk(unit[i].disk);
+ }
unregister_blkdev(FLOPPY_MAJOR, "fd");
return ret;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index fa8204214ac0..f973a2a845c8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2792,7 +2792,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
+ q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
if (!q)
goto out_no_q;
device->rq_queue = q;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 61c392752fe4..ccfcf00f2798 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3623,7 +3623,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
* change.
*/
- peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
+ peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
if (IS_ERR(peer_integrity_tfm)) {
peer_integrity_tfm = NULL;
drbd_err(connection, "peer data-integrity-alg %s not supported\n",
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index fb23578e9a41..6f2856c6d0f2 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2231,7 +2231,6 @@ static void request_done(int uptodate)
{
struct request *req = current_req;
struct request_queue *q;
- unsigned long flags;
int block;
char msg[sizeof("request done ") + sizeof(int) * 3];
@@ -2254,10 +2253,7 @@ static void request_done(int uptodate)
if (block > _floppy->sect)
DRS->maxtrack = 1;
- /* unlock chained buffers */
- spin_lock_irqsave(q->queue_lock, flags);
floppy_end_request(req, 0);
- spin_unlock_irqrestore(q->queue_lock, flags);
} else {
if (rq_data_dir(req) == WRITE) {
/* record write error information */
@@ -2269,9 +2265,7 @@ static void request_done(int uptodate)
DRWE->last_error_sector = blk_rq_pos(req);
DRWE->last_error_generation = DRS->generation;
}
- spin_lock_irqsave(q->queue_lock, flags);
floppy_end_request(req, BLK_STS_IOERR);
- spin_unlock_irqrestore(q->queue_lock, flags);
}
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cb0cc8685076..0939f36548c9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,13 +77,14 @@
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/ioprio.h>
+#include <linux/blk-cgroup.h>
#include "loop.h"
#include <linux/uaccess.h>
static DEFINE_IDR(loop_index_idr);
-static DEFINE_MUTEX(loop_index_mutex);
+static DEFINE_MUTEX(loop_ctl_mutex);
static int max_part;
static int part_shift;
@@ -630,18 +631,7 @@ static void loop_reread_partitions(struct loop_device *lo,
{
int rc;
- /*
- * bd_mutex has been held already in release path, so don't
- * acquire it if this function is called in such case.
- *
- * If the reread partition isn't from release path, lo_refcnt
- * must be at least one and it can only become zero when the
- * current holder is released.
- */
- if (!atomic_read(&lo->lo_refcnt))
- rc = __blkdev_reread_part(bdev);
- else
- rc = blkdev_reread_part(bdev);
+ rc = blkdev_reread_part(bdev);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -688,26 +678,30 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
unsigned int arg)
{
- struct file *file, *old_file;
+ struct file *file = NULL, *old_file;
int error;
+ bool partscan;
+ error = mutex_lock_killable(&loop_ctl_mutex);
+ if (error)
+ return error;
error = -ENXIO;
if (lo->lo_state != Lo_bound)
- goto out;
+ goto out_err;
/* the loop device has to be read-only */
error = -EINVAL;
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
- goto out;
+ goto out_err;
error = -EBADF;
file = fget(arg);
if (!file)
- goto out;
+ goto out_err;
error = loop_validate_file(file, bdev);
if (error)
- goto out_putf;
+ goto out_err;
old_file = lo->lo_backing_file;
@@ -715,7 +709,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
/* size of the new backing store needs to be the same */
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
- goto out_putf;
+ goto out_err;
/* and ... switch */
blk_mq_freeze_queue(lo->lo_queue);
@@ -726,15 +720,22 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
-
+ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
+ mutex_unlock(&loop_ctl_mutex);
+ /*
+ * We must drop file reference outside of loop_ctl_mutex as dropping
+ * the file ref can take bd_mutex which creates circular locking
+ * dependency.
+ */
fput(old_file);
- if (lo->lo_flags & LO_FLAGS_PARTSCAN)
+ if (partscan)
loop_reread_partitions(lo, bdev);
return 0;
- out_putf:
- fput(file);
- out:
+out_err:
+ mutex_unlock(&loop_ctl_mutex);
+ if (file)
+ fput(file);
return error;
}
@@ -909,6 +910,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
int lo_flags = 0;
int error;
loff_t size;
+ bool partscan;
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
@@ -918,13 +920,17 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (!file)
goto out;
+ error = mutex_lock_killable(&loop_ctl_mutex);
+ if (error)
+ goto out_putf;
+
error = -EBUSY;
if (lo->lo_state != Lo_unbound)
- goto out_putf;
+ goto out_unlock;
error = loop_validate_file(file, bdev);
if (error)
- goto out_putf;
+ goto out_unlock;
mapping = file->f_mapping;
inode = mapping->host;
@@ -936,10 +942,10 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
error = -EFBIG;
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
- goto out_putf;
+ goto out_unlock;
error = loop_prepare_queue(lo);
if (error)
- goto out_putf;
+ goto out_unlock;
error = 0;
@@ -971,18 +977,22 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
- if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- loop_reread_partitions(lo, bdev);
+ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
/* Grab the block_device to prevent its destruction after we
- * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
+ * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/
bdgrab(bdev);
+ mutex_unlock(&loop_ctl_mutex);
+ if (partscan)
+ loop_reread_partitions(lo, bdev);
return 0;
- out_putf:
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
+out_putf:
fput(file);
- out:
+out:
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
@@ -1025,39 +1035,31 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
return err;
}
-static int loop_clr_fd(struct loop_device *lo)
+static int __loop_clr_fd(struct loop_device *lo, bool release)
{
- struct file *filp = lo->lo_backing_file;
+ struct file *filp = NULL;
gfp_t gfp = lo->old_gfp_mask;
struct block_device *bdev = lo->lo_device;
+ int err = 0;
+ bool partscan = false;
+ int lo_number;
- if (lo->lo_state != Lo_bound)
- return -ENXIO;
-
- /*
- * If we've explicitly asked to tear down the loop device,
- * and it has an elevated reference count, set it for auto-teardown when
- * the last reference goes away. This stops $!~#$@ udev from
- * preventing teardown because it decided that it needs to run blkid on
- * the loopback device whenever they appear. xfstests is notorious for
- * failing tests because blkid via udev races with a losetup
- * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
- * command to fail with EBUSY.
- */
- if (atomic_read(&lo->lo_refcnt) > 1) {
- lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- mutex_unlock(&lo->lo_ctl_mutex);
- return 0;
+ mutex_lock(&loop_ctl_mutex);
+ if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
+ err = -ENXIO;
+ goto out_unlock;
}
- if (filp == NULL)
- return -EINVAL;
+ filp = lo->lo_backing_file;
+ if (filp == NULL) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
/* freeze request queue during the transition */
blk_mq_freeze_queue(lo->lo_queue);
spin_lock_irq(&lo->lo_lock);
- lo->lo_state = Lo_rundown;
lo->lo_backing_file = NULL;
spin_unlock_irq(&lo->lo_lock);
@@ -1093,21 +1095,73 @@ static int loop_clr_fd(struct loop_device *lo)
module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
- if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
- loop_reread_partitions(lo, bdev);
+ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev;
+ lo_number = lo->lo_number;
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
loop_unprepare_queue(lo);
- mutex_unlock(&lo->lo_ctl_mutex);
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
+ if (partscan) {
+ /*
+ * bd_mutex has been held already in release path, so don't
+ * acquire it if this function is called in such case.
+ *
+ * If the reread partition isn't from release path, lo_refcnt
+ * must be at least one and it can only become zero when the
+ * current holder is released.
+ */
+ if (release)
+ err = __blkdev_reread_part(bdev);
+ else
+ err = blkdev_reread_part(bdev);
+ pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
+ __func__, lo_number, err);
+ /* Device is gone, no point in returning error */
+ err = 0;
+ }
/*
- * Need not hold lo_ctl_mutex to fput backing file.
- * Calling fput holding lo_ctl_mutex triggers a circular
+ * Need not hold loop_ctl_mutex to fput backing file.
+ * Calling fput holding loop_ctl_mutex triggers a circular
* lock dependency possibility warning as fput can take
- * bd_mutex which is usually taken before lo_ctl_mutex.
+ * bd_mutex which is usually taken before loop_ctl_mutex.
*/
- fput(filp);
- return 0;
+ if (filp)
+ fput(filp);
+ return err;
+}
+
+static int loop_clr_fd(struct loop_device *lo)
+{
+ int err;
+
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
+ if (lo->lo_state != Lo_bound) {
+ mutex_unlock(&loop_ctl_mutex);
+ return -ENXIO;
+ }
+ /*
+ * If we've explicitly asked to tear down the loop device,
+ * and it has an elevated reference count, set it for auto-teardown when
+ * the last reference goes away. This stops $!~#$@ udev from
+ * preventing teardown because it decided that it needs to run blkid on
+ * the loopback device whenever they appear. xfstests is notorious for
+ * failing tests because blkid via udev races with a losetup
+ * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
+ * command to fail with EBUSY.
+ */
+ if (atomic_read(&lo->lo_refcnt) > 1) {
+ lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
+ mutex_unlock(&loop_ctl_mutex);
+ return 0;
+ }
+ lo->lo_state = Lo_rundown;
+ mutex_unlock(&loop_ctl_mutex);
+
+ return __loop_clr_fd(lo, false);
}
static int
@@ -1116,47 +1170,58 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
+ struct block_device *bdev;
+ bool partscan = false;
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (lo->lo_state != Lo_bound)
- return -ENXIO;
- if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
- return -EINVAL;
+ !capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ if (lo->lo_state != Lo_bound) {
+ err = -ENXIO;
+ goto out_unlock;
+ }
+ if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
err = loop_release_xfer(lo);
if (err)
- goto exit;
+ goto out_unfreeze;
if (info->lo_encrypt_type) {
unsigned int type = info->lo_encrypt_type;
if (type >= MAX_LO_CRYPT) {
err = -EINVAL;
- goto exit;
+ goto out_unfreeze;
}
xfer = xfer_funcs[type];
if (xfer == NULL) {
err = -EINVAL;
- goto exit;
+ goto out_unfreeze;
}
} else
xfer = NULL;
err = loop_init_xfer(lo, xfer, info);
if (err)
- goto exit;
+ goto out_unfreeze;
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
- goto exit;
+ goto out_unfreeze;
}
}
@@ -1188,15 +1253,20 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
- exit:
+out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
!(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_flags |= LO_FLAGS_PARTSCAN;
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- loop_reread_partitions(lo, lo->lo_device);
+ bdev = lo->lo_device;
+ partscan = true;
}
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
+ if (partscan)
+ loop_reread_partitions(lo, bdev);
return err;
}
@@ -1204,12 +1274,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
static int
loop_get_status(struct loop_device *lo, struct loop_info64 *info)
{
- struct file *file;
+ struct path path;
struct kstat stat;
int ret;
+ ret = mutex_lock_killable(&loop_ctl_mutex);
+ if (ret)
+ return ret;
if (lo->lo_state != Lo_bound) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return -ENXIO;
}
@@ -1228,17 +1301,17 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
lo->lo_encrypt_key_size);
}
- /* Drop lo_ctl_mutex while we call into the filesystem. */
- file = get_file(lo->lo_backing_file);
- mutex_unlock(&lo->lo_ctl_mutex);
- ret = vfs_getattr(&file->f_path, &stat, STATX_INO,
- AT_STATX_SYNC_AS_STAT);
+ /* Drop loop_ctl_mutex while we call into the filesystem. */
+ path = lo->lo_backing_file->f_path;
+ path_get(&path);
+ mutex_unlock(&loop_ctl_mutex);
+ ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
if (!ret) {
info->lo_device = huge_encode_dev(stat.dev);
info->lo_inode = stat.ino;
info->lo_rdevice = huge_encode_dev(stat.rdev);
}
- fput(file);
+ path_put(&path);
return ret;
}
@@ -1322,10 +1395,8 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
struct loop_info64 info64;
int err;
- if (!arg) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ if (!arg)
return -EINVAL;
- }
err = loop_get_status(lo, &info64);
if (!err)
err = loop_info64_to_old(&info64, &info);
@@ -1340,10 +1411,8 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
struct loop_info64 info64;
int err;
- if (!arg) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ if (!arg)
return -EINVAL;
- }
err = loop_get_status(lo, &info64);
if (!err && copy_to_user(arg, &info64, sizeof(info64)))
err = -EFAULT;
@@ -1393,70 +1462,73 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
return 0;
}
+static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
+ unsigned long arg)
+{
+ int err;
+
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
+ switch (cmd) {
+ case LOOP_SET_CAPACITY:
+ err = loop_set_capacity(lo);
+ break;
+ case LOOP_SET_DIRECT_IO:
+ err = loop_set_dio(lo, arg);
+ break;
+ case LOOP_SET_BLOCK_SIZE:
+ err = loop_set_block_size(lo, arg);
+ break;
+ default:
+ err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
+ }
+ mutex_unlock(&loop_ctl_mutex);
+ return err;
+}
+
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo = bdev->bd_disk->private_data;
int err;
- err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1);
- if (err)
- goto out_unlocked;
-
switch (cmd) {
case LOOP_SET_FD:
- err = loop_set_fd(lo, mode, bdev, arg);
- break;
+ return loop_set_fd(lo, mode, bdev, arg);
case LOOP_CHANGE_FD:
- err = loop_change_fd(lo, bdev, arg);
- break;
+ return loop_change_fd(lo, bdev, arg);
case LOOP_CLR_FD:
- /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
- err = loop_clr_fd(lo);
- if (!err)
- goto out_unlocked;
- break;
+ return loop_clr_fd(lo);
case LOOP_SET_STATUS:
err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
err = loop_set_status_old(lo,
(struct loop_info __user *)arg);
+ }
break;
case LOOP_GET_STATUS:
- err = loop_get_status_old(lo, (struct loop_info __user *) arg);
- /* loop_get_status() unlocks lo_ctl_mutex */
- goto out_unlocked;
+ return loop_get_status_old(lo, (struct loop_info __user *) arg);
case LOOP_SET_STATUS64:
err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
err = loop_set_status64(lo,
(struct loop_info64 __user *) arg);
+ }
break;
case LOOP_GET_STATUS64:
- err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
- /* loop_get_status() unlocks lo_ctl_mutex */
- goto out_unlocked;
+ return loop_get_status64(lo, (struct loop_info64 __user *) arg);
case LOOP_SET_CAPACITY:
- err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_capacity(lo);
- break;
case LOOP_SET_DIRECT_IO:
- err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_dio(lo, arg);
- break;
case LOOP_SET_BLOCK_SIZE:
- err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_block_size(lo, arg);
- break;
+ if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ /* Fall through */
default:
- err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
+ err = lo_simple_ioctl(lo, cmd, arg);
+ break;
}
- mutex_unlock(&lo->lo_ctl_mutex);
-out_unlocked:
return err;
}
@@ -1570,10 +1642,8 @@ loop_get_status_compat(struct loop_device *lo,
struct loop_info64 info64;
int err;
- if (!arg) {
- mutex_unlock(&lo->lo_ctl_mutex);
+ if (!arg)
return -EINVAL;
- }
err = loop_get_status(lo, &info64);
if (!err)
err = loop_info64_to_compat(&info64, arg);
@@ -1588,20 +1658,12 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
switch(cmd) {
case LOOP_SET_STATUS:
- err = mutex_lock_killable(&lo->lo_ctl_mutex);
- if (!err) {
- err = loop_set_status_compat(lo,
- (const struct compat_loop_info __user *)arg);
- mutex_unlock(&lo->lo_ctl_mutex);
- }
+ err = loop_set_status_compat(lo,
+ (const struct compat_loop_info __user *)arg);
break;
case LOOP_GET_STATUS:
- err = mutex_lock_killable(&lo->lo_ctl_mutex);
- if (!err) {
- err = loop_get_status_compat(lo,
- (struct compat_loop_info __user *)arg);
- /* loop_get_status() unlocks lo_ctl_mutex */
- }
+ err = loop_get_status_compat(lo,
+ (struct compat_loop_info __user *)arg);
break;
case LOOP_SET_CAPACITY:
case LOOP_CLR_FD:
@@ -1625,9 +1687,11 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
static int lo_open(struct block_device *bdev, fmode_t mode)
{
struct loop_device *lo;
- int err = 0;
+ int err;
- mutex_lock(&loop_index_mutex);
+ err = mutex_lock_killable(&loop_ctl_mutex);
+ if (err)
+ return err;
lo = bdev->bd_disk->private_data;
if (!lo) {
err = -ENXIO;
@@ -1636,26 +1700,30 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
atomic_inc(&lo->lo_refcnt);
out:
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return err;
}
-static void __lo_release(struct loop_device *lo)
+static void lo_release(struct gendisk *disk, fmode_t mode)
{
- int err;
+ struct loop_device *lo;
+ mutex_lock(&loop_ctl_mutex);
+ lo = disk->private_data;
if (atomic_dec_return(&lo->lo_refcnt))
- return;
+ goto out_unlock;
- mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
+ if (lo->lo_state != Lo_bound)
+ goto out_unlock;
+ lo->lo_state = Lo_rundown;
+ mutex_unlock(&loop_ctl_mutex);
/*
* In autoclear mode, stop the loop thread
* and remove configuration after last close.
*/
- err = loop_clr_fd(lo);
- if (!err)
- return;
+ __loop_clr_fd(lo, true);
+ return;
} else if (lo->lo_state == Lo_bound) {
/*
* Otherwise keep thread (if running) and config,
@@ -1665,14 +1733,8 @@ static void __lo_release(struct loop_device *lo)
blk_mq_unfreeze_queue(lo->lo_queue);
}
- mutex_unlock(&lo->lo_ctl_mutex);
-}
-
-static void lo_release(struct gendisk *disk, fmode_t mode)
-{
- mutex_lock(&loop_index_mutex);
- __lo_release(disk->private_data);
- mutex_unlock(&loop_index_mutex);
+out_unlock:
+ mutex_unlock(&loop_ctl_mutex);
}
static const struct block_device_operations lo_fops = {
@@ -1711,10 +1773,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
struct loop_device *lo = ptr;
struct loop_func_table *xfer = data;
- mutex_lock(&lo->lo_ctl_mutex);
+ mutex_lock(&loop_ctl_mutex);
if (lo->lo_encryption == xfer)
loop_release_xfer(lo);
- mutex_unlock(&lo->lo_ctl_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return 0;
}
@@ -1759,8 +1821,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
/* always use the first bio's css */
#ifdef CONFIG_BLK_CGROUP
- if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
- cmd->css = rq->bio->bi_css;
+ if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
+ cmd->css = &bio_blkcg(rq->bio)->css;
css_get(cmd->css);
} else
#endif
@@ -1853,7 +1915,7 @@ static int loop_add(struct loop_device **l, int i)
goto out_free_idr;
lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
- if (IS_ERR_OR_NULL(lo->lo_queue)) {
+ if (IS_ERR(lo->lo_queue)) {
err = PTR_ERR(lo->lo_queue);
goto out_cleanup_tags;
}
@@ -1895,7 +1957,6 @@ static int loop_add(struct loop_device **l, int i)
if (!part_shift)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
- mutex_init(&lo->lo_ctl_mutex);
atomic_set(&lo->lo_refcnt, 0);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
@@ -1974,7 +2035,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
struct kobject *kobj;
int err;
- mutex_lock(&loop_index_mutex);
+ mutex_lock(&loop_ctl_mutex);
err = loop_lookup(&lo, MINOR(dev) >> part_shift);
if (err < 0)
err = loop_add(&lo, MINOR(dev) >> part_shift);
@@ -1982,7 +2043,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
kobj = NULL;
else
kobj = get_disk_and_module(lo->lo_disk);
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
*part = 0;
return kobj;
@@ -1992,9 +2053,13 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
unsigned long parm)
{
struct loop_device *lo;
- int ret = -ENOSYS;
+ int ret;
- mutex_lock(&loop_index_mutex);
+ ret = mutex_lock_killable(&loop_ctl_mutex);
+ if (ret)
+ return ret;
+
+ ret = -ENOSYS;
switch (cmd) {
case LOOP_CTL_ADD:
ret = loop_lookup(&lo, parm);
@@ -2008,21 +2073,15 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
ret = loop_lookup(&lo, parm);
if (ret < 0)
break;
- ret = mutex_lock_killable(&lo->lo_ctl_mutex);
- if (ret)
- break;
if (lo->lo_state != Lo_unbound) {
ret = -EBUSY;
- mutex_unlock(&lo->lo_ctl_mutex);
break;
}
if (atomic_read(&lo->lo_refcnt) > 0) {
ret = -EBUSY;
- mutex_unlock(&lo->lo_ctl_mutex);
break;
}
lo->lo_disk->private_data = NULL;
- mutex_unlock(&lo->lo_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo);
break;
@@ -2032,7 +2091,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
break;
ret = loop_add(&lo, -1);
}
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
return ret;
}
@@ -2116,10 +2175,10 @@ static int __init loop_init(void)
THIS_MODULE, loop_probe, NULL, NULL);
/* pre-create number of devices given by config or max_loop */
- mutex_lock(&loop_index_mutex);
+ mutex_lock(&loop_ctl_mutex);
for (i = 0; i < nr; i++)
loop_add(&lo, i);
- mutex_unlock(&loop_index_mutex);
+ mutex_unlock(&loop_ctl_mutex);
printk(KERN_INFO "loop: module loaded\n");
return 0;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 4d42c7af7de7..af75a5ee4094 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -54,7 +54,6 @@ struct loop_device {
spinlock_t lo_lock;
int lo_state;
- struct mutex lo_ctl_mutex;
struct kthread_worker worker;
struct task_struct *worker_task;
bool use_dio;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index a7daa8acbab3..88e8440e75c3 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -168,41 +168,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
return false; /* device present */
}
-/* we have to use runtime tag to setup command header */
-static void mtip_init_cmd_header(struct request *rq)
-{
- struct driver_data *dd = rq->q->queuedata;
- struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-
- /* Point the command headers at the command tables. */
- cmd->command_header = dd->port->command_list +
- (sizeof(struct mtip_cmd_hdr) * rq->tag);
- cmd->command_header_dma = dd->port->command_list_dma +
- (sizeof(struct mtip_cmd_hdr) * rq->tag);
-
- if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
- cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
-
- cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
-}
-
-static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
-{
- struct request *rq;
-
- if (mtip_check_surprise_removal(dd->pdev))
- return NULL;
-
- rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED);
- if (IS_ERR(rq))
- return NULL;
-
- /* Internal cmd isn't submitted via .queue_rq */
- mtip_init_cmd_header(rq);
-
- return blk_mq_rq_to_pdu(rq);
-}
-
static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
unsigned int tag)
{
@@ -1023,13 +988,14 @@ static int mtip_exec_internal_command(struct mtip_port *port,
return -EFAULT;
}
- int_cmd = mtip_get_int_command(dd);
- if (!int_cmd) {
+ if (mtip_check_surprise_removal(dd->pdev))
+ return -EFAULT;
+
+ rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED);
+ if (IS_ERR(rq)) {
dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n");
return -EFAULT;
}
- rq = blk_mq_rq_from_pdu(int_cmd);
- rq->special = &icmd;
set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
@@ -1050,6 +1016,8 @@ static int mtip_exec_internal_command(struct mtip_port *port,
}
/* Copy the command to the command table */
+ int_cmd = blk_mq_rq_to_pdu(rq);
+ int_cmd->icmd = &icmd;
memcpy(int_cmd->command, fis, fis_len*4);
rq->timeout = timeout;
@@ -1423,23 +1391,19 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
* @dd pointer to driver_data structure
* @lba starting lba
* @len # of 512b sectors to trim
- *
- * return value
- * -ENOMEM Out of dma memory
- * -EINVAL Invalid parameters passed in, trim not supported
- * -EIO Error submitting trim request to hw
*/
-static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
- unsigned int len)
+static blk_status_t mtip_send_trim(struct driver_data *dd, unsigned int lba,
+ unsigned int len)
{
- int i, rv = 0;
u64 tlba, tlen, sect_left;
struct mtip_trim_entry *buf;
dma_addr_t dma_addr;
struct host_to_dev_fis fis;
+ blk_status_t ret = BLK_STS_OK;
+ int i;
if (!len || dd->trim_supp == false)
- return -EINVAL;
+ return BLK_STS_IOERR;
/* Trim request too big */
WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
@@ -1454,7 +1418,7 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
GFP_KERNEL);
if (!buf)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
memset(buf, 0, ATA_SECT_SIZE);
for (i = 0, sect_left = len, tlba = lba;
@@ -1463,8 +1427,8 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
MTIP_MAX_TRIM_ENTRY_LEN :
sect_left);
- buf[i].lba = __force_bit2int cpu_to_le32(tlba);
- buf[i].range = __force_bit2int cpu_to_le16(tlen);
+ buf[i].lba = cpu_to_le32(tlba);
+ buf[i].range = cpu_to_le16(tlen);
tlba += tlen;
sect_left -= tlen;
}
@@ -1486,10 +1450,10 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
ATA_SECT_SIZE,
0,
MTIP_TRIM_TIMEOUT_MS) < 0)
- rv = -EIO;
+ ret = BLK_STS_IOERR;
dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
- return rv;
+ return ret;
}
/*
@@ -1585,23 +1549,20 @@ static inline void fill_command_sg(struct driver_data *dd,
int n;
unsigned int dma_len;
struct mtip_cmd_sg *command_sg;
- struct scatterlist *sg = command->sg;
+ struct scatterlist *sg;
command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
- for (n = 0; n < nents; n++) {
+ for_each_sg(command->sg, sg, nents, n) {
dma_len = sg_dma_len(sg);
if (dma_len > 0x400000)
dev_err(&dd->pdev->dev,
"DMA segment length truncated\n");
- command_sg->info = __force_bit2int
- cpu_to_le32((dma_len-1) & 0x3FFFFF);
- command_sg->dba = __force_bit2int
- cpu_to_le32(sg_dma_address(sg));
- command_sg->dba_upper = __force_bit2int
+ command_sg->info = cpu_to_le32((dma_len-1) & 0x3FFFFF);
+ command_sg->dba = cpu_to_le32(sg_dma_address(sg));
+ command_sg->dba_upper =
cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
command_sg++;
- sg++;
}
}
@@ -2171,7 +2132,6 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
* @dd Pointer to the driver data structure.
* @start First sector to read.
* @nsect Number of sectors to read.
- * @nents Number of entries in scatter list for the read command.
* @tag The tag of this read command.
* @callback Pointer to the function that should be called
* when the read completes.
@@ -2183,16 +2143,20 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
* None
*/
static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
- struct mtip_cmd *command, int nents,
+ struct mtip_cmd *command,
struct blk_mq_hw_ctx *hctx)
{
+ struct mtip_cmd_hdr *hdr =
+ dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag;
struct host_to_dev_fis *fis;
struct mtip_port *port = dd->port;
int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
u64 start = blk_rq_pos(rq);
unsigned int nsect = blk_rq_sectors(rq);
+ unsigned int nents;
/* Map the scatter list for DMA access */
+ nents = blk_rq_map_sg(hctx->queue, rq, command->sg);
nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
prefetch(&port->flags);
@@ -2233,10 +2197,11 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
fis->device |= 1 << 7;
/* Populate the command header */
- command->command_header->opts =
- __force_bit2int cpu_to_le32(
- (nents << 16) | 5 | AHCI_CMD_PREFETCH);
- command->command_header->byte_count = 0;
+ hdr->ctba = cpu_to_le32(command->command_dma & 0xFFFFFFFF);
+ if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
+ hdr->ctbau = cpu_to_le32((command->command_dma >> 16) >> 16);
+ hdr->opts = cpu_to_le32((nents << 16) | 5 | AHCI_CMD_PREFETCH);
+ hdr->byte_count = 0;
command->direction = dma_dir;
@@ -2715,12 +2680,12 @@ static void mtip_softirq_done_fn(struct request *rq)
cmd->direction);
if (unlikely(cmd->unaligned))
- up(&dd->port->cmd_slot_unal);
+ atomic_inc(&dd->port->cmd_slot_unal);
blk_mq_end_request(rq, cmd->status);
}
-static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
struct driver_data *dd = data;
@@ -2730,14 +2695,16 @@ static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
clear_bit(req->tag, dd->port->cmds_to_issue);
cmd->status = BLK_STS_IOERR;
mtip_softirq_done_fn(req);
+ return true;
}
-static void mtip_queue_cmd(struct request *req, void *data, bool reserved)
+static bool mtip_queue_cmd(struct request *req, void *data, bool reserved)
{
struct driver_data *dd = data;
set_bit(req->tag, dd->port->cmds_to_issue);
blk_abort_request(req);
+ return true;
}
/*
@@ -2803,10 +2770,7 @@ restart_eh:
blk_mq_quiesce_queue(dd->queue);
- spin_lock(dd->queue->queue_lock);
- blk_mq_tagset_busy_iter(&dd->tags,
- mtip_queue_cmd, dd);
- spin_unlock(dd->queue->queue_lock);
+ blk_mq_tagset_busy_iter(&dd->tags, mtip_queue_cmd, dd);
set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags);
@@ -3026,7 +2990,7 @@ static int mtip_hw_init(struct driver_data *dd)
else
dd->unal_qdepth = 0;
- sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth);
+ atomic_set(&dd->port->cmd_slot_unal, dd->unal_qdepth);
/* Spinlock to prevent concurrent issue */
for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
@@ -3531,58 +3495,24 @@ static inline bool is_se_active(struct driver_data *dd)
return false;
}
-/*
- * Block layer make request function.
- *
- * This function is called by the kernel to process a BIO for
- * the P320 device.
- *
- * @queue Pointer to the request queue. Unused other than to obtain
- * the driver data structure.
- * @rq Pointer to the request.
- *
- */
-static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static inline bool is_stopped(struct driver_data *dd, struct request *rq)
{
- struct driver_data *dd = hctx->queue->queuedata;
- struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
- unsigned int nents;
-
- if (is_se_active(dd))
- return -ENODATA;
-
- if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
- if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
- &dd->dd_flag))) {
- return -ENXIO;
- }
- if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
- return -ENODATA;
- }
- if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
- &dd->dd_flag) &&
- rq_data_dir(rq))) {
- return -ENODATA;
- }
- if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) ||
- test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)))
- return -ENODATA;
- }
-
- if (req_op(rq) == REQ_OP_DISCARD) {
- int err;
-
- err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
- blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
- return 0;
- }
+ if (likely(!(dd->dd_flag & MTIP_DDF_STOP_IO)))
+ return false;
- /* Create the scatter list for this request. */
- nents = blk_rq_map_sg(hctx->queue, rq, cmd->sg);
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
+ return true;
+ if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
+ return true;
+ if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag) &&
+ rq_data_dir(rq))
+ return true;
+ if (test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
+ return true;
+ if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
+ return true;
- /* Issue the read/write. */
- mtip_hw_submit_io(dd, rq, cmd, nents, hctx);
- return 0;
+ return false;
}
static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
@@ -3603,7 +3533,7 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
cmd->unaligned = 1;
}
- if (cmd->unaligned && down_trylock(&dd->port->cmd_slot_unal))
+ if (cmd->unaligned && atomic_dec_if_positive(&dd->port->cmd_slot_unal) >= 0)
return true;
return false;
@@ -3613,32 +3543,33 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
struct driver_data *dd = hctx->queue->queuedata;
- struct mtip_int_cmd *icmd = rq->special;
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct mtip_int_cmd *icmd = cmd->icmd;
+ struct mtip_cmd_hdr *hdr =
+ dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag;
struct mtip_cmd_sg *command_sg;
if (mtip_commands_active(dd->port))
- return BLK_STS_RESOURCE;
+ return BLK_STS_DEV_RESOURCE;
+ hdr->ctba = cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
+ if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
+ hdr->ctbau = cpu_to_le32((cmd->command_dma >> 16) >> 16);
/* Populate the SG list */
- cmd->command_header->opts =
- __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len);
+ hdr->opts = cpu_to_le32(icmd->opts | icmd->fis_len);
if (icmd->buf_len) {
command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ;
- command_sg->info =
- __force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF);
- command_sg->dba =
- __force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF);
+ command_sg->info = cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF);
+ command_sg->dba = cpu_to_le32(icmd->buffer & 0xFFFFFFFF);
command_sg->dba_upper =
- __force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16);
+ cpu_to_le32((icmd->buffer >> 16) >> 16);
- cmd->command_header->opts |=
- __force_bit2int cpu_to_le32((1 << 16));
+ hdr->opts |= cpu_to_le32((1 << 16));
}
/* Populate the command header */
- cmd->command_header->byte_count = 0;
+ hdr->byte_count = 0;
blk_mq_start_request(rq);
mtip_issue_non_ncq_command(dd->port, rq->tag);
@@ -3648,23 +3579,25 @@ static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
+ struct driver_data *dd = hctx->queue->queuedata;
struct request *rq = bd->rq;
- int ret;
-
- mtip_init_cmd_header(rq);
+ struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
if (blk_rq_is_passthrough(rq))
return mtip_issue_reserved_cmd(hctx, rq);
if (unlikely(mtip_check_unal_depth(hctx, rq)))
- return BLK_STS_RESOURCE;
+ return BLK_STS_DEV_RESOURCE;
+
+ if (is_se_active(dd) || is_stopped(dd, rq))
+ return BLK_STS_IOERR;
blk_mq_start_request(rq);
- ret = mtip_submit_request(hctx, rq);
- if (likely(!ret))
- return BLK_STS_OK;
- return BLK_STS_IOERR;
+ if (req_op(rq) == REQ_OP_DISCARD)
+ return mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
+ mtip_hw_submit_io(dd, rq, cmd, hctx);
+ return BLK_STS_OK;
}
static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3920,12 +3853,13 @@ protocol_init_error:
return rv;
}
-static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
+static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(rq);
+ return true;
}
/*
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e20e55dab443..abce25f27f57 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -126,8 +126,6 @@
#define MTIP_DFS_MAX_BUF_SIZE 1024
-#define __force_bit2int (unsigned int __force)
-
enum {
/* below are bit numbers in 'flags' defined in mtip_port */
MTIP_PF_IC_ACTIVE_BIT = 0, /* pio/ioctl */
@@ -174,10 +172,10 @@ enum {
struct smart_attr {
u8 attr_id;
- u16 flags;
+ __le16 flags;
u8 cur;
u8 worst;
- u32 data;
+ __le32 data;
u8 res[3];
} __packed;
@@ -200,9 +198,9 @@ struct mtip_work {
#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8
struct mtip_trim_entry {
- u32 lba; /* starting lba of region */
- u16 rsvd; /* unused */
- u16 range; /* # of 512b blocks to trim */
+ __le32 lba; /* starting lba of region */
+ __le16 rsvd; /* unused */
+ __le16 range; /* # of 512b blocks to trim */
} __packed;
struct mtip_trim {
@@ -278,24 +276,24 @@ struct mtip_cmd_hdr {
* - Bit 5 Unused in this implementation.
* - Bits 4:0 Length of the command FIS in DWords (DWord = 4 bytes).
*/
- unsigned int opts;
+ __le32 opts;
/* This field is unsed when using NCQ. */
union {
- unsigned int byte_count;
- unsigned int status;
+ __le32 byte_count;
+ __le32 status;
};
/*
* Lower 32 bits of the command table address associated with this
* header. The command table addresses must be 128 byte aligned.
*/
- unsigned int ctba;
+ __le32 ctba;
/*
* If 64 bit addressing is used this field is the upper 32 bits
* of the command table address associated with this command.
*/
- unsigned int ctbau;
+ __le32 ctbau;
/* Reserved and unused. */
- unsigned int res[4];
+ u32 res[4];
};
/* Command scatter gather structure (PRD). */
@@ -305,31 +303,28 @@ struct mtip_cmd_sg {
* address must be 8 byte aligned signified by bits 2:0 being
* set to 0.
*/
- unsigned int dba;
+ __le32 dba;
/*
* When 64 bit addressing is used this field is the upper
* 32 bits of the data buffer address.
*/
- unsigned int dba_upper;
+ __le32 dba_upper;
/* Unused. */
- unsigned int reserved;
+ __le32 reserved;
/*
* Bit 31: interrupt when this data block has been transferred.
* Bits 30..22: reserved
* Bits 21..0: byte count (minus 1). For P320 the byte count must be
* 8 byte aligned signified by bits 2:0 being set to 1.
*/
- unsigned int info;
+ __le32 info;
};
struct mtip_port;
+struct mtip_int_cmd;
+
/* Structure used to describe a command. */
struct mtip_cmd {
-
- struct mtip_cmd_hdr *command_header; /* ptr to command header entry */
-
- dma_addr_t command_header_dma; /* corresponding physical address */
-
void *command; /* ptr to command table entry */
dma_addr_t command_dma; /* corresponding physical address */
@@ -338,7 +333,10 @@ struct mtip_cmd {
int unaligned; /* command is unaligned on 4k boundary */
- struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
+ union {
+ struct scatterlist sg[MTIP_MAX_SG]; /* Scatter list entries */
+ struct mtip_int_cmd *icmd;
+ };
int retries; /* The number of retries left for this command. */
@@ -435,8 +433,8 @@ struct mtip_port {
*/
unsigned long ic_pause_timer;
- /* Semaphore to control queue depth of unaligned IOs */
- struct semaphore cmd_slot_unal;
+ /* Counter to control queue depth of unaligned IOs */
+ atomic_t cmd_slot_unal;
/* Spinlock for working around command-issue bug. */
spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS];
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4d4d6129ff66..08696f5f00bb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -734,12 +734,13 @@ static void recv_work(struct work_struct *work)
kfree(args);
}
-static void nbd_clear_req(struct request *req, void *data, bool reserved)
+static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
+ return true;
}
static void nbd_clear_que(struct nbd_device *nbd)
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 7685df43f1ef..b3df2793e7cd 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -49,6 +49,7 @@ struct nullb_device {
unsigned long completion_nsec; /* time in ns to complete a request */
unsigned long cache_size; /* disk cache size in MB */
unsigned long zone_size; /* zone size in MB if device is zoned */
+ unsigned int zone_nr_conv; /* number of conventional zones */
unsigned int submit_queues; /* number of submission queues */
unsigned int home_node; /* home node for the device */
unsigned int queue_mode; /* block interface */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 09339203dfba..62c9654b9ce8 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -188,6 +188,10 @@ static unsigned long g_zone_size = 256;
module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
+static unsigned int g_zone_nr_conv;
+module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
+MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -293,6 +297,7 @@ NULLB_DEVICE_ATTR(mbps, uint);
NULLB_DEVICE_ATTR(cache_size, ulong);
NULLB_DEVICE_ATTR(zoned, bool);
NULLB_DEVICE_ATTR(zone_size, ulong);
+NULLB_DEVICE_ATTR(zone_nr_conv, uint);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -407,6 +412,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_badblocks,
&nullb_device_attr_zoned,
&nullb_device_attr_zone_size,
+ &nullb_device_attr_zone_nr_conv,
NULL,
};
@@ -520,6 +526,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->use_per_node_hctx = g_use_per_node_hctx;
dev->zoned = g_zoned;
dev->zone_size = g_zone_size;
+ dev->zone_nr_conv = g_zone_nr_conv;
return dev;
}
@@ -635,14 +642,9 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
-static void null_softirq_done_fn(struct request *rq)
+static void null_complete_rq(struct request *rq)
{
- struct nullb *nullb = rq->q->queuedata;
-
- if (nullb->dev->queue_mode == NULL_Q_MQ)
- end_cmd(blk_mq_rq_to_pdu(rq));
- else
- end_cmd(rq->special);
+ end_cmd(blk_mq_rq_to_pdu(rq));
}
static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
@@ -1350,7 +1352,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
- .complete = null_softirq_done_fn,
+ .complete = null_complete_rq,
.timeout = null_timeout_rq,
};
@@ -1657,8 +1659,7 @@ static int null_add_dev(struct nullb_device *dev)
}
null_init_queues(nullb);
} else if (dev->queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node,
- NULL);
+ nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index c0b0e4a3fa8f..5d1c261a2cfd 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -29,7 +29,25 @@ int null_zone_init(struct nullb_device *dev)
if (!dev->zones)
return -ENOMEM;
- for (i = 0; i < dev->nr_zones; i++) {
+ if (dev->zone_nr_conv >= dev->nr_zones) {
+ dev->zone_nr_conv = dev->nr_zones - 1;
+ pr_info("null_blk: changed the number of conventional zones to %u",
+ dev->zone_nr_conv);
+ }
+
+ for (i = 0; i < dev->zone_nr_conv; i++) {
+ struct blk_zone *zone = &dev->zones[i];
+
+ zone->start = sector;
+ zone->len = dev->zone_size_sects;
+ zone->wp = zone->start + zone->len;
+ zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
+ zone->cond = BLK_ZONE_COND_NOT_WP;
+
+ sector += dev->zone_size_sects;
+ }
+
+ for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
struct blk_zone *zone = &dev->zones[i];
zone->start = zone->wp = sector;
@@ -98,6 +116,8 @@ void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->wp == zone->start + zone->len)
zone->cond = BLK_ZONE_COND_FULL;
break;
+ case BLK_ZONE_COND_NOT_WP:
+ break;
default:
/* Invalid zone condition */
cmd->error = BLK_STS_IOERR;
@@ -111,6 +131,11 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
unsigned int zno = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zno];
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
+ cmd->error = BLK_STS_IOERR;
+ return;
+ }
+
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index ae4971e5d9a8..0ff9b12d0e35 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -242,6 +242,11 @@ struct pd_unit {
static struct pd_unit pd[PD_UNITS];
+struct pd_req {
+ /* for REQ_OP_DRV_IN: */
+ enum action (*func)(struct pd_unit *disk);
+};
+
static char pd_scratch[512]; /* scratch block buffer */
static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR",
@@ -502,8 +507,9 @@ static enum action do_pd_io_start(void)
static enum action pd_special(void)
{
- enum action (*func)(struct pd_unit *) = pd_req->special;
- return func(pd_current);
+ struct pd_req *req = blk_mq_rq_to_pdu(pd_req);
+
+ return req->func(pd_current);
}
static int pd_next_buf(void)
@@ -767,12 +773,14 @@ static int pd_special_command(struct pd_unit *disk,
enum action (*func)(struct pd_unit *disk))
{
struct request *rq;
+ struct pd_req *req;
rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
+ req = blk_mq_rq_to_pdu(rq);
- rq->special = func;
+ req->func = func;
blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
blk_put_request(rq);
return 0;
@@ -892,9 +900,21 @@ static void pd_probe_drive(struct pd_unit *disk)
disk->gd = p;
p->private_data = disk;
- p->queue = blk_mq_init_sq_queue(&disk->tag_set, &pd_mq_ops, 2,
- BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
+ memset(&disk->tag_set, 0, sizeof(disk->tag_set));
+ disk->tag_set.ops = &pd_mq_ops;
+ disk->tag_set.cmd_size = sizeof(struct pd_req);
+ disk->tag_set.nr_hw_queues = 1;
+ disk->tag_set.nr_maps = 1;
+ disk->tag_set.queue_depth = 2;
+ disk->tag_set.numa_node = NUMA_NO_NODE;
+ disk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+
+ if (blk_mq_alloc_tag_set(&disk->tag_set))
+ return;
+
+ p->queue = blk_mq_init_queue(&disk->tag_set);
if (IS_ERR(p->queue)) {
+ blk_mq_free_tag_set(&disk->tag_set);
p->queue = NULL;
return;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 9381f4e3b221..f5a71023f76c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2203,9 +2203,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
* Some CDRW drives can not handle writes larger than one packet,
* even if the size is a multiple of the packet size.
*/
- spin_lock_irq(q->queue_lock);
blk_queue_max_hw_sectors(q, pd->settings.size);
- spin_unlock_irq(q->queue_lock);
set_bit(PACKET_WRITABLE, &pd->flags);
} else {
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 2459dcc04b1c..a10d5736d8f7 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -181,6 +181,7 @@ struct skd_request_context {
struct fit_completion_entry_v1 completion;
struct fit_comp_error_info err_info;
+ int retries;
blk_status_t status;
};
@@ -382,11 +383,12 @@ static void skd_log_skreq(struct skd_device *skdev,
* READ/WRITE REQUESTS
*****************************************************************************
*/
-static void skd_inc_in_flight(struct request *rq, void *data, bool reserved)
+static bool skd_inc_in_flight(struct request *rq, void *data, bool reserved)
{
int *count = data;
count++;
+ return true;
}
static int skd_in_flight(struct skd_device *skdev)
@@ -494,6 +496,11 @@ static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE))
return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE;
+ if (!(req->rq_flags & RQF_DONTPREP)) {
+ skreq->retries = 0;
+ req->rq_flags |= RQF_DONTPREP;
+ }
+
blk_mq_start_request(req);
WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n",
@@ -1425,7 +1432,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
break;
case SKD_CHECK_STATUS_REQUEUE_REQUEST:
- if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
+ if (++skreq->retries < SKD_MAX_RETRIES) {
skd_log_skreq(skdev, skreq, "retry");
blk_mq_requeue_request(req, true);
break;
@@ -1887,13 +1894,13 @@ static void skd_isr_fwstate(struct skd_device *skdev)
skd_skdev_state_to_str(skdev->state), skdev->state);
}
-static void skd_recover_request(struct request *req, void *data, bool reserved)
+static bool skd_recover_request(struct request *req, void *data, bool reserved)
{
struct skd_device *const skdev = data;
struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
if (skreq->state != SKD_REQ_STATE_BUSY)
- return;
+ return true;
skd_log_skreq(skdev, skreq, "recover");
@@ -1904,6 +1911,7 @@ static void skd_recover_request(struct request *req, void *data, bool reserved)
skreq->state = SKD_REQ_STATE_IDLE;
skreq->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
+ return true;
}
static void skd_recover_requests(struct skd_device *skdev)
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index b54fa6726303..9c0553dd13e7 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -6,7 +6,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/hdreg.h>
#include <linux/genhd.h>
#include <linux/cdrom.h>
@@ -45,6 +45,8 @@ MODULE_VERSION(DRV_MODULE_VERSION);
#define WAITING_FOR_GEN_CMD 0x04
#define WAITING_FOR_ANY -1
+#define VDC_MAX_RETRIES 10
+
static struct workqueue_struct *sunvdc_wq;
struct vdc_req_entry {
@@ -66,9 +68,10 @@ struct vdc_port {
u64 max_xfer_size;
u32 vdisk_block_size;
+ u32 drain;
u64 ldc_timeout;
- struct timer_list ldc_reset_timer;
+ struct delayed_work ldc_reset_timer_work;
struct work_struct ldc_reset_work;
/* The server fills these in for us in the disk attribute
@@ -80,12 +83,14 @@ struct vdc_port {
u8 vdisk_mtype;
u32 vdisk_phys_blksz;
+ struct blk_mq_tag_set tag_set;
+
char disk_name[32];
};
static void vdc_ldc_reset(struct vdc_port *port);
static void vdc_ldc_reset_work(struct work_struct *work);
-static void vdc_ldc_reset_timer(struct timer_list *t);
+static void vdc_ldc_reset_timer_work(struct work_struct *work);
static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
{
@@ -175,11 +180,8 @@ static void vdc_blk_queue_start(struct vdc_port *port)
* handshake completes, so check for initial handshake before we've
* allocated a disk.
*/
- if (port->disk && blk_queue_stopped(port->disk->queue) &&
- vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) {
- blk_start_queue(port->disk->queue);
- }
-
+ if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
+ blk_mq_start_hw_queues(port->disk->queue);
}
static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
@@ -197,7 +199,7 @@ static void vdc_handshake_complete(struct vio_driver_state *vio)
{
struct vdc_port *port = to_vdc_port(vio);
- del_timer(&port->ldc_reset_timer);
+ cancel_delayed_work(&port->ldc_reset_timer_work);
vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
vdc_blk_queue_start(port);
}
@@ -320,7 +322,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
rqe->req = NULL;
- __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
+ blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0);
vdc_blk_queue_start(port);
}
@@ -431,6 +433,7 @@ static int __vdc_tx_trigger(struct vdc_port *port)
.end_idx = dr->prod,
};
int err, delay;
+ int retries = 0;
hdr.seq = dr->snd_nxt;
delay = 1;
@@ -443,6 +446,8 @@ static int __vdc_tx_trigger(struct vdc_port *port)
udelay(delay);
if ((delay <<= 1) > 128)
delay = 128;
+ if (retries++ > VDC_MAX_RETRIES)
+ break;
} while (err == -EAGAIN);
if (err == -ENOTCONN)
@@ -525,29 +530,40 @@ static int __send_request(struct request *req)
return err;
}
-static void do_vdc_request(struct request_queue *rq)
+static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct request *req;
+ struct vdc_port *port = hctx->queue->queuedata;
+ struct vio_dring_state *dr;
+ unsigned long flags;
- while ((req = blk_peek_request(rq)) != NULL) {
- struct vdc_port *port;
- struct vio_dring_state *dr;
+ dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- port = req->rq_disk->private_data;
- dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- if (unlikely(vdc_tx_dring_avail(dr) < 1))
- goto wait;
+ blk_mq_start_request(bd->rq);
- blk_start_request(req);
+ spin_lock_irqsave(&port->vio.lock, flags);
- if (__send_request(req) < 0) {
- blk_requeue_request(rq, req);
-wait:
- /* Avoid pointless unplugs. */
- blk_stop_queue(rq);
- break;
- }
+ /*
+ * Doing drain, just end the request in error
+ */
+ if (unlikely(port->drain)) {
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_IOERR;
+ }
+
+ if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ blk_mq_stop_hw_queue(hctx);
+ return BLK_STS_DEV_RESOURCE;
+ }
+
+ if (__send_request(bd->rq) < 0) {
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_IOERR;
}
+
+ spin_unlock_irqrestore(&port->vio.lock, flags);
+ return BLK_STS_OK;
}
static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
@@ -759,6 +775,31 @@ static void vdc_port_down(struct vdc_port *port)
vio_ldc_free(&port->vio);
}
+static const struct blk_mq_ops vdc_mq_ops = {
+ .queue_rq = vdc_queue_rq,
+};
+
+static void cleanup_queue(struct request_queue *q)
+{
+ struct vdc_port *port = q->queuedata;
+
+ blk_cleanup_queue(q);
+ blk_mq_free_tag_set(&port->tag_set);
+}
+
+static struct request_queue *init_queue(struct vdc_port *port)
+{
+ struct request_queue *q;
+
+ q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE,
+ BLK_MQ_F_SHOULD_MERGE);
+ if (IS_ERR(q))
+ return q;
+
+ q->queuedata = port;
+ return q;
+}
+
static int probe_disk(struct vdc_port *port)
{
struct request_queue *q;
@@ -796,17 +837,17 @@ static int probe_disk(struct vdc_port *port)
(u64)geom.num_sec);
}
- q = blk_init_queue(do_vdc_request, &port->vio.lock);
- if (!q) {
+ q = init_queue(port);
+ if (IS_ERR(q)) {
printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
port->vio.name);
- return -ENOMEM;
+ return PTR_ERR(q);
}
g = alloc_disk(1 << PARTITION_SHIFT);
if (!g) {
printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
port->vio.name);
- blk_cleanup_queue(q);
+ cleanup_queue(q);
return -ENOMEM;
}
@@ -981,7 +1022,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
*/
ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
- timer_setup(&port->ldc_reset_timer, vdc_ldc_reset_timer, 0);
+ INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work);
INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
@@ -1034,18 +1075,14 @@ static int vdc_port_remove(struct vio_dev *vdev)
struct vdc_port *port = dev_get_drvdata(&vdev->dev);
if (port) {
- unsigned long flags;
-
- spin_lock_irqsave(&port->vio.lock, flags);
- blk_stop_queue(port->disk->queue);
- spin_unlock_irqrestore(&port->vio.lock, flags);
+ blk_mq_stop_hw_queues(port->disk->queue);
flush_work(&port->ldc_reset_work);
- del_timer_sync(&port->ldc_reset_timer);
+ cancel_delayed_work_sync(&port->ldc_reset_timer_work);
del_timer_sync(&port->vio.timer);
del_gendisk(port->disk);
- blk_cleanup_queue(port->disk->queue);
+ cleanup_queue(port->disk->queue);
put_disk(port->disk);
port->disk = NULL;
@@ -1080,32 +1117,46 @@ static void vdc_requeue_inflight(struct vdc_port *port)
}
rqe->req = NULL;
- blk_requeue_request(port->disk->queue, req);
+ blk_mq_requeue_request(req, false);
}
}
static void vdc_queue_drain(struct vdc_port *port)
{
- struct request *req;
+ struct request_queue *q = port->disk->queue;
+
+ /*
+ * Mark the queue as draining, then freeze/quiesce to ensure
+ * that all existing requests are seen in ->queue_rq() and killed
+ */
+ port->drain = 1;
+ spin_unlock_irq(&port->vio.lock);
- while ((req = blk_fetch_request(port->disk->queue)) != NULL)
- __blk_end_request_all(req, BLK_STS_IOERR);
+ blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
+
+ spin_lock_irq(&port->vio.lock);
+ port->drain = 0;
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
}
-static void vdc_ldc_reset_timer(struct timer_list *t)
+static void vdc_ldc_reset_timer_work(struct work_struct *work)
{
- struct vdc_port *port = from_timer(port, t, ldc_reset_timer);
- struct vio_driver_state *vio = &port->vio;
- unsigned long flags;
+ struct vdc_port *port;
+ struct vio_driver_state *vio;
- spin_lock_irqsave(&vio->lock, flags);
+ port = container_of(work, struct vdc_port, ldc_reset_timer_work.work);
+ vio = &port->vio;
+
+ spin_lock_irq(&vio->lock);
if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
port->disk_name, port->ldc_timeout);
vdc_queue_drain(port);
vdc_blk_queue_start(port);
}
- spin_unlock_irqrestore(&vio->lock, flags);
+ spin_unlock_irq(&vio->lock);
}
static void vdc_ldc_reset_work(struct work_struct *work)
@@ -1129,7 +1180,7 @@ static void vdc_ldc_reset(struct vdc_port *port)
assert_spin_locked(&port->vio.lock);
pr_warn(PFX "%s ldc link reset\n", port->disk_name);
- blk_stop_queue(port->disk->queue);
+ blk_mq_stop_hw_queues(port->disk->queue);
vdc_requeue_inflight(port);
vdc_port_down(port);
@@ -1146,7 +1197,7 @@ static void vdc_ldc_reset(struct vdc_port *port)
}
if (port->ldc_timeout)
- mod_timer(&port->ldc_reset_timer,
+ mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
round_jiffies(jiffies + HZ * port->ldc_timeout));
mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
return;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 064b8c5c7a32..4478eb7efee0 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -243,7 +243,6 @@ struct carm_port {
unsigned int port_no;
struct gendisk *disk;
struct carm_host *host;
- struct blk_mq_tag_set tag_set;
/* attached device characteristics */
u64 capacity;
@@ -254,13 +253,10 @@ struct carm_port {
};
struct carm_request {
- unsigned int tag;
int n_elem;
unsigned int msg_type;
unsigned int msg_subtype;
unsigned int msg_bucket;
- struct request *rq;
- struct carm_port *port;
struct scatterlist sg[CARM_MAX_REQ_SG];
};
@@ -291,9 +287,6 @@ struct carm_host {
unsigned int wait_q_cons;
struct request_queue *wait_q[CARM_MAX_WAIT_Q];
- unsigned int n_msgs;
- u64 msg_alloc;
- struct carm_request req[CARM_MAX_REQ];
void *msg_base;
dma_addr_t msg_dma;
@@ -478,10 +471,10 @@ static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
}
static int carm_send_msg(struct carm_host *host,
- struct carm_request *crq)
+ struct carm_request *crq, unsigned tag)
{
void __iomem *mmio = host->mmio;
- u32 msg = (u32) carm_ref_msg_dma(host, crq->tag);
+ u32 msg = (u32) carm_ref_msg_dma(host, tag);
u32 cm_bucket = crq->msg_bucket;
u32 tmp;
int rc = 0;
@@ -506,99 +499,24 @@ static int carm_send_msg(struct carm_host *host,
return rc;
}
-static struct carm_request *carm_get_request(struct carm_host *host)
-{
- unsigned int i;
-
- /* obey global hardware limit on S/G entries */
- if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG))
- return NULL;
-
- for (i = 0; i < max_queue; i++)
- if ((host->msg_alloc & (1ULL << i)) == 0) {
- struct carm_request *crq = &host->req[i];
- crq->port = NULL;
- crq->n_elem = 0;
-
- host->msg_alloc |= (1ULL << i);
- host->n_msgs++;
-
- assert(host->n_msgs <= CARM_MAX_REQ);
- sg_init_table(crq->sg, CARM_MAX_REQ_SG);
- return crq;
- }
-
- DPRINTK("no request available, returning NULL\n");
- return NULL;
-}
-
-static int carm_put_request(struct carm_host *host, struct carm_request *crq)
-{
- assert(crq->tag < max_queue);
-
- if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0))
- return -EINVAL; /* tried to clear a tag that was not active */
-
- assert(host->hw_sg_used >= crq->n_elem);
-
- host->msg_alloc &= ~(1ULL << crq->tag);
- host->hw_sg_used -= crq->n_elem;
- host->n_msgs--;
-
- return 0;
-}
-
-static struct carm_request *carm_get_special(struct carm_host *host)
-{
- unsigned long flags;
- struct carm_request *crq = NULL;
- struct request *rq;
- int tries = 5000;
-
- while (tries-- > 0) {
- spin_lock_irqsave(&host->lock, flags);
- crq = carm_get_request(host);
- spin_unlock_irqrestore(&host->lock, flags);
-
- if (crq)
- break;
- msleep(10);
- }
-
- if (!crq)
- return NULL;
-
- rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq)) {
- spin_lock_irqsave(&host->lock, flags);
- carm_put_request(host, crq);
- spin_unlock_irqrestore(&host->lock, flags);
- return NULL;
- }
-
- crq->rq = rq;
- return crq;
-}
-
static int carm_array_info (struct carm_host *host, unsigned int array_idx)
{
struct carm_msg_ioctl *ioc;
- unsigned int idx;
u32 msg_data;
dma_addr_t msg_dma;
struct carm_request *crq;
+ struct request *rq;
int rc;
- crq = carm_get_special(host);
- if (!crq) {
+ rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
+ if (IS_ERR(rq)) {
rc = -ENOMEM;
goto err_out;
}
+ crq = blk_mq_rq_to_pdu(rq);
- idx = crq->tag;
-
- ioc = carm_ref_msg(host, idx);
- msg_dma = carm_ref_msg_dma(host, idx);
+ ioc = carm_ref_msg(host, rq->tag);
+ msg_dma = carm_ref_msg_dma(host, rq->tag);
msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
crq->msg_type = CARM_MSG_ARRAY;
@@ -612,7 +530,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
ioc->type = CARM_MSG_ARRAY;
ioc->subtype = CARM_ARRAY_INFO;
ioc->array_id = (u8) array_idx;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
+ ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
ioc->data_addr = cpu_to_le32(msg_data);
spin_lock_irq(&host->lock);
@@ -620,9 +538,8 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
host->state == HST_DEV_SCAN);
spin_unlock_irq(&host->lock);
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->special = crq;
- blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
+ DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
+ blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
return 0;
@@ -637,21 +554,21 @@ typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *);
static int carm_send_special (struct carm_host *host, carm_sspc_t func)
{
+ struct request *rq;
struct carm_request *crq;
struct carm_msg_ioctl *ioc;
void *mem;
- unsigned int idx, msg_size;
+ unsigned int msg_size;
int rc;
- crq = carm_get_special(host);
- if (!crq)
+ rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
+ if (IS_ERR(rq))
return -ENOMEM;
+ crq = blk_mq_rq_to_pdu(rq);
- idx = crq->tag;
+ mem = carm_ref_msg(host, rq->tag);
- mem = carm_ref_msg(host, idx);
-
- msg_size = func(host, idx, mem);
+ msg_size = func(host, rq->tag, mem);
ioc = mem;
crq->msg_type = ioc->type;
@@ -660,9 +577,8 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
BUG_ON(rc < 0);
crq->msg_bucket = (u32) rc;
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->special = crq;
- blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
+ DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
+ blk_execute_rq_nowait(host->oob_q, NULL, rq, true, NULL);
return 0;
}
@@ -744,19 +660,6 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
sizeof(struct carm_fw_ver);
}
-static inline void carm_end_request_queued(struct carm_host *host,
- struct carm_request *crq,
- blk_status_t error)
-{
- struct request *req = crq->rq;
- int rc;
-
- blk_mq_end_request(req, error);
-
- rc = carm_put_request(host, crq);
- assert(rc == 0);
-}
-
static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
{
unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
@@ -791,101 +694,50 @@ static inline void carm_round_robin(struct carm_host *host)
}
}
-static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
- blk_status_t error)
-{
- carm_end_request_queued(host, crq, error);
- if (max_queue == 1)
- carm_round_robin(host);
- else if ((host->n_msgs <= CARM_MSG_LOW_WATER) &&
- (host->hw_sg_used <= CARM_SG_LOW_WATER)) {
- carm_round_robin(host);
- }
-}
-
-static blk_status_t carm_oob_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
+static inline enum dma_data_direction carm_rq_dir(struct request *rq)
{
- struct request_queue *q = hctx->queue;
- struct carm_host *host = q->queuedata;
- struct carm_request *crq;
- int rc;
-
- blk_mq_start_request(bd->rq);
-
- spin_lock_irq(&host->lock);
-
- crq = bd->rq->special;
- assert(crq != NULL);
- assert(crq->rq == bd->rq);
-
- crq->n_elem = 0;
-
- DPRINTK("send req\n");
- rc = carm_send_msg(host, crq);
- if (rc) {
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
- }
-
- spin_unlock_irq(&host->lock);
- return BLK_STS_OK;
+ return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
}
static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request_queue *q = hctx->queue;
+ struct request *rq = bd->rq;
struct carm_port *port = q->queuedata;
struct carm_host *host = port->host;
+ struct carm_request *crq = blk_mq_rq_to_pdu(rq);
struct carm_msg_rw *msg;
- struct carm_request *crq;
- struct request *rq = bd->rq;
struct scatterlist *sg;
- int writing = 0, pci_dir, i, n_elem, rc;
- u32 tmp;
+ int i, n_elem = 0, rc;
unsigned int msg_size;
+ u32 tmp;
+
+ crq->n_elem = 0;
+ sg_init_table(crq->sg, CARM_MAX_REQ_SG);
blk_mq_start_request(rq);
spin_lock_irq(&host->lock);
-
- crq = carm_get_request(host);
- if (!crq) {
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
- }
- crq->rq = rq;
-
- if (rq_data_dir(rq) == WRITE) {
- writing = 1;
- pci_dir = DMA_TO_DEVICE;
- } else {
- pci_dir = DMA_FROM_DEVICE;
- }
+ if (req_op(rq) == REQ_OP_DRV_OUT)
+ goto send_msg;
/* get scatterlist from block layer */
sg = &crq->sg[0];
n_elem = blk_rq_map_sg(q, rq, sg);
- if (n_elem <= 0) {
- /* request with no s/g entries? */
- carm_end_rq(host, crq, BLK_STS_IOERR);
- spin_unlock_irq(&host->lock);
- return BLK_STS_IOERR;
- }
+ if (n_elem <= 0)
+ goto out_ioerr;
/* map scatterlist to PCI bus addresses */
- n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, pci_dir);
- if (n_elem <= 0) {
- /* request with no s/g entries? */
- carm_end_rq(host, crq, BLK_STS_IOERR);
- spin_unlock_irq(&host->lock);
- return BLK_STS_IOERR;
- }
+ n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq));
+ if (n_elem <= 0)
+ goto out_ioerr;
+
+ /* obey global hardware limit on S/G entries */
+ if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem)
+ goto out_resource;
+
crq->n_elem = n_elem;
- crq->port = port;
host->hw_sg_used += n_elem;
/*
@@ -893,9 +745,9 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
*/
VPRINTK("build msg\n");
- msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag);
+ msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag);
- if (writing) {
+ if (rq_data_dir(rq) == WRITE) {
msg->type = CARM_MSG_WRITE;
crq->msg_type = CARM_MSG_WRITE;
} else {
@@ -906,7 +758,7 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
msg->id = port->port_no;
msg->sg_count = n_elem;
msg->sg_type = SGT_32BIT;
- msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag));
+ msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
tmp = (blk_rq_pos(rq) >> 16) >> 16;
msg->lba_high = cpu_to_le16( (u16) tmp );
@@ -923,22 +775,28 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
rc = carm_lookup_bucket(msg_size);
BUG_ON(rc < 0);
crq->msg_bucket = (u32) rc;
-
+send_msg:
/*
* queue read/write message to hardware
*/
-
- VPRINTK("send msg, tag == %u\n", crq->tag);
- rc = carm_send_msg(host, crq);
+ VPRINTK("send msg, tag == %u\n", rq->tag);
+ rc = carm_send_msg(host, crq, rq->tag);
if (rc) {
- carm_put_request(host, crq);
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
+ host->hw_sg_used -= n_elem;
+ goto out_resource;
}
spin_unlock_irq(&host->lock);
return BLK_STS_OK;
+out_resource:
+ dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq));
+ carm_push_q(host, q);
+ spin_unlock_irq(&host->lock);
+ return BLK_STS_DEV_RESOURCE;
+out_ioerr:
+ carm_round_robin(host);
+ spin_unlock_irq(&host->lock);
+ return BLK_STS_IOERR;
}
static void carm_handle_array_info(struct carm_host *host,
@@ -954,8 +812,6 @@ static void carm_handle_array_info(struct carm_host *host,
DPRINTK("ENTER\n");
- carm_end_rq(host, crq, error);
-
if (error)
goto out;
if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
@@ -1011,8 +867,6 @@ static void carm_handle_scan_chan(struct carm_host *host,
DPRINTK("ENTER\n");
- carm_end_rq(host, crq, error);
-
if (error) {
new_state = HST_ERROR;
goto out;
@@ -1040,8 +894,6 @@ static void carm_handle_generic(struct carm_host *host,
{
DPRINTK("ENTER\n");
- carm_end_rq(host, crq, error);
-
assert(host->state == cur_state);
if (error)
host->state = HST_ERROR;
@@ -1050,28 +902,12 @@ static void carm_handle_generic(struct carm_host *host,
schedule_work(&host->fsm_task);
}
-static inline void carm_handle_rw(struct carm_host *host,
- struct carm_request *crq, blk_status_t error)
-{
- int pci_dir;
-
- VPRINTK("ENTER\n");
-
- if (rq_data_dir(crq->rq) == WRITE)
- pci_dir = DMA_TO_DEVICE;
- else
- pci_dir = DMA_FROM_DEVICE;
-
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, pci_dir);
-
- carm_end_rq(host, crq, error);
-}
-
static inline void carm_handle_resp(struct carm_host *host,
__le32 ret_handle_le, u32 status)
{
u32 handle = le32_to_cpu(ret_handle_le);
unsigned int msg_idx;
+ struct request *rq;
struct carm_request *crq;
blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
u8 *mem;
@@ -1087,13 +923,15 @@ static inline void carm_handle_resp(struct carm_host *host,
msg_idx = TAG_DECODE(handle);
VPRINTK("tag == %u\n", msg_idx);
- crq = &host->req[msg_idx];
+ rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx);
+ crq = blk_mq_rq_to_pdu(rq);
/* fast path */
if (likely(crq->msg_type == CARM_MSG_READ ||
crq->msg_type == CARM_MSG_WRITE)) {
- carm_handle_rw(host, crq, error);
- return;
+ dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem,
+ carm_rq_dir(rq));
+ goto done;
}
mem = carm_ref_msg(host, msg_idx);
@@ -1103,7 +941,7 @@ static inline void carm_handle_resp(struct carm_host *host,
switch (crq->msg_subtype) {
case CARM_IOC_SCAN_CHAN:
carm_handle_scan_chan(host, crq, mem, error);
- break;
+ goto done;
default:
/* unknown / invalid response */
goto err_out;
@@ -1116,11 +954,11 @@ static inline void carm_handle_resp(struct carm_host *host,
case MISC_ALLOC_MEM:
carm_handle_generic(host, crq, error,
HST_ALLOC_BUF, HST_SYNC_TIME);
- break;
+ goto done;
case MISC_SET_TIME:
carm_handle_generic(host, crq, error,
HST_SYNC_TIME, HST_GET_FW_VER);
- break;
+ goto done;
case MISC_GET_FW_VER: {
struct carm_fw_ver *ver = (struct carm_fw_ver *)
(mem + sizeof(struct carm_msg_get_fw_ver));
@@ -1130,7 +968,7 @@ static inline void carm_handle_resp(struct carm_host *host,
}
carm_handle_generic(host, crq, error,
HST_GET_FW_VER, HST_PORT_SCAN);
- break;
+ goto done;
}
default:
/* unknown / invalid response */
@@ -1161,7 +999,13 @@ static inline void carm_handle_resp(struct carm_host *host,
err_out:
printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- carm_end_rq(host, crq, BLK_STS_IOERR);
+ error = BLK_STS_IOERR;
+done:
+ host->hw_sg_used -= crq->n_elem;
+ blk_mq_end_request(blk_mq_rq_from_pdu(crq), error);
+
+ if (host->hw_sg_used <= CARM_SG_LOW_WATER)
+ carm_round_robin(host);
}
static inline void carm_handle_responses(struct carm_host *host)
@@ -1491,78 +1335,56 @@ static int carm_init_host(struct carm_host *host)
return 0;
}
-static const struct blk_mq_ops carm_oob_mq_ops = {
- .queue_rq = carm_oob_queue_rq,
-};
-
static const struct blk_mq_ops carm_mq_ops = {
.queue_rq = carm_queue_rq,
};
-static int carm_init_disks(struct carm_host *host)
+static int carm_init_disk(struct carm_host *host, unsigned int port_no)
{
- unsigned int i;
- int rc = 0;
+ struct carm_port *port = &host->port[port_no];
+ struct gendisk *disk;
+ struct request_queue *q;
- for (i = 0; i < CARM_MAX_PORTS; i++) {
- struct gendisk *disk;
- struct request_queue *q;
- struct carm_port *port;
+ port->host = host;
+ port->port_no = port_no;
- port = &host->port[i];
- port->host = host;
- port->port_no = i;
+ disk = alloc_disk(CARM_MINORS_PER_MAJOR);
+ if (!disk)
+ return -ENOMEM;
- disk = alloc_disk(CARM_MINORS_PER_MAJOR);
- if (!disk) {
- rc = -ENOMEM;
- break;
- }
+ port->disk = disk;
+ sprintf(disk->disk_name, DRV_NAME "/%u",
+ (unsigned int)host->id * CARM_MAX_PORTS + port_no);
+ disk->major = host->major;
+ disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
+ disk->fops = &carm_bd_ops;
+ disk->private_data = port;
- port->disk = disk;
- sprintf(disk->disk_name, DRV_NAME "/%u",
- (unsigned int) (host->id * CARM_MAX_PORTS) + i);
- disk->major = host->major;
- disk->first_minor = i * CARM_MINORS_PER_MAJOR;
- disk->fops = &carm_bd_ops;
- disk->private_data = port;
-
- q = blk_mq_init_sq_queue(&port->tag_set, &carm_mq_ops,
- max_queue, BLK_MQ_F_SHOULD_MERGE);
- if (IS_ERR(q)) {
- rc = PTR_ERR(q);
- break;
- }
- disk->queue = q;
- blk_queue_max_segments(q, CARM_MAX_REQ_SG);
- blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
+ q = blk_mq_init_queue(&host->tag_set);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
- q->queuedata = port;
- }
+ blk_queue_max_segments(q, CARM_MAX_REQ_SG);
+ blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
- return rc;
+ q->queuedata = port;
+ disk->queue = q;
+ return 0;
}
-static void carm_free_disks(struct carm_host *host)
+static void carm_free_disk(struct carm_host *host, unsigned int port_no)
{
- unsigned int i;
-
- for (i = 0; i < CARM_MAX_PORTS; i++) {
- struct carm_port *port = &host->port[i];
- struct gendisk *disk = port->disk;
+ struct carm_port *port = &host->port[port_no];
+ struct gendisk *disk = port->disk;
- if (disk) {
- struct request_queue *q = disk->queue;
+ if (!disk)
+ return;
- if (disk->flags & GENHD_FL_UP)
- del_gendisk(disk);
- if (q) {
- blk_mq_free_tag_set(&port->tag_set);
- blk_cleanup_queue(q);
- }
- put_disk(disk);
- }
- }
+ if (disk->flags & GENHD_FL_UP)
+ del_gendisk(disk);
+ if (disk->queue)
+ blk_cleanup_queue(disk->queue);
+ put_disk(disk);
}
static int carm_init_shm(struct carm_host *host)
@@ -1618,9 +1440,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&host->fsm_task, carm_fsm_task);
init_completion(&host->probe_comp);
- for (i = 0; i < ARRAY_SIZE(host->req); i++)
- host->req[i].tag = i;
-
host->mmio = ioremap(pci_resource_start(pdev, 0),
pci_resource_len(pdev, 0));
if (!host->mmio) {
@@ -1637,14 +1456,26 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_iounmap;
}
- q = blk_mq_init_sq_queue(&host->tag_set, &carm_oob_mq_ops, 1,
- BLK_MQ_F_NO_SCHED);
+ memset(&host->tag_set, 0, sizeof(host->tag_set));
+ host->tag_set.ops = &carm_mq_ops;
+ host->tag_set.cmd_size = sizeof(struct carm_request);
+ host->tag_set.nr_hw_queues = 1;
+ host->tag_set.nr_maps = 1;
+ host->tag_set.queue_depth = max_queue;
+ host->tag_set.numa_node = NUMA_NO_NODE;
+ host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+
+ rc = blk_mq_alloc_tag_set(&host->tag_set);
+ if (rc)
+ goto err_out_dma_free;
+
+ q = blk_mq_init_queue(&host->tag_set);
if (IS_ERR(q)) {
- printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n",
- pci_name(pdev));
rc = PTR_ERR(q);
+ blk_mq_free_tag_set(&host->tag_set);
goto err_out_dma_free;
}
+
host->oob_q = q;
q->queuedata = host;
@@ -1667,9 +1498,11 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
if (host->flags & FL_DYN_MAJOR)
host->major = rc;
- rc = carm_init_disks(host);
- if (rc)
- goto err_out_blkdev_disks;
+ for (i = 0; i < CARM_MAX_PORTS; i++) {
+ rc = carm_init_disk(host, i);
+ if (rc)
+ goto err_out_blkdev_disks;
+ }
pci_set_master(pdev);
@@ -1699,7 +1532,8 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
err_out_free_irq:
free_irq(pdev->irq, host);
err_out_blkdev_disks:
- carm_free_disks(host);
+ for (i = 0; i < CARM_MAX_PORTS; i++)
+ carm_free_disk(host, i);
unregister_blkdev(host->major, host->name);
err_out_free_majors:
if (host->major == 160)
@@ -1724,6 +1558,7 @@ err_out:
static void carm_remove_one (struct pci_dev *pdev)
{
struct carm_host *host = pci_get_drvdata(pdev);
+ unsigned int i;
if (!host) {
printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
@@ -1732,7 +1567,8 @@ static void carm_remove_one (struct pci_dev *pdev)
}
free_irq(pdev->irq, host);
- carm_free_disks(host);
+ for (i = 0; i < CARM_MAX_PORTS; i++)
+ carm_free_disk(host, i);
unregister_blkdev(host->major, host->name);
if (host->major == 160)
clear_bit(0, &carm_major_alloc);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index be3e3ab79950..aa035cf8a51d 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -888,8 +888,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
card->biotail = &card->bio;
spin_lock_init(&card->lock);
- card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
- &card->lock);
+ card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
if (!card->queue)
goto failed_alloc;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 086c6bb12baa..912c4265e592 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -214,6 +214,20 @@ static void virtblk_done(struct virtqueue *vq)
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
+static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+ struct virtio_blk *vblk = hctx->queue->queuedata;
+ struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
+ bool kick;
+
+ spin_lock_irq(&vq->lock);
+ kick = virtqueue_kick_prepare(vq->vq);
+ spin_unlock_irq(&vq->lock);
+
+ if (kick)
+ virtqueue_notify(vq->vq);
+}
+
static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -624,7 +638,7 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set)
{
struct virtio_blk *vblk = set->driver_data;
- return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
+ return blk_mq_virtio_map_queues(&set->map[0], vblk->vdev, 0);
}
#ifdef CONFIG_VIRTIO_BLK_SCSI
@@ -638,6 +652,7 @@ static void virtblk_initialize_rq(struct request *req)
static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
+ .commit_rqs = virtio_commit_rqs,
.complete = virtblk_request_done,
.init_request = virtblk_init_request,
#ifdef CONFIG_VIRTIO_BLK_SCSI
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index fcd055457364..1ffc64770643 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -15,7 +15,7 @@ config ZRAM
See Documentation/blockdev/zram.txt for more information.
config ZRAM_WRITEBACK
- bool "Write back incompressible page to backing device"
+ bool "Write back incompressible or idle page to backing device"
depends on ZRAM
help
With incompressible page, there is no memory saving to keep it
@@ -23,6 +23,9 @@ config ZRAM_WRITEBACK
For this feature, admin should set up backing device via
/sys/block/zramX/backing_dev.
+ With /sys/block/zramX/{idle,writeback}, application could ask
+ idle page's writeback to the backing device to save in memory.
+
See Documentation/blockdev/zram.txt for more information.
config ZRAM_MEMORY_TRACKING
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 4879595200e1..33c5cc879f24 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -52,15 +52,23 @@ static unsigned int num_devices = 1;
static size_t huge_class_size;
static void zram_free_page(struct zram *zram, size_t index);
+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+ u32 index, int offset, struct bio *bio);
+
+
+static int zram_slot_trylock(struct zram *zram, u32 index)
+{
+ return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
+}
static void zram_slot_lock(struct zram *zram, u32 index)
{
- bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
+ bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
}
static void zram_slot_unlock(struct zram *zram, u32 index)
{
- bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
+ bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
}
static inline bool init_done(struct zram *zram)
@@ -68,13 +76,6 @@ static inline bool init_done(struct zram *zram)
return zram->disksize;
}
-static inline bool zram_allocated(struct zram *zram, u32 index)
-{
-
- return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
- zram->table[index].handle;
-}
-
static inline struct zram *dev_to_zram(struct device *dev)
{
return (struct zram *)dev_to_disk(dev)->private_data;
@@ -94,19 +95,19 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
static bool zram_test_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- return zram->table[index].value & BIT(flag);
+ return zram->table[index].flags & BIT(flag);
}
static void zram_set_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- zram->table[index].value |= BIT(flag);
+ zram->table[index].flags |= BIT(flag);
}
static void zram_clear_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- zram->table[index].value &= ~BIT(flag);
+ zram->table[index].flags &= ~BIT(flag);
}
static inline void zram_set_element(struct zram *zram, u32 index,
@@ -122,15 +123,22 @@ static unsigned long zram_get_element(struct zram *zram, u32 index)
static size_t zram_get_obj_size(struct zram *zram, u32 index)
{
- return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
}
static void zram_set_obj_size(struct zram *zram,
u32 index, size_t size)
{
- unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
+ unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
- zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+ zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+
+static inline bool zram_allocated(struct zram *zram, u32 index)
+{
+ return zram_get_obj_size(zram, index) ||
+ zram_test_flag(zram, index, ZRAM_SAME) ||
+ zram_test_flag(zram, index, ZRAM_WB);
}
#if PAGE_SIZE != 4096
@@ -276,17 +284,90 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ int index;
+ char mode_buf[8];
+ ssize_t sz;
+
+ sz = strscpy(mode_buf, buf, sizeof(mode_buf));
+ if (sz <= 0)
+ return -EINVAL;
+
+ /* ignore trailing new line */
+ if (mode_buf[sz - 1] == '\n')
+ mode_buf[sz - 1] = 0x00;
+
+ if (strcmp(mode_buf, "all"))
+ return -EINVAL;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ up_read(&zram->init_lock);
+ return -EINVAL;
+ }
+
+ for (index = 0; index < nr_pages; index++) {
+ /*
+ * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
+ * See the comment in writeback_store.
+ */
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index) ||
+ zram_test_flag(zram, index, ZRAM_UNDER_WB))
+ goto next;
+ zram_set_flag(zram, index, ZRAM_IDLE);
+next:
+ zram_slot_unlock(zram, index);
+ }
+
+ up_read(&zram->init_lock);
+
+ return len;
+}
+
#ifdef CONFIG_ZRAM_WRITEBACK
-static bool zram_wb_enabled(struct zram *zram)
+static ssize_t writeback_limit_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ u64 val;
+ ssize_t ret = -EINVAL;
+
+ if (kstrtoull(buf, 10, &val))
+ return ret;
+
+ down_read(&zram->init_lock);
+ atomic64_set(&zram->stats.bd_wb_limit, val);
+ if (val == 0)
+ zram->stop_writeback = false;
+ up_read(&zram->init_lock);
+ ret = len;
+
+ return ret;
+}
+
+static ssize_t writeback_limit_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- return zram->backing_dev;
+ u64 val;
+ struct zram *zram = dev_to_zram(dev);
+
+ down_read(&zram->init_lock);
+ val = atomic64_read(&zram->stats.bd_wb_limit);
+ up_read(&zram->init_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
static void reset_bdev(struct zram *zram)
{
struct block_device *bdev;
- if (!zram_wb_enabled(zram))
+ if (!zram->backing_dev)
return;
bdev = zram->bdev;
@@ -313,7 +394,7 @@ static ssize_t backing_dev_show(struct device *dev,
ssize_t ret;
down_read(&zram->init_lock);
- if (!zram_wb_enabled(zram)) {
+ if (!zram->backing_dev) {
memcpy(buf, "none\n", 5);
up_read(&zram->init_lock);
return 5;
@@ -382,8 +463,10 @@ static ssize_t backing_dev_store(struct device *dev,
bdev = bdgrab(I_BDEV(inode));
err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
- if (err < 0)
+ if (err < 0) {
+ bdev = NULL;
goto out;
+ }
nr_pages = i_size_read(inode) >> PAGE_SHIFT;
bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
@@ -399,7 +482,6 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
reset_bdev(zram);
- spin_lock_init(&zram->bitmap_lock);
zram->old_block_size = old_block_size;
zram->bdev = bdev;
@@ -441,32 +523,29 @@ out:
return err;
}
-static unsigned long get_entry_bdev(struct zram *zram)
+static unsigned long alloc_block_bdev(struct zram *zram)
{
- unsigned long entry;
-
- spin_lock(&zram->bitmap_lock);
+ unsigned long blk_idx = 1;
+retry:
/* skip 0 bit to confuse zram.handle = 0 */
- entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
- if (entry == zram->nr_pages) {
- spin_unlock(&zram->bitmap_lock);
+ blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
+ if (blk_idx == zram->nr_pages)
return 0;
- }
- set_bit(entry, zram->bitmap);
- spin_unlock(&zram->bitmap_lock);
+ if (test_and_set_bit(blk_idx, zram->bitmap))
+ goto retry;
- return entry;
+ atomic64_inc(&zram->stats.bd_count);
+ return blk_idx;
}
-static void put_entry_bdev(struct zram *zram, unsigned long entry)
+static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
{
int was_set;
- spin_lock(&zram->bitmap_lock);
- was_set = test_and_clear_bit(entry, zram->bitmap);
- spin_unlock(&zram->bitmap_lock);
+ was_set = test_and_clear_bit(blk_idx, zram->bitmap);
WARN_ON_ONCE(!was_set);
+ atomic64_dec(&zram->stats.bd_count);
}
static void zram_page_end_io(struct bio *bio)
@@ -509,6 +588,169 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
return 1;
}
+#define HUGE_WRITEBACK 0x1
+#define IDLE_WRITEBACK 0x2
+
+static ssize_t writeback_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ unsigned long index;
+ struct bio bio;
+ struct bio_vec bio_vec;
+ struct page *page;
+ ssize_t ret, sz;
+ char mode_buf[8];
+ unsigned long mode = -1UL;
+ unsigned long blk_idx = 0;
+
+ sz = strscpy(mode_buf, buf, sizeof(mode_buf));
+ if (sz <= 0)
+ return -EINVAL;
+
+ /* ignore trailing newline */
+ if (mode_buf[sz - 1] == '\n')
+ mode_buf[sz - 1] = 0x00;
+
+ if (!strcmp(mode_buf, "idle"))
+ mode = IDLE_WRITEBACK;
+ else if (!strcmp(mode_buf, "huge"))
+ mode = HUGE_WRITEBACK;
+
+ if (mode == -1UL)
+ return -EINVAL;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ ret = -EINVAL;
+ goto release_init_lock;
+ }
+
+ if (!zram->backing_dev) {
+ ret = -ENODEV;
+ goto release_init_lock;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto release_init_lock;
+ }
+
+ for (index = 0; index < nr_pages; index++) {
+ struct bio_vec bvec;
+
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+
+ if (zram->stop_writeback) {
+ ret = -EIO;
+ break;
+ }
+
+ if (!blk_idx) {
+ blk_idx = alloc_block_bdev(zram);
+ if (!blk_idx) {
+ ret = -ENOSPC;
+ break;
+ }
+ }
+
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index))
+ goto next;
+
+ if (zram_test_flag(zram, index, ZRAM_WB) ||
+ zram_test_flag(zram, index, ZRAM_SAME) ||
+ zram_test_flag(zram, index, ZRAM_UNDER_WB))
+ goto next;
+
+ if ((mode & IDLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_IDLE)) &&
+ (mode & HUGE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_HUGE)))
+ goto next;
+ /*
+ * Clearing ZRAM_UNDER_WB is duty of caller.
+ * IOW, zram_free_page never clear it.
+ */
+ zram_set_flag(zram, index, ZRAM_UNDER_WB);
+ /* Need for hugepage writeback racing */
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ zram_slot_unlock(zram, index);
+ if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
+ zram_slot_lock(zram, index);
+ zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+ zram_clear_flag(zram, index, ZRAM_IDLE);
+ zram_slot_unlock(zram, index);
+ continue;
+ }
+
+ bio_init(&bio, &bio_vec, 1);
+ bio_set_dev(&bio, zram->bdev);
+ bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
+ bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
+
+ bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
+ bvec.bv_offset);
+ /*
+ * XXX: A single page IO would be inefficient for write
+ * but it would be not bad as starter.
+ */
+ ret = submit_bio_wait(&bio);
+ if (ret) {
+ zram_slot_lock(zram, index);
+ zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+ zram_clear_flag(zram, index, ZRAM_IDLE);
+ zram_slot_unlock(zram, index);
+ continue;
+ }
+
+ atomic64_inc(&zram->stats.bd_writes);
+ /*
+ * We released zram_slot_lock so need to check if the slot was
+ * changed. If there is freeing for the slot, we can catch it
+ * easily by zram_allocated.
+ * A subtle case is the slot is freed/reallocated/marked as
+ * ZRAM_IDLE again. To close the race, idle_store doesn't
+ * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
+ * Thus, we could close the race by checking ZRAM_IDLE bit.
+ */
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index) ||
+ !zram_test_flag(zram, index, ZRAM_IDLE)) {
+ zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+ zram_clear_flag(zram, index, ZRAM_IDLE);
+ goto next;
+ }
+
+ zram_free_page(zram, index);
+ zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+ zram_set_flag(zram, index, ZRAM_WB);
+ zram_set_element(zram, index, blk_idx);
+ blk_idx = 0;
+ atomic64_inc(&zram->stats.pages_stored);
+ if (atomic64_add_unless(&zram->stats.bd_wb_limit,
+ -1 << (PAGE_SHIFT - 12), 0)) {
+ if (atomic64_read(&zram->stats.bd_wb_limit) == 0)
+ zram->stop_writeback = true;
+ }
+next:
+ zram_slot_unlock(zram, index);
+ }
+
+ if (blk_idx)
+ free_block_bdev(zram, blk_idx);
+ ret = len;
+ __free_page(page);
+release_init_lock:
+ up_read(&zram->init_lock);
+
+ return ret;
+}
+
struct zram_work {
struct work_struct work;
struct zram *zram;
@@ -561,79 +803,21 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *parent, bool sync)
{
+ atomic64_inc(&zram->stats.bd_reads);
if (sync)
return read_from_bdev_sync(zram, bvec, entry, parent);
else
return read_from_bdev_async(zram, bvec, entry, parent);
}
-
-static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
- u32 index, struct bio *parent,
- unsigned long *pentry)
-{
- struct bio *bio;
- unsigned long entry;
-
- bio = bio_alloc(GFP_ATOMIC, 1);
- if (!bio)
- return -ENOMEM;
-
- entry = get_entry_bdev(zram);
- if (!entry) {
- bio_put(bio);
- return -ENOSPC;
- }
-
- bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
- bio_set_dev(bio, zram->bdev);
- if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
- bvec->bv_offset)) {
- bio_put(bio);
- put_entry_bdev(zram, entry);
- return -EIO;
- }
-
- if (!parent) {
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
- bio->bi_end_io = zram_page_end_io;
- } else {
- bio->bi_opf = parent->bi_opf;
- bio_chain(bio, parent);
- }
-
- submit_bio(bio);
- *pentry = entry;
-
- return 0;
-}
-
-static void zram_wb_clear(struct zram *zram, u32 index)
-{
- unsigned long entry;
-
- zram_clear_flag(zram, index, ZRAM_WB);
- entry = zram_get_element(zram, index);
- zram_set_element(zram, index, 0);
- put_entry_bdev(zram, entry);
-}
-
#else
-static bool zram_wb_enabled(struct zram *zram) { return false; }
static inline void reset_bdev(struct zram *zram) {};
-static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
- u32 index, struct bio *parent,
- unsigned long *pentry)
-
-{
- return -EIO;
-}
-
static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
unsigned long entry, struct bio *parent, bool sync)
{
return -EIO;
}
-static void zram_wb_clear(struct zram *zram, u32 index) {}
+
+static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
#endif
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
@@ -652,14 +836,10 @@ static void zram_debugfs_destroy(void)
static void zram_accessed(struct zram *zram, u32 index)
{
+ zram_clear_flag(zram, index, ZRAM_IDLE);
zram->table[index].ac_time = ktime_get_boottime();
}
-static void zram_reset_access(struct zram *zram, u32 index)
-{
- zram->table[index].ac_time = 0;
-}
-
static ssize_t read_block_state(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -689,12 +869,13 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
ts = ktime_to_timespec64(zram->table[index].ac_time);
copied = snprintf(kbuf + written, count,
- "%12zd %12lld.%06lu %c%c%c\n",
+ "%12zd %12lld.%06lu %c%c%c%c\n",
index, (s64)ts.tv_sec,
ts.tv_nsec / NSEC_PER_USEC,
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
- zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
+ zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
+ zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
if (count < copied) {
zram_slot_unlock(zram, index);
@@ -739,8 +920,10 @@ static void zram_debugfs_unregister(struct zram *zram)
#else
static void zram_debugfs_create(void) {};
static void zram_debugfs_destroy(void) {};
-static void zram_accessed(struct zram *zram, u32 index) {};
-static void zram_reset_access(struct zram *zram, u32 index) {};
+static void zram_accessed(struct zram *zram, u32 index)
+{
+ zram_clear_flag(zram, index, ZRAM_IDLE);
+};
static void zram_debugfs_register(struct zram *zram) {};
static void zram_debugfs_unregister(struct zram *zram) {};
#endif
@@ -877,6 +1060,26 @@ static ssize_t mm_stat_show(struct device *dev,
return ret;
}
+#ifdef CONFIG_ZRAM_WRITEBACK
+#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
+static ssize_t bd_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ ret = scnprintf(buf, PAGE_SIZE,
+ "%8llu %8llu %8llu\n",
+ FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
+ FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
+ FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
+ up_read(&zram->init_lock);
+
+ return ret;
+}
+#endif
+
static ssize_t debug_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -886,9 +1089,10 @@ static ssize_t debug_stat_show(struct device *dev,
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
- "version: %d\n%8llu\n",
+ "version: %d\n%8llu %8llu\n",
version,
- (u64)atomic64_read(&zram->stats.writestall));
+ (u64)atomic64_read(&zram->stats.writestall),
+ (u64)atomic64_read(&zram->stats.miss_free));
up_read(&zram->init_lock);
return ret;
@@ -896,6 +1100,9 @@ static ssize_t debug_stat_show(struct device *dev,
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RO(bd_stat);
+#endif
static DEVICE_ATTR_RO(debug_stat);
static void zram_meta_free(struct zram *zram, u64 disksize)
@@ -940,17 +1147,21 @@ static void zram_free_page(struct zram *zram, size_t index)
{
unsigned long handle;
- zram_reset_access(zram, index);
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ zram->table[index].ac_time = 0;
+#endif
+ if (zram_test_flag(zram, index, ZRAM_IDLE))
+ zram_clear_flag(zram, index, ZRAM_IDLE);
if (zram_test_flag(zram, index, ZRAM_HUGE)) {
zram_clear_flag(zram, index, ZRAM_HUGE);
atomic64_dec(&zram->stats.huge_pages);
}
- if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
- zram_wb_clear(zram, index);
- atomic64_dec(&zram->stats.pages_stored);
- return;
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_clear_flag(zram, index, ZRAM_WB);
+ free_block_bdev(zram, zram_get_element(zram, index));
+ goto out;
}
/*
@@ -959,10 +1170,8 @@ static void zram_free_page(struct zram *zram, size_t index)
*/
if (zram_test_flag(zram, index, ZRAM_SAME)) {
zram_clear_flag(zram, index, ZRAM_SAME);
- zram_set_element(zram, index, 0);
atomic64_dec(&zram->stats.same_pages);
- atomic64_dec(&zram->stats.pages_stored);
- return;
+ goto out;
}
handle = zram_get_handle(zram, index);
@@ -973,10 +1182,12 @@ static void zram_free_page(struct zram *zram, size_t index)
atomic64_sub(zram_get_obj_size(zram, index),
&zram->stats.compr_data_size);
+out:
atomic64_dec(&zram->stats.pages_stored);
-
zram_set_handle(zram, index, 0);
zram_set_obj_size(zram, index, 0);
+ WARN_ON_ONCE(zram->table[index].flags &
+ ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
}
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
@@ -987,24 +1198,20 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
unsigned int size;
void *src, *dst;
- if (zram_wb_enabled(zram)) {
- zram_slot_lock(zram, index);
- if (zram_test_flag(zram, index, ZRAM_WB)) {
- struct bio_vec bvec;
-
- zram_slot_unlock(zram, index);
+ zram_slot_lock(zram, index);
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ struct bio_vec bvec;
- bvec.bv_page = page;
- bvec.bv_len = PAGE_SIZE;
- bvec.bv_offset = 0;
- return read_from_bdev(zram, &bvec,
- zram_get_element(zram, index),
- bio, partial_io);
- }
zram_slot_unlock(zram, index);
+
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+ return read_from_bdev(zram, &bvec,
+ zram_get_element(zram, index),
+ bio, partial_io);
}
- zram_slot_lock(zram, index);
handle = zram_get_handle(zram, index);
if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
unsigned long value;
@@ -1089,7 +1296,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
struct page *page = bvec->bv_page;
unsigned long element = 0;
enum zram_pageflags flags = 0;
- bool allow_wb = true;
mem = kmap_atomic(page);
if (page_same_filled(mem, &element)) {
@@ -1114,21 +1320,8 @@ compress_again:
return ret;
}
- if (unlikely(comp_len >= huge_class_size)) {
+ if (comp_len >= huge_class_size)
comp_len = PAGE_SIZE;
- if (zram_wb_enabled(zram) && allow_wb) {
- zcomp_stream_put(zram->comp);
- ret = write_to_bdev(zram, bvec, index, bio, &element);
- if (!ret) {
- flags = ZRAM_WB;
- ret = 1;
- goto out;
- }
- allow_wb = false;
- goto compress_again;
- }
- }
-
/*
* handle allocation has 2 paths:
* a) fast path is executed with preemption disabled (for
@@ -1400,10 +1593,14 @@ static void zram_slot_free_notify(struct block_device *bdev,
zram = bdev->bd_disk->private_data;
- zram_slot_lock(zram, index);
+ atomic64_inc(&zram->stats.notify_free);
+ if (!zram_slot_trylock(zram, index)) {
+ atomic64_inc(&zram->stats.miss_free);
+ return;
+ }
+
zram_free_page(zram, index);
zram_slot_unlock(zram, index);
- atomic64_inc(&zram->stats.notify_free);
}
static int zram_rw_page(struct block_device *bdev, sector_t sector,
@@ -1608,10 +1805,13 @@ static DEVICE_ATTR_RO(initstate);
static DEVICE_ATTR_WO(reset);
static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
+static DEVICE_ATTR_WO(idle);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
#ifdef CONFIG_ZRAM_WRITEBACK
static DEVICE_ATTR_RW(backing_dev);
+static DEVICE_ATTR_WO(writeback);
+static DEVICE_ATTR_RW(writeback_limit);
#endif
static struct attribute *zram_disk_attrs[] = {
@@ -1621,13 +1821,19 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_compact.attr,
&dev_attr_mem_limit.attr,
&dev_attr_mem_used_max.attr,
+ &dev_attr_idle.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
#ifdef CONFIG_ZRAM_WRITEBACK
&dev_attr_backing_dev.attr,
+ &dev_attr_writeback.attr,
+ &dev_attr_writeback_limit.attr,
#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+ &dev_attr_bd_stat.attr,
+#endif
&dev_attr_debug_stat.attr,
NULL,
};
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 72c8584b6dff..4bd3afd15e83 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -30,7 +30,7 @@
/*
- * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * The lower ZRAM_FLAG_SHIFT bits of table.flags is for
* object size (excluding header), the higher bits is for
* zram_pageflags.
*
@@ -41,13 +41,15 @@
*/
#define ZRAM_FLAG_SHIFT 24
-/* Flags for zram pages (table[page_no].value) */
+/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {
/* zram slot is locked */
ZRAM_LOCK = ZRAM_FLAG_SHIFT,
ZRAM_SAME, /* Page consists the same element */
ZRAM_WB, /* page is stored on backing_device */
+ ZRAM_UNDER_WB, /* page is under writeback */
ZRAM_HUGE, /* Incompressible page */
+ ZRAM_IDLE, /* not accessed page since last idle marking */
__NR_ZRAM_PAGEFLAGS,
};
@@ -60,7 +62,7 @@ struct zram_table_entry {
unsigned long handle;
unsigned long element;
};
- unsigned long value;
+ unsigned long flags;
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
ktime_t ac_time;
#endif
@@ -79,6 +81,13 @@ struct zram_stats {
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
atomic64_t writestall; /* no. of write slow paths */
+ atomic64_t miss_free; /* no. of missed free */
+#ifdef CONFIG_ZRAM_WRITEBACK
+ atomic64_t bd_count; /* no. of pages in backing device */
+ atomic64_t bd_reads; /* no. of reads from backing device */
+ atomic64_t bd_writes; /* no. of writes from backing device */
+ atomic64_t bd_wb_limit; /* writeback limit of backing device */
+#endif
};
struct zram {
@@ -104,13 +113,13 @@ struct zram {
* zram is claimed so open request will be failed
*/
bool claim; /* Protected by bdev->bd_mutex */
-#ifdef CONFIG_ZRAM_WRITEBACK
struct file *backing_dev;
+ bool stop_writeback;
+#ifdef CONFIG_ZRAM_WRITEBACK
struct block_device *bdev;
unsigned int old_block_size;
unsigned long *bitmap;
unsigned long nr_pages;
- spinlock_t bitmap_lock;
#endif
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
struct dentry *debugfs_dir;