summaryrefslogtreecommitdiffstats
path: root/drivers/lightnvm/pblk-write.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 00:32:19 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 00:32:19 +0100
commite2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch)
treeb97a90170c45211bcc437761653aa8016c34afcd /drivers/lightnvm/pblk-write.c
parentMerge tag 'configfs-for-4.15' of git://git.infradead.org/users/hch/configfs (diff)
parentnvme: fix visibility of "uuid" ns attribute (diff)
downloadlinux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.xz
linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.zip
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe: "This is the main pull request for block storage for 4.15-rc1. Nothing out of the ordinary in here, and no API changes or anything like that. Just various new features for drivers, core changes, etc. In particular, this pull request contains: - A patch series from Bart, closing the whole on blk/scsi-mq queue quescing. - A series from Christoph, building towards hidden gendisks (for multipath) and ability to move bio chains around. - NVMe - Support for native multipath for NVMe (Christoph). - Userspace notifications for AENs (Keith). - Command side-effects support (Keith). - SGL support (Chaitanya Kulkarni) - FC fixes and improvements (James Smart) - Lots of fixes and tweaks (Various) - bcache - New maintainer (Michael Lyle) - Writeback control improvements (Michael) - Various fixes (Coly, Elena, Eric, Liang, et al) - lightnvm updates, mostly centered around the pblk interface (Javier, Hans, and Rakesh). - Removal of unused bio/bvec kmap atomic interfaces (me, Christoph) - Writeback series that fix the much discussed hundreds of millions of sync-all units. This goes all the way, as discussed previously (me). - Fix for missing wakeup on writeback timer adjustments (Yafang Shao). - Fix laptop mode on blk-mq (me). - {mq,name} tupple lookup for IO schedulers, allowing us to have alias names. This means you can use 'deadline' on both !mq and on mq (where it's called mq-deadline). (me). - blktrace race fix, oopsing on sg load (me). - blk-mq optimizations (me). - Obscure waitqueue race fix for kyber (Omar). - NBD fixes (Josef). - Disable writeback throttling by default on bfq, like we do on cfq (Luca Miccio). - Series from Ming that enable us to treat flush requests on blk-mq like any other request. This is a really nice cleanup. - Series from Ming that improves merging on blk-mq with schedulers, getting us closer to flipping the switch on scsi-mq again. - BFQ updates (Paolo). - blk-mq atomic flags memory ordering fixes (Peter Z). - Loop cgroup support (Shaohua). - Lots of minor fixes from lots of different folks, both for core and driver code" * 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits) nvme: fix visibility of "uuid" ns attribute blk-mq: fixup some comment typos and lengths ide: ide-atapi: fix compile error with defining macro DEBUG blk-mq: improve tag waiting setup for non-shared tags brd: remove unused brd_mutex blk-mq: only run the hardware queue if IO is pending block: avoid null pointer dereference on null disk fs: guard_bio_eod() needs to consider partitions xtensa/simdisk: fix compile error nvme: expose subsys attribute to sysfs nvme: create 'slaves' and 'holders' entries for hidden controllers block: create 'slaves' and 'holders' entries for hidden gendisks nvme: also expose the namespace identification sysfs files for mpath nodes nvme: implement multipath access to nvme subsystems nvme: track shared namespaces nvme: introduce a nvme_ns_ids structure nvme: track subsystems block, nvme: Introduce blk_mq_req_flags_t block, scsi: Make SCSI quiesce and resume work reliably block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag ...
Diffstat (limited to 'drivers/lightnvm/pblk-write.c')
-rw-r--r--drivers/lightnvm/pblk-write.c229
1 files changed, 95 insertions, 134 deletions
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 3ad9e56d2473..6c1cafafef53 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -20,7 +20,6 @@
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
- struct nvm_tgt_dev *dev = pblk->dev;
struct bio *original_bio;
unsigned long ret;
int i;
@@ -33,16 +32,18 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
bio_endio(original_bio);
}
+ if (c_ctx->nr_padded)
+ pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
+ c_ctx->nr_padded);
+
#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
+ atomic_long_add(rqd->nr_ppas, &pblk->sync_writes);
#endif
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
- nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
-
bio_put(rqd->bio);
- pblk_free_rqd(pblk, rqd, WRITE);
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE);
return ret;
}
@@ -107,10 +108,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
ppa_list = &rqd->ppa_addr;
recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
- if (!recovery) {
- pr_err("pblk: could not allocate recovery context\n");
- return;
- }
+
INIT_LIST_HEAD(&recovery->failed);
bit = -1;
@@ -175,7 +173,6 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
static void pblk_end_io_write_meta(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
- struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
struct pblk_line *line = m_ctx->private;
struct pblk_emeta *emeta = line->emeta;
@@ -187,19 +184,13 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
pblk_log_write_err(pblk, rqd);
pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
}
-#ifdef CONFIG_NVM_DEBUG
- else
- WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
-#endif
sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
if (sync == emeta->nr_entries)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
- pblk->close_wq);
+ pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws,
+ GFP_ATOMIC, pblk->close_wq);
- bio_put(rqd->bio);
- nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
- pblk_free_rqd(pblk, rqd, READ);
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
atomic_dec(&pblk->inflight_io);
}
@@ -213,7 +204,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
/* Setup write request */
rqd->opcode = NVM_OP_PWRITE;
rqd->nr_ppas = nr_secs;
- rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+ rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
rqd->private = pblk;
rqd->end_io = end_io;
@@ -229,15 +220,16 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
+ struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *e_line = pblk_line_get_erase(pblk);
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
unsigned long *lun_bitmap;
- int ret = 0;
+ int ret;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
if (!lun_bitmap)
@@ -279,7 +271,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
rqd->ppa_status = (u64)0;
- rqd->flags = pblk_set_progr_mode(pblk, WRITE);
+ rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
return ret;
}
@@ -303,55 +295,6 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
return secs_to_sync;
}
-static inline int pblk_valid_meta_ppa(struct pblk *pblk,
- struct pblk_line *meta_line,
- struct ppa_addr *ppa_list, int nr_ppas)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line *data_line;
- struct ppa_addr ppa, ppa_opt;
- u64 paddr;
- int i;
-
- data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
- paddr = pblk_lookup_page(pblk, meta_line);
- ppa = addr_to_gen_ppa(pblk, paddr, 0);
-
- if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
- return 1;
-
- /* Schedule a metadata I/O that is half the distance from the data I/O
- * with regards to the number of LUNs forming the pblk instance. This
- * balances LUN conflicts across every I/O.
- *
- * When the LUN configuration changes (e.g., due to GC), this distance
- * can align, which would result on a LUN deadlock. In this case, modify
- * the distance to not be optimal, but allow metadata I/Os to succeed.
- */
- ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
- if (unlikely(ppa_opt.ppa == ppa.ppa)) {
- data_line->meta_distance--;
- return 0;
- }
-
- for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
- if (ppa_list[i].g.ch == ppa_opt.g.ch &&
- ppa_list[i].g.lun == ppa_opt.g.lun)
- return 1;
-
- if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
- for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
- if (ppa_list[i].g.ch == ppa.g.ch &&
- ppa_list[i].g.lun == ppa.g.lun)
- return 0;
-
- return 1;
- }
-
- return 0;
-}
-
int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -370,11 +313,8 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
int i, j;
int ret;
- rqd = pblk_alloc_rqd(pblk, READ);
- if (IS_ERR(rqd)) {
- pr_err("pblk: cannot allocate write req.\n");
- return PTR_ERR(rqd);
- }
+ rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
+
m_ctx = nvm_rq_to_pdu(rqd);
m_ctx->private = meta_line;
@@ -407,8 +347,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
if (emeta->mem >= lm->emeta_len[0]) {
spin_lock(&l_mg->close_lock);
list_del(&meta_line->list);
- WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
- "pblk: corrupt meta line %d\n", meta_line->id);
spin_unlock(&l_mg->close_lock);
}
@@ -428,18 +366,51 @@ fail_rollback:
pblk_dealloc_page(pblk, meta_line, rq_ppas);
list_add(&meta_line->list, &meta_line->list);
spin_unlock(&l_mg->close_lock);
-
- nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
fail_free_bio:
- if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
- bio_put(bio);
+ bio_put(bio);
fail_free_rqd:
- pblk_free_rqd(pblk, rqd, READ);
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
return ret;
}
-static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
- int prev_n)
+static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
+ struct pblk_line *meta_line,
+ struct nvm_rq *data_rqd)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
+ struct pblk_line *data_line = pblk_line_get_data(pblk);
+ struct ppa_addr ppa, ppa_opt;
+ u64 paddr;
+ int pos_opt;
+
+ /* Schedule a metadata I/O that is half the distance from the data I/O
+ * with regards to the number of LUNs forming the pblk instance. This
+ * balances LUN conflicts across every I/O.
+ *
+ * When the LUN configuration changes (e.g., due to GC), this distance
+ * can align, which would result on metadata and data I/Os colliding. In
+ * this case, modify the distance to not be optimal, but move the
+ * optimal in the right direction.
+ */
+ paddr = pblk_lookup_page(pblk, meta_line);
+ ppa = addr_to_gen_ppa(pblk, paddr, 0);
+ ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+ pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
+
+ if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
+ test_bit(pos_opt, data_line->blk_bitmap))
+ return true;
+
+ if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
+ data_line->meta_distance--;
+
+ return false;
+}
+
+static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
+ struct nvm_rq *data_rqd)
{
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -449,57 +420,45 @@ static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
retry:
if (list_empty(&l_mg->emeta_list)) {
spin_unlock(&l_mg->close_lock);
- return 0;
+ return NULL;
}
meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
- if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+ if (meta_line->emeta->mem >= lm->emeta_len[0])
goto retry;
spin_unlock(&l_mg->close_lock);
- if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
- return 0;
+ if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
+ return NULL;
- return pblk_submit_meta_io(pblk, meta_line);
+ return meta_line;
}
static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
{
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct ppa_addr erase_ppa;
+ struct pblk_line *meta_line;
int err;
ppa_set_empty(&erase_ppa);
/* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+ err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
if (err) {
pr_err("pblk: could not setup write request: %d\n", err);
return NVM_IO_ERR;
}
- if (likely(ppa_empty(erase_ppa))) {
- /* Submit metadata write for previous data line */
- err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
- if (err) {
- pr_err("pblk: metadata I/O submission failed: %d", err);
- return NVM_IO_ERR;
- }
+ meta_line = pblk_should_submit_meta_io(pblk, rqd);
- /* Submit data write for current data line */
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: data I/O submission failed: %d\n", err);
- return NVM_IO_ERR;
- }
- } else {
- /* Submit data write for current data line */
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: data I/O submission failed: %d\n", err);
- return NVM_IO_ERR;
- }
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
- /* Submit available erase for next data line */
+ if (!ppa_empty(erase_ppa)) {
+ /* Submit erase for next data line */
if (pblk_blk_erase_async(pblk, erase_ppa)) {
struct pblk_line *e_line = pblk_line_get_erase(pblk);
struct nvm_tgt_dev *dev = pblk->dev;
@@ -512,6 +471,15 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
}
}
+ if (meta_line) {
+ /* Submit metadata write for previous data line */
+ err = pblk_submit_meta_io(pblk, meta_line);
+ if (err) {
+ pr_err("pblk: metadata I/O submission failed: %d", err);
+ return NVM_IO_ERR;
+ }
+ }
+
return NVM_IO_OK;
}
@@ -521,7 +489,8 @@ static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
struct bio *bio = rqd->bio;
if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+ pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid,
+ c_ctx->nr_padded);
}
static int pblk_submit_write(struct pblk *pblk)
@@ -543,31 +512,24 @@ static int pblk_submit_write(struct pblk *pblk)
if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
return 1;
- rqd = pblk_alloc_rqd(pblk, WRITE);
- if (IS_ERR(rqd)) {
- pr_err("pblk: cannot allocate write req.\n");
- return 1;
- }
-
- bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
- if (!bio) {
- pr_err("pblk: cannot allocate write bio\n");
- goto fail_free_rqd;
- }
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- rqd->bio = bio;
-
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
if (secs_to_sync > pblk->max_write_pgs) {
pr_err("pblk: bad buffer sync calculation\n");
- goto fail_put_bio;
+ return 1;
}
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+ bio = bio_alloc(GFP_KERNEL, secs_to_sync);
+
+ bio->bi_iter.bi_sector = 0; /* internal bio */
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+ rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
+ rqd->bio = bio;
+
+ if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync,
secs_avail)) {
pr_err("pblk: corrupted write bio\n");
goto fail_put_bio;
@@ -586,8 +548,7 @@ fail_free_bio:
pblk_free_write_rqd(pblk, rqd);
fail_put_bio:
bio_put(bio);
-fail_free_rqd:
- pblk_free_rqd(pblk, rqd, WRITE);
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE);
return 1;
}