diff options
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r-- | drivers/lightnvm/core.c | 10 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-cache.c | 10 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-core.c | 233 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-gc.c | 112 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-init.c | 172 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-map.c | 33 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rb.c | 48 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-read.c | 146 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-recovery.c | 121 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rl.c | 29 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-sysfs.c | 15 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-write.c | 269 | ||||
-rw-r--r-- | drivers/lightnvm/pblk.h | 58 |
13 files changed, 730 insertions, 526 deletions
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 63171cdce270..60aa7bc5a630 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -431,7 +431,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return 0; err_sysfs: if (tt->exit) - tt->exit(targetdata); + tt->exit(targetdata, true); err_init: blk_cleanup_queue(tqueue); tdisk->queue = NULL; @@ -446,7 +446,7 @@ err_reserve: return ret; } -static void __nvm_remove_target(struct nvm_target *t) +static void __nvm_remove_target(struct nvm_target *t, bool graceful) { struct nvm_tgt_type *tt = t->type; struct gendisk *tdisk = t->disk; @@ -459,7 +459,7 @@ static void __nvm_remove_target(struct nvm_target *t) tt->sysfs_exit(tdisk); if (tt->exit) - tt->exit(tdisk->private_data); + tt->exit(tdisk->private_data, graceful); nvm_remove_tgt_dev(t->dev, 1); put_disk(tdisk); @@ -489,7 +489,7 @@ static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) mutex_unlock(&dev->mlock); return 1; } - __nvm_remove_target(t); + __nvm_remove_target(t, true); mutex_unlock(&dev->mlock); return 0; @@ -963,7 +963,7 @@ void nvm_unregister(struct nvm_dev *dev) list_for_each_entry_safe(t, tmp, &dev->targets, list) { if (t->dev->parent != dev) continue; - __nvm_remove_target(t); + __nvm_remove_target(t, false); } mutex_unlock(&dev->mlock); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 29a23111b31c..b1c6d7eb6115 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -44,13 +44,15 @@ retry: goto out; } - if (unlikely(!bio_has_data(bio))) - goto out; - pblk_ppa_set_empty(&w_ctx.ppa); w_ctx.flags = flags; - if (bio->bi_opf & REQ_PREFLUSH) + if (bio->bi_opf & REQ_PREFLUSH) { w_ctx.flags |= PBLK_FLUSH_ENTRY; + pblk_write_kick(pblk); + } + + if (unlikely(!bio_has_data(bio))) + goto out; for (i = 0; i < nr_entries; i++) { void *data = bio_data(bio); diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 94d5d97c9d8a..ed9cc977c8b3 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -40,7 +40,7 @@ static void pblk_line_mark_bb(struct work_struct *work) } kfree(ppa); - mempool_free(line_ws, pblk->gen_ws_pool); + mempool_free(line_ws, &pblk->gen_ws_pool); } static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, @@ -102,7 +102,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) struct pblk *pblk = rqd->private; __pblk_end_io_erase(pblk, rqd); - mempool_free(rqd, pblk->e_rq_pool); + mempool_free(rqd, &pblk->e_rq_pool); } /* @@ -237,15 +237,15 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) switch (type) { case PBLK_WRITE: case PBLK_WRITE_INT: - pool = pblk->w_rq_pool; + pool = &pblk->w_rq_pool; rq_size = pblk_w_rq_size; break; case PBLK_READ: - pool = pblk->r_rq_pool; + pool = &pblk->r_rq_pool; rq_size = pblk_g_rq_size; break; default: - pool = pblk->e_rq_pool; + pool = &pblk->e_rq_pool; rq_size = pblk_g_rq_size; } @@ -265,20 +265,22 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) case PBLK_WRITE: kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap); case PBLK_WRITE_INT: - pool = pblk->w_rq_pool; + pool = &pblk->w_rq_pool; break; case PBLK_READ: - pool = pblk->r_rq_pool; + pool = &pblk->r_rq_pool; break; case PBLK_ERASE: - pool = pblk->e_rq_pool; + pool = &pblk->e_rq_pool; break; default: pr_err("pblk: trying to free unknown rqd type\n"); return; } - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); + if (rqd->meta_list) + nvm_dev_dma_free(dev->parent, rqd->meta_list, + rqd->dma_meta_list); mempool_free(rqd, pool); } @@ -292,7 +294,7 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, for (i = off; i < nr_pages + off; i++) { bv = bio->bi_io_vec[i]; - mempool_free(bv.bv_page, pblk->page_bio_pool); + mempool_free(bv.bv_page, &pblk->page_bio_pool); } } @@ -304,23 +306,23 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int i, ret; for (i = 0; i < nr_pages; i++) { - page = mempool_alloc(pblk->page_bio_pool, flags); + page = mempool_alloc(&pblk->page_bio_pool, flags); ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); if (ret != PBLK_EXPOSED_PAGE_SIZE) { pr_err("pblk: could not add page to bio\n"); - mempool_free(page, pblk->page_bio_pool); + mempool_free(page, &pblk->page_bio_pool); goto err; } } return 0; err: - pblk_bio_free_pages(pblk, bio, 0, i - 1); + pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i); return -1; } -static void pblk_write_kick(struct pblk *pblk) +void pblk_write_kick(struct pblk *pblk) { wake_up_process(pblk->writer_ts); mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000)); @@ -342,13 +344,6 @@ void pblk_write_should_kick(struct pblk *pblk) pblk_write_kick(pblk); } -void pblk_end_io_sync(struct nvm_rq *rqd) -{ - struct completion *waiting = rqd->private; - - complete(waiting); -} - static void pblk_wait_for_meta(struct pblk *pblk) { do { @@ -380,7 +375,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) lockdep_assert_held(&line->lock); - if (!vsc) { + if (line->w_err_gc->has_write_err) { + if (line->gc_group != PBLK_LINEGC_WERR) { + line->gc_group = PBLK_LINEGC_WERR; + move_list = &l_mg->gc_werr_list; + pblk_rl_werr_line_in(&pblk->rl); + } + } else if (!vsc) { if (line->gc_group != PBLK_LINEGC_FULL) { line->gc_group = PBLK_LINEGC_FULL; move_list = &l_mg->gc_full_list; @@ -467,16 +468,13 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; -#ifdef CONFIG_NVM_DEBUG - int ret; + atomic_inc(&pblk->inflight_io); - ret = pblk_check_io(pblk, rqd); - if (ret) - return ret; +#ifdef CONFIG_NVM_DEBUG + if (pblk_check_io(pblk, rqd)) + return NVM_IO_ERR; #endif - atomic_inc(&pblk->inflight_io); - return nvm_submit_io(dev, rqd); } @@ -484,16 +482,13 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; -#ifdef CONFIG_NVM_DEBUG - int ret; + atomic_inc(&pblk->inflight_io); - ret = pblk_check_io(pblk, rqd); - if (ret) - return ret; +#ifdef CONFIG_NVM_DEBUG + if (pblk_check_io(pblk, rqd)) + return NVM_IO_ERR; #endif - atomic_inc(&pblk->inflight_io); - return nvm_submit_io_sync(dev, rqd); } @@ -856,9 +851,10 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, atomic_dec(&pblk->inflight_io); if (rqd.error) { - if (dir == PBLK_WRITE) + if (dir == PBLK_WRITE) { pblk_log_write_err(pblk, &rqd); - else if (dir == PBLK_READ) + ret = 1; + } else if (dir == PBLK_READ) pblk_log_read_err(pblk, &rqd); } @@ -1071,6 +1067,25 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, return 1; } +static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + + line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!line->map_bitmap) + return -ENOMEM; + + /* will be initialized using bb info from map_bitmap */ + line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!line->invalid_bitmap) { + kfree(line->map_bitmap); + line->map_bitmap = NULL; + return -ENOMEM; + } + + return 0; +} + /* For now lines are always assumed full lines. Thus, smeta former and current * lun bitmaps are omitted. */ @@ -1108,7 +1123,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) { pr_debug("pblk: line smeta I/O failed. Retry\n"); - return 1; + return 0; } bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); @@ -1174,19 +1189,9 @@ static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line) static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_meta *lm = &pblk->lm; + int blk_in_line = atomic_read(&line->blk_in_line); int blk_to_erase; - line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC); - if (!line->map_bitmap) - return -ENOMEM; - - /* will be initialized using bb info from map_bitmap */ - line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC); - if (!line->invalid_bitmap) { - kfree(line->map_bitmap); - return -ENOMEM; - } - /* Bad blocks do not need to be erased */ bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); @@ -1199,16 +1204,19 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) blk_to_erase = pblk_prepare_new_line(pblk, line); line->state = PBLK_LINESTATE_FREE; } else { - blk_to_erase = atomic_read(&line->blk_in_line); + blk_to_erase = blk_in_line; } - if (line->state != PBLK_LINESTATE_FREE) { - kfree(line->map_bitmap); - kfree(line->invalid_bitmap); + if (blk_in_line < lm->min_blk_line) { spin_unlock(&line->lock); + return -EAGAIN; + } + + if (line->state != PBLK_LINESTATE_FREE) { WARN(1, "pblk: corrupted line %d, state %d\n", line->id, line->state); - return -EAGAIN; + spin_unlock(&line->lock); + return -EINTR; } line->state = PBLK_LINESTATE_OPEN; @@ -1241,13 +1249,16 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) } spin_unlock(&l_mg->free_lock); - pblk_rl_free_lines_dec(&pblk->rl, line, true); + ret = pblk_line_alloc_bitmaps(pblk, line); + if (ret) + return ret; if (!pblk_line_init_bb(pblk, line, 0)) { list_add(&line->list, &l_mg->free_list); return -EINTR; } + pblk_rl_free_lines_dec(&pblk->rl, line, true); return 0; } @@ -1259,6 +1270,24 @@ void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) line->emeta = NULL; } +static void pblk_line_reinit(struct pblk_line *line) +{ + *line->vsc = cpu_to_le32(EMPTY_ENTRY); + + line->map_bitmap = NULL; + line->invalid_bitmap = NULL; + line->smeta = NULL; + line->emeta = NULL; +} + +void pblk_line_free(struct pblk_line *line) +{ + kfree(line->map_bitmap); + kfree(line->invalid_bitmap); + + pblk_line_reinit(line); +} + struct pblk_line *pblk_line_get(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1292,10 +1321,14 @@ retry: ret = pblk_line_prepare(pblk, line); if (ret) { - if (ret == -EAGAIN) { + switch (ret) { + case -EAGAIN: + list_add(&line->list, &l_mg->bad_list); + goto retry; + case -EINTR: list_add(&line->list, &l_mg->corrupt_list); goto retry; - } else { + default: pr_err("pblk: failed to prepare line %d\n", line->id); list_add(&line->list, &l_mg->free_list); l_mg->nr_free_lines++; @@ -1321,11 +1354,14 @@ retry: return NULL; } + retry_line->map_bitmap = line->map_bitmap; + retry_line->invalid_bitmap = line->invalid_bitmap; retry_line->smeta = line->smeta; retry_line->emeta = line->emeta; retry_line->meta_line = line->meta_line; - pblk_line_free(pblk, line); + pblk_line_reinit(line); + l_mg->data_line = retry_line; spin_unlock(&l_mg->free_lock); @@ -1378,6 +1414,9 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) } spin_unlock(&l_mg->free_lock); + if (pblk_line_alloc_bitmaps(pblk, line)) + return NULL; + if (pblk_line_erase(pblk, line)) { line = pblk_line_retry(pblk, line); if (!line) @@ -1449,7 +1488,7 @@ static void pblk_line_close_meta_sync(struct pblk *pblk) flush_workqueue(pblk->close_wq); } -void pblk_pipeline_stop(struct pblk *pblk) +void __pblk_pipeline_flush(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; int ret; @@ -1474,6 +1513,11 @@ void pblk_pipeline_stop(struct pblk *pblk) flush_workqueue(pblk->bb_wq); pblk_line_close_meta_sync(pblk); +} + +void __pblk_pipeline_stop(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; spin_lock(&l_mg->free_lock); pblk->state = PBLK_STATE_STOPPED; @@ -1482,6 +1526,12 @@ void pblk_pipeline_stop(struct pblk *pblk) spin_unlock(&l_mg->free_lock); } +void pblk_pipeline_stop(struct pblk *pblk) +{ + __pblk_pipeline_flush(pblk); + __pblk_pipeline_stop(pblk); +} + struct pblk_line *pblk_line_replace_data(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1511,6 +1561,9 @@ retry_erase: goto retry_erase; } + if (pblk_line_alloc_bitmaps(pblk, new)) + return NULL; + retry_setup: if (!pblk_line_init_metadata(pblk, new, cur)) { new = pblk_line_retry(pblk, new); @@ -1550,19 +1603,6 @@ out: return new; } -void pblk_line_free(struct pblk *pblk, struct pblk_line *line) -{ - kfree(line->map_bitmap); - kfree(line->invalid_bitmap); - - *line->vsc = cpu_to_le32(EMPTY_ENTRY); - - line->map_bitmap = NULL; - line->invalid_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; -} - static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1572,9 +1612,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_FREE; line->gc_group = PBLK_LINEGC_NONE; - pblk_line_free(pblk, line); - spin_unlock(&line->lock); + pblk_line_free(line); + + if (line->w_err_gc->has_write_err) { + pblk_rl_werr_line_out(&pblk->rl); + line->w_err_gc->has_write_err = 0; + } + spin_unlock(&line->lock); atomic_dec(&gc->pipeline_gc); spin_lock(&l_mg->free_lock); @@ -1593,7 +1638,7 @@ static void pblk_line_put_ws(struct work_struct *work) struct pblk_line *line = line_put_ws->line; __pblk_line_put(pblk, line); - mempool_free(line_put_ws, pblk->gen_ws_pool); + mempool_free(line_put_ws, &pblk->gen_ws_pool); } void pblk_line_put(struct kref *ref) @@ -1610,7 +1655,7 @@ void pblk_line_put_wq(struct kref *ref) struct pblk *pblk = line->pblk; struct pblk_line_ws *line_put_ws; - line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC); + line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC); if (!line_put_ws) return; @@ -1737,11 +1782,34 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) spin_lock(&l_mg->close_lock); spin_lock(&line->lock); + + /* Update the in-memory start address for emeta, in case it has + * shifted due to write errors + */ + if (line->emeta_ssec != line->cur_sec) + line->emeta_ssec = line->cur_sec; + list_add_tail(&line->list, &l_mg->emeta_list); spin_unlock(&line->lock); spin_unlock(&l_mg->close_lock); pblk_line_should_sync_meta(pblk); + + +} + +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + unsigned int lba_list_size = lm->emeta_len[2]; + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; + struct pblk_emeta *emeta = line->emeta; + + w_err_gc->lba_list = pblk_malloc(lba_list_size, + l_mg->emeta_alloc_type, GFP_KERNEL); + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf), + lba_list_size); } void pblk_line_close_ws(struct work_struct *work) @@ -1750,9 +1818,16 @@ void pblk_line_close_ws(struct work_struct *work) ws); struct pblk *pblk = line_ws->pblk; struct pblk_line *line = line_ws->line; + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; + + /* Write errors makes the emeta start address stored in smeta invalid, + * so keep a copy of the lba list until we've gc'd the line + */ + if (w_err_gc->has_write_err) + pblk_save_lba_list(pblk, line); pblk_line_close(pblk, line); - mempool_free(line_ws, pblk->gen_ws_pool); + mempool_free(line_ws, &pblk->gen_ws_pool); } void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, @@ -1761,7 +1836,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, { struct pblk_line_ws *line_ws; - line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask); + line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask); line_ws->pblk = pblk; line_ws->line = line; diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 6851a5c67189..df88f1bdd921 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -129,6 +129,53 @@ out: kfree(gc_rq_ws); } +static __le64 *get_lba_list_from_emeta(struct pblk *pblk, + struct pblk_line *line) +{ + struct line_emeta *emeta_buf; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + unsigned int lba_list_size = lm->emeta_len[2]; + __le64 *lba_list; + int ret; + + emeta_buf = pblk_malloc(lm->emeta_len[0], + l_mg->emeta_alloc_type, GFP_KERNEL); + if (!emeta_buf) + return NULL; + + ret = pblk_line_read_emeta(pblk, line, emeta_buf); + if (ret) { + pr_err("pblk: line %d read emeta failed (%d)\n", + line->id, ret); + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + return NULL; + } + + /* If this read fails, it means that emeta is corrupted. + * For now, leave the line untouched. + * TODO: Implement a recovery routine that scans and moves + * all sectors on the line. + */ + + ret = pblk_recov_check_emeta(pblk, emeta_buf); + if (ret) { + pr_err("pblk: inconsistent emeta (line %d)\n", + line->id); + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + return NULL; + } + + lba_list = pblk_malloc(lba_list_size, + l_mg->emeta_alloc_type, GFP_KERNEL); + if (lba_list) + memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size); + + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + + return lba_list; +} + static void pblk_gc_line_prepare_ws(struct work_struct *work) { struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, @@ -138,46 +185,26 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct pblk_gc *gc = &pblk->gc; - struct line_emeta *emeta_buf; struct pblk_line_ws *gc_rq_ws; struct pblk_gc_rq *gc_rq; __le64 *lba_list; unsigned long *invalid_bitmap; int sec_left, nr_secs, bit; - int ret; invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); if (!invalid_bitmap) goto fail_free_ws; - emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, - GFP_KERNEL); - if (!emeta_buf) { - pr_err("pblk: cannot use GC emeta\n"); - goto fail_free_bitmap; - } - - ret = pblk_line_read_emeta(pblk, line, emeta_buf); - if (ret) { - pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); - goto fail_free_emeta; - } - - /* If this read fails, it means that emeta is corrupted. For now, leave - * the line untouched. TODO: Implement a recovery routine that scans and - * moves all sectors on the line. - */ - - ret = pblk_recov_check_emeta(pblk, emeta_buf); - if (ret) { - pr_err("pblk: inconsistent emeta (line %d)\n", line->id); - goto fail_free_emeta; - } - - lba_list = emeta_to_lbas(pblk, emeta_buf); - if (!lba_list) { - pr_err("pblk: could not interpret emeta (line %d)\n", line->id); - goto fail_free_emeta; + if (line->w_err_gc->has_write_err) { + lba_list = line->w_err_gc->lba_list; + line->w_err_gc->lba_list = NULL; + } else { + lba_list = get_lba_list_from_emeta(pblk, line); + if (!lba_list) { + pr_err("pblk: could not interpret emeta (line %d)\n", + line->id); + goto fail_free_ws; + } } spin_lock(&line->lock); @@ -187,14 +214,14 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) if (sec_left < 0) { pr_err("pblk: corrupted GC line (%d)\n", line->id); - goto fail_free_emeta; + goto fail_free_lba_list; } bit = -1; next_rq: gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); if (!gc_rq) - goto fail_free_emeta; + goto fail_free_lba_list; nr_secs = 0; do { @@ -240,7 +267,7 @@ next_rq: goto next_rq; out: - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + pblk_mfree(lba_list, l_mg->emeta_alloc_type); kfree(line_ws); kfree(invalid_bitmap); @@ -251,9 +278,8 @@ out: fail_free_gc_rq: kfree(gc_rq); -fail_free_emeta: - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); -fail_free_bitmap: +fail_free_lba_list: + pblk_mfree(lba_list, l_mg->emeta_alloc_type); kfree(invalid_bitmap); fail_free_ws: kfree(line_ws); @@ -349,12 +375,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) { unsigned int nr_blocks_free, nr_blocks_need; + unsigned int werr_lines = atomic_read(&rl->werr_lines); nr_blocks_need = pblk_rl_high_thrs(rl); nr_blocks_free = pblk_rl_nr_free_blks(rl); /* This is not critical, no need to take lock here */ - return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)); + return ((werr_lines > 0) || + ((gc->gc_active) && (nr_blocks_need > nr_blocks_free))); } void pblk_gc_free_full_lines(struct pblk *pblk) @@ -649,7 +677,7 @@ fail_free_main_kthread: return ret; } -void pblk_gc_exit(struct pblk *pblk) +void pblk_gc_exit(struct pblk *pblk, bool graceful) { struct pblk_gc *gc = &pblk->gc; @@ -663,10 +691,12 @@ void pblk_gc_exit(struct pblk *pblk) if (gc->gc_reader_ts) kthread_stop(gc->gc_reader_ts); - flush_workqueue(gc->gc_reader_wq); - destroy_workqueue(gc->gc_reader_wq); + if (graceful) { + flush_workqueue(gc->gc_reader_wq); + flush_workqueue(gc->gc_line_reader_wq); + } - flush_workqueue(gc->gc_line_reader_wq); + destroy_workqueue(gc->gc_reader_wq); destroy_workqueue(gc->gc_line_reader_wq); if (gc->gc_writer_ts) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 91a5bc2556a3..ce561f5d48ce 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -20,10 +20,15 @@ #include "pblk.h" +unsigned int write_buffer_size; + +module_param(write_buffer_size, uint, 0644); +MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); + static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, *pblk_w_rq_cache; static DECLARE_RWSEM(pblk_lock); -struct bio_set *pblk_bio_set; +struct bio_set pblk_bio_set; static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, struct bio *bio) @@ -127,10 +132,8 @@ static int pblk_l2p_recover(struct pblk *pblk, bool factory_init) if (!line) { /* Configure next line for user data */ line = pblk_line_get_first_data(pblk); - if (!line) { - pr_err("pblk: line list corrupted\n"); + if (!line) return -EFAULT; - } } return 0; @@ -141,6 +144,7 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init) sector_t i; struct ppa_addr ppa; size_t map_size; + int ret = 0; map_size = pblk_trans_map_size(pblk); pblk->trans_map = vmalloc(map_size); @@ -152,7 +156,11 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init) for (i = 0; i < pblk->rl.nr_secs; i++) pblk_trans_map_set(pblk, i, ppa); - return pblk_l2p_recover(pblk, factory_init); + ret = pblk_l2p_recover(pblk, factory_init); + if (ret) + vfree(pblk->trans_map); + + return ret; } static void pblk_rwb_free(struct pblk *pblk) @@ -169,10 +177,15 @@ static int pblk_rwb_init(struct pblk *pblk) struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_rb_entry *entries; - unsigned long nr_entries; + unsigned long nr_entries, buffer_size; unsigned int power_size, power_seg_sz; - nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer); + if (write_buffer_size && (write_buffer_size > pblk->pgs_in_buffer)) + buffer_size = write_buffer_size; + else + buffer_size = pblk->pgs_in_buffer; + + nr_entries = pblk_rb_calculate_size(buffer_size); entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry)); if (!entries) @@ -341,7 +354,7 @@ static int pblk_core_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int max_write_ppas; + int ret, max_write_ppas; atomic64_set(&pblk->user_wa, 0); atomic64_set(&pblk->pad_wa, 0); @@ -375,33 +388,33 @@ static int pblk_core_init(struct pblk *pblk) goto fail_free_pad_dist; /* Internal bios can be at most the sectors signaled by the device. */ - pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0); - if (!pblk->page_bio_pool) + ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0); + if (ret) goto free_global_caches; - pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, - pblk_ws_cache); - if (!pblk->gen_ws_pool) + ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, + pblk_ws_cache); + if (ret) goto free_page_bio_pool; - pblk->rec_pool = mempool_create_slab_pool(geo->all_luns, - pblk_rec_cache); - if (!pblk->rec_pool) + ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, + pblk_rec_cache); + if (ret) goto free_gen_ws_pool; - pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns, - pblk_g_rq_cache); - if (!pblk->r_rq_pool) + ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, + pblk_g_rq_cache); + if (ret) goto free_rec_pool; - pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns, - pblk_g_rq_cache); - if (!pblk->e_rq_pool) + ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, + pblk_g_rq_cache); + if (ret) goto free_r_rq_pool; - pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns, - pblk_w_rq_cache); - if (!pblk->w_rq_pool) + ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, + pblk_w_rq_cache); + if (ret) goto free_e_rq_pool; pblk->close_wq = alloc_workqueue("pblk-close-wq", @@ -423,6 +436,7 @@ static int pblk_core_init(struct pblk *pblk) goto free_r_end_wq; INIT_LIST_HEAD(&pblk->compl_list); + INIT_LIST_HEAD(&pblk->resubmit_list); return 0; @@ -433,17 +447,17 @@ free_bb_wq: free_close_wq: destroy_workqueue(pblk->close_wq); free_w_rq_pool: - mempool_destroy(pblk->w_rq_pool); + mempool_exit(&pblk->w_rq_pool); free_e_rq_pool: - mempool_destroy(pblk->e_rq_pool); + mempool_exit(&pblk->e_rq_pool); free_r_rq_pool: - mempool_destroy(pblk->r_rq_pool); + mempool_exit(&pblk->r_rq_pool); free_rec_pool: - mempool_destroy(pblk->rec_pool); + mempool_exit(&pblk->rec_pool); free_gen_ws_pool: - mempool_destroy(pblk->gen_ws_pool); + mempool_exit(&pblk->gen_ws_pool); free_page_bio_pool: - mempool_destroy(pblk->page_bio_pool); + mempool_exit(&pblk->page_bio_pool); free_global_caches: pblk_free_global_caches(pblk); fail_free_pad_dist: @@ -462,12 +476,12 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); - mempool_destroy(pblk->page_bio_pool); - mempool_destroy(pblk->gen_ws_pool); - mempool_destroy(pblk->rec_pool); - mempool_destroy(pblk->r_rq_pool); - mempool_destroy(pblk->e_rq_pool); - mempool_destroy(pblk->w_rq_pool); + mempool_exit(&pblk->page_bio_pool); + mempool_exit(&pblk->gen_ws_pool); + mempool_exit(&pblk->rec_pool); + mempool_exit(&pblk->r_rq_pool); + mempool_exit(&pblk->e_rq_pool); + mempool_exit(&pblk->w_rq_pool); pblk_free_global_caches(pblk); kfree(pblk->pad_dist); @@ -489,11 +503,17 @@ static void pblk_line_mg_free(struct pblk *pblk) } } -static void pblk_line_meta_free(struct pblk_line *line) +static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, + struct pblk_line *line) { + struct pblk_w_err_gc *w_err_gc = line->w_err_gc; + kfree(line->blk_bitmap); kfree(line->erase_bitmap); kfree(line->chks); + + pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type); + kfree(w_err_gc); } static void pblk_lines_free(struct pblk *pblk) @@ -506,8 +526,8 @@ static void pblk_lines_free(struct pblk *pblk) for (i = 0; i < l_mg->nr_lines; i++) { line = &pblk->lines[i]; - pblk_line_free(pblk, line); - pblk_line_meta_free(line); + pblk_line_free(line); + pblk_line_meta_free(l_mg, line); } spin_unlock(&l_mg->free_lock); @@ -748,14 +768,14 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, chunk->cnlb = chunk_meta->cnlb; chunk->wp = chunk_meta->wp; - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - if (chunk->type & NVM_CHK_TP_SZ_SPEC) { WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); continue; } + if (!(chunk->state & NVM_CHK_ST_OFFLINE)) + continue; + set_bit(pos, line->blk_bitmap); nr_bad_chks++; } @@ -809,20 +829,28 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line) return -ENOMEM; line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->erase_bitmap) { - kfree(line->blk_bitmap); - return -ENOMEM; - } + if (!line->erase_bitmap) + goto free_blk_bitmap; + line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta), GFP_KERNEL); - if (!line->chks) { - kfree(line->erase_bitmap); - kfree(line->blk_bitmap); - return -ENOMEM; - } + if (!line->chks) + goto free_erase_bitmap; + + line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL); + if (!line->w_err_gc) + goto free_chks; return 0; + +free_chks: + kfree(line->chks); +free_erase_bitmap: + kfree(line->erase_bitmap); +free_blk_bitmap: + kfree(line->blk_bitmap); + return -ENOMEM; } static int pblk_line_mg_init(struct pblk *pblk) @@ -847,12 +875,14 @@ static int pblk_line_mg_init(struct pblk *pblk) INIT_LIST_HEAD(&l_mg->gc_mid_list); INIT_LIST_HEAD(&l_mg->gc_low_list); INIT_LIST_HEAD(&l_mg->gc_empty_list); + INIT_LIST_HEAD(&l_mg->gc_werr_list); INIT_LIST_HEAD(&l_mg->emeta_list); - l_mg->gc_lists[0] = &l_mg->gc_high_list; - l_mg->gc_lists[1] = &l_mg->gc_mid_list; - l_mg->gc_lists[2] = &l_mg->gc_low_list; + l_mg->gc_lists[0] = &l_mg->gc_werr_list; + l_mg->gc_lists[1] = &l_mg->gc_high_list; + l_mg->gc_lists[2] = &l_mg->gc_mid_list; + l_mg->gc_lists[3] = &l_mg->gc_low_list; spin_lock_init(&l_mg->free_lock); spin_lock_init(&l_mg->close_lock); @@ -1047,6 +1077,11 @@ static int pblk_lines_init(struct pblk *pblk) nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); } + if (!nr_free_chks) { + pr_err("pblk: too many bad blocks prevent for sane instance\n"); + return -EINTR; + } + pblk_set_provision(pblk, nr_free_chks); kfree(chunk_meta); @@ -1054,7 +1089,7 @@ static int pblk_lines_init(struct pblk *pblk) fail_free_lines: while (--i >= 0) - pblk_line_meta_free(&pblk->lines[i]); + pblk_line_meta_free(l_mg, &pblk->lines[i]); kfree(pblk->lines); fail_free_chunk_meta: kfree(chunk_meta); @@ -1110,23 +1145,25 @@ static void pblk_free(struct pblk *pblk) kfree(pblk); } -static void pblk_tear_down(struct pblk *pblk) +static void pblk_tear_down(struct pblk *pblk, bool graceful) { - pblk_pipeline_stop(pblk); + if (graceful) + __pblk_pipeline_flush(pblk); + __pblk_pipeline_stop(pblk); pblk_writer_stop(pblk); pblk_rb_sync_l2p(&pblk->rwb); pblk_rl_free(&pblk->rl); - pr_debug("pblk: consistent tear down\n"); + pr_debug("pblk: consistent tear down (graceful:%d)\n", graceful); } -static void pblk_exit(void *private) +static void pblk_exit(void *private, bool graceful) { struct pblk *pblk = private; down_write(&pblk_lock); - pblk_gc_exit(pblk); - pblk_tear_down(pblk); + pblk_gc_exit(pblk, graceful); + pblk_tear_down(pblk, graceful); #ifdef CONFIG_NVM_DEBUG pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); @@ -1175,6 +1212,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->state = PBLK_STATE_RUNNING; pblk->gc.gc_enabled = 0; + spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); @@ -1297,18 +1335,18 @@ static int __init pblk_module_init(void) { int ret; - pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!pblk_bio_set) - return -ENOMEM; + ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0); + if (ret) + return ret; ret = nvm_register_tgt_type(&tt_pblk); if (ret) - bioset_free(pblk_bio_set); + bioset_exit(&pblk_bio_set); return ret; } static void pblk_module_exit(void) { - bioset_free(pblk_bio_set); + bioset_exit(&pblk_bio_set); nvm_unregister_tgt_type(&tt_pblk); } diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 20dbaa89c9df..953ca31dda68 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -18,11 +18,11 @@ #include "pblk.h" -static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, - struct ppa_addr *ppa_list, - unsigned long *lun_bitmap, - struct pblk_sec_meta *meta_list, - unsigned int valid_secs) +static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, + struct ppa_addr *ppa_list, + unsigned long *lun_bitmap, + struct pblk_sec_meta *meta_list, + unsigned int valid_secs) { struct pblk_line *line = pblk_line_get_data(pblk); struct pblk_emeta *emeta; @@ -35,8 +35,14 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, if (pblk_line_is_full(line)) { struct pblk_line *prev_line = line; + /* If we cannot allocate a new line, make sure to store metadata + * on current line and then fail + */ line = pblk_line_replace_data(pblk); pblk_line_close_meta(pblk, prev_line); + + if (!line) + return -EINTR; } emeta = line->emeta; @@ -74,6 +80,7 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, } pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); + return 0; } void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, @@ -87,8 +94,12 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, for (i = off; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], - lun_bitmap, &meta_list[i], map_secs); + if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs)) { + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); + pblk_pipeline_stop(pblk); + } } } @@ -108,8 +119,12 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, for (i = 0; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], - lun_bitmap, &meta_list[i], map_secs); + if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + lun_bitmap, &meta_list[i], map_secs)) { + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); + pblk_pipeline_stop(pblk); + } erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 52fdd85dbc97..00cd1f20a196 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -142,10 +142,9 @@ static void clean_wctx(struct pblk_w_ctx *w_ctx) { int flags; -try: flags = READ_ONCE(w_ctx->flags); - if (!(flags & PBLK_SUBMITTED_ENTRY)) - goto try; + WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY), + "pblk: overwriting unsubmitted data\n"); /* Release flags on context. Protect from writes and reads */ smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); @@ -350,7 +349,7 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, } static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, - unsigned int pos) + unsigned int pos) { struct pblk_rb_entry *entry; unsigned int sync, flush_point; @@ -420,7 +419,7 @@ void pblk_rb_flush(struct pblk_rb *rb) if (pblk_rb_flush_point_set(rb, NULL, mem)) return; - pblk_write_should_kick(pblk); + pblk_write_kick(pblk); } static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, @@ -504,45 +503,6 @@ int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, } /* - * The caller of this function must ensure that the backpointer will not - * overwrite the entries passed on the list. - */ -unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, - struct list_head *list, - unsigned int max) -{ - struct pblk_rb_entry *entry, *tentry; - struct page *page; - unsigned int read = 0; - int ret; - - list_for_each_entry_safe(entry, tentry, list, index) { - if (read > max) { - pr_err("pblk: too many entries on list\n"); - goto out; - } - - page = virt_to_page(entry->data); - if (!page) { - pr_err("pblk: could not allocate write bio page\n"); - goto out; - } - - ret = bio_add_page(bio, page, rb->seg_size, 0); - if (ret != rb->seg_size) { - pr_err("pblk: could not add page to write bio\n"); - goto out; - } - - list_del(&entry->index); - read++; - } - -out: - return read; -} - -/* * Read available entries on rb and add them to the given bio. To avoid a memory * copy, a page reference to the write buffer is used to be added to the bio. * diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 9eee10f69df0..18694694e5f0 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -39,10 +39,10 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, } static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, - sector_t blba, unsigned long *read_bitmap) + struct bio *bio, sector_t blba, + unsigned long *read_bitmap) { struct pblk_sec_meta *meta_list = rqd->meta_list; - struct bio *bio = rqd->bio; struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; int nr_secs = rqd->nr_ppas; bool advanced_bio = false; @@ -102,32 +102,69 @@ next: #endif } -static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) + +static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, + sector_t blba) { - int err; + struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + int nr_lbas = rqd->nr_ppas; + int i; - err = pblk_submit_io(pblk, rqd); - if (err) - return NVM_IO_ERR; + for (i = 0; i < nr_lbas; i++) { + u64 lba = le64_to_cpu(meta_lba_list[i].lba); + + if (lba == ADDR_EMPTY) + continue; + + if (lba != blba + i) { +#ifdef CONFIG_NVM_DEBUG + struct ppa_addr *p; - return NVM_IO_OK; + p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr; + print_ppa(&pblk->dev->geo, p, "seq", i); +#endif + pr_err("pblk: corrupted read LBA (%llu/%llu)\n", + lba, (u64)blba + i); + WARN_ON(1); + } + } } -static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd, - sector_t blba) +/* + * There can be holes in the lba list. + */ +static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, + u64 *lba_list, int nr_lbas) { - struct pblk_sec_meta *meta_list = rqd->meta_list; - int nr_lbas = rqd->nr_ppas; - int i; + struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + int i, j; - for (i = 0; i < nr_lbas; i++) { - u64 lba = le64_to_cpu(meta_list[i].lba); + for (i = 0, j = 0; i < nr_lbas; i++) { + u64 lba = lba_list[i]; + u64 meta_lba; if (lba == ADDR_EMPTY) continue; - WARN(lba != blba + i, "pblk: corrupted read LBA\n"); + meta_lba = le64_to_cpu(meta_lba_list[j].lba); + + if (lba != meta_lba) { +#ifdef CONFIG_NVM_DEBUG + struct ppa_addr *p; + int nr_ppas = rqd->nr_ppas; + + p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr; + print_ppa(&pblk->dev->geo, p, "seq", j); +#endif + pr_err("pblk: corrupted read LBA (%llu/%llu)\n", + lba, meta_lba); + WARN_ON(1); + } + + j++; } + + WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); } static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) @@ -152,7 +189,6 @@ static void pblk_end_user_read(struct bio *bio) WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); #endif bio_endio(bio); - bio_put(bio); } static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, @@ -160,23 +196,18 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = rqd->bio; + struct bio *int_bio = rqd->bio; unsigned long start_time = r_ctx->start_time; generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time); if (rqd->error) pblk_log_read_err(pblk, rqd); -#ifdef CONFIG_NVM_DEBUG - else - WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n"); -#endif - pblk_read_check(pblk, rqd, r_ctx->lba); + pblk_read_check_seq(pblk, rqd, r_ctx->lba); - bio_put(bio); - if (r_ctx->private) - pblk_end_user_read((struct bio *)r_ctx->private); + if (int_bio) + bio_put(int_bio); if (put_line) pblk_read_put_rqd_kref(pblk, rqd); @@ -193,16 +224,19 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, static void pblk_end_io_read(struct nvm_rq *rqd) { struct pblk *pblk = rqd->private; + struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); + struct bio *bio = (struct bio *)r_ctx->private; + pblk_end_user_read(bio); __pblk_end_io_read(pblk, rqd, true); } -static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int bio_init_idx, - unsigned long *read_bitmap) +static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd, + struct bio *orig_bio, unsigned int bio_init_idx, + unsigned long *read_bitmap) { - struct bio *new_bio, *bio = rqd->bio; struct pblk_sec_meta *meta_list = rqd->meta_list; + struct bio *new_bio; struct bio_vec src_bv, dst_bv; void *ppa_ptr = NULL; void *src_p, *dst_p; @@ -219,11 +253,11 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, new_bio = bio_alloc(GFP_KERNEL, nr_holes); if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) - goto err; + goto fail_add_pages; if (nr_holes != new_bio->bi_vcnt) { pr_err("pblk: malformed bio\n"); - goto err; + goto fail; } for (i = 0; i < nr_secs; i++) @@ -246,7 +280,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, if (ret) { bio_put(rqd->bio); pr_err("pblk: sync read IO submission failed\n"); - goto err; + goto fail; } if (rqd->error) { @@ -282,7 +316,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, meta_list[hole].lba = lba_list_media[i]; src_bv = new_bio->bi_io_vec[i++]; - dst_bv = bio->bi_io_vec[bio_init_idx + hole]; + dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole]; src_p = kmap_atomic(src_bv.bv_page); dst_p = kmap_atomic(dst_bv.bv_page); @@ -294,35 +328,33 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, kunmap_atomic(src_p); kunmap_atomic(dst_p); - mempool_free(src_bv.bv_page, pblk->page_bio_pool); + mempool_free(src_bv.bv_page, &pblk->page_bio_pool); hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); } while (hole < nr_secs); bio_put(new_bio); - /* Complete the original bio and associated request */ - bio_endio(bio); - rqd->bio = bio; + /* restore original request */ + rqd->bio = NULL; rqd->nr_ppas = nr_secs; __pblk_end_io_read(pblk, rqd, false); - return NVM_IO_OK; - -err: - pr_err("pblk: failed to perform partial read\n"); + return NVM_IO_DONE; +fail: /* Free allocated pages in new bio */ - pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); + pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt); +fail_add_pages: + pr_err("pblk: failed to perform partial read\n"); __pblk_end_io_read(pblk, rqd, false); return NVM_IO_ERR; } -static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, +static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, sector_t lba, unsigned long *read_bitmap) { struct pblk_sec_meta *meta_list = rqd->meta_list; - struct bio *bio = rqd->bio; struct ppa_addr ppa; pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); @@ -386,14 +418,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd = pblk_alloc_rqd(pblk, PBLK_READ); rqd->opcode = NVM_OP_PREAD; - rqd->bio = bio; rqd->nr_ppas = nr_secs; + rqd->bio = NULL; /* cloned bio if needed */ rqd->private = pblk; rqd->end_io = pblk_end_io_read; r_ctx = nvm_rq_to_pdu(rqd); r_ctx->start_time = jiffies; r_ctx->lba = blba; + r_ctx->private = bio; /* original bio */ /* Save the index for this bio's start. This is needed in case * we need to fill a partial read. @@ -411,17 +444,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; - pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap); + pblk_read_ppalist_rq(pblk, rqd, bio, blba, &read_bitmap); } else { - pblk_read_rq(pblk, rqd, blba, &read_bitmap); + pblk_read_rq(pblk, rqd, bio, blba, &read_bitmap); } - bio_get(bio); if (bitmap_full(&read_bitmap, nr_secs)) { - bio_endio(bio); atomic_inc(&pblk->inflight_io); __pblk_end_io_read(pblk, rqd, false); - return NVM_IO_OK; + return NVM_IO_DONE; } /* All sectors are to be read from the device */ @@ -429,20 +460,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) struct bio *int_bio = NULL; /* Clone read bio to deal with read errors internally */ - int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set); + int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); if (!int_bio) { pr_err("pblk: could not clone read bio\n"); goto fail_end_io; } rqd->bio = int_bio; - r_ctx->private = bio; - ret = pblk_submit_read_io(pblk, rqd); - if (ret) { + if (pblk_submit_io(pblk, rqd)) { pr_err("pblk: read IO submission failed\n"); - if (int_bio) - bio_put(int_bio); + ret = NVM_IO_ERR; goto fail_end_io; } @@ -452,7 +480,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) /* The read bio request could be partially filled by the write buffer, * but there are some holes that need to be read from the drive. */ - return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); + return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap); fail_rqd_free: pblk_free_rqd(pblk, rqd, PBLK_READ); @@ -585,6 +613,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) goto err_free_bio; } + pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs); + atomic_dec(&pblk->inflight_io); if (rqd.error) { diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 3e079c2afa6e..598342833d0d 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -16,97 +16,6 @@ #include "pblk.h" -void pblk_submit_rec(struct work_struct *work) -{ - struct pblk_rec_ctx *recovery = - container_of(work, struct pblk_rec_ctx, ws_rec); - struct pblk *pblk = recovery->pblk; - struct nvm_rq *rqd = recovery->rqd; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio; - unsigned int nr_rec_secs; - unsigned int pgs_read; - int ret; - - nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status, - NVM_MAX_VLBA); - - bio = bio_alloc(GFP_KERNEL, nr_rec_secs); - - bio->bi_iter.bi_sector = 0; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - rqd->bio = bio; - rqd->nr_ppas = nr_rec_secs; - - pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed, - nr_rec_secs); - if (pgs_read != nr_rec_secs) { - pr_err("pblk: could not read recovery entries\n"); - goto err; - } - - if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) { - pr_err("pblk: could not setup recovery request\n"); - goto err; - } - -#ifdef CONFIG_NVM_DEBUG - atomic_long_add(nr_rec_secs, &pblk->recov_writes); -#endif - - ret = pblk_submit_io(pblk, rqd); - if (ret) { - pr_err("pblk: I/O submission failed: %d\n", ret); - goto err; - } - - mempool_free(recovery, pblk->rec_pool); - return; - -err: - bio_put(bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); -} - -int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, - struct pblk_rec_ctx *recovery, u64 *comp_bits, - unsigned int comp) -{ - struct nvm_rq *rec_rqd; - struct pblk_c_ctx *rec_ctx; - int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; - - rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); - rec_ctx = nvm_rq_to_pdu(rec_rqd); - - /* Copy completion bitmap, but exclude the first X completed entries */ - bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status, - (unsigned long int *)comp_bits, - comp, NVM_MAX_VLBA); - - /* Save the context for the entries that need to be re-written and - * update current context with the completed entries. - */ - rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp); - if (comp >= c_ctx->nr_valid) { - rec_ctx->nr_valid = 0; - rec_ctx->nr_padded = nr_entries - comp; - - c_ctx->nr_padded = comp - c_ctx->nr_valid; - } else { - rec_ctx->nr_valid = c_ctx->nr_valid - comp; - rec_ctx->nr_padded = c_ctx->nr_padded; - - c_ctx->nr_valid = comp; - c_ctx->nr_padded = 0; - } - - recovery->rqd = rec_rqd; - recovery->pblk = pblk; - - return 0; -} - int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) { u32 crc; @@ -865,18 +774,30 @@ static void pblk_recov_wa_counters(struct pblk *pblk, } static int pblk_line_was_written(struct pblk_line *line, - struct pblk_line_meta *lm) + struct pblk *pblk) { - int i; - int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE; + struct pblk_line_meta *lm = &pblk->lm; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_chk_meta *chunk; + struct ppa_addr bppa; + int smeta_blk; - for (i = 0; i < lm->blk_per_line; i++) { - if (!(line->chks[i].state & state_mask)) - return 1; - } + if (line->state == PBLK_LINESTATE_BAD) + return 0; - return 0; + smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (smeta_blk >= lm->blk_per_line) + return 0; + + bppa = pblk->luns[smeta_blk].bppa; + chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; + + if (chunk->state & NVM_CHK_ST_FREE) + return 0; + + return 1; } struct pblk_line *pblk_recov_l2p(struct pblk *pblk) @@ -915,7 +836,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta); - if (!pblk_line_was_written(line, lm)) + if (!pblk_line_was_written(line, pblk)) continue; /* Lines that cannot be read are assumed as not written here */ diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 883a7113b19d..6a0616a6fcaf 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) pblk_rl_kick_u_timer(rl); } +void pblk_rl_werr_line_in(struct pblk_rl *rl) +{ + atomic_inc(&rl->werr_lines); +} + +void pblk_rl_werr_line_out(struct pblk_rl *rl) +{ + atomic_dec(&rl->werr_lines); +} + void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries) { atomic_add(nr_entries, &rl->rb_gc_cnt); @@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, { struct pblk *pblk = container_of(rl, struct pblk, rl); int max = rl->rb_budget; + int werr_gc_needed = atomic_read(&rl->werr_lines); if (free_blocks >= rl->high) { - rl->rb_user_max = max; - rl->rb_gc_max = 0; - rl->rb_state = PBLK_RL_HIGH; + if (werr_gc_needed) { + /* Allocate a small budget for recovering + * lines with write errors + */ + rl->rb_gc_max = 1 << rl->rb_windows_pw; + rl->rb_user_max = max - rl->rb_gc_max; + rl->rb_state = PBLK_RL_WERR; + } else { + rl->rb_user_max = max; + rl->rb_gc_max = 0; + rl->rb_state = PBLK_RL_OFF; + } } else if (free_blocks < rl->high) { int shift = rl->high_pw - rl->rb_windows_pw; int user_windows = free_blocks >> shift; @@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, rl->rb_state = PBLK_RL_LOW; } - if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW)) + if (rl->rb_state != PBLK_RL_OFF) pblk_gc_should_start(pblk); else pblk_gc_should_stop(pblk); @@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) atomic_set(&rl->rb_user_cnt, 0); atomic_set(&rl->rb_gc_cnt, 0); atomic_set(&rl->rb_space, -1); + atomic_set(&rl->werr_lines, 0); timer_setup(&rl->u_timer, pblk_rl_u_timer, 0); diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index e61909af23a5..88a0a7c407aa 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0; int d_line_cnt = 0, l_line_cnt = 0; int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; + int gc_werr = 0; + int bad = 0, cor = 0; int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; int map_weight = 0, meta_weight = 0; @@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) gc_empty++; } + list_for_each_entry(line, &l_mg->gc_werr_list, list) { + if (line->type == PBLK_LINETYPE_DATA) + d_line_cnt++; + else if (line->type == PBLK_LINETYPE_LOG) + l_line_cnt++; + closed_line_cnt++; + gc_werr++; + } + list_for_each_entry(line, &l_mg->bad_list, list) bad++; list_for_each_entry(line, &l_mg->corrupt_list, list) @@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) l_mg->nr_lines); sz += snprintf(page + sz, PAGE_SIZE - sz, - "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n", - gc_full, gc_high, gc_mid, gc_low, gc_empty, + "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n", + gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr, atomic_read(&pblk->gc.read_inflight_gc)); sz += snprintf(page + sz, PAGE_SIZE - sz, diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 3e6f1ebd743a..f353e52941f5 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -103,68 +103,150 @@ retry: pblk_rb_sync_end(&pblk->rwb, &flags); } -/* When a write fails, we are not sure whether the block has grown bad or a page - * range is more susceptible to write errors. If a high number of pages fail, we - * assume that the block is bad and we mark it accordingly. In all cases, we - * remap and resubmit the failed entries as fast as possible; if a flush is - * waiting on a completion, the whole stack would stall otherwise. - */ -static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) +/* Map remaining sectors in chunk, starting from ppa */ +static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) { - void *comp_bits = &rqd->ppa_status; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct pblk_rec_ctx *recovery; - struct ppa_addr *ppa_list = rqd->ppa_list; - int nr_ppas = rqd->nr_ppas; - unsigned int c_entries; - int bit, ret; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *line; + struct ppa_addr map_ppa = *ppa; + u64 paddr; + int done = 0; - if (unlikely(nr_ppas == 1)) - ppa_list = &rqd->ppa_addr; + line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + spin_lock(&line->lock); - recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC); + while (!done) { + paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa); - INIT_LIST_HEAD(&recovery->failed); + if (!test_and_set_bit(paddr, line->map_bitmap)) + line->left_msecs--; - bit = -1; - while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) { - struct pblk_rb_entry *entry; - struct ppa_addr ppa; + if (!test_and_set_bit(paddr, line->invalid_bitmap)) + le32_add_cpu(line->vsc, -1); - /* Logic error */ - if (bit > c_ctx->nr_valid) { - WARN_ONCE(1, "pblk: corrupted write request\n"); - mempool_free(recovery, pblk->rec_pool); - goto out; + if (geo->version == NVM_OCSSD_SPEC_12) { + map_ppa.ppa++; + if (map_ppa.g.pg == geo->num_pg) + done = 1; + } else { + map_ppa.m.sec++; + if (map_ppa.m.sec == geo->clba) + done = 1; } + } - ppa = ppa_list[bit]; - entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa); - if (!entry) { - pr_err("pblk: could not scan entry on write failure\n"); - mempool_free(recovery, pblk->rec_pool); - goto out; - } + line->w_err_gc->has_write_err = 1; + spin_unlock(&line->lock); +} - /* The list is filled first and emptied afterwards. No need for - * protecting it with a lock +static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, + unsigned int nr_entries) +{ + struct pblk_rb *rb = &pblk->rwb; + struct pblk_rb_entry *entry; + struct pblk_line *line; + struct pblk_w_ctx *w_ctx; + struct ppa_addr ppa_l2p; + int flags; + unsigned int pos, i; + + spin_lock(&pblk->trans_lock); + pos = sentry; + for (i = 0; i < nr_entries; i++) { + entry = &rb->entries[pos]; + w_ctx = &entry->w_ctx; + + /* Check if the lba has been overwritten */ + ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); + if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) + w_ctx->lba = ADDR_EMPTY; + + /* Mark up the entry as submittable again */ + flags = READ_ONCE(w_ctx->flags); + flags |= PBLK_WRITTEN_DATA; + /* Release flags on write context. Protect from writes */ + smp_store_release(&w_ctx->flags, flags); + + /* Decrese the reference count to the line as we will + * re-map these entries */ - list_add_tail(&entry->index, &recovery->failed); + line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + kref_put(&line->ref, pblk_line_put); + + pos = (pos + 1) & (rb->nr_entries - 1); } + spin_unlock(&pblk->trans_lock); +} - c_entries = find_first_bit(comp_bits, nr_ppas); - ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries); - if (ret) { - pr_err("pblk: could not recover from write failure\n"); - mempool_free(recovery, pblk->rec_pool); - goto out; +static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx) +{ + struct pblk_c_ctx *r_ctx; + + r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL); + if (!r_ctx) + return; + + r_ctx->lun_bitmap = NULL; + r_ctx->sentry = c_ctx->sentry; + r_ctx->nr_valid = c_ctx->nr_valid; + r_ctx->nr_padded = c_ctx->nr_padded; + + spin_lock(&pblk->resubmit_lock); + list_add_tail(&r_ctx->list, &pblk->resubmit_list); + spin_unlock(&pblk->resubmit_lock); + +#ifdef CONFIG_NVM_DEBUG + atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes); +#endif +} + +static void pblk_submit_rec(struct work_struct *work) +{ + struct pblk_rec_ctx *recovery = + container_of(work, struct pblk_rec_ctx, ws_rec); + struct pblk *pblk = recovery->pblk; + struct nvm_rq *rqd = recovery->rqd; + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + struct ppa_addr *ppa_list; + + pblk_log_write_err(pblk, rqd); + + if (rqd->nr_ppas == 1) + ppa_list = &rqd->ppa_addr; + else + ppa_list = rqd->ppa_list; + + pblk_map_remaining(pblk, ppa_list); + pblk_queue_resubmit(pblk, c_ctx); + + pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + if (c_ctx->nr_padded) + pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, + c_ctx->nr_padded); + bio_put(rqd->bio); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); + mempool_free(recovery, &pblk->rec_pool); + + atomic_dec(&pblk->inflight_io); +} + + +static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct pblk_rec_ctx *recovery; + + recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC); + if (!recovery) { + pr_err("pblk: could not allocate recovery work\n"); + return; } + recovery->pblk = pblk; + recovery->rqd = rqd; + INIT_WORK(&recovery->ws_rec, pblk_submit_rec); queue_work(pblk->close_wq, &recovery->ws_rec); - -out: - pblk_complete_write(pblk, rqd, c_ctx); } static void pblk_end_io_write(struct nvm_rq *rqd) @@ -173,8 +255,8 @@ static void pblk_end_io_write(struct nvm_rq *rqd) struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); if (rqd->error) { - pblk_log_write_err(pblk, rqd); - return pblk_end_w_fail(pblk, rqd); + pblk_end_w_fail(pblk, rqd); + return; } #ifdef CONFIG_NVM_DEBUG else @@ -198,6 +280,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) if (rqd->error) { pblk_log_write_err(pblk, rqd); pr_err("pblk: metadata I/O failed. Line %d\n", line->id); + line->w_err_gc->has_write_err = 1; } sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); @@ -266,31 +349,6 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, return 0; } -int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - struct pblk_line_meta *lm = &pblk->lm; - unsigned long *lun_bitmap; - int ret; - - lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); - if (!lun_bitmap) - return -ENOMEM; - - c_ctx->lun_bitmap = lun_bitmap; - - ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write); - if (ret) - return ret; - - pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0); - - rqd->ppa_status = (u64)0; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); - - return ret; -} - static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, unsigned int secs_to_flush) { @@ -339,6 +397,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, l_mg->emeta_alloc_type, GFP_KERNEL); if (IS_ERR(bio)) { + pr_err("pblk: failed to map emeta io"); ret = PTR_ERR(bio); goto fail_free_rqd; } @@ -515,26 +574,54 @@ static int pblk_submit_write(struct pblk *pblk) unsigned int secs_avail, secs_to_sync, secs_to_com; unsigned int secs_to_flush; unsigned long pos; + unsigned int resubmit; - /* If there are no sectors in the cache, flushes (bios without data) - * will be cleared on the cache threads - */ - secs_avail = pblk_rb_read_count(&pblk->rwb); - if (!secs_avail) - return 1; - - secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); - if (!secs_to_flush && secs_avail < pblk->min_write_pgs) - return 1; - - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); - if (secs_to_sync > pblk->max_write_pgs) { - pr_err("pblk: bad buffer sync calculation\n"); - return 1; - } + spin_lock(&pblk->resubmit_lock); + resubmit = !list_empty(&pblk->resubmit_list); + spin_unlock(&pblk->resubmit_lock); + + /* Resubmit failed writes first */ + if (resubmit) { + struct pblk_c_ctx *r_ctx; + + spin_lock(&pblk->resubmit_lock); + r_ctx = list_first_entry(&pblk->resubmit_list, + struct pblk_c_ctx, list); + list_del(&r_ctx->list); + spin_unlock(&pblk->resubmit_lock); + + secs_avail = r_ctx->nr_valid; + pos = r_ctx->sentry; + + pblk_prepare_resubmit(pblk, pos, secs_avail); + secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, + secs_avail); - secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; - pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); + kfree(r_ctx); + } else { + /* If there are no sectors in the cache, + * flushes (bios without data) will be cleared on + * the cache threads + */ + secs_avail = pblk_rb_read_count(&pblk->rwb); + if (!secs_avail) + return 1; + + secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); + if (!secs_to_flush && secs_avail < pblk->min_write_pgs) + return 1; + + secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, + secs_to_flush); + if (secs_to_sync > pblk->max_write_pgs) { + pr_err("pblk: bad buffer sync calculation\n"); + return 1; + } + + secs_to_com = (secs_to_sync > secs_avail) ? + secs_avail : secs_to_sync; + pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); + } bio = bio_alloc(GFP_KERNEL, secs_to_sync); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 9c682acfc5d1..34cc1d64a9d4 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -89,12 +89,14 @@ struct pblk_sec_meta { /* The number of GC lists and the rate-limiter states go together. This way the * rate-limiter can dictate how much GC is needed based on resource utilization. */ -#define PBLK_GC_NR_LISTS 3 +#define PBLK_GC_NR_LISTS 4 enum { - PBLK_RL_HIGH = 1, - PBLK_RL_MID = 2, - PBLK_RL_LOW = 3, + PBLK_RL_OFF = 0, + PBLK_RL_WERR = 1, + PBLK_RL_HIGH = 2, + PBLK_RL_MID = 3, + PBLK_RL_LOW = 4 }; #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) @@ -128,7 +130,6 @@ struct pblk_pad_rq { struct pblk_rec_ctx { struct pblk *pblk; struct nvm_rq *rqd; - struct list_head failed; struct work_struct ws_rec; }; @@ -279,6 +280,8 @@ struct pblk_rl { int rb_user_active; int rb_gc_active; + atomic_t werr_lines; /* Number of write error lines that needs gc */ + struct timer_list u_timer; unsigned long long nr_secs; @@ -312,6 +315,7 @@ enum { PBLK_LINEGC_MID = 23, PBLK_LINEGC_HIGH = 24, PBLK_LINEGC_FULL = 25, + PBLK_LINEGC_WERR = 26 }; #define PBLK_MAGIC 0x70626c6b /*pblk*/ @@ -413,6 +417,11 @@ struct pblk_smeta { struct line_smeta *buf; /* smeta buffer in persistent format */ }; +struct pblk_w_err_gc { + int has_write_err; + __le64 *lba_list; +}; + struct pblk_line { struct pblk *pblk; unsigned int id; /* Line number corresponds to the @@ -458,6 +467,8 @@ struct pblk_line { struct kref ref; /* Write buffer L2P references */ + struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */ + spinlock_t lock; /* Necessary for invalid_bitmap only */ }; @@ -489,6 +500,8 @@ struct pblk_line_mgmt { struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ struct list_head gc_low_list; /* Full lines ready to GC, low isc */ + struct list_head gc_werr_list; /* Write err recovery list */ + struct list_head gc_full_list; /* Full lines ready to GC, no valid */ struct list_head gc_empty_list; /* Full lines close, all valid */ @@ -664,12 +677,15 @@ struct pblk { struct list_head compl_list; - mempool_t *page_bio_pool; - mempool_t *gen_ws_pool; - mempool_t *rec_pool; - mempool_t *r_rq_pool; - mempool_t *w_rq_pool; - mempool_t *e_rq_pool; + spinlock_t resubmit_lock; /* Resubmit list lock */ + struct list_head resubmit_list; /* Resubmit list for failed writes*/ + + mempool_t page_bio_pool; + mempool_t gen_ws_pool; + mempool_t rec_pool; + mempool_t r_rq_pool; + mempool_t w_rq_pool; + mempool_t e_rq_pool; struct workqueue_struct *close_wq; struct workqueue_struct *bb_wq; @@ -713,9 +729,6 @@ void pblk_rb_sync_l2p(struct pblk_rb *rb); unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, unsigned int pos, unsigned int nr_entries, unsigned int count); -unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, - struct list_head *list, - unsigned int max); int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, struct ppa_addr ppa, int bio_iter, bool advanced_bio); unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); @@ -766,11 +779,13 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk); struct pblk_line *pblk_line_get_erase(struct pblk *pblk); int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); int pblk_line_is_full(struct pblk_line *line); -void pblk_line_free(struct pblk *pblk, struct pblk_line *line); +void pblk_line_free(struct pblk_line *line); void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line); void pblk_line_close(struct pblk *pblk, struct pblk_line *line); void pblk_line_close_ws(struct work_struct *work); void pblk_pipeline_stop(struct pblk *pblk); +void __pblk_pipeline_stop(struct pblk *pblk); +void __pblk_pipeline_flush(struct pblk *pblk); void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq); @@ -794,7 +809,6 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, unsigned long *lun_bitmap); -void pblk_end_io_sync(struct nvm_rq *rqd); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, @@ -837,23 +851,20 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, int pblk_write_ts(void *data); void pblk_write_timer_fn(struct timer_list *t); void pblk_write_should_kick(struct pblk *pblk); +void pblk_write_kick(struct pblk *pblk); /* * pblk read path */ -extern struct bio_set *pblk_bio_set; +extern struct bio_set pblk_bio_set; int pblk_submit_read(struct pblk *pblk, struct bio *bio); int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk recovery */ -void pblk_submit_rec(struct work_struct *work); struct pblk_line *pblk_recov_l2p(struct pblk *pblk); int pblk_recov_pad(struct pblk *pblk); int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); -int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, - struct pblk_rec_ctx *recovery, u64 *comp_bits, - unsigned int comp); /* * pblk gc @@ -864,7 +875,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, #define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */ int pblk_gc_init(struct pblk *pblk); -void pblk_gc_exit(struct pblk *pblk); +void pblk_gc_exit(struct pblk *pblk, bool graceful); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); void pblk_gc_should_kick(struct pblk *pblk); @@ -894,6 +905,9 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, bool used); int pblk_rl_is_limit(struct pblk_rl *rl); +void pblk_rl_werr_line_in(struct pblk_rl *rl); +void pblk_rl_werr_line_out(struct pblk_rl *rl); + /* * pblk sysfs */ |