summaryrefslogtreecommitdiffstats
path: root/drivers/lightnvm
diff options
context:
space:
mode:
authorJavier González <javier@javigon.com>2018-10-09 13:12:05 +0200
committerJens Axboe <axboe@kernel.dk>2018-10-09 16:25:08 +0200
commit6ad2f619b2b278467242e7a084c4f77d79c31a63 (patch)
tree48aa156fb8f59749ec8cb7a67e10f2b394e5173d /drivers/lightnvm
parentlightnvm: pblk: take write semaphore on metadata (diff)
downloadlinux-6ad2f619b2b278467242e7a084c4f77d79c31a63.tar.xz
linux-6ad2f619b2b278467242e7a084c4f77d79c31a63.zip
lightnvm: pblk: recover open lines on 2.0 devices
In the OCSSD 2.0 spec, each chunk reports its write pointer. This means that pblk does not need to scan open lines to find the write pointer, but instead, it can retrieve it directly (and verify it). This patch uses the write pointer on open lines to (i) recover the line up until the last written lba and (ii) reconstruct the map bitmap and rest of line metadata so that the line can be used for new data. Since the 1.2 path in lightnvm core has been re-implemented to populate the chunk structure and thus recover the write pointer on initialization, this patch removes 1.2 specific recovery, as the 2.0 path can be reused. Signed-off-by: Javier González <javier@cnexlabs.com> Signed-off-by: Matias Bjørling <mb@lightnvm.io> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r--drivers/lightnvm/pblk-recovery.c400
1 files changed, 121 insertions, 279 deletions
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 6c57eb00a7f1..fccf65bc70b3 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -86,15 +86,39 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
return 0;
}
-static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
+static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
+ u64 written_secs)
+{
+ int i;
+
+ for (i = 0; i < written_secs; i += pblk->min_write_pgs)
+ pblk_alloc_page(pblk, line, pblk->min_write_pgs);
+}
+
+static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
+ u64 written_secs = 0;
+ int valid_chunks = 0;
+ int i;
+
+ for (i = 0; i < lm->blk_per_line; i++) {
+ struct nvm_chk_meta *chunk = &line->chks[i];
- return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
- nr_bb * geo->clba;
+ if (chunk->state & NVM_CHK_ST_OFFLINE)
+ continue;
+
+ written_secs += chunk->wp;
+ valid_chunks++;
+ }
+
+ if (lm->blk_per_line - nr_bb != valid_chunks)
+ pblk_err(pblk, "recovery line %d is bad\n", line->id);
+
+ pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
+
+ return written_secs;
}
struct pblk_recov_alloc {
@@ -106,115 +130,6 @@ struct pblk_recov_alloc {
dma_addr_t dma_meta_list;
};
-static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, u64 r_ptr)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- struct nvm_rq *rqd;
- struct bio *bio;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
- u64 r_ptr_int;
- int left_ppas;
- int rq_ppas, rq_len;
- int i, j;
- int ret = 0;
-
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
-
- left_ppas = line->cur_sec - r_ptr;
- if (!left_ppas)
- return 0;
-
- r_ptr_int = r_ptr;
-
-next_read_rq:
- memset(rqd, 0, pblk_g_rq_size);
-
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
- rq_len = rq_ppas * geo->csecs;
-
- bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
-
- rqd->bio = bio;
- rqd->opcode = NVM_OP_PREAD;
- rqd->meta_list = meta_list;
- rqd->nr_ppas = rq_ppas;
- rqd->ppa_list = ppa_list;
- rqd->dma_ppa_list = dma_ppa_list;
- rqd->dma_meta_list = dma_meta_list;
-
- if (pblk_io_aligned(pblk, rq_ppas))
- rqd->is_seq = 1;
-
- ppa_list = nvm_rq_to_ppa_list(rqd);
-
- for (i = 0; i < rqd->nr_ppas; ) {
- struct ppa_addr ppa;
- int pos;
-
- ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
-
- while (test_bit(pos, line->blk_bitmap)) {
- r_ptr_int += pblk->min_write_pgs;
- ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
- ppa_list[i] =
- addr_to_gen_ppa(pblk, r_ptr_int, line->id);
- }
-
- /* If read fails, more padding is needed */
- ret = pblk_submit_io_sync(pblk, rqd);
- if (ret) {
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
- return ret;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- /* At this point, the read should not fail. If it does, it is a problem
- * we cannot recover from here. Need FTL log.
- */
- if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
- pblk_err(pblk, "L2P recovery failed (%d)\n", rqd->error);
- return -EINTR;
- }
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- u64 lba = le64_to_cpu(meta_list[i].lba);
-
- if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
- continue;
-
- pblk_update_map(pblk, lba, ppa_list[i]);
- }
-
- left_ppas -= rq_ppas;
- if (left_ppas > 0)
- goto next_read_rq;
-
- return 0;
-}
-
static void pblk_recov_complete(struct kref *ref)
{
struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
@@ -236,8 +151,9 @@ static void pblk_end_io_recov(struct nvm_rq *rqd)
kref_put(&pad_rq->ref, pblk_recov_complete);
}
-static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
- int left_ppas)
+/* pad line using line bitmap. */
+static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
+ int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
@@ -379,143 +295,42 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
return (distance > line->left_msecs) ? line->left_msecs : distance;
}
-/* When this function is called, it means that not all upper pages have been
- * written in a page that contains valid data. In order to recover this data, we
- * first find the write pointer on the device, then we pad all necessary
- * sectors, and finally attempt to read the valid data
- */
-static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p)
+static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
+ struct pblk_line *line)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- struct nvm_rq *rqd;
- struct bio *bio;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
- u64 w_ptr = 0, r_ptr;
- int rq_ppas, rq_len;
- int i, j;
- int ret = 0;
- int rec_round;
- int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
-
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
-
- /* we could recover up until the line write pointer */
- r_ptr = line->cur_sec;
- rec_round = 0;
-
-next_rq:
- memset(rqd, 0, pblk_g_rq_size);
-
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
- rq_len = rq_ppas * geo->csecs;
-
- bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
-
- rqd->bio = bio;
- rqd->opcode = NVM_OP_PREAD;
- rqd->meta_list = meta_list;
- rqd->nr_ppas = rq_ppas;
- rqd->ppa_list = ppa_list;
- rqd->dma_ppa_list = dma_ppa_list;
- rqd->dma_meta_list = dma_meta_list;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_lun *rlun;
+ struct nvm_chk_meta *chunk;
+ struct ppa_addr ppa;
+ u64 line_wp;
+ int pos, i;
- if (pblk_io_aligned(pblk, rq_ppas))
- rqd->is_seq = 1;
+ rlun = &pblk->luns[0];
+ ppa = rlun->bppa;
+ pos = pblk_ppa_to_pos(geo, ppa);
+ chunk = &line->chks[pos];
- for (i = 0; i < rqd->nr_ppas; ) {
- struct ppa_addr ppa;
- int pos;
+ line_wp = chunk->wp;
- w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
+ for (i = 1; i < lm->blk_per_line; i++) {
+ rlun = &pblk->luns[i];
+ ppa = rlun->bppa;
pos = pblk_ppa_to_pos(geo, ppa);
+ chunk = &line->chks[pos];
- while (test_bit(pos, line->blk_bitmap)) {
- w_ptr += pblk->min_write_pgs;
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
- rqd->ppa_list[i] =
- addr_to_gen_ppa(pblk, w_ptr, line->id);
- }
-
- ret = pblk_submit_io_sync(pblk, rqd);
- if (ret) {
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
- return ret;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- /* This should not happen since the read failed during normal recovery,
- * but the media works funny sometimes...
- */
- if (!rec_round++ && !rqd->error) {
- rec_round = 0;
- for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
- u64 lba = le64_to_cpu(meta_list[i].lba);
-
- if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
- continue;
-
- pblk_update_map(pblk, lba, rqd->ppa_list[i]);
- }
- }
-
- /* Reached the end of the written line */
- if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
- int pad_secs, nr_error_bits, bit;
- int ret;
-
- bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
- nr_error_bits = rqd->nr_ppas - bit;
-
- /* Roll back failed sectors */
- line->cur_sec -= nr_error_bits;
- line->left_msecs += nr_error_bits;
- bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
-
- pad_secs = pblk_pad_distance(pblk, line);
-
- ret = pblk_recov_pad_oob(pblk, line, pad_secs);
- if (ret)
- pblk_err(pblk, "OOB padding failed (err:%d)\n", ret);
-
- ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
- if (ret)
- pblk_err(pblk, "OOB read failed (err:%d)\n", ret);
-
- left_ppas = 0;
+ if (chunk->wp > line_wp)
+ return 1;
+ else if (chunk->wp < line_wp)
+ line_wp = chunk->wp;
}
- left_ppas -= rq_ppas;
- if (left_ppas > 0)
- goto next_rq;
-
- return ret;
+ return 0;
}
static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, int *done)
+ struct pblk_recov_alloc p)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
@@ -525,11 +340,16 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
- u64 paddr;
+ __le64 *lba_list;
+ u64 paddr = 0;
+ bool padded = false;
int rq_ppas, rq_len;
int i, j;
- int ret = 0;
- int left_ppas = pblk_calc_sec_in_line(pblk, line);
+ int ret;
+ u64 left_ppas = pblk_sec_in_open_line(pblk, line);
+
+ if (pblk_line_wp_is_unbalanced(pblk, line))
+ pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
ppa_list = p.ppa_list;
meta_list = p.meta_list;
@@ -538,7 +358,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
dma_ppa_list = p.dma_ppa_list;
dma_meta_list = p.dma_meta_list;
- *done = 1;
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
next_rq:
memset(rqd, 0, pblk_g_rq_size);
@@ -566,11 +386,11 @@ next_rq:
if (pblk_io_aligned(pblk, rq_ppas))
rqd->is_seq = 1;
+retry_rq:
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
- paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
ppa = addr_to_gen_ppa(pblk, paddr, line->id);
pos = pblk_ppa_to_pos(geo, ppa);
@@ -580,9 +400,9 @@ next_rq:
pos = pblk_ppa_to_pos(geo, ppa);
}
- for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
+ for (j = 0; j < pblk->min_write_pgs; j++, i++)
rqd->ppa_list[i] =
- addr_to_gen_ppa(pblk, paddr, line->id);
+ addr_to_gen_ppa(pblk, paddr + j, line->id);
}
ret = pblk_submit_io_sync(pblk, rqd);
@@ -594,31 +414,33 @@ next_rq:
atomic_dec(&pblk->inflight_io);
- /* Reached the end of the written line */
+ /* If a read fails, do a best effort by padding the line and retrying */
if (rqd->error) {
- int nr_error_bits, bit;
-
- bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
- nr_error_bits = rqd->nr_ppas - bit;
+ int pad_distance, ret;
- /* Roll back failed sectors */
- line->cur_sec -= nr_error_bits;
- line->left_msecs += nr_error_bits;
- bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
+ if (padded) {
+ pblk_log_read_err(pblk, rqd);
+ return -EINTR;
+ }
- left_ppas = 0;
- rqd->nr_ppas = bit;
+ pad_distance = pblk_pad_distance(pblk, line);
+ ret = pblk_recov_pad_line(pblk, line, pad_distance);
+ if (ret)
+ return ret;
- if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
- *done = 0;
+ padded = true;
+ goto retry_rq;
}
for (i = 0; i < rqd->nr_ppas; i++) {
u64 lba = le64_to_cpu(meta_list[i].lba);
+ lba_list[paddr++] = cpu_to_le64(lba);
+
if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
continue;
+ line->nr_valid_lbas++;
pblk_update_map(pblk, lba, rqd->ppa_list[i]);
}
@@ -626,7 +448,11 @@ next_rq:
if (left_ppas > 0)
goto next_rq;
- return ret;
+#ifdef CONFIG_NVM_PBLK_DEBUG
+ WARN_ON(padded && !pblk_line_is_full(line));
+#endif
+
+ return 0;
}
/* Scan line for lbas on out of bound area */
@@ -640,7 +466,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
struct pblk_recov_alloc p;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
- int done, ret = 0;
+ int ret = 0;
meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
if (!meta_list)
@@ -655,7 +481,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
goto free_meta_list;
}
- rqd = pblk_alloc_rqd(pblk, PBLK_READ);
+ rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL);
+ memset(rqd, 0, pblk_g_rq_size);
p.ppa_list = ppa_list;
p.meta_list = meta_list;
@@ -664,24 +491,17 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
p.dma_ppa_list = dma_ppa_list;
p.dma_meta_list = dma_meta_list;
- ret = pblk_recov_scan_oob(pblk, line, p, &done);
+ ret = pblk_recov_scan_oob(pblk, line, p);
if (ret) {
- pblk_err(pblk, "could not recover L2P from OOB\n");
+ pblk_err(pblk, "could not recover L2P form OOB\n");
goto out;
}
- if (!done) {
- ret = pblk_recov_scan_all_oob(pblk, line, p);
- if (ret) {
- pblk_err(pblk, "could not recover L2P from OOB\n");
- goto out;
- }
- }
-
if (pblk_line_is_full(line))
pblk_line_recov_close(pblk, line);
out:
+ mempool_free(rqd, &pblk->r_rq_pool);
kfree(data);
free_meta_list:
nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
@@ -770,7 +590,7 @@ static void pblk_recov_wa_counters(struct pblk *pblk,
}
static int pblk_line_was_written(struct pblk_line *line,
- struct pblk *pblk)
+ struct pblk *pblk)
{
struct pblk_line_meta *lm = &pblk->lm;
@@ -796,6 +616,18 @@ static int pblk_line_was_written(struct pblk_line *line,
return 1;
}
+static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i;
+
+ for (i = 0; i < lm->blk_per_line; i++)
+ if (line->chks[i].state & NVM_CHK_ST_OPEN)
+ return true;
+
+ return false;
+}
+
struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
{
struct pblk_line_meta *lm = &pblk->lm;
@@ -906,6 +738,11 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
line->emeta = emeta;
memset(line->emeta->buf, 0, lm->emeta_len[0]);
+ if (pblk_line_is_open(pblk, line)) {
+ pblk_recov_l2p_from_oob(pblk, line);
+ goto next;
+ }
+
if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
@@ -944,15 +781,20 @@ next:
line->smeta = NULL;
line->emeta = NULL;
} else {
- if (open_lines > 1)
- pblk_err(pblk, "failed to recover L2P\n");
+ spin_lock(&line->lock);
+ line->state = PBLK_LINESTATE_OPEN;
+ spin_unlock(&line->lock);
+
+ line->emeta->mem = 0;
+ atomic_set(&line->emeta->sync, 0);
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
- open_lines++;
- line->meta_line = meta_line;
data_line = line;
+ line->meta_line = meta_line;
+
+ open_lines++;
}
}
@@ -1000,7 +842,7 @@ int pblk_recov_pad(struct pblk *pblk)
left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);
- ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+ ret = pblk_recov_pad_line(pblk, line, left_msecs);
if (ret) {
pblk_err(pblk, "tear down padding failed (%d)\n", ret);
return ret;