summaryrefslogtreecommitdiffstats
path: root/drivers/lightnvm/pblk-init.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 22:19:59 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 22:19:59 +0100
commit0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch)
tree2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /drivers/lightnvm/pblk-init.c
parentMerge tag 'y2038-for-4.21' of ssh://gitolite.kernel.org:/pub/scm/linux/kernel... (diff)
parentkyber: use sbitmap add_wait_queue/list_del wait helpers (diff)
downloadlinux-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.tar.xz
linux-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.zip
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block/storage for 4.21. Larger than usual, it was a busy round with lots of goodies queued up. Most notable is the removal of the old IO stack, which has been a long time coming. No new features for a while, everything coming in this week has all been fixes for things that were previously merged. This contains: - Use atomic counters instead of semaphores for mtip32xx (Arnd) - Cleanup of the mtip32xx request setup (Christoph) - Fix for circular locking dependency in loop (Jan, Tetsuo) - bcache (Coly, Guoju, Shenghui) * Optimizations for writeback caching * Various fixes and improvements - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith) * host and target support for NVMe over TCP * Error log page support * Support for separate read/write/poll queues * Much improved polling * discard OOM fallback * Tracepoint improvements - lightnvm (Hans, Hua, Igor, Matias, Javier) * Igor added packed metadata to pblk. Now drives without metadata per LBA can be used as well. * Fix from Geert on uninitialized value on chunk metadata reads. * Fixes from Hans and Javier to pblk recovery and write path. * Fix from Hua Su to fix a race condition in the pblk recovery code. * Scan optimization added to pblk recovery from Zhoujie. * Small geometry cleanup from me. - Conversion of the last few drivers that used the legacy path to blk-mq (me) - Removal of legacy IO path in SCSI (me, Christoph) - Removal of legacy IO stack and schedulers (me) - Support for much better polling, now without interrupts at all. blk-mq adds support for multiple queue maps, which enables us to have a map per type. This in turn enables nvme to have separate completion queues for polling, which can then be interrupt-less. Also means we're ready for async polled IO, which is hopefully coming in the next release. - Killing of (now) unused block exports (Christoph) - Unification of the blk-rq-qos and blk-wbt wait handling (Josef) - Support for zoned testing with null_blk (Masato) - sx8 conversion to per-host tag sets (Christoph) - IO priority improvements (Damien) - mq-deadline zoned fix (Damien) - Ref count blkcg series (Dennis) - Lots of blk-mq improvements and speedups (me) - sbitmap scalability improvements (me) - Make core inflight IO accounting per-cpu (Mikulas) - Export timeout setting in sysfs (Weiping) - Cleanup the direct issue path (Jianchao) - Export blk-wbt internals in block debugfs for easier debugging (Ming) - Lots of other fixes and improvements" * tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits) kyber: use sbitmap add_wait_queue/list_del wait helpers sbitmap: add helpers for add/del wait queue handling block: save irq state in blkg_lookup_create() dm: don't reuse bio for flushes nvme-pci: trace SQ status on completions nvme-rdma: implement polling queue map nvme-fabrics: allow user to pass in nr_poll_queues nvme-fabrics: allow nvmf_connect_io_queue to poll nvme-core: optionally poll sync commands block: make request_to_qc_t public nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" nvme-tcp: fix endianess annotations nvmet-tcp: fix endianess annotations nvme-pci: refactor nvme_poll_irqdisable to make sparse happy nvme-pci: only set nr_maps to 2 if poll queues are supported nvmet: use a macro for default error location nvmet: fix comparison of a u16 with -1 blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue ...
Diffstat (limited to 'drivers/lightnvm/pblk-init.c')
-rw-r--r--drivers/lightnvm/pblk-init.c103
1 files changed, 79 insertions, 24 deletions
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 13822594647c..f9a3e47b6a93 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -207,9 +207,6 @@ static int pblk_rwb_init(struct pblk *pblk)
return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs);
}
-/* Minimum pages needed within a lun */
-#define ADDR_POOL_SIZE 64
-
static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo,
struct nvm_addrf_12 *dst)
{
@@ -350,23 +347,19 @@ fail_destroy_ws:
static int pblk_get_global_caches(void)
{
- int ret;
+ int ret = 0;
mutex_lock(&pblk_caches.mutex);
- if (kref_read(&pblk_caches.kref) > 0) {
- kref_get(&pblk_caches.kref);
- mutex_unlock(&pblk_caches.mutex);
- return 0;
- }
+ if (kref_get_unless_zero(&pblk_caches.kref))
+ goto out;
ret = pblk_create_global_caches();
-
if (!ret)
- kref_get(&pblk_caches.kref);
+ kref_init(&pblk_caches.kref);
+out:
mutex_unlock(&pblk_caches.mutex);
-
return ret;
}
@@ -406,12 +399,45 @@ static int pblk_core_init(struct pblk *pblk)
pblk->nr_flush_rst = 0;
pblk->min_write_pgs = geo->ws_opt;
+ pblk->min_write_pgs_data = pblk->min_write_pgs;
max_write_ppas = pblk->min_write_pgs * geo->all_luns;
pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
pblk->max_write_pgs = min_t(int, pblk->max_write_pgs,
queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT));
pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+ pblk->oob_meta_size = geo->sos;
+ if (!pblk_is_oob_meta_supported(pblk)) {
+ /* For drives which does not have OOB metadata feature
+ * in order to support recovery feature we need to use
+ * so called packed metadata. Packed metada will store
+ * the same information as OOB metadata (l2p table mapping,
+ * but in the form of the single page at the end of
+ * every write request.
+ */
+ if (pblk->min_write_pgs
+ * sizeof(struct pblk_sec_meta) > PAGE_SIZE) {
+ /* We want to keep all the packed metadata on single
+ * page per write requests. So we need to ensure that
+ * it will fit.
+ *
+ * This is more like sanity check, since there is
+ * no device with such a big minimal write size
+ * (above 1 metabytes).
+ */
+ pblk_err(pblk, "Not supported min write size\n");
+ return -EINVAL;
+ }
+ /* For packed meta approach we do some simplification.
+ * On read path we always issue requests which size
+ * equal to max_write_pgs, with all pages filled with
+ * user payload except of last one page which will be
+ * filled with packed metadata.
+ */
+ pblk->max_write_pgs = pblk->min_write_pgs;
+ pblk->min_write_pgs_data = pblk->min_write_pgs - 1;
+ }
+
pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
GFP_KERNEL);
if (!pblk->pad_dist)
@@ -635,40 +661,61 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
-static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
+static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_geo *geo = &dev->geo;
sector_t provisioned;
- int sec_meta, blk_meta;
+ int sec_meta, blk_meta, clba;
+ int minimum;
if (geo->op == NVM_TARGET_DEFAULT_OP)
pblk->op = PBLK_DEFAULT_OP;
else
pblk->op = geo->op;
- provisioned = nr_free_blks;
+ minimum = pblk_get_min_chks(pblk);
+ provisioned = nr_free_chks;
provisioned *= (100 - pblk->op);
sector_div(provisioned, 100);
- pblk->op_blks = nr_free_blks - provisioned;
+ if ((nr_free_chks - provisioned) < minimum) {
+ if (geo->op != NVM_TARGET_DEFAULT_OP) {
+ pblk_err(pblk, "OP too small to create a sane instance\n");
+ return -EINTR;
+ }
+
+ /* If the user did not specify an OP value, and PBLK_DEFAULT_OP
+ * is not enough, calculate and set sane value
+ */
+
+ provisioned = nr_free_chks - minimum;
+ pblk->op = (100 * minimum) / nr_free_chks;
+ pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n",
+ pblk->op);
+ }
+
+ pblk->op_blks = nr_free_chks - provisioned;
/* Internally pblk manages all free blocks, but all calculations based
* on user capacity consider only provisioned blocks
*/
- pblk->rl.total_blocks = nr_free_blks;
- pblk->rl.nr_secs = nr_free_blks * geo->clba;
+ pblk->rl.total_blocks = nr_free_chks;
+ pblk->rl.nr_secs = nr_free_chks * geo->clba;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
- pblk->capacity = (provisioned - blk_meta) * geo->clba;
+ clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data;
+ pblk->capacity = (provisioned - blk_meta) * clba;
- atomic_set(&pblk->rl.free_blocks, nr_free_blks);
- atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
+ atomic_set(&pblk->rl.free_blocks, nr_free_chks);
+ atomic_set(&pblk->rl.free_user_blocks, nr_free_chks);
+
+ return 0;
}
static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
@@ -984,7 +1031,7 @@ static int pblk_lines_init(struct pblk *pblk)
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
void *chunk_meta;
- long nr_free_chks = 0;
+ int nr_free_chks = 0;
int i, ret;
ret = pblk_line_meta_init(pblk);
@@ -1031,7 +1078,9 @@ static int pblk_lines_init(struct pblk *pblk)
goto fail_free_lines;
}
- pblk_set_provision(pblk, nr_free_chks);
+ ret = pblk_set_provision(pblk, nr_free_chks);
+ if (ret)
+ goto fail_free_lines;
vfree(chunk_meta);
return 0;
@@ -1041,7 +1090,7 @@ fail_free_lines:
pblk_line_meta_free(l_mg, &pblk->lines[i]);
kfree(pblk->lines);
fail_free_chunk_meta:
- kfree(chunk_meta);
+ vfree(chunk_meta);
fail_free_luns:
kfree(pblk->luns);
fail_free_meta:
@@ -1154,6 +1203,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
return ERR_PTR(-EINVAL);
}
+ if (geo->ext) {
+ pblk_err(pblk, "extended metadata not supported\n");
+ kfree(pblk);
+ return ERR_PTR(-EINVAL);
+ }
+
spin_lock_init(&pblk->resubmit_lock);
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);