diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 22:19:59 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 22:19:59 +0100 |
commit | 0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch) | |
tree | 2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /drivers/lightnvm/pblk-init.c | |
parent | Merge tag 'y2038-for-4.21' of ssh://gitolite.kernel.org:/pub/scm/linux/kernel... (diff) | |
parent | kyber: use sbitmap add_wait_queue/list_del wait helpers (diff) | |
download | linux-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.tar.xz linux-0e9da3fbf7d81f0f913b491c8de1ba7883d4f217.zip |
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block/storage for 4.21.
Larger than usual, it was a busy round with lots of goodies queued up.
Most notable is the removal of the old IO stack, which has been a long
time coming. No new features for a while, everything coming in this
week has all been fixes for things that were previously merged.
This contains:
- Use atomic counters instead of semaphores for mtip32xx (Arnd)
- Cleanup of the mtip32xx request setup (Christoph)
- Fix for circular locking dependency in loop (Jan, Tetsuo)
- bcache (Coly, Guoju, Shenghui)
* Optimizations for writeback caching
* Various fixes and improvements
- nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith)
* host and target support for NVMe over TCP
* Error log page support
* Support for separate read/write/poll queues
* Much improved polling
* discard OOM fallback
* Tracepoint improvements
- lightnvm (Hans, Hua, Igor, Matias, Javier)
* Igor added packed metadata to pblk. Now drives without metadata
per LBA can be used as well.
* Fix from Geert on uninitialized value on chunk metadata reads.
* Fixes from Hans and Javier to pblk recovery and write path.
* Fix from Hua Su to fix a race condition in the pblk recovery
code.
* Scan optimization added to pblk recovery from Zhoujie.
* Small geometry cleanup from me.
- Conversion of the last few drivers that used the legacy path to
blk-mq (me)
- Removal of legacy IO path in SCSI (me, Christoph)
- Removal of legacy IO stack and schedulers (me)
- Support for much better polling, now without interrupts at all.
blk-mq adds support for multiple queue maps, which enables us to
have a map per type. This in turn enables nvme to have separate
completion queues for polling, which can then be interrupt-less.
Also means we're ready for async polled IO, which is hopefully
coming in the next release.
- Killing of (now) unused block exports (Christoph)
- Unification of the blk-rq-qos and blk-wbt wait handling (Josef)
- Support for zoned testing with null_blk (Masato)
- sx8 conversion to per-host tag sets (Christoph)
- IO priority improvements (Damien)
- mq-deadline zoned fix (Damien)
- Ref count blkcg series (Dennis)
- Lots of blk-mq improvements and speedups (me)
- sbitmap scalability improvements (me)
- Make core inflight IO accounting per-cpu (Mikulas)
- Export timeout setting in sysfs (Weiping)
- Cleanup the direct issue path (Jianchao)
- Export blk-wbt internals in block debugfs for easier debugging
(Ming)
- Lots of other fixes and improvements"
* tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits)
kyber: use sbitmap add_wait_queue/list_del wait helpers
sbitmap: add helpers for add/del wait queue handling
block: save irq state in blkg_lookup_create()
dm: don't reuse bio for flushes
nvme-pci: trace SQ status on completions
nvme-rdma: implement polling queue map
nvme-fabrics: allow user to pass in nr_poll_queues
nvme-fabrics: allow nvmf_connect_io_queue to poll
nvme-core: optionally poll sync commands
block: make request_to_qc_t public
nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"
nvme-tcp: fix endianess annotations
nvmet-tcp: fix endianess annotations
nvme-pci: refactor nvme_poll_irqdisable to make sparse happy
nvme-pci: only set nr_maps to 2 if poll queues are supported
nvmet: use a macro for default error location
nvmet: fix comparison of a u16 with -1
blk-mq: enable IO poll if .nr_queues of type poll > 0
blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
blk-mq: skip zero-queue maps in blk_mq_map_swqueue
...
Diffstat (limited to 'drivers/lightnvm/pblk-init.c')
-rw-r--r-- | drivers/lightnvm/pblk-init.c | 103 |
1 files changed, 79 insertions, 24 deletions
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 13822594647c..f9a3e47b6a93 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -207,9 +207,6 @@ static int pblk_rwb_init(struct pblk *pblk) return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); } -/* Minimum pages needed within a lun */ -#define ADDR_POOL_SIZE 64 - static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo, struct nvm_addrf_12 *dst) { @@ -350,23 +347,19 @@ fail_destroy_ws: static int pblk_get_global_caches(void) { - int ret; + int ret = 0; mutex_lock(&pblk_caches.mutex); - if (kref_read(&pblk_caches.kref) > 0) { - kref_get(&pblk_caches.kref); - mutex_unlock(&pblk_caches.mutex); - return 0; - } + if (kref_get_unless_zero(&pblk_caches.kref)) + goto out; ret = pblk_create_global_caches(); - if (!ret) - kref_get(&pblk_caches.kref); + kref_init(&pblk_caches.kref); +out: mutex_unlock(&pblk_caches.mutex); - return ret; } @@ -406,12 +399,45 @@ static int pblk_core_init(struct pblk *pblk) pblk->nr_flush_rst = 0; pblk->min_write_pgs = geo->ws_opt; + pblk->min_write_pgs_data = pblk->min_write_pgs; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); + pblk->oob_meta_size = geo->sos; + if (!pblk_is_oob_meta_supported(pblk)) { + /* For drives which does not have OOB metadata feature + * in order to support recovery feature we need to use + * so called packed metadata. Packed metada will store + * the same information as OOB metadata (l2p table mapping, + * but in the form of the single page at the end of + * every write request. + */ + if (pblk->min_write_pgs + * sizeof(struct pblk_sec_meta) > PAGE_SIZE) { + /* We want to keep all the packed metadata on single + * page per write requests. So we need to ensure that + * it will fit. + * + * This is more like sanity check, since there is + * no device with such a big minimal write size + * (above 1 metabytes). + */ + pblk_err(pblk, "Not supported min write size\n"); + return -EINVAL; + } + /* For packed meta approach we do some simplification. + * On read path we always issue requests which size + * equal to max_write_pgs, with all pages filled with + * user payload except of last one page which will be + * filled with packed metadata. + */ + pblk->max_write_pgs = pblk->min_write_pgs; + pblk->min_write_pgs_data = pblk->min_write_pgs - 1; + } + pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), GFP_KERNEL); if (!pblk->pad_dist) @@ -635,40 +661,61 @@ static unsigned int calc_emeta_len(struct pblk *pblk) return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]); } -static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) +static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct nvm_geo *geo = &dev->geo; sector_t provisioned; - int sec_meta, blk_meta; + int sec_meta, blk_meta, clba; + int minimum; if (geo->op == NVM_TARGET_DEFAULT_OP) pblk->op = PBLK_DEFAULT_OP; else pblk->op = geo->op; - provisioned = nr_free_blks; + minimum = pblk_get_min_chks(pblk); + provisioned = nr_free_chks; provisioned *= (100 - pblk->op); sector_div(provisioned, 100); - pblk->op_blks = nr_free_blks - provisioned; + if ((nr_free_chks - provisioned) < minimum) { + if (geo->op != NVM_TARGET_DEFAULT_OP) { + pblk_err(pblk, "OP too small to create a sane instance\n"); + return -EINTR; + } + + /* If the user did not specify an OP value, and PBLK_DEFAULT_OP + * is not enough, calculate and set sane value + */ + + provisioned = nr_free_chks - minimum; + pblk->op = (100 * minimum) / nr_free_chks; + pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n", + pblk->op); + } + + pblk->op_blks = nr_free_chks - provisioned; /* Internally pblk manages all free blocks, but all calculations based * on user capacity consider only provisioned blocks */ - pblk->rl.total_blocks = nr_free_blks; - pblk->rl.nr_secs = nr_free_blks * geo->clba; + pblk->rl.total_blocks = nr_free_chks; + pblk->rl.nr_secs = nr_free_chks * geo->clba; /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - pblk->capacity = (provisioned - blk_meta) * geo->clba; + clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data; + pblk->capacity = (provisioned - blk_meta) * clba; - atomic_set(&pblk->rl.free_blocks, nr_free_blks); - atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); + atomic_set(&pblk->rl.free_blocks, nr_free_chks); + atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); + + return 0; } static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, @@ -984,7 +1031,7 @@ static int pblk_lines_init(struct pblk *pblk) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line; void *chunk_meta; - long nr_free_chks = 0; + int nr_free_chks = 0; int i, ret; ret = pblk_line_meta_init(pblk); @@ -1031,7 +1078,9 @@ static int pblk_lines_init(struct pblk *pblk) goto fail_free_lines; } - pblk_set_provision(pblk, nr_free_chks); + ret = pblk_set_provision(pblk, nr_free_chks); + if (ret) + goto fail_free_lines; vfree(chunk_meta); return 0; @@ -1041,7 +1090,7 @@ fail_free_lines: pblk_line_meta_free(l_mg, &pblk->lines[i]); kfree(pblk->lines); fail_free_chunk_meta: - kfree(chunk_meta); + vfree(chunk_meta); fail_free_luns: kfree(pblk->luns); fail_free_meta: @@ -1154,6 +1203,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, return ERR_PTR(-EINVAL); } + if (geo->ext) { + pblk_err(pblk, "extended metadata not supported\n"); + kfree(pblk); + return ERR_PTR(-EINVAL); + } + spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); |