diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 00:32:19 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 00:32:19 +0100 |
commit | e2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch) | |
tree | b97a90170c45211bcc437761653aa8016c34afcd /drivers/lightnvm/pblk-init.c | |
parent | Merge tag 'configfs-for-4.15' of git://git.infradead.org/users/hch/configfs (diff) | |
parent | nvme: fix visibility of "uuid" ns attribute (diff) | |
download | linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.xz linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.zip |
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe:
"This is the main pull request for block storage for 4.15-rc1.
Nothing out of the ordinary in here, and no API changes or anything
like that. Just various new features for drivers, core changes, etc.
In particular, this pull request contains:
- A patch series from Bart, closing the whole on blk/scsi-mq queue
quescing.
- A series from Christoph, building towards hidden gendisks (for
multipath) and ability to move bio chains around.
- NVMe
- Support for native multipath for NVMe (Christoph).
- Userspace notifications for AENs (Keith).
- Command side-effects support (Keith).
- SGL support (Chaitanya Kulkarni)
- FC fixes and improvements (James Smart)
- Lots of fixes and tweaks (Various)
- bcache
- New maintainer (Michael Lyle)
- Writeback control improvements (Michael)
- Various fixes (Coly, Elena, Eric, Liang, et al)
- lightnvm updates, mostly centered around the pblk interface
(Javier, Hans, and Rakesh).
- Removal of unused bio/bvec kmap atomic interfaces (me, Christoph)
- Writeback series that fix the much discussed hundreds of millions
of sync-all units. This goes all the way, as discussed previously
(me).
- Fix for missing wakeup on writeback timer adjustments (Yafang
Shao).
- Fix laptop mode on blk-mq (me).
- {mq,name} tupple lookup for IO schedulers, allowing us to have
alias names. This means you can use 'deadline' on both !mq and on
mq (where it's called mq-deadline). (me).
- blktrace race fix, oopsing on sg load (me).
- blk-mq optimizations (me).
- Obscure waitqueue race fix for kyber (Omar).
- NBD fixes (Josef).
- Disable writeback throttling by default on bfq, like we do on cfq
(Luca Miccio).
- Series from Ming that enable us to treat flush requests on blk-mq
like any other request. This is a really nice cleanup.
- Series from Ming that improves merging on blk-mq with schedulers,
getting us closer to flipping the switch on scsi-mq again.
- BFQ updates (Paolo).
- blk-mq atomic flags memory ordering fixes (Peter Z).
- Loop cgroup support (Shaohua).
- Lots of minor fixes from lots of different folks, both for core and
driver code"
* 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits)
nvme: fix visibility of "uuid" ns attribute
blk-mq: fixup some comment typos and lengths
ide: ide-atapi: fix compile error with defining macro DEBUG
blk-mq: improve tag waiting setup for non-shared tags
brd: remove unused brd_mutex
blk-mq: only run the hardware queue if IO is pending
block: avoid null pointer dereference on null disk
fs: guard_bio_eod() needs to consider partitions
xtensa/simdisk: fix compile error
nvme: expose subsys attribute to sysfs
nvme: create 'slaves' and 'holders' entries for hidden controllers
block: create 'slaves' and 'holders' entries for hidden gendisks
nvme: also expose the namespace identification sysfs files for mpath nodes
nvme: implement multipath access to nvme subsystems
nvme: track shared namespaces
nvme: introduce a nvme_ns_ids structure
nvme: track subsystems
block, nvme: Introduce blk_mq_req_flags_t
block, scsi: Make SCSI quiesce and resume work reliably
block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag
...
Diffstat (limited to 'drivers/lightnvm/pblk-init.c')
-rw-r--r-- | drivers/lightnvm/pblk-init.c | 197 |
1 files changed, 121 insertions, 76 deletions
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 1b0f61233c21..f62112ba5482 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -20,8 +20,8 @@ #include "pblk.h" -static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, - *pblk_w_rq_cache, *pblk_line_meta_cache; +static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, + *pblk_w_rq_cache; static DECLARE_RWSEM(pblk_lock); struct bio_set *pblk_bio_set; @@ -46,7 +46,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, * user I/Os. Unless stalled, the rate limiter leaves at least 256KB * available for user I/O. */ - if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) + if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) blk_queue_split(q, &bio); return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); @@ -76,6 +76,28 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } +static size_t pblk_trans_map_size(struct pblk *pblk) +{ + int entry_size = 8; + + if (pblk->ppaf_bitsize < 32) + entry_size = 4; + + return entry_size * pblk->rl.nr_secs; +} + +#ifdef CONFIG_NVM_DEBUG +static u32 pblk_l2p_crc(struct pblk *pblk) +{ + size_t map_size; + u32 crc = ~(u32)0; + + map_size = pblk_trans_map_size(pblk); + crc = crc32_le(crc, pblk->trans_map, map_size); + return crc; +} +#endif + static void pblk_l2p_free(struct pblk *pblk) { vfree(pblk->trans_map); @@ -85,12 +107,10 @@ static int pblk_l2p_init(struct pblk *pblk) { sector_t i; struct ppa_addr ppa; - int entry_size = 8; + size_t map_size; - if (pblk->ppaf_bitsize < 32) - entry_size = 4; - - pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); + map_size = pblk_trans_map_size(pblk); + pblk->trans_map = vmalloc(map_size); if (!pblk->trans_map) return -ENOMEM; @@ -132,7 +152,6 @@ static int pblk_rwb_init(struct pblk *pblk) } /* Minimum pages needed within a lun */ -#define PAGE_POOL_SIZE 16 #define ADDR_POOL_SIZE 64 static int pblk_set_ppaf(struct pblk *pblk) @@ -182,12 +201,10 @@ static int pblk_set_ppaf(struct pblk *pblk) static int pblk_init_global_caches(struct pblk *pblk) { - char cache_name[PBLK_CACHE_NAME_LEN]; - down_write(&pblk_lock); - pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", + pblk_ws_cache = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_blk_ws_cache) { + if (!pblk_ws_cache) { up_write(&pblk_lock); return -ENOMEM; } @@ -195,7 +212,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_rec_cache = kmem_cache_create("pblk_rec", sizeof(struct pblk_rec_ctx), 0, 0, NULL); if (!pblk_rec_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); up_write(&pblk_lock); return -ENOMEM; } @@ -203,7 +220,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, 0, 0, NULL); if (!pblk_g_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); up_write(&pblk_lock); return -ENOMEM; @@ -212,30 +229,25 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, 0, 0, NULL); if (!pblk_w_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); up_write(&pblk_lock); return -ENOMEM; } - - snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", - pblk->disk->disk_name); - pblk_line_meta_cache = kmem_cache_create(cache_name, - pblk->lm.sec_bitmap_len, 0, 0, NULL); - if (!pblk_line_meta_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); - up_write(&pblk_lock); - return -ENOMEM; - } up_write(&pblk_lock); return 0; } +static void pblk_free_global_caches(struct pblk *pblk) +{ + kmem_cache_destroy(pblk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_g_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); +} + static int pblk_core_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -247,70 +259,80 @@ static int pblk_core_init(struct pblk *pblk) if (pblk_init_global_caches(pblk)) return -ENOMEM; - pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); - if (!pblk->page_pool) - return -ENOMEM; + /* Internal bios can be at most the sectors signaled by the device. */ + pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), + 0); + if (!pblk->page_bio_pool) + goto free_global_caches; - pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE, - pblk_blk_ws_cache); - if (!pblk->line_ws_pool) - goto free_page_pool; + pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, + pblk_ws_cache); + if (!pblk->gen_ws_pool) + goto free_page_bio_pool; pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); if (!pblk->rec_pool) - goto free_blk_ws_pool; + goto free_gen_ws_pool; - pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE, + pblk->r_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_g_rq_cache); - if (!pblk->g_rq_pool) + if (!pblk->r_rq_pool) goto free_rec_pool; - pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2, + pblk->e_rq_pool = mempool_create_slab_pool(geo->nr_luns, + pblk_g_rq_cache); + if (!pblk->e_rq_pool) + goto free_r_rq_pool; + + pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_w_rq_cache); if (!pblk->w_rq_pool) - goto free_g_rq_pool; - - pblk->line_meta_pool = - mempool_create_slab_pool(PBLK_META_POOL_SIZE, - pblk_line_meta_cache); - if (!pblk->line_meta_pool) - goto free_w_rq_pool; + goto free_e_rq_pool; pblk->close_wq = alloc_workqueue("pblk-close-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS); if (!pblk->close_wq) - goto free_line_meta_pool; + goto free_w_rq_pool; pblk->bb_wq = alloc_workqueue("pblk-bb-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!pblk->bb_wq) goto free_close_wq; - if (pblk_set_ppaf(pblk)) + pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (!pblk->r_end_wq) goto free_bb_wq; + if (pblk_set_ppaf(pblk)) + goto free_r_end_wq; + if (pblk_rwb_init(pblk)) - goto free_bb_wq; + goto free_r_end_wq; INIT_LIST_HEAD(&pblk->compl_list); return 0; +free_r_end_wq: + destroy_workqueue(pblk->r_end_wq); free_bb_wq: destroy_workqueue(pblk->bb_wq); free_close_wq: destroy_workqueue(pblk->close_wq); -free_line_meta_pool: - mempool_destroy(pblk->line_meta_pool); free_w_rq_pool: mempool_destroy(pblk->w_rq_pool); -free_g_rq_pool: - mempool_destroy(pblk->g_rq_pool); +free_e_rq_pool: + mempool_destroy(pblk->e_rq_pool); +free_r_rq_pool: + mempool_destroy(pblk->r_rq_pool); free_rec_pool: mempool_destroy(pblk->rec_pool); -free_blk_ws_pool: - mempool_destroy(pblk->line_ws_pool); -free_page_pool: - mempool_destroy(pblk->page_pool); +free_gen_ws_pool: + mempool_destroy(pblk->gen_ws_pool); +free_page_bio_pool: + mempool_destroy(pblk->page_bio_pool); +free_global_caches: + pblk_free_global_caches(pblk); return -ENOMEM; } @@ -319,21 +341,20 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->close_wq) destroy_workqueue(pblk->close_wq); + if (pblk->r_end_wq) + destroy_workqueue(pblk->r_end_wq); + if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); - mempool_destroy(pblk->page_pool); - mempool_destroy(pblk->line_ws_pool); + mempool_destroy(pblk->page_bio_pool); + mempool_destroy(pblk->gen_ws_pool); mempool_destroy(pblk->rec_pool); - mempool_destroy(pblk->g_rq_pool); + mempool_destroy(pblk->r_rq_pool); + mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); - mempool_destroy(pblk->line_meta_pool); - kmem_cache_destroy(pblk_blk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); - kmem_cache_destroy(pblk_line_meta_cache); + pblk_free_global_caches(pblk); } static void pblk_luns_free(struct pblk *pblk) @@ -372,13 +393,11 @@ static void pblk_line_meta_free(struct pblk *pblk) kfree(l_mg->bb_aux); kfree(l_mg->vsc_list); - spin_lock(&l_mg->free_lock); for (i = 0; i < PBLK_DATA_LINES; i++) { kfree(l_mg->sline_meta[i]); pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); kfree(l_mg->eline_meta[i]); } - spin_unlock(&l_mg->free_lock); kfree(pblk->lines); } @@ -507,6 +526,13 @@ static int pblk_lines_configure(struct pblk *pblk, int flags) } } +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + + /* Free full lines directly as GC has not been started yet */ + pblk_gc_free_full_lines(pblk); + if (!line) { /* Configure next line for user data */ line = pblk_line_get_first_data(pblk); @@ -630,7 +656,10 @@ static int pblk_lines_alloc_metadata(struct pblk *pblk) fail_free_emeta: while (--i >= 0) { - vfree(l_mg->eline_meta[i]->buf); + if (l_mg->emeta_alloc_type == PBLK_VMALLOC_META) + vfree(l_mg->eline_meta[i]->buf); + else + kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } @@ -681,8 +710,8 @@ static int pblk_lines_init(struct pblk *pblk) lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); - lm->high_thrs = lm->sec_per_line / 2; - lm->mid_thrs = lm->sec_per_line / 4; + lm->mid_thrs = lm->sec_per_line / 2; + lm->high_thrs = lm->sec_per_line / 4; lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs; /* Calculate necessary pages for smeta. See comment over struct @@ -713,9 +742,13 @@ add_emeta_page: goto add_emeta_page; } - lm->emeta_bb = geo->nr_luns - i; - lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0], - geo->sec_per_blk); + lm->emeta_bb = geo->nr_luns > i ? geo->nr_luns - i : 0; + + lm->min_blk_line = 1; + if (geo->nr_luns > 1) + lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + + lm->emeta_sec[0], geo->sec_per_blk); + if (lm->min_blk_line > lm->blk_per_line) { pr_err("pblk: config. not supported. Min. LUN in line:%d\n", lm->blk_per_line); @@ -890,6 +923,11 @@ static void pblk_exit(void *private) down_write(&pblk_lock); pblk_gc_exit(pblk); pblk_tear_down(pblk); + +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + pblk_free(pblk); up_write(&pblk_lock); } @@ -911,7 +949,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, int ret; if (dev->identity.dom & NVM_RSP_L2P) { - pr_err("pblk: device-side L2P table not supported. (%x)\n", + pr_err("pblk: host-side L2P table not supported. (%x)\n", dev->identity.dom); return ERR_PTR(-EINVAL); } @@ -923,6 +961,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->dev = dev; pblk->disk = tdisk; pblk->state = PBLK_STATE_RUNNING; + pblk->gc.gc_enabled = 0; spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); @@ -944,6 +983,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_gc_writes, 0); + atomic_long_set(&pblk->recov_gc_reads, 0); #endif atomic_long_set(&pblk->read_failed, 0); @@ -1012,6 +1052,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->rwb.nr_entries); wake_up_process(pblk->writer_ts); + + /* Check if we need to start GC */ + pblk_gc_should_kick(pblk); + return pblk; fail_stop_writer: @@ -1044,6 +1088,7 @@ static struct nvm_tgt_type tt_pblk = { .sysfs_init = pblk_sysfs_init, .sysfs_exit = pblk_sysfs_exit, + .owner = THIS_MODULE, }; static int __init pblk_module_init(void) |