diff options
author | Gao Xiang <hsiangkao@redhat.com> | 2021-04-09 21:06:30 +0200 |
---|---|---|
committer | Gao Xiang <hsiangkao@redhat.com> | 2021-04-09 21:19:59 +0200 |
commit | 524887347fcb67faa0a63dd3c4c02ab48d4968d4 (patch) | |
tree | 2a11ca8b2e7c11e74b6f4d1cfb39d2a0c182d7a9 /fs/erofs/pcpubuf.c | |
parent | erofs: reserve physical_clusterbits[] (diff) | |
download | linux-524887347fcb67faa0a63dd3c4c02ab48d4968d4.tar.xz linux-524887347fcb67faa0a63dd3c4c02ab48d4968d4.zip |
erofs: introduce multipage per-CPU buffers
To deal the with the cases which inplace decompression is infeasible
for some inplace I/O. Per-CPU buffers was introduced to get rid of page
allocation latency and thrash for low-latency decompression algorithms
such as lz4.
For the big pcluster feature, introduce multipage per-CPU buffers to
keep such inplace I/O pclusters temporarily as well but note that
per-CPU pages are just consecutive virtually.
When a new big pcluster fs is mounted, its max pclustersize will be
read and per-CPU buffers can be growed if needed. Shrinking adjustable
per-CPU buffers is more complex (because we don't know if such size
is still be used), so currently just release them all when unloading.
Link: https://lore.kernel.org/r/20210409190630.19569-1-xiang@kernel.org
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
Diffstat (limited to 'fs/erofs/pcpubuf.c')
-rw-r--r-- | fs/erofs/pcpubuf.c | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c new file mode 100644 index 000000000000..6c885575128a --- /dev/null +++ b/fs/erofs/pcpubuf.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) Gao Xiang <xiang@kernel.org> + * + * For low-latency decompression algorithms (e.g. lz4), reserve consecutive + * per-CPU virtual memory (in pages) in advance to store such inplace I/O + * data if inplace decompression is failed (due to unmet inplace margin for + * example). + */ +#include "internal.h" + +struct erofs_pcpubuf { + raw_spinlock_t lock; + void *ptr; + struct page **pages; + unsigned int nrpages; +}; + +static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb); + +void *erofs_get_pcpubuf(unsigned int requiredpages) + __acquires(pcb->lock) +{ + struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb); + + raw_spin_lock(&pcb->lock); + /* check if the per-CPU buffer is too small */ + if (requiredpages > pcb->nrpages) { + raw_spin_unlock(&pcb->lock); + put_cpu_var(erofs_pcb); + /* (for sparse checker) pretend pcb->lock is still taken */ + __acquire(pcb->lock); + return NULL; + } + return pcb->ptr; +} + +void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock) +{ + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id()); + + DBG_BUGON(pcb->ptr != ptr); + raw_spin_unlock(&pcb->lock); + put_cpu_var(erofs_pcb); +} + +/* the next step: support per-CPU page buffers hotplug */ +int erofs_pcpubuf_growsize(unsigned int nrpages) +{ + static DEFINE_MUTEX(pcb_resize_mutex); + static unsigned int pcb_nrpages; + LIST_HEAD(pagepool); + int delta, cpu, ret, i; + + mutex_lock(&pcb_resize_mutex); + delta = nrpages - pcb_nrpages; + ret = 0; + /* avoid shrinking pcpubuf, since no idea how many fses rely on */ + if (delta <= 0) + goto out; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + struct page **pages, **oldpages; + void *ptr, *old_ptr; + + pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + break; + } + + for (i = 0; i < nrpages; ++i) { + pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL); + if (!pages[i]) { + ret = -ENOMEM; + oldpages = pages; + goto free_pagearray; + } + } + ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL); + if (!ptr) { + ret = -ENOMEM; + oldpages = pages; + goto free_pagearray; + } + raw_spin_lock(&pcb->lock); + old_ptr = pcb->ptr; + pcb->ptr = ptr; + oldpages = pcb->pages; + pcb->pages = pages; + i = pcb->nrpages; + pcb->nrpages = nrpages; + raw_spin_unlock(&pcb->lock); + + if (!oldpages) { + DBG_BUGON(old_ptr); + continue; + } + + if (old_ptr) + vunmap(old_ptr); +free_pagearray: + while (i) + list_add(&oldpages[--i]->lru, &pagepool); + kfree(oldpages); + if (ret) + break; + } + pcb_nrpages = nrpages; + put_pages_list(&pagepool); +out: + mutex_unlock(&pcb_resize_mutex); + return ret; +} + +void erofs_pcpubuf_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + + raw_spin_lock_init(&pcb->lock); + } +} + +void erofs_pcpubuf_exit(void) +{ + int cpu, i; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + + if (pcb->ptr) { + vunmap(pcb->ptr); + pcb->ptr = NULL; + } + if (!pcb->pages) + continue; + + for (i = 0; i < pcb->nrpages; ++i) + if (pcb->pages[i]) + put_page(pcb->pages[i]); + kfree(pcb->pages); + pcb->pages = NULL; + } +} |