erofs: introduce multipage per-CPU buffers

To deal the with the cases which inplace decompression is infeasible for some inplace I/O. Per-CPU buffers was introduced to get rid of page allocation latency and thrash for low-latency decompression algorithms such as lz4. For the big pcluster feature, introduce multipage per-CPU buffers to keep such inplace I/O pclusters temporarily as well but note that per-CPU pages are just consecutive virtually. When a new big pcluster fs is mounted, its max pclustersize will be read and per-CPU buffers can be growed if needed. Shrinking adjustable per-CPU buffers is more complex (because we don't know if such size is still be used), so currently just release them all when unloading. Link: https://lore.kernel.org/r/20210409190630.19569-1-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
author: Gao Xiang <hsiangkao@redhat.com> 2021-04-09 21:06:30 +0200
committer: Gao Xiang <hsiangkao@redhat.com> 2021-04-09 21:19:59 +0200
commit: 524887347fcb67faa0a63dd3c4c02ab48d4968d4 (patch)
tree: 2a11ca8b2e7c11e74b6f4d1cfb39d2a0c182d7a9 /fs/erofs/pcpubuf.c
parent: erofs: reserve physical_clusterbits[] (diff)
download: linux-524887347fcb67faa0a63dd3c4c02ab48d4968d4.tar.xz
linux-524887347fcb67faa0a63dd3c4c02ab48d4968d4.zip
1 files changed, 148 insertions, 0 deletions
diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c
new file mode 100644
index 000000000000..6c885575128a
--- /dev/null
+++ b/fs/erofs/pcpubuf.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) Gao Xiang <xiang@kernel.org>
+ *
+ * For low-latency decompression algorithms (e.g. lz4), reserve consecutive
+ * per-CPU virtual memory (in pages) in advance to store such inplace I/O
+ * data if inplace decompression is failed (due to unmet inplace margin for
+ * example).
+ */
+#include "internal.h"
+
+struct erofs_pcpubuf {
+	raw_spinlock_t lock;
+	void *ptr;
+	struct page **pages;
+	unsigned int nrpages;
+};
+
+static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
+
+void *erofs_get_pcpubuf(unsigned int requiredpages)
+	__acquires(pcb->lock)
+{
+	struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
+
+	raw_spin_lock(&pcb->lock);
+	/* check if the per-CPU buffer is too small */
+	if (requiredpages > pcb->nrpages) {
+		raw_spin_unlock(&pcb->lock);
+		put_cpu_var(erofs_pcb);
+		/* (for sparse checker) pretend pcb->lock is still taken */
+		__acquire(pcb->lock);
+		return NULL;
+	}
+	return pcb->ptr;
+}
+
+void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
+{
+	struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
+
+	DBG_BUGON(pcb->ptr != ptr);
+	raw_spin_unlock(&pcb->lock);
+	put_cpu_var(erofs_pcb);
+}
+
+/* the next step: support per-CPU page buffers hotplug */
+int erofs_pcpubuf_growsize(unsigned int nrpages)
+{
+	static DEFINE_MUTEX(pcb_resize_mutex);
+	static unsigned int pcb_nrpages;
+	LIST_HEAD(pagepool);
+	int delta, cpu, ret, i;
+
+	mutex_lock(&pcb_resize_mutex);
+	delta = nrpages - pcb_nrpages;
+	ret = 0;
+	/* avoid shrinking pcpubuf, since no idea how many fses rely on */
+	if (delta <= 0)
+		goto out;
+
+	for_each_possible_cpu(cpu) {
+		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
+		struct page **pages, **oldpages;
+		void *ptr, *old_ptr;
+
+		pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
+		if (!pages) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		for (i = 0; i < nrpages; ++i) {
+			pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
+			if (!pages[i]) {
+				ret = -ENOMEM;
+				oldpages = pages;
+				goto free_pagearray;
+			}
+		}
+		ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
+		if (!ptr) {
+			ret = -ENOMEM;
+			oldpages = pages;
+			goto free_pagearray;
+		}
+		raw_spin_lock(&pcb->lock);
+		old_ptr = pcb->ptr;
+		pcb->ptr = ptr;
+		oldpages = pcb->pages;
+		pcb->pages = pages;
+		i = pcb->nrpages;
+		pcb->nrpages = nrpages;
+		raw_spin_unlock(&pcb->lock);
+
+		if (!oldpages) {
+			DBG_BUGON(old_ptr);
+			continue;
+		}
+
+		if (old_ptr)
+			vunmap(old_ptr);
+free_pagearray:
+		while (i)
+			list_add(&oldpages[--i]->lru, &pagepool);
+		kfree(oldpages);
+		if (ret)
+			break;
+	}
+	pcb_nrpages = nrpages;
+	put_pages_list(&pagepool);
+out:
+	mutex_unlock(&pcb_resize_mutex);
+	return ret;
+}
+
+void erofs_pcpubuf_init(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
+
+		raw_spin_lock_init(&pcb->lock);
+	}
+}
+
+void erofs_pcpubuf_exit(void)
+{
+	int cpu, i;
+
+	for_each_possible_cpu(cpu) {
+		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
+
+		if (pcb->ptr) {
+			vunmap(pcb->ptr);
+			pcb->ptr = NULL;
+		}
+		if (!pcb->pages)
+			continue;
+
+		for (i = 0; i < pcb->nrpages; ++i)
+			if (pcb->pages[i])
+				put_page(pcb->pages[i]);
+		kfree(pcb->pages);
+		pcb->pages = NULL;
+	}
+}
author	Gao Xiang <hsiangkao@redhat.com>	2021-04-09 21:06:30 +0200
committer	Gao Xiang <hsiangkao@redhat.com>	2021-04-09 21:19:59 +0200
commit	524887347fcb67faa0a63dd3c4c02ab48d4968d4 (patch)
tree	2a11ca8b2e7c11e74b6f4d1cfb39d2a0c182d7a9 /fs/erofs/pcpubuf.c
parent	erofs: reserve physical_clusterbits[] (diff)
download	linux-524887347fcb67faa0a63dd3c4c02ab48d4968d4.tar.xz linux-524887347fcb67faa0a63dd3c4c02ab48d4968d4.zip