summaryrefslogtreecommitdiffstats
path: root/fs/erofs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/erofs')
-rw-r--r--fs/erofs/Makefile2
-rw-r--r--fs/erofs/compress.h4
-rw-r--r--fs/erofs/data.c157
-rw-r--r--fs/erofs/decompressor.c134
-rw-r--r--fs/erofs/decompressor_lzma.c19
-rw-r--r--fs/erofs/dir.c21
-rw-r--r--fs/erofs/erofs_fs.h23
-rw-r--r--fs/erofs/inode.c72
-rw-r--r--fs/erofs/internal.h57
-rw-r--r--fs/erofs/namei.c54
-rw-r--r--fs/erofs/super.c176
-rw-r--r--fs/erofs/sysfs.c258
-rw-r--r--fs/erofs/xattr.c135
-rw-r--r--fs/erofs/xattr.h1
-rw-r--r--fs/erofs/zdata.c436
-rw-r--r--fs/erofs/zdata.h24
-rw-r--r--fs/erofs/zmap.c237
17 files changed, 1101 insertions, 709 deletions
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 756fe2d65272..8a3317e38e5a 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_EROFS_FS) += erofs.o
-erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
+erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 579406504919..19e6c56a9f47 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -12,7 +12,7 @@ struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
- unsigned short pageofs_out;
+ unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize;
/* indicate the algorithm will be used for decompression */
@@ -87,6 +87,8 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
return page->mapping == MNGD_MAPPING(sbi);
}
+int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
+ unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 0e35ef3f9f3d..780db1e5f4b7 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -9,37 +9,77 @@
#include <linux/dax.h>
#include <trace/events/erofs.h>
-struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr)
+void erofs_unmap_metabuf(struct erofs_buf *buf)
{
- struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
- struct page *page;
-
- page = read_cache_page_gfp(mapping, blkaddr,
- mapping_gfp_constraint(mapping, ~__GFP_FS));
- /* should already be PageUptodate */
- if (!IS_ERR(page))
- lock_page(page);
- return page;
+ if (buf->kmap_type == EROFS_KMAP)
+ kunmap(buf->page);
+ else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
+ kunmap_atomic(buf->base);
+ buf->base = NULL;
+ buf->kmap_type = EROFS_NO_KMAP;
+}
+
+void erofs_put_metabuf(struct erofs_buf *buf)
+{
+ if (!buf->page)
+ return;
+ erofs_unmap_metabuf(buf);
+ put_page(buf->page);
+ buf->page = NULL;
+}
+
+void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
+ erofs_blk_t blkaddr, enum erofs_kmap_type type)
+{
+ struct address_space *const mapping = inode->i_mapping;
+ erofs_off_t offset = blknr_to_addr(blkaddr);
+ pgoff_t index = offset >> PAGE_SHIFT;
+ struct page *page = buf->page;
+
+ if (!page || page->index != index) {
+ erofs_put_metabuf(buf);
+ page = read_cache_page_gfp(mapping, index,
+ mapping_gfp_constraint(mapping, ~__GFP_FS));
+ if (IS_ERR(page))
+ return page;
+ /* should already be PageUptodate, no need to lock page */
+ buf->page = page;
+ }
+ if (buf->kmap_type == EROFS_NO_KMAP) {
+ if (type == EROFS_KMAP)
+ buf->base = kmap(page);
+ else if (type == EROFS_KMAP_ATOMIC)
+ buf->base = kmap_atomic(page);
+ buf->kmap_type = type;
+ } else if (buf->kmap_type != type) {
+ DBG_BUGON(1);
+ return ERR_PTR(-EFAULT);
+ }
+ if (type == EROFS_NO_KMAP)
+ return NULL;
+ return buf->base + (offset & ~PAGE_MASK);
+}
+
+void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
+ erofs_blk_t blkaddr, enum erofs_kmap_type type)
+{
+ return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
}
static int erofs_map_blocks_flatmode(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
{
- int err = 0;
erofs_blk_t nblocks, lastblk;
u64 offset = map->m_la;
struct erofs_inode *vi = EROFS_I(inode);
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
- trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
-
- nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+ nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
lastblk = nblocks - tailendpacking;
/* there is no hole in flatmode */
map->m_flags = EROFS_MAP_MAPPED;
-
if (offset < blknr_to_addr(lastblk)) {
map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
map->m_plen = blknr_to_addr(lastblk) - offset;
@@ -51,30 +91,23 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
vi->xattr_isize + erofs_blkoff(map->m_la);
map->m_plen = inode->i_size - offset;
- /* inline data should be located in one meta block */
- if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
+ /* inline data should be located in the same meta block */
+ if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu",
vi->nid);
DBG_BUGON(1);
- err = -EFSCORRUPTED;
- goto err_out;
+ return -EFSCORRUPTED;
}
-
map->m_flags |= EROFS_MAP_META;
} else {
erofs_err(inode->i_sb,
"internal error @ nid: %llu (size %llu), m_la 0x%llx",
vi->nid, inode->i_size, map->m_la);
DBG_BUGON(1);
- err = -EIO;
- goto err_out;
+ return -EIO;
}
-
- map->m_llen = map->m_plen;
-err_out:
- trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
- return err;
+ return 0;
}
static int erofs_map_blocks(struct inode *inode,
@@ -83,12 +116,14 @@ static int erofs_map_blocks(struct inode *inode,
struct super_block *sb = inode->i_sb;
struct erofs_inode *vi = EROFS_I(inode);
struct erofs_inode_chunk_index *idx;
- struct page *page;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
u64 chunknr;
unsigned int unit;
erofs_off_t pos;
+ void *kaddr;
int err = 0;
+ trace_erofs_map_blocks_enter(inode, map, flags);
map->m_deviceid = 0;
if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */
@@ -97,8 +132,10 @@ static int erofs_map_blocks(struct inode *inode,
goto out;
}
- if (vi->datalayout != EROFS_INODE_CHUNK_BASED)
- return erofs_map_blocks_flatmode(inode, map, flags);
+ if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
+ err = erofs_map_blocks_flatmode(inode, map, flags);
+ goto out;
+ }
if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
unit = sizeof(*idx); /* chunk index */
@@ -109,17 +146,18 @@ static int erofs_map_blocks(struct inode *inode,
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, unit) + unit * chunknr;
- page = erofs_get_meta_page(inode->i_sb, erofs_blknr(pos));
- if (IS_ERR(page))
- return PTR_ERR(page);
-
+ kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
+ if (IS_ERR(kaddr)) {
+ err = PTR_ERR(kaddr);
+ goto out;
+ }
map->m_la = chunknr << vi->chunkbits;
map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
/* handle block map */
if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
- __le32 *blkaddr = page_address(page) + erofs_blkoff(pos);
+ __le32 *blkaddr = kaddr + erofs_blkoff(pos);
if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
map->m_flags = 0;
@@ -130,7 +168,7 @@ static int erofs_map_blocks(struct inode *inode,
goto out_unlock;
}
/* parse chunk indexes */
- idx = page_address(page) + erofs_blkoff(pos);
+ idx = kaddr + erofs_blkoff(pos);
switch (le32_to_cpu(idx->blkaddr)) {
case EROFS_NULL_ADDR:
map->m_flags = 0;
@@ -143,10 +181,11 @@ static int erofs_map_blocks(struct inode *inode,
break;
}
out_unlock:
- unlock_page(page);
- put_page(page);
+ erofs_put_metabuf(&buf);
out:
- map->m_llen = map->m_plen;
+ if (!err)
+ map->m_llen = map->m_plen;
+ trace_erofs_map_blocks_exit(inode, map, flags, 0);
return err;
}
@@ -159,6 +198,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/* primary device by default */
map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev;
+ map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
if (map->m_deviceid) {
down_read(&devs->rwsem);
@@ -169,6 +209,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
}
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
+ map->m_dax_part_off = dif->dax_part_off;
up_read(&devs->rwsem);
} else if (devs->extra_devices) {
down_read(&devs->rwsem);
@@ -185,6 +226,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
map->m_pa -= startoff;
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
+ map->m_dax_part_off = dif->dax_part_off;
break;
}
}
@@ -215,9 +257,11 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret)
return ret;
- iomap->bdev = mdev.m_bdev;
- iomap->dax_dev = mdev.m_daxdev;
iomap->offset = map.m_la;
+ if (flags & IOMAP_DAX)
+ iomap->dax_dev = mdev.m_daxdev;
+ else
+ iomap->bdev = mdev.m_bdev;
iomap->length = map.m_llen;
iomap->flags = 0;
iomap->private = NULL;
@@ -231,19 +275,21 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
}
if (map.m_flags & EROFS_MAP_META) {
- struct page *ipage;
+ void *ptr;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
iomap->type = IOMAP_INLINE;
- ipage = erofs_get_meta_page(inode->i_sb,
- erofs_blknr(mdev.m_pa));
- if (IS_ERR(ipage))
- return PTR_ERR(ipage);
- iomap->inline_data = page_address(ipage) +
- erofs_blkoff(mdev.m_pa);
- iomap->private = ipage;
+ ptr = erofs_read_metabuf(&buf, inode->i_sb,
+ erofs_blknr(mdev.m_pa), EROFS_KMAP);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
+ iomap->private = buf.base;
} else {
iomap->type = IOMAP_MAPPED;
iomap->addr = mdev.m_pa;
+ if (flags & IOMAP_DAX)
+ iomap->addr += mdev.m_dax_part_off;
}
return 0;
}
@@ -251,12 +297,17 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
ssize_t written, unsigned int flags, struct iomap *iomap)
{
- struct page *ipage = iomap->private;
+ void *ptr = iomap->private;
+
+ if (ptr) {
+ struct erofs_buf buf = {
+ .page = kmap_to_page(ptr),
+ .base = ptr,
+ .kmap_type = EROFS_KMAP,
+ };
- if (ipage) {
DBG_BUGON(iomap->type != IOMAP_INLINE);
- unlock_page(ipage);
- put_page(ipage);
+ erofs_put_metabuf(&buf);
} else {
DBG_BUGON(iomap->type == IOMAP_INLINE);
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index bf37fc76b182..3efa686c7644 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -16,6 +16,14 @@
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
#endif
+struct z_erofs_lz4_decompress_ctx {
+ struct z_erofs_decompress_req *rq;
+ /* # of encoded, decoded pages */
+ unsigned int inpages, outpages;
+ /* decoded block total length (used for in-place decompression) */
+ unsigned int oend;
+};
+
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size)
@@ -56,11 +64,10 @@ int z_erofs_load_lz4_config(struct super_block *sb,
* Fill all gaps with bounce pages if it's a sparse page list. Also check if
* all physical pages are consecutive, which can be seen for moderate CR.
*/
-static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
+static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
struct page **pagepool)
{
- const unsigned int nr =
- PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ struct z_erofs_decompress_req *rq = ctx->rq;
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
BITS_PER_LONG)] = { 0 };
@@ -70,7 +77,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
unsigned int i, j, top;
top = 0;
- for (i = j = 0; i < nr; ++i, ++j) {
+ for (i = j = 0; i < ctx->outpages; ++i, ++j) {
struct page *const page = rq->out[i];
struct page *victim;
@@ -112,41 +119,36 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
return kaddr ? 1 : 0;
}
-static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
+static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
void *inpage, unsigned int *inputmargin, int *maptype,
- bool support_0padding)
+ bool may_inplace)
{
- unsigned int nrpages_in, nrpages_out;
- unsigned int ofull, oend, inputsize, total, i, j;
+ struct z_erofs_decompress_req *rq = ctx->rq;
+ unsigned int omargin, total, i, j;
struct page **in;
void *src, *tmp;
- inputsize = rq->inputsize;
- nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT;
- oend = rq->pageofs_out + rq->outputsize;
- ofull = PAGE_ALIGN(oend);
- nrpages_out = ofull >> PAGE_SHIFT;
-
if (rq->inplace_io) {
- if (rq->partial_decoding || !support_0padding ||
- ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize))
+ omargin = PAGE_ALIGN(ctx->oend) - ctx->oend;
+ if (rq->partial_decoding || !may_inplace ||
+ omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;
- for (i = 0; i < nrpages_in; ++i) {
+ for (i = 0; i < ctx->inpages; ++i) {
DBG_BUGON(rq->in[i] == NULL);
- for (j = 0; j < nrpages_out - nrpages_in + i; ++j)
+ for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
if (rq->out[j] == rq->in[i])
goto docopy;
}
}
- if (nrpages_in <= 1) {
+ if (ctx->inpages <= 1) {
*maptype = 0;
return inpage;
}
kunmap_atomic(inpage);
might_sleep();
- src = erofs_vm_map_ram(rq->in, nrpages_in);
+ src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
*maptype = 1;
@@ -155,7 +157,7 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
docopy:
/* Or copy compressed data which can be overlapped to per-CPU buffer */
in = rq->in;
- src = erofs_get_pcpubuf(nrpages_in);
+ src = erofs_get_pcpubuf(ctx->inpages);
if (!src) {
DBG_BUGON(1);
kunmap_atomic(inpage);
@@ -182,36 +184,53 @@ docopy:
return src;
}
-static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
+/*
+ * Get the exact inputsize with zero_padding feature.
+ * - For LZ4, it should work if zero_padding feature is on (5.3+);
+ * - For MicroLZMA, it'd be enabled all the time.
+ */
+int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
+ unsigned int padbufsize)
+{
+ const char *padend;
+
+ padend = memchr_inv(padbuf, 0, padbufsize);
+ if (!padend)
+ return -EFSCORRUPTED;
+ rq->inputsize -= padend - padbuf;
+ rq->pageofs_in += padend - padbuf;
+ return 0;
+}
+
+static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
u8 *out)
{
+ struct z_erofs_decompress_req *rq = ctx->rq;
+ bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
u8 *headpage, *src;
- bool support_0padding;
int ret, maptype;
DBG_BUGON(*rq->in == NULL);
headpage = kmap_atomic(*rq->in);
- inputmargin = 0;
- support_0padding = false;
- /* decompression inplace is only safe when 0padding is enabled */
- if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) {
+ /* LZ4 decompression inplace is only safe if zero_padding is enabled */
+ if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) {
support_0padding = true;
-
- while (!headpage[inputmargin & ~PAGE_MASK])
- if (!(++inputmargin & ~PAGE_MASK))
- break;
-
- if (inputmargin >= rq->inputsize) {
+ ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
+ min_t(unsigned int, rq->inputsize,
+ EROFS_BLKSIZ - rq->pageofs_in));
+ if (ret) {
kunmap_atomic(headpage);
- return -EIO;
+ return ret;
}
+ may_inplace = !((rq->pageofs_in + rq->inputsize) &
+ (EROFS_BLKSIZ - 1));
}
- rq->inputsize -= inputmargin;
- src = z_erofs_lz4_handle_inplace_io(rq, headpage, &inputmargin,
- &maptype, support_0padding);
+ inputmargin = rq->pageofs_in;
+ src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
+ &maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);
@@ -240,9 +259,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
}
if (maptype == 0) {
- kunmap_atomic(src);
+ kunmap_atomic(headpage);
} else if (maptype == 1) {
- vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT);
+ vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
} else {
@@ -255,14 +274,18 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
- const unsigned int nrpages_out =
- PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ struct z_erofs_lz4_decompress_ctx ctx;
unsigned int dst_maptype;
void *dst;
int ret;
+ ctx.rq = rq;
+ ctx.oend = rq->pageofs_out + rq->outputsize;
+ ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT;
+ ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
+
/* one optimized fast path only for non bigpcluster cases yet */
- if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) {
+ if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out);
dst_maptype = 0;
@@ -270,27 +293,25 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
}
/* general decoding path which can be used for all cases */
- ret = z_erofs_lz4_prepare_dstpages(rq, pagepool);
- if (ret < 0)
+ ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool);
+ if (ret < 0) {
return ret;
- if (ret) {
+ } else if (ret > 0) {
dst = page_address(*rq->out);
dst_maptype = 1;
- goto dstmap_out;
+ } else {
+ dst = erofs_vm_map_ram(rq->out, ctx.outpages);
+ if (!dst)
+ return -ENOMEM;
+ dst_maptype = 2;
}
- dst = erofs_vm_map_ram(rq->out, nrpages_out);
- if (!dst)
- return -ENOMEM;
- dst_maptype = 2;
-
dstmap_out:
- ret = z_erofs_lz4_decompress_mem(rq, dst + rq->pageofs_out);
-
+ ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
if (!dst_maptype)
kunmap_atomic(dst);
else if (dst_maptype == 2)
- vm_unmap_ram(dst, nrpages_out);
+ vm_unmap_ram(dst, ctx.outpages);
return ret;
}
@@ -299,7 +320,8 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
- const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out;
+ const unsigned int righthalf = min_t(unsigned int, rq->outputsize,
+ PAGE_SIZE - rq->pageofs_out);
unsigned char *src, *dst;
if (nrpages_out > 2) {
@@ -312,7 +334,7 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
return 0;
}
- src = kmap_atomic(*rq->in);
+ src = kmap_atomic(*rq->in) + rq->pageofs_in;
if (rq->out[0]) {
dst = kmap_atomic(rq->out[0]);
memcpy(dst + rq->pageofs_out, src, righthalf);
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 50045510a1f4..05a3063cf2bc 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -156,7 +156,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int nrpages_in =
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
- unsigned int inputmargin, inlen, outlen, pageofs;
+ unsigned int inlen, outlen, pageofs;
struct z_erofs_lzma *strm;
u8 *kin;
bool bounced = false;
@@ -164,16 +164,13 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
/* 1. get the exact LZMA compressed size */
kin = kmap(*rq->in);
- inputmargin = 0;
- while (!kin[inputmargin & ~PAGE_MASK])
- if (!(++inputmargin & ~PAGE_MASK))
- break;
-
- if (inputmargin >= PAGE_SIZE) {
+ err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
+ min_t(unsigned int, rq->inputsize,
+ EROFS_BLKSIZ - rq->pageofs_in));
+ if (err) {
kunmap(*rq->in);
- return -EFSCORRUPTED;
+ return err;
}
- rq->inputsize -= inputmargin;
/* 2. get an available lzma context */
again:
@@ -193,9 +190,9 @@ again:
xz_dec_microlzma_reset(strm->state, inlen, outlen,
!rq->partial_decoding);
pageofs = rq->pageofs_out;
- strm->buf.in = kin + inputmargin;
+ strm->buf.in = kin + rq->pageofs_in;
strm->buf.in_pos = 0;
- strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin);
+ strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - rq->pageofs_in);
inlen -= strm->buf.in_size;
strm->buf.out = NULL;
strm->buf.out_pos = 0;
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index eee9b0b31b63..18e59821c597 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2022, Alibaba Cloud
*/
#include "internal.h"
@@ -67,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
static int erofs_readdir(struct file *f, struct dir_context *ctx)
{
struct inode *dir = file_inode(f);
- struct address_space *mapping = dir->i_mapping;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
const size_t dirsize = i_size_read(dir);
unsigned int i = ctx->pos / EROFS_BLKSIZ;
unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
@@ -75,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
bool initial = true;
while (ctx->pos < dirsize) {
- struct page *dentry_page;
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
- dentry_page = read_mapping_page(mapping, i, NULL);
- if (dentry_page == ERR_PTR(-ENOMEM)) {
- err = -ENOMEM;
- break;
- } else if (IS_ERR(dentry_page)) {
+ de = erofs_bread(&buf, dir, i, EROFS_KMAP);
+ if (IS_ERR(de)) {
erofs_err(dir->i_sb,
"fail to readdir of logical block %u of nid %llu",
i, EROFS_I(dir)->nid);
- err = -EFSCORRUPTED;
+ err = PTR_ERR(de);
break;
}
- de = (struct erofs_dirent *)kmap(dentry_page);
-
nameoff = le16_to_cpu(de->nameoff);
-
if (nameoff < sizeof(struct erofs_dirent) ||
nameoff >= PAGE_SIZE) {
erofs_err(dir->i_sb,
@@ -119,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
err = erofs_fill_dentries(dir, ctx, de, &ofs,
nameoff, maxsize);
skip_this:
- kunmap(dentry_page);
-
- put_page(dentry_page);
-
ctx->pos = blknr_to_addr(i) + ofs;
if (err)
@@ -130,6 +120,7 @@ skip_this:
++i;
ofs = 0;
}
+ erofs_put_metabuf(&buf);
return err < 0 ? err : 0;
}
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 083997a034e5..1238ca104f09 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -12,24 +12,27 @@
#define EROFS_SUPER_OFFSET 1024
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
+#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
* be incompatible with this kernel version.
*/
-#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
+#define EROFS_FEATURE_INCOMPAT_ZERO_PADDING 0x00000001
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
+#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
#define EROFS_ALL_FEATURE_INCOMPAT \
- (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
+ (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
- EROFS_FEATURE_INCOMPAT_COMPR_HEAD2)
+ EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
+ EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
#define EROFS_SB_EXTSLOT_SIZE 16
@@ -184,8 +187,8 @@ struct erofs_inode_extended {
__le32 i_uid;
__le32 i_gid;
- __le64 i_ctime;
- __le32 i_ctime_nsec;
+ __le64 i_mtime;
+ __le32 i_mtime_nsec;
__le32 i_nlink;
__u8 i_reserved2[16];
};
@@ -209,7 +212,7 @@ struct erofs_xattr_ibody_header {
__le32 h_reserved;
__u8 h_shared_count;
__u8 h_reserved2[7];
- __le32 h_shared_xattrs[0]; /* shared xattr id array */
+ __le32 h_shared_xattrs[]; /* shared xattr id array */
};
/* Name indexes */
@@ -226,7 +229,7 @@ struct erofs_xattr_entry {
__u8 e_name_index; /* attribute name index */
__le16 e_value_size; /* size of attribute value */
/* followed by e_name and e_value */
- char e_name[0]; /* attribute name */
+ char e_name[]; /* attribute name */
};
static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount)
@@ -292,13 +295,17 @@ struct z_erofs_lzma_cfgs {
* (4B) + 2B + (4B) if compacted 2B is on.
* bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
+ * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
*/
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
+#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
struct z_erofs_map_header {
- __le32 h_reserved1;
+ __le16 h_reserved1;
+ /* indicates the encoded size of tailpacking data */
+ __le16 h_idata_size;
__le16 h_advise;
/*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01);
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 2345f1de438e..e8b37ba5e9ad 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -13,8 +13,8 @@
* the inode payload page if it's an extended inode) in order to fill
* inline data if possible.
*/
-static struct page *erofs_read_inode(struct inode *inode,
- unsigned int *ofs)
+static void *erofs_read_inode(struct erofs_buf *buf,
+ struct inode *inode, unsigned int *ofs)
{
struct super_block *sb = inode->i_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
@@ -22,7 +22,7 @@ static struct page *erofs_read_inode(struct inode *inode,
const erofs_off_t inode_loc = iloc(sbi, vi->nid);
erofs_blk_t blkaddr, nblks = 0;
- struct page *page;
+ void *kaddr;
struct erofs_inode_compact *dic;
struct erofs_inode_extended *die, *copied = NULL;
unsigned int ifmt;
@@ -34,14 +34,14 @@ static struct page *erofs_read_inode(struct inode *inode,
erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u",
__func__, vi->nid, *ofs, blkaddr);
- page = erofs_get_meta_page(sb, blkaddr);
- if (IS_ERR(page)) {
+ kaddr = erofs_read_metabuf(buf, sb, blkaddr, EROFS_KMAP);
+ if (IS_ERR(kaddr)) {
erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld",
- vi->nid, PTR_ERR(page));
- return page;
+ vi->nid, PTR_ERR(kaddr));
+ return kaddr;
}
- dic = page_address(page) + *ofs;
+ dic = kaddr + *ofs;
ifmt = le16_to_cpu(dic->i_format);
if (ifmt & ~EROFS_I_ALL) {
@@ -62,12 +62,12 @@ static struct page *erofs_read_inode(struct inode *inode,
switch (erofs_inode_version(ifmt)) {
case EROFS_INODE_LAYOUT_EXTENDED:
vi->inode_isize = sizeof(struct erofs_inode_extended);
- /* check if the inode acrosses page boundary */
- if (*ofs + vi->inode_isize <= PAGE_SIZE) {
+ /* check if the extended inode acrosses block boundary */
+ if (*ofs + vi->inode_isize <= EROFS_BLKSIZ) {
*ofs += vi->inode_isize;
die = (struct erofs_inode_extended *)dic;
} else {
- const unsigned int gotten = PAGE_SIZE - *ofs;
+ const unsigned int gotten = EROFS_BLKSIZ - *ofs;
copied = kmalloc(vi->inode_isize, GFP_NOFS);
if (!copied) {
@@ -75,18 +75,16 @@ static struct page *erofs_read_inode(struct inode *inode,
goto err_out;
}
memcpy(copied, dic, gotten);
- unlock_page(page);
- put_page(page);
-
- page = erofs_get_meta_page(sb, blkaddr + 1);
- if (IS_ERR(page)) {
- erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld",
- vi->nid, PTR_ERR(page));
+ kaddr = erofs_read_metabuf(buf, sb, blkaddr + 1,
+ EROFS_KMAP);
+ if (IS_ERR(kaddr)) {
+ erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld",
+ vi->nid, PTR_ERR(kaddr));
kfree(copied);
- return page;
+ return kaddr;
}
*ofs = vi->inode_isize - gotten;
- memcpy((u8 *)copied + gotten, page_address(page), *ofs);
+ memcpy((u8 *)copied + gotten, kaddr, *ofs);
die = copied;
}
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
@@ -115,8 +113,8 @@ static struct page *erofs_read_inode(struct inode *inode,
set_nlink(inode, le32_to_cpu(die->i_nlink));
/* extended inode has its own timestamp */
- inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime);
- inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec);
+ inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime);
+ inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec);
inode->i_size = le64_to_cpu(die->i_size);
@@ -200,7 +198,7 @@ static struct page *erofs_read_inode(struct inode *inode,
inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
else
inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK;
- return page;
+ return kaddr;
bogusimode:
erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu",
@@ -209,12 +207,11 @@ bogusimode:
err_out:
DBG_BUGON(1);
kfree(copied);
- unlock_page(page);
- put_page(page);
+ erofs_put_metabuf(buf);
return ERR_PTR(err);
}
-static int erofs_fill_symlink(struct inode *inode, void *data,
+static int erofs_fill_symlink(struct inode *inode, void *kaddr,
unsigned int m_pofs)
{
struct erofs_inode *vi = EROFS_I(inode);
@@ -222,7 +219,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
/* if it cannot be handled with fast symlink scheme */
if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
- inode->i_size >= PAGE_SIZE) {
+ inode->i_size >= EROFS_BLKSIZ) {
inode->i_op = &erofs_symlink_iops;
return 0;
}
@@ -232,8 +229,8 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
return -ENOMEM;
m_pofs += vi->xattr_isize;
- /* inline symlink data shouldn't cross page boundary as well */
- if (m_pofs + inode->i_size > PAGE_SIZE) {
+ /* inline symlink data shouldn't cross block boundary */
+ if (m_pofs + inode->i_size > EROFS_BLKSIZ) {
kfree(lnk);
erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu",
@@ -241,8 +238,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
DBG_BUGON(1);
return -EFSCORRUPTED;
}
-
- memcpy(lnk, data + m_pofs, inode->i_size);
+ memcpy(lnk, kaddr + m_pofs, inode->i_size);
lnk[inode->i_size] = '\0';
inode->i_link = lnk;
@@ -253,16 +249,17 @@ static int erofs_fill_symlink(struct inode *inode, void *data,
static int erofs_fill_inode(struct inode *inode, int isdir)
{
struct erofs_inode *vi = EROFS_I(inode);
- struct page *page;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+ void *kaddr;
unsigned int ofs;
int err = 0;
trace_erofs_fill_inode(inode, isdir);
/* read inode base data from disk */
- page = erofs_read_inode(inode, &ofs);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ kaddr = erofs_read_inode(&buf, inode, &ofs);
+ if (IS_ERR(kaddr))
+ return PTR_ERR(kaddr);
/* setup the new inode */
switch (inode->i_mode & S_IFMT) {
@@ -278,7 +275,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir)
inode->i_fop = &erofs_dir_fops;
break;
case S_IFLNK:
- err = erofs_fill_symlink(inode, page_address(page), ofs);
+ err = erofs_fill_symlink(inode, kaddr, ofs);
if (err)
goto out_unlock;
inode_nohighmem(inode);
@@ -302,8 +299,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir)
inode->i_mapping->a_ops = &erofs_raw_access_aops;
out_unlock:
- unlock_page(page);
- put_page(page);
+ erofs_put_metabuf(&buf);
return err;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 3265688af7f9..5298c4ee277d 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -51,17 +51,24 @@ struct erofs_device_info {
char *path;
struct block_device *bdev;
struct dax_device *dax_dev;
+ u64 dax_part_off;
u32 blocks;
u32 mapped_blkaddr;
};
+enum {
+ EROFS_SYNC_DECOMPRESS_AUTO,
+ EROFS_SYNC_DECOMPRESS_FORCE_ON,
+ EROFS_SYNC_DECOMPRESS_FORCE_OFF
+};
+
struct erofs_mount_opts {
#ifdef CONFIG_EROFS_FS_ZIP
/* current strategy of how to use managed cache */
unsigned char cache_strategy;
- /* strategy of sync decompression (false - auto, true - force on) */
- bool readahead_sync_decompress;
+ /* strategy of sync decompression (0 - auto, 1 - force on, 2 - force off) */
+ unsigned int sync_decompress;
/* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages;
@@ -109,6 +116,7 @@ struct erofs_sb_info {
#endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
+ u64 dax_part_off;
u64 total_blocks;
u32 primarydevice_blocks;
@@ -134,6 +142,10 @@ struct erofs_sb_info {
u8 volume_name[16]; /* volume name */
u32 feature_compat;
u32 feature_incompat;
+
+ /* sysfs support */
+ struct kobject s_kobj; /* /sys/fs/erofs/<devname> */
+ struct completion s_kobj_unregister;
};
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
@@ -241,6 +253,19 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
#error erofs cannot be used in this platform
#endif
+enum erofs_kmap_type {
+ EROFS_NO_KMAP, /* don't map the buffer */
+ EROFS_KMAP, /* use kmap() to map the buffer */
+ EROFS_KMAP_ATOMIC, /* use kmap_atomic() to map the buffer */
+};
+
+struct erofs_buf {
+ struct page *page;
+ void *base;
+ enum erofs_kmap_type kmap_type;
+};
+#define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL })
+
#define ROOT_NID(sb) ((sb)->root_nid)
#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ)
@@ -258,10 +283,13 @@ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
return sbi->feature_##compat & EROFS_FEATURE_##feature; \
}
-EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
+EROFS_FEATURE_FUNCS(zero_padding, incompat, INCOMPAT_ZERO_PADDING)
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
+EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
+EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
+EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
/* atomic flag definitions */
@@ -296,6 +324,9 @@ struct erofs_inode {
unsigned short z_advise;
unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits;
+ unsigned long z_tailextent_headlcn;
+ erofs_off_t z_idataoff;
+ unsigned short z_idata_size;
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
@@ -390,14 +421,14 @@ enum {
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
struct erofs_map_blocks {
+ struct erofs_buf buf;
+
erofs_off_t m_pa, m_la;
u64 m_plen, m_llen;
unsigned short m_deviceid;
char m_algorithmformat;
unsigned int m_flags;
-
- struct page *mpage;
};
/* Flags used by erofs_map_blocks_flatmode() */
@@ -409,6 +440,8 @@ struct erofs_map_blocks {
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
/* Used to map the whole extent if non-negligible data is requested for LZMA */
#define EROFS_GET_BLOCKS_READMORE 0x0004
+/* Used to map tail extent for tailpacking inline pcluster */
+#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
@@ -436,6 +469,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_dev {
struct block_device *m_bdev;
struct dax_device *m_daxdev;
+ u64 m_dax_part_off;
erofs_off_t m_pa;
unsigned int m_deviceid;
@@ -443,7 +477,12 @@ struct erofs_map_dev {
/* data.c */
extern const struct file_operations erofs_file_fops;
-struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr);
+void erofs_unmap_metabuf(struct erofs_buf *buf);
+void erofs_put_metabuf(struct erofs_buf *buf);
+void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
+ erofs_blk_t blkaddr, enum erofs_kmap_type type);
+void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
+ erofs_blk_t blkaddr, enum erofs_kmap_type type);
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
@@ -498,6 +537,12 @@ int erofs_pcpubuf_growsize(unsigned int nrpages);
void erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
+/* sysfs.c */
+int erofs_register_sysfs(struct super_block *sb);
+void erofs_unregister_sysfs(struct super_block *sb);
+int __init erofs_init_sysfs(void);
+void erofs_exit_sysfs(void);
+
/* utils.c / zdata.c */
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp);
static inline void erofs_pagepool_add(struct page **pagepool,
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index 8629e616028c..554efa363317 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2022, Alibaba Cloud
*/
#include "xattr.h"
@@ -86,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
return ERR_PTR(-ENOENT);
}
-static struct page *find_target_block_classic(struct inode *dir,
- struct erofs_qstr *name,
- int *_ndirents)
+static void *find_target_block_classic(struct erofs_buf *target,
+ struct inode *dir,
+ struct erofs_qstr *name,
+ int *_ndirents)
{
unsigned int startprfx, endprfx;
int head, back;
- struct address_space *const mapping = dir->i_mapping;
- struct page *candidate = ERR_PTR(-ENOENT);
+ void *candidate = ERR_PTR(-ENOENT);
startprfx = endprfx = 0;
head = 0;
@@ -101,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir,
while (head <= back) {
const int mid = head + (back - head) / 2;
- struct page *page = read_mapping_page(mapping, mid, NULL);
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+ struct erofs_dirent *de;
- if (!IS_ERR(page)) {
- struct erofs_dirent *de = kmap_atomic(page);
+ de = erofs_bread(&buf, dir, mid, EROFS_KMAP);
+ if (!IS_ERR(de)) {
const int nameoff = nameoff_from_disk(de->nameoff,
EROFS_BLKSIZ);
const int ndirents = nameoff / sizeof(*de);
@@ -113,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir,
struct erofs_qstr dname;
if (!ndirents) {
- kunmap_atomic(de);
- put_page(page);
+ erofs_put_metabuf(&buf);
erofs_err(dir->i_sb,
"corrupted dir block %d @ nid %llu",
mid, EROFS_I(dir)->nid);
DBG_BUGON(1);
- page = ERR_PTR(-EFSCORRUPTED);
+ de = ERR_PTR(-EFSCORRUPTED);
goto out;
}
@@ -135,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir,
/* string comparison without already matched prefix */
diff = erofs_dirnamecmp(name, &dname, &matched);
- kunmap_atomic(de);
if (!diff) {
*_ndirents = 0;
@@ -145,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir,
startprfx = matched;
if (!IS_ERR(candidate))
- put_page(candidate);
- candidate = page;
+ erofs_put_metabuf(target);
+ *target = buf;
+ candidate = de;
*_ndirents = ndirents;
} else {
- put_page(page);
+ erofs_put_metabuf(&buf);
back = mid - 1;
endprfx = matched;
@@ -158,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir,
}
out: /* free if the candidate is valid */
if (!IS_ERR(candidate))
- put_page(candidate);
- return page;
+ erofs_put_metabuf(target);
+ return de;
}
return candidate;
}
@@ -169,8 +170,7 @@ int erofs_namei(struct inode *dir,
erofs_nid_t *nid, unsigned int *d_type)
{
int ndirents;
- struct page *page;
- void *data;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_dirent *de;
struct erofs_qstr qn;
@@ -181,26 +181,20 @@ int erofs_namei(struct inode *dir,
qn.end = name->name + name->len;
ndirents = 0;
- page = find_target_block_classic(dir, &qn, &ndirents);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ de = find_target_block_classic(&buf, dir, &qn, &ndirents);
+ if (IS_ERR(de))
+ return PTR_ERR(de);
- data = kmap_atomic(page);
/* the target page has been mapped */
if (ndirents)
- de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
- else
- de = (struct erofs_dirent *)data;
+ de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents);
if (!IS_ERR(de)) {
*nid = le64_to_cpu(de->nid);
*d_type = de->file_type;
}
-
- kunmap_atomic(data);
- put_page(page);
-
+ erofs_put_metabuf(&buf);
return PTR_ERR_OR_ZERO(de);
}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 6a969b1e0ee6..0c4b41130c2f 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -2,6 +2,7 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2021, Alibaba Cloud
*/
#include <linux/module.h>
#include <linux/buffer_head.h>
@@ -83,7 +84,7 @@ static void erofs_inode_init_once(void *ptr)
static struct inode *erofs_alloc_inode(struct super_block *sb)
{
struct erofs_inode *vi =
- kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
+ alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL);
if (!vi)
return NULL;
@@ -124,80 +125,50 @@ static bool check_layout_compatibility(struct super_block *sb,
#ifdef CONFIG_EROFS_FS_ZIP
/* read variable-sized metadata, offset will be aligned by 4-byte */
-static void *erofs_read_metadata(struct super_block *sb, struct page **pagep,
+static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
erofs_off_t *offset, int *lengthp)
{
- struct page *page = *pagep;
u8 *buffer, *ptr;
int len, i, cnt;
- erofs_blk_t blk;
*offset = round_up(*offset, 4);
- blk = erofs_blknr(*offset);
+ ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP);
+ if (IS_ERR(ptr))
+ return ptr;
- if (!page || page->index != blk) {
- if (page) {
- unlock_page(page);
- put_page(page);
- }
- page = erofs_get_meta_page(sb, blk);
- if (IS_ERR(page))
- goto err_nullpage;
- }
-
- ptr = kmap(page);
len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]);
if (!len)
len = U16_MAX + 1;
buffer = kmalloc(len, GFP_KERNEL);
- if (!buffer) {
- buffer = ERR_PTR(-ENOMEM);
- goto out;
- }
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
*offset += sizeof(__le16);
*lengthp = len;
for (i = 0; i < len; i += cnt) {
cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i);
- blk = erofs_blknr(*offset);
-
- if (!page || page->index != blk) {
- if (page) {
- kunmap(page);
- unlock_page(page);
- put_page(page);
- }
- page = erofs_get_meta_page(sb, blk);
- if (IS_ERR(page)) {
- kfree(buffer);
- goto err_nullpage;
- }
- ptr = kmap(page);
+ ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset),
+ EROFS_KMAP);
+ if (IS_ERR(ptr)) {
+ kfree(buffer);
+ return ptr;
}
memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt);
*offset += cnt;
}
-out:
- kunmap(page);
- *pagep = page;
return buffer;
-err_nullpage:
- *pagep = NULL;
- return page;
}
static int erofs_load_compr_cfgs(struct super_block *sb,
struct erofs_super_block *dsb)
{
- struct erofs_sb_info *sbi;
- struct page *page;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
unsigned int algs, alg;
erofs_off_t offset;
- int size, ret;
+ int size, ret = 0;
- sbi = EROFS_SB(sb);
sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
-
if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) {
erofs_err(sb, "try to load compressed fs with unsupported algorithms %x",
sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS);
@@ -205,20 +176,17 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
}
offset = EROFS_SUPER_OFFSET + sbi->sb_size;
- page = NULL;
alg = 0;
- ret = 0;
-
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
void *data;
if (!(algs & 1))
continue;
- data = erofs_read_metadata(sb, &page, &offset, &size);
+ data = erofs_read_metadata(sb, &buf, &offset, &size);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
- goto err;
+ break;
}
switch (alg) {
@@ -234,13 +202,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
}
kfree(data);
if (ret)
- goto err;
- }
-err:
- if (page) {
- unlock_page(page);
- put_page(page);
+ break;
}
+ erofs_put_metabuf(&buf);
return ret;
}
#else
@@ -261,7 +225,7 @@ static int erofs_init_devices(struct super_block *sb,
struct erofs_sb_info *sbi = EROFS_SB(sb);
unsigned int ondisk_extradevs;
erofs_off_t pos;
- struct page *page = NULL;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_device_info *dif;
struct erofs_deviceslot *dis;
void *ptr;
@@ -285,22 +249,13 @@ static int erofs_init_devices(struct super_block *sb,
pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
down_read(&sbi->devs->rwsem);
idr_for_each_entry(&sbi->devs->tree, dif, id) {
- erofs_blk_t blk = erofs_blknr(pos);
struct block_device *bdev;
- if (!page || page->index != blk) {
- if (page) {
- kunmap(page);
- unlock_page(page);
- put_page(page);
- }
-
- page = erofs_get_meta_page(sb, blk);
- if (IS_ERR(page)) {
- up_read(&sbi->devs->rwsem);
- return PTR_ERR(page);
- }
- ptr = kmap(page);
+ ptr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
+ EROFS_KMAP);
+ if (IS_ERR(ptr)) {
+ err = PTR_ERR(ptr);
+ break;
}
dis = ptr + erofs_blkoff(pos);
@@ -309,43 +264,36 @@ static int erofs_init_devices(struct super_block *sb,
sb->s_type);
if (IS_ERR(bdev)) {
err = PTR_ERR(bdev);
- goto err_out;
+ break;
}
dif->bdev = bdev;
- dif->dax_dev = fs_dax_get_by_bdev(bdev);
+ dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off);
dif->blocks = le32_to_cpu(dis->blocks);
dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
sbi->total_blocks += dif->blocks;
pos += EROFS_DEVT_SLOT_SIZE;
}
-err_out:
up_read(&sbi->devs->rwsem);
- if (page) {
- kunmap(page);
- unlock_page(page);
- put_page(page);
- }
+ erofs_put_metabuf(&buf);
return err;
}
static int erofs_read_superblock(struct super_block *sb)
{
struct erofs_sb_info *sbi;
- struct page *page;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_super_block *dsb;
unsigned int blkszbits;
void *data;
int ret;
- page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL);
- if (IS_ERR(page)) {
+ data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
+ if (IS_ERR(data)) {
erofs_err(sb, "cannot read erofs superblock");
- return PTR_ERR(page);
+ return PTR_ERR(data);
}
sbi = EROFS_SB(sb);
-
- data = kmap(page);
dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
ret = -EINVAL;
@@ -411,9 +359,11 @@ static int erofs_read_superblock(struct super_block *sb)
/* handle multiple devices */
ret = erofs_init_devices(sb, dsb);
+
+ if (erofs_sb_has_ztailpacking(sbi))
+ erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
out:
- kunmap(page);
- put_page(page);
+ erofs_put_metabuf(&buf);
return ret;
}
@@ -423,7 +373,7 @@ static void erofs_default_options(struct erofs_fs_context *ctx)
#ifdef CONFIG_EROFS_FS_ZIP
ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->opt.max_sync_decompress_pages = 3;
- ctx->opt.readahead_sync_decompress = false;
+ ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
#endif
#ifdef CONFIG_EROFS_FS_XATTR
set_opt(&ctx->opt, XATTR_USER);
@@ -582,25 +532,29 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
return ret;
}
-static void erofs_managed_cache_invalidatepage(struct page *page,
- unsigned int offset,
- unsigned int length)
+/*
+ * It will be called only on inode eviction. In case that there are still some
+ * decompression requests in progress, wait with rescheduling for a bit here.
+ * We could introduce an extra locking instead but it seems unnecessary.
+ */
+static void erofs_managed_cache_invalidate_folio(struct folio *folio,
+ size_t offset, size_t length)
{
- const unsigned int stop = length + offset;
+ const size_t stop = length + offset;
- DBG_BUGON(!PageLocked(page));
+ DBG_BUGON(!folio_test_locked(folio));
/* Check for potential overflow in debug mode */
- DBG_BUGON(stop > PAGE_SIZE || stop < length);
+ DBG_BUGON(stop > folio_size(folio) || stop < length);
- if (offset == 0 && stop == PAGE_SIZE)
- while (!erofs_managed_cache_releasepage(page, GFP_NOFS))
+ if (offset == 0 && stop == folio_size(folio))
+ while (!erofs_managed_cache_releasepage(&folio->page, GFP_NOFS))
cond_resched();
}
static const struct address_space_operations managed_cache_aops = {
.releasepage = erofs_managed_cache_releasepage,
- .invalidatepage = erofs_managed_cache_invalidatepage,
+ .invalidate_folio = erofs_managed_cache_invalidate_folio,
};
static int erofs_init_managed_cache(struct super_block *sb)
@@ -615,8 +569,7 @@ static int erofs_init_managed_cache(struct super_block *sb)
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &managed_cache_aops;
- mapping_set_gfp_mask(inode->i_mapping,
- GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->managed_cache = inode;
return 0;
}
@@ -644,7 +597,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
- sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
+ sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off);
sbi->devs = ctx->devs;
ctx->devs = NULL;
@@ -652,10 +605,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
- if (test_opt(&sbi->opt, DAX_ALWAYS) &&
- !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
- errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
- clear_opt(&sbi->opt, DAX_ALWAYS);
+ if (test_opt(&sbi->opt, DAX_ALWAYS)) {
+ BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE);
+
+ if (!sbi->dax_dev) {
+ errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
+ clear_opt(&sbi->opt, DAX_ALWAYS);
+ }
}
sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -695,6 +651,10 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
+ err = erofs_register_sysfs(sb);
+ if (err)
+ return err;
+
erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi));
return 0;
}
@@ -808,6 +768,7 @@ static void erofs_put_super(struct super_block *sb)
DBG_BUGON(!sbi);
+ erofs_unregister_sysfs(sb);
erofs_shrinker_unregister(sb);
#ifdef CONFIG_EROFS_FS_ZIP
iput(sbi->managed_cache);
@@ -852,6 +813,10 @@ static int __init erofs_module_init(void)
if (err)
goto zip_err;
+ err = erofs_init_sysfs();
+ if (err)
+ goto sysfs_err;
+
err = register_filesystem(&erofs_fs_type);
if (err)
goto fs_err;
@@ -859,6 +824,8 @@ static int __init erofs_module_init(void)
return 0;
fs_err:
+ erofs_exit_sysfs();
+sysfs_err:
z_erofs_exit_zip_subsystem();
zip_err:
z_erofs_lzma_exit();
@@ -877,6 +844,7 @@ static void __exit erofs_module_exit(void)
/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
rcu_barrier();
+ erofs_exit_sysfs();
z_erofs_exit_zip_subsystem();
z_erofs_lzma_exit();
erofs_exit_shrinker();
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
new file mode 100644
index 000000000000..f3babf1e6608
--- /dev/null
+++ b/fs/erofs/sysfs.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd.
+ * https://www.oppo.com/
+ */
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+
+#include "internal.h"
+
+enum {
+ attr_feature,
+ attr_pointer_ui,
+ attr_pointer_bool,
+};
+
+enum {
+ struct_erofs_sb_info,
+ struct_erofs_mount_opts,
+};
+
+struct erofs_attr {
+ struct attribute attr;
+ short attr_id;
+ int struct_type, offset;
+};
+
+#define EROFS_ATTR(_name, _mode, _id) \
+static struct erofs_attr erofs_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .attr_id = attr_##_id, \
+}
+#define EROFS_ATTR_FUNC(_name, _mode) EROFS_ATTR(_name, _mode, _name)
+#define EROFS_ATTR_FEATURE(_name) EROFS_ATTR(_name, 0444, feature)
+
+#define EROFS_ATTR_OFFSET(_name, _mode, _id, _struct) \
+static struct erofs_attr erofs_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .attr_id = attr_##_id, \
+ .struct_type = struct_##_struct, \
+ .offset = offsetof(struct _struct, _name),\
+}
+
+#define EROFS_ATTR_RW(_name, _id, _struct) \
+ EROFS_ATTR_OFFSET(_name, 0644, _id, _struct)
+
+#define EROFS_RO_ATTR(_name, _id, _struct) \
+ EROFS_ATTR_OFFSET(_name, 0444, _id, _struct)
+
+#define EROFS_ATTR_RW_UI(_name, _struct) \
+ EROFS_ATTR_RW(_name, pointer_ui, _struct)
+
+#define EROFS_ATTR_RW_BOOL(_name, _struct) \
+ EROFS_ATTR_RW(_name, pointer_bool, _struct)
+
+#define ATTR_LIST(name) (&erofs_attr_##name.attr)
+
+#ifdef CONFIG_EROFS_FS_ZIP
+EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts);
+#endif
+
+static struct attribute *erofs_attrs[] = {
+#ifdef CONFIG_EROFS_FS_ZIP
+ ATTR_LIST(sync_decompress),
+#endif
+ NULL,
+};
+ATTRIBUTE_GROUPS(erofs);
+
+/* Features this copy of erofs supports */
+EROFS_ATTR_FEATURE(zero_padding);
+EROFS_ATTR_FEATURE(compr_cfgs);
+EROFS_ATTR_FEATURE(big_pcluster);
+EROFS_ATTR_FEATURE(chunked_file);
+EROFS_ATTR_FEATURE(device_table);
+EROFS_ATTR_FEATURE(compr_head2);
+EROFS_ATTR_FEATURE(sb_chksum);
+EROFS_ATTR_FEATURE(ztailpacking);
+
+static struct attribute *erofs_feat_attrs[] = {
+ ATTR_LIST(zero_padding),
+ ATTR_LIST(compr_cfgs),
+ ATTR_LIST(big_pcluster),
+ ATTR_LIST(chunked_file),
+ ATTR_LIST(device_table),
+ ATTR_LIST(compr_head2),
+ ATTR_LIST(sb_chksum),
+ ATTR_LIST(ztailpacking),
+ NULL,
+};
+ATTRIBUTE_GROUPS(erofs_feat);
+
+static unsigned char *__struct_ptr(struct erofs_sb_info *sbi,
+ int struct_type, int offset)
+{
+ if (struct_type == struct_erofs_sb_info)
+ return (unsigned char *)sbi + offset;
+ if (struct_type == struct_erofs_mount_opts)
+ return (unsigned char *)&sbi->opt + offset;
+ return NULL;
+}
+
+static ssize_t erofs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
+ s_kobj);
+ struct erofs_attr *a = container_of(attr, struct erofs_attr, attr);
+ unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset);
+
+ switch (a->attr_id) {
+ case attr_feature:
+ return sysfs_emit(buf, "supported\n");
+ case attr_pointer_ui:
+ if (!ptr)
+ return 0;
+ return sysfs_emit(buf, "%u\n", *(unsigned int *)ptr);
+ case attr_pointer_bool:
+ if (!ptr)
+ return 0;
+ return sysfs_emit(buf, "%d\n", *(bool *)ptr);
+ }
+ return 0;
+}
+
+static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
+ s_kobj);
+ struct erofs_attr *a = container_of(attr, struct erofs_attr, attr);
+ unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset);
+ unsigned long t;
+ int ret;
+
+ switch (a->attr_id) {
+ case attr_pointer_ui:
+ if (!ptr)
+ return 0;
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+ if (t != (unsigned int)t)
+ return -ERANGE;
+#ifdef CONFIG_EROFS_FS_ZIP
+ if (!strcmp(a->attr.name, "sync_decompress") &&
+ (t > EROFS_SYNC_DECOMPRESS_FORCE_OFF))
+ return -EINVAL;
+#endif
+ *(unsigned int *)ptr = t;
+ return len;
+ case attr_pointer_bool:
+ if (!ptr)
+ return 0;
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+ if (t != 0 && t != 1)
+ return -EINVAL;
+ *(bool *)ptr = !!t;
+ return len;
+ }
+ return 0;
+}
+
+static void erofs_sb_release(struct kobject *kobj)
+{
+ struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
+ s_kobj);
+ complete(&sbi->s_kobj_unregister);
+}
+
+static const struct sysfs_ops erofs_attr_ops = {
+ .show = erofs_attr_show,
+ .store = erofs_attr_store,
+};
+
+static struct kobj_type erofs_sb_ktype = {
+ .default_groups = erofs_groups,
+ .sysfs_ops = &erofs_attr_ops,
+ .release = erofs_sb_release,
+};
+
+static struct kobj_type erofs_ktype = {
+ .sysfs_ops = &erofs_attr_ops,
+};
+
+static struct kset erofs_root = {
+ .kobj = {.ktype = &erofs_ktype},
+};
+
+static struct kobj_type erofs_feat_ktype = {
+ .default_groups = erofs_feat_groups,
+ .sysfs_ops = &erofs_attr_ops,
+};
+
+static struct kobject erofs_feat = {
+ .kset = &erofs_root,
+};
+
+int erofs_register_sysfs(struct super_block *sb)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ int err;
+
+ sbi->s_kobj.kset = &erofs_root;
+ init_completion(&sbi->s_kobj_unregister);
+ err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto put_sb_kobj;
+ return 0;
+
+put_sb_kobj:
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+ return err;
+}
+
+void erofs_unregister_sysfs(struct super_block *sb)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ if (sbi->s_kobj.state_in_sysfs) {
+ kobject_del(&sbi->s_kobj);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+ }
+}
+
+int __init erofs_init_sysfs(void)
+{
+ int ret;
+
+ kobject_set_name(&erofs_root.kobj, "erofs");
+ erofs_root.kobj.parent = fs_kobj;
+ ret = kset_register(&erofs_root);
+ if (ret)
+ goto root_err;
+
+ ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype,
+ NULL, "features");
+ if (ret)
+ goto feat_err;
+ return ret;
+
+feat_err:
+ kobject_put(&erofs_feat);
+ kset_unregister(&erofs_root);
+root_err:
+ return ret;
+}
+
+void erofs_exit_sysfs(void)
+{
+ kobject_put(&erofs_feat);
+ kset_unregister(&erofs_root);
+}
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 01c581e93c5f..8106bcb5a38d 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -2,39 +2,20 @@
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
+ * Copyright (C) 2021-2022, Alibaba Cloud
*/
#include <linux/security.h>
#include "xattr.h"
struct xattr_iter {
struct super_block *sb;
- struct page *page;
+ struct erofs_buf buf;
void *kaddr;
erofs_blk_t blkaddr;
unsigned int ofs;
};
-static inline void xattr_iter_end(struct xattr_iter *it, bool atomic)
-{
- /* the only user of kunmap() is 'init_inode_xattrs' */
- if (!atomic)
- kunmap(it->page);
- else
- kunmap_atomic(it->kaddr);
-
- unlock_page(it->page);
- put_page(it->page);
-}
-
-static inline void xattr_iter_end_final(struct xattr_iter *it)
-{
- if (!it->page)
- return;
-
- xattr_iter_end(it, true);
-}
-
static int init_inode_xattrs(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
@@ -43,7 +24,6 @@ static int init_inode_xattrs(struct inode *inode)
struct erofs_xattr_ibody_header *ih;
struct super_block *sb;
struct erofs_sb_info *sbi;
- bool atomic_map;
int ret = 0;
/* the most case is that xattrs of this inode are initialized. */
@@ -91,26 +71,23 @@ static int init_inode_xattrs(struct inode *inode)
sb = inode->i_sb;
sbi = EROFS_SB(sb);
+ it.buf = __EROFS_BUF_INITIALIZER;
it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
- it.page = erofs_get_meta_page(sb, it.blkaddr);
- if (IS_ERR(it.page)) {
- ret = PTR_ERR(it.page);
+ /* read in shared xattr array (non-atomic, see kmalloc below) */
+ it.kaddr = erofs_read_metabuf(&it.buf, sb, it.blkaddr, EROFS_KMAP);
+ if (IS_ERR(it.kaddr)) {
+ ret = PTR_ERR(it.kaddr);
goto out_unlock;
}
- /* read in shared xattr array (non-atomic, see kmalloc below) */
- it.kaddr = kmap(it.page);
- atomic_map = false;
-
ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
-
vi->xattr_shared_count = ih->h_shared_count;
vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
sizeof(uint), GFP_KERNEL);
if (!vi->xattr_shared_xattrs) {
- xattr_iter_end(&it, atomic_map);
+ erofs_put_metabuf(&it.buf);
ret = -ENOMEM;
goto out_unlock;
}
@@ -122,25 +99,22 @@ static int init_inode_xattrs(struct inode *inode)
if (it.ofs >= EROFS_BLKSIZ) {
/* cannot be unaligned */
DBG_BUGON(it.ofs != EROFS_BLKSIZ);
- xattr_iter_end(&it, atomic_map);
- it.page = erofs_get_meta_page(sb, ++it.blkaddr);
- if (IS_ERR(it.page)) {
+ it.kaddr = erofs_read_metabuf(&it.buf, sb, ++it.blkaddr,
+ EROFS_KMAP);
+ if (IS_ERR(it.kaddr)) {
kfree(vi->xattr_shared_xattrs);
vi->xattr_shared_xattrs = NULL;
- ret = PTR_ERR(it.page);
+ ret = PTR_ERR(it.kaddr);
goto out_unlock;
}
-
- it.kaddr = kmap_atomic(it.page);
- atomic_map = true;
it.ofs = 0;
}
vi->xattr_shared_xattrs[i] =
le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
it.ofs += sizeof(__le32);
}
- xattr_iter_end(&it, atomic_map);
+ erofs_put_metabuf(&it.buf);
/* paired with smp_mb() at the beginning of the function. */
smp_mb();
@@ -172,19 +146,11 @@ static inline int xattr_iter_fixup(struct xattr_iter *it)
if (it->ofs < EROFS_BLKSIZ)
return 0;
- xattr_iter_end(it, true);
-
it->blkaddr += erofs_blknr(it->ofs);
-
- it->page = erofs_get_meta_page(it->sb, it->blkaddr);
- if (IS_ERR(it->page)) {
- int err = PTR_ERR(it->page);
-
- it->page = NULL;
- return err;
- }
-
- it->kaddr = kmap_atomic(it->page);
+ it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr,
+ EROFS_KMAP_ATOMIC);
+ if (IS_ERR(it->kaddr))
+ return PTR_ERR(it->kaddr);
it->ofs = erofs_blkoff(it->ofs);
return 0;
}
@@ -207,11 +173,10 @@ static int inline_xattr_iter_begin(struct xattr_iter *it,
it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
- it->page = erofs_get_meta_page(inode->i_sb, it->blkaddr);
- if (IS_ERR(it->page))
- return PTR_ERR(it->page);
-
- it->kaddr = kmap_atomic(it->page);
+ it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr,
+ EROFS_KMAP_ATOMIC);
+ if (IS_ERR(it->kaddr))
+ return PTR_ERR(it->kaddr);
return vi->xattr_isize - xattr_header_sz;
}
@@ -272,7 +237,7 @@ static int xattr_foreach(struct xattr_iter *it,
it->ofs = 0;
}
- slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
+ slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs,
entry.e_name_len - processed);
/* handle name */
@@ -307,7 +272,7 @@ static int xattr_foreach(struct xattr_iter *it,
it->ofs = 0;
}
- slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
+ slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs,
value_sz - processed);
op->value(it, processed, it->kaddr + it->ofs, slice);
it->ofs += slice;
@@ -386,8 +351,6 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
if (ret != -ENOATTR)
break;
}
- xattr_iter_end_final(&it->it);
-
return ret ? ret : it->buffer_size;
}
@@ -404,26 +367,16 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
-
- if (!i || blkaddr != it->it.blkaddr) {
- if (i)
- xattr_iter_end(&it->it, true);
-
- it->it.page = erofs_get_meta_page(sb, blkaddr);
- if (IS_ERR(it->it.page))
- return PTR_ERR(it->it.page);
-
- it->it.kaddr = kmap_atomic(it->it.page);
- it->it.blkaddr = blkaddr;
- }
+ it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
+ EROFS_KMAP_ATOMIC);
+ if (IS_ERR(it->it.kaddr))
+ return PTR_ERR(it->it.kaddr);
+ it->it.blkaddr = blkaddr;
ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
if (ret != -ENOATTR)
break;
}
- if (vi->xattr_shared_count)
- xattr_iter_end_final(&it->it);
-
return ret ? ret : it->buffer_size;
}
@@ -452,10 +405,11 @@ int erofs_getxattr(struct inode *inode, int index,
return ret;
it.index = index;
-
it.name.len = strlen(name);
if (it.name.len > EROFS_NAME_LEN)
return -ERANGE;
+
+ it.it.buf = __EROFS_BUF_INITIALIZER;
it.name.name = name;
it.buffer = buffer;
@@ -465,6 +419,7 @@ int erofs_getxattr(struct inode *inode, int index,
ret = inline_getxattr(inode, &it);
if (ret == -ENOATTR)
ret = shared_getxattr(inode, &it);
+ erofs_put_metabuf(&it.it.buf);
return ret;
}
@@ -607,7 +562,6 @@ static int inline_listxattr(struct listxattr_iter *it)
if (ret)
break;
}
- xattr_iter_end_final(&it->it);
return ret ? ret : it->buffer_ofs;
}
@@ -625,25 +579,16 @@ static int shared_listxattr(struct listxattr_iter *it)
xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
- if (!i || blkaddr != it->it.blkaddr) {
- if (i)
- xattr_iter_end(&it->it, true);
-
- it->it.page = erofs_get_meta_page(sb, blkaddr);
- if (IS_ERR(it->it.page))
- return PTR_ERR(it->it.page);
-
- it->it.kaddr = kmap_atomic(it->it.page);
- it->it.blkaddr = blkaddr;
- }
+ it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr,
+ EROFS_KMAP_ATOMIC);
+ if (IS_ERR(it->it.kaddr))
+ return PTR_ERR(it->it.kaddr);
+ it->it.blkaddr = blkaddr;
ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
if (ret)
break;
}
- if (vi->xattr_shared_count)
- xattr_iter_end_final(&it->it);
-
return ret ? ret : it->buffer_ofs;
}
@@ -659,6 +604,7 @@ ssize_t erofs_listxattr(struct dentry *dentry,
if (ret)
return ret;
+ it.it.buf = __EROFS_BUF_INITIALIZER;
it.dentry = dentry;
it.buffer = buffer;
it.buffer_size = buffer_size;
@@ -667,9 +613,10 @@ ssize_t erofs_listxattr(struct dentry *dentry,
it.it.sb = dentry->d_sb;
ret = inline_listxattr(&it);
- if (ret < 0 && ret != -ENOATTR)
- return ret;
- return shared_listxattr(&it);
+ if (ret >= 0 || ret == -ENOATTR)
+ ret = shared_listxattr(&it);
+ erofs_put_metabuf(&it.it.buf);
+ return ret;
}
#ifdef CONFIG_EROFS_FS_POSIX_ACL
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
index 94090c74b3f7..332462c59f11 100644
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -86,4 +86,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu);
#endif
#endif
-
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 9a249bfc2770..0ed880f42525 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
{
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
int i;
for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
- if (pcl->pclusterpages > pcs->maxpages)
+ if (pclusterpages > pcs->maxpages)
continue;
kmem_cache_free(pcs->slab, pcl);
@@ -191,7 +192,10 @@ enum z_erofs_collectmode {
COLLECT_PRIMARY_FOLLOWED,
};
-struct z_erofs_collector {
+struct z_erofs_decompress_frontend {
+ struct inode *const inode;
+ struct erofs_map_blocks map;
+
struct z_erofs_pagevec_ctor vector;
struct z_erofs_pcluster *pcl, *tailpcl;
@@ -201,13 +205,6 @@ struct z_erofs_collector {
z_erofs_next_pcluster_t owned_head;
enum z_erofs_collectmode mode;
-};
-
-struct z_erofs_decompress_frontend {
- struct inode *const inode;
-
- struct z_erofs_collector clt;
- struct erofs_map_blocks map;
bool readahead;
/* used for applying cache strategy on the fly */
@@ -215,30 +212,30 @@ struct z_erofs_decompress_frontend {
erofs_off_t headoffset;
};
-#define COLLECTOR_INIT() { \
- .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = COLLECT_PRIMARY_FOLLOWED }
-
#define DECOMPRESS_FRONTEND_INIT(__i) { \
- .inode = __i, .clt = COLLECTOR_INIT(), \
- .backmost = true, }
+ .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
+ .mode = COLLECT_PRIMARY_FOLLOWED }
static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
static DEFINE_MUTEX(z_pagemap_global_lock);
-static void preload_compressed_pages(struct z_erofs_collector *clt,
- struct address_space *mc,
- enum z_erofs_cache_alloctype type,
- struct page **pagepool)
+static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
+ enum z_erofs_cache_alloctype type,
+ struct page **pagepool)
{
- struct z_erofs_pcluster *pcl = clt->pcl;
+ struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
+ struct z_erofs_pcluster *pcl = fe->pcl;
bool standalone = true;
+ /*
+ * optimistic allocation without direct reclaim since inplace I/O
+ * can be used if low memory otherwise.
+ */
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
struct page **pages;
pgoff_t index;
- if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
+ if (fe->mode < COLLECT_PRIMARY_FOLLOWED)
return;
pages = pcl->compressed_pages;
@@ -287,7 +284,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
* managed cache since it can be moved to the bypass queue instead.
*/
if (standalone)
- clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
}
/* called by erofs_shrinker to get rid of all compressed_pages */
@@ -298,6 +295,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
container_of(grp, struct z_erofs_pcluster, obj);
int i;
+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/*
* refcount of workgroup is now freezed as 1,
* therefore no need to worry about available decompression users.
@@ -331,6 +329,7 @@ int erofs_try_to_free_cached_page(struct page *page)
if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
unsigned int i;
+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pcl->pclusterpages; ++i) {
if (pcl->compressed_pages[i] == page) {
WRITE_ONCE(pcl->compressed_pages[i], NULL);
@@ -347,47 +346,47 @@ int erofs_try_to_free_cached_page(struct page *page)
}
/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
-static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
+static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
struct page *page)
{
- struct z_erofs_pcluster *const pcl = clt->pcl;
+ struct z_erofs_pcluster *const pcl = fe->pcl;
- while (clt->icpage_ptr > pcl->compressed_pages)
- if (!cmpxchg(--clt->icpage_ptr, NULL, page))
+ while (fe->icpage_ptr > pcl->compressed_pages)
+ if (!cmpxchg(--fe->icpage_ptr, NULL, page))
return true;
return false;
}
/* callers must be with collection lock held */
-static int z_erofs_attach_page(struct z_erofs_collector *clt,
+static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
struct page *page, enum z_erofs_page_type type,
bool pvec_safereuse)
{
int ret;
/* give priority for inplaceio */
- if (clt->mode >= COLLECT_PRIMARY &&
+ if (fe->mode >= COLLECT_PRIMARY &&
type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
- z_erofs_try_inplace_io(clt, page))
+ z_erofs_try_inplace_io(fe, page))
return 0;
- ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
+ ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
pvec_safereuse);
- clt->cl->vcnt += (unsigned int)ret;
+ fe->cl->vcnt += (unsigned int)ret;
return ret ? 0 : -EAGAIN;
}
-static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
+static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
{
- struct z_erofs_pcluster *pcl = clt->pcl;
- z_erofs_next_pcluster_t *owned_head = &clt->owned_head;
+ struct z_erofs_pcluster *pcl = f->pcl;
+ z_erofs_next_pcluster_t *owned_head = &f->owned_head;
/* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
*owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
/* so we can attach this pcluster to our submission chain. */
- clt->mode = COLLECT_PRIMARY_FOLLOWED;
+ f->mode = COLLECT_PRIMARY_FOLLOWED;
return;
}
@@ -398,24 +397,24 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
*owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
- clt->mode = COLLECT_PRIMARY_HOOKED;
- clt->tailpcl = NULL;
+ f->mode = COLLECT_PRIMARY_HOOKED;
+ f->tailpcl = NULL;
return;
}
/* type 3, it belongs to a chain, but it isn't the end of the chain */
- clt->mode = COLLECT_PRIMARY;
+ f->mode = COLLECT_PRIMARY;
}
-static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
+static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
{
- struct z_erofs_pcluster *pcl = clt->pcl;
+ struct z_erofs_pcluster *pcl = fe->pcl;
struct z_erofs_collection *cl;
unsigned int length;
/* to avoid unexpected loop formed by corrupted images */
- if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
+ if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
@@ -446,18 +445,19 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
}
mutex_lock(&cl->lock);
/* used to check tail merging loop due to corrupted images */
- if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
- clt->tailpcl = pcl;
+ if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ fe->tailpcl = pcl;
- z_erofs_try_to_claim_pcluster(clt);
- clt->cl = cl;
+ z_erofs_try_to_claim_pcluster(fe);
+ fe->cl = cl;
return 0;
}
-static int z_erofs_register_collection(struct z_erofs_collector *clt,
+static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
{
+ bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl;
struct erofs_workgroup *grp;
@@ -469,20 +469,20 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
}
/* no available pcluster, let's allocate one */
- pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
+ pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
+ map->m_plen >> PAGE_SHIFT);
if (IS_ERR(pcl))
return PTR_ERR(pcl);
atomic_set(&pcl->obj.refcount, 1);
- pcl->obj.index = map->m_pa >> PAGE_SHIFT;
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
(map->m_flags & EROFS_MAP_FULL_MAPPED ?
Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
/* new pclusters should be claimed as type 1, primary and followed */
- pcl->next = clt->owned_head;
- clt->mode = COLLECT_PRIMARY_FOLLOWED;
+ pcl->next = fe->owned_head;
+ fe->mode = COLLECT_PRIMARY_FOLLOWED;
cl = z_erofs_primarycollection(pcl);
cl->pageofs = map->m_la & ~PAGE_MASK;
@@ -494,23 +494,32 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
mutex_init(&cl->lock);
DBG_BUGON(!mutex_trylock(&cl->lock));
- grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
- if (IS_ERR(grp)) {
- err = PTR_ERR(grp);
- goto err_out;
- }
+ if (ztailpacking) {
+ pcl->obj.index = 0; /* which indicates ztailpacking */
+ pcl->pageofs_in = erofs_blkoff(map->m_pa);
+ pcl->tailpacking_size = map->m_plen;
+ } else {
+ pcl->obj.index = map->m_pa >> PAGE_SHIFT;
- if (grp != &pcl->obj) {
- clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
- err = -EEXIST;
- goto err_out;
+ grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+ if (IS_ERR(grp)) {
+ err = PTR_ERR(grp);
+ goto err_out;
+ }
+
+ if (grp != &pcl->obj) {
+ fe->pcl = container_of(grp,
+ struct z_erofs_pcluster, obj);
+ err = -EEXIST;
+ goto err_out;
+ }
}
/* used to check tail merging loop due to corrupted images */
- if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
- clt->tailpcl = pcl;
- clt->owned_head = &pcl->next;
- clt->pcl = pcl;
- clt->cl = cl;
+ if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ fe->tailpcl = pcl;
+ fe->owned_head = &pcl->next;
+ fe->pcl = pcl;
+ fe->cl = cl;
return 0;
err_out:
@@ -519,48 +528,51 @@ err_out:
return err;
}
-static int z_erofs_collector_begin(struct z_erofs_collector *clt,
+static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
{
struct erofs_workgroup *grp;
int ret;
- DBG_BUGON(clt->cl);
+ DBG_BUGON(fe->cl);
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
- DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
- DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+ DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
+ DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
- if (!PAGE_ALIGNED(map->m_pa)) {
- DBG_BUGON(1);
- return -EINVAL;
+ if (map->m_flags & EROFS_MAP_META) {
+ if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ goto tailpacking;
}
grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
if (grp) {
- clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+ fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
} else {
- ret = z_erofs_register_collection(clt, inode, map);
-
+tailpacking:
+ ret = z_erofs_register_collection(fe, inode, map);
if (!ret)
goto out;
if (ret != -EEXIST)
return ret;
}
- ret = z_erofs_lookup_collection(clt, inode, map);
+ ret = z_erofs_lookup_collection(fe, inode, map);
if (ret) {
- erofs_workgroup_put(&clt->pcl->obj);
+ erofs_workgroup_put(&fe->pcl->obj);
return ret;
}
out:
- z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
- clt->cl->pagevec, clt->cl->vcnt);
-
+ z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
+ fe->cl->pagevec, fe->cl->vcnt);
/* since file-backed online pages are traversed in reverse order */
- clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages;
+ fe->icpage_ptr = fe->pcl->compressed_pages +
+ z_erofs_pclusterpages(fe->pcl);
return 0;
}
@@ -594,24 +606,24 @@ static void z_erofs_collection_put(struct z_erofs_collection *cl)
erofs_workgroup_put(&pcl->obj);
}
-static bool z_erofs_collector_end(struct z_erofs_collector *clt)
+static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
{
- struct z_erofs_collection *cl = clt->cl;
+ struct z_erofs_collection *cl = fe->cl;
if (!cl)
return false;
- z_erofs_pagevec_ctor_exit(&clt->vector, false);
+ z_erofs_pagevec_ctor_exit(&fe->vector, false);
mutex_unlock(&cl->lock);
/*
* if all pending pages are added, don't hold its reference
* any longer if the pcluster isn't hosted by ourselves.
*/
- if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
+ if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
z_erofs_collection_put(cl);
- clt->cl = NULL;
+ fe->cl = NULL;
return true;
}
@@ -635,7 +647,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct inode *const inode = fe->inode;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct erofs_map_blocks *const map = &fe->map;
- struct z_erofs_collector *const clt = &fe->clt;
const loff_t offset = page_offset(page);
bool tight = true;
@@ -656,7 +667,7 @@ repeat:
if (offset + cur >= map->m_la &&
offset + cur < map->m_la + map->m_llen) {
/* didn't get a valid collection previously (very rare) */
- if (!clt->cl)
+ if (!fe->cl)
goto restart_now;
goto hitted;
}
@@ -664,7 +675,7 @@ repeat:
/* go ahead the next map_blocks */
erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur);
- if (z_erofs_collector_end(clt))
+ if (z_erofs_collector_end(fe))
fe->backmost = false;
map->m_la = offset + cur;
@@ -677,19 +688,34 @@ restart_now:
if (!(map->m_flags & EROFS_MAP_MAPPED))
goto hitted;
- err = z_erofs_collector_begin(clt, inode, map);
+ err = z_erofs_collector_begin(fe, inode, map);
if (err)
goto err_out;
- /* preload all compressed pages (maybe downgrade role if necessary) */
- if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la))
- cache_strategy = TRYALLOC;
- else
- cache_strategy = DONTALLOC;
+ if (z_erofs_is_inline_pcluster(fe->pcl)) {
+ void *mp;
- preload_compressed_pages(clt, MNGD_MAPPING(sbi),
- cache_strategy, pagepool);
+ mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb,
+ erofs_blknr(map->m_pa), EROFS_NO_KMAP);
+ if (IS_ERR(mp)) {
+ err = PTR_ERR(mp);
+ erofs_err(inode->i_sb,
+ "failed to get inline page, err %d", err);
+ goto err_out;
+ }
+ get_page(fe->map.buf.page);
+ WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
+ fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ } else {
+ /* bind cache first when cached decompression is preferred */
+ if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
+ map->m_la))
+ cache_strategy = TRYALLOC;
+ else
+ cache_strategy = DONTALLOC;
+ z_erofs_bind_cache(fe, cache_strategy, pagepool);
+ }
hitted:
/*
* Ensure the current partial page belongs to this submit chain rather
@@ -697,8 +723,8 @@ hitted:
* those chains are handled asynchronously thus the page cannot be used
* for inplace I/O or pagevec (should be processed in strict order.)
*/
- tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
- clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+ tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED &&
+ fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
cur = end - min_t(unsigned int, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
@@ -713,18 +739,18 @@ hitted:
Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
if (cur)
- tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+ tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED);
retry:
- err = z_erofs_attach_page(clt, page, page_type,
- clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+ err = z_erofs_attach_page(fe, page, page_type,
+ fe->mode >= COLLECT_PRIMARY_FOLLOWED);
/* should allocate an additional short-lived page for pagevec */
if (err == -EAGAIN) {
struct page *const newpage =
alloc_page(GFP_NOFS | __GFP_NOFAIL);
set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
- err = z_erofs_attach_page(clt, newpage,
+ err = z_erofs_attach_page(fe, newpage,
Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
if (!err)
goto retry;
@@ -740,7 +766,7 @@ retry:
/* bump up the number of spiltted parts of a page */
++spiltted;
/* also update nr_pages */
- clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
+ fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1);
next_part:
/* can be used for verification */
map->m_llen = offset + cur - map->m_la;
@@ -762,32 +788,19 @@ err_out:
goto out;
}
-static void z_erofs_decompressqueue_work(struct work_struct *work);
-static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
- bool sync, int bios)
+static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
+ unsigned int readahead_pages)
{
- struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
-
- /* wake up the caller thread for sync decompression */
- if (sync) {
- unsigned long flags;
+ /* auto: enable for readpage, disable for readahead */
+ if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) &&
+ !readahead_pages)
+ return true;
- spin_lock_irqsave(&io->u.wait.lock, flags);
- if (!atomic_add_return(bios, &io->pending_bios))
- wake_up_locked(&io->u.wait);
- spin_unlock_irqrestore(&io->u.wait.lock, flags);
- return;
- }
+ if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_FORCE_ON) &&
+ (readahead_pages <= sbi->opt.max_sync_decompress_pages))
+ return true;
- if (atomic_add_return(bios, &io->pending_bios))
- return;
- /* Use workqueue and sync decompression for atomic contexts only */
- if (in_atomic() || irqs_disabled()) {
- queue_work(z_erofs_workqueue, &io->u.work);
- sbi->opt.readahead_sync_decompress = true;
- return;
- }
- z_erofs_decompressqueue_work(&io->u.work);
+ return false;
}
static bool z_erofs_page_is_invalidated(struct page *page)
@@ -795,38 +808,12 @@ static bool z_erofs_page_is_invalidated(struct page *page)
return !page->mapping && !z_erofs_is_shortlived_page(page);
}
-static void z_erofs_decompressqueue_endio(struct bio *bio)
-{
- tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
- struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
- blk_status_t err = bio->bi_status;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
-
- DBG_BUGON(PageUptodate(page));
- DBG_BUGON(z_erofs_page_is_invalidated(page));
-
- if (err)
- SetPageError(page);
-
- if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
- if (!err)
- SetPageUptodate(page);
- unlock_page(page);
- }
- }
- z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
- bio_put(bio);
-}
-
static int z_erofs_decompress_pcluster(struct super_block *sb,
struct z_erofs_pcluster *pcl,
struct page **pagepool)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct z_erofs_pagevec_ctor ctor;
unsigned int i, inputsize, outputsize, llen, nr_pages;
struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
@@ -908,15 +895,20 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
overlapped = false;
compressed_pages = pcl->compressed_pages;
- for (i = 0; i < pcl->pclusterpages; ++i) {
+ for (i = 0; i < pclusterpages; ++i) {
unsigned int pagenr;
page = compressed_pages[i];
-
/* all compressed pages ought to be valid */
DBG_BUGON(!page);
- DBG_BUGON(z_erofs_page_is_invalidated(page));
+ if (z_erofs_is_inline_pcluster(pcl)) {
+ if (!PageUptodate(page))
+ err = -EIO;
+ continue;
+ }
+
+ DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
if (erofs_page_is_managed(sbi, page)) {
if (!PageUptodate(page))
@@ -961,11 +953,16 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
partial = true;
}
- inputsize = pcl->pclusterpages * PAGE_SIZE;
+ if (z_erofs_is_inline_pcluster(pcl))
+ inputsize = pcl->tailpacking_size;
+ else
+ inputsize = pclusterpages * PAGE_SIZE;
+
err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
.sb = sb,
.in = compressed_pages,
.out = pages,
+ .pageofs_in = pcl->pageofs_in,
.pageofs_out = cl->pageofs,
.inputsize = inputsize,
.outputsize = outputsize,
@@ -975,17 +972,22 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
}, pagepool);
out:
- /* must handle all compressed pages before ending pages */
- for (i = 0; i < pcl->pclusterpages; ++i) {
- page = compressed_pages[i];
-
- if (erofs_page_is_managed(sbi, page))
- continue;
+ /* must handle all compressed pages before actual file pages */
+ if (z_erofs_is_inline_pcluster(pcl)) {
+ page = compressed_pages[0];
+ WRITE_ONCE(compressed_pages[0], NULL);
+ put_page(page);
+ } else {
+ for (i = 0; i < pclusterpages; ++i) {
+ page = compressed_pages[i];
- /* recycle all individual short-lived pages */
- (void)z_erofs_put_shortlivedpage(pagepool, page);
+ if (erofs_page_is_managed(sbi, page))
+ continue;
- WRITE_ONCE(compressed_pages[i], NULL);
+ /* recycle all individual short-lived pages */
+ (void)z_erofs_put_shortlivedpage(pagepool, page);
+ WRITE_ONCE(compressed_pages[i], NULL);
+ }
}
for (i = 0; i < nr_pages; ++i) {
@@ -1057,13 +1059,42 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
kvfree(bgq);
}
+static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
+ bool sync, int bios)
+{
+ struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
+ /* wake up the caller thread for sync decompression */
+ if (sync) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&io->u.wait.lock, flags);
+ if (!atomic_add_return(bios, &io->pending_bios))
+ wake_up_locked(&io->u.wait);
+ spin_unlock_irqrestore(&io->u.wait.lock, flags);
+ return;
+ }
+
+ if (atomic_add_return(bios, &io->pending_bios))
+ return;
+ /* Use workqueue and sync decompression for atomic contexts only */
+ if (in_atomic() || irqs_disabled()) {
+ queue_work(z_erofs_workqueue, &io->u.work);
+ /* enable sync decompression for readahead */
+ if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
+ sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
+ return;
+ }
+ z_erofs_decompressqueue_work(&io->u.work);
+}
+
static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
unsigned int nr,
struct page **pagepool,
- struct address_space *mc,
- gfp_t gfp)
+ struct address_space *mc)
{
const pgoff_t index = pcl->obj.index;
+ gfp_t gfp = mapping_gfp_mask(mc);
bool tocache = false;
struct address_space *mapping;
@@ -1234,6 +1265,33 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
qtail[JQ_BYPASS] = &pcl->next;
}
+static void z_erofs_decompressqueue_endio(struct bio *bio)
+{
+ tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
+ struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
+ blk_status_t err = bio->bi_status;
+ struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bvec, bio, iter_all) {
+ struct page *page = bvec->bv_page;
+
+ DBG_BUGON(PageUptodate(page));
+ DBG_BUGON(z_erofs_page_is_invalidated(page));
+
+ if (err)
+ SetPageError(page);
+
+ if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
+ if (!err)
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ }
+ z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
+ bio_put(bio);
+}
+
static void z_erofs_submit_queue(struct super_block *sb,
struct z_erofs_decompress_frontend *f,
struct page **pagepool,
@@ -1244,7 +1302,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
void *bi_private;
- z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
+ z_erofs_next_pcluster_t owned_head = f->owned_head;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
pgoff_t last_index;
struct block_device *last_bdev;
@@ -1271,6 +1329,14 @@ static void z_erofs_submit_queue(struct super_block *sb,
pcl = container_of(owned_head, struct z_erofs_pcluster, next);
+ /* close the main owned chain at first */
+ owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+ Z_EROFS_PCLUSTER_TAIL_CLOSED);
+ if (z_erofs_is_inline_pcluster(pcl)) {
+ move_to_bypass_jobqueue(pcl, qtail, owned_head);
+ continue;
+ }
+
/* no device id here, thus it will always succeed */
mdev = (struct erofs_map_dev) {
.m_pa = blknr_to_addr(pcl->obj.index),
@@ -1280,16 +1346,11 @@ static void z_erofs_submit_queue(struct super_block *sb,
cur = erofs_blknr(mdev.m_pa);
end = cur + pcl->pclusterpages;
- /* close the main owned chain at first */
- owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
- Z_EROFS_PCLUSTER_TAIL_CLOSED);
-
do {
struct page *page;
page = pickup_page_for_submission(pcl, i++, pagepool,
- MNGD_MAPPING(sbi),
- GFP_NOFS);
+ MNGD_MAPPING(sbi));
if (!page)
continue;
@@ -1301,15 +1362,14 @@ submit_bio_retry:
}
if (!bio) {
- bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
+ bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
+ REQ_OP_READ, GFP_NOIO);
bio->bi_end_io = z_erofs_decompressqueue_endio;
- bio_set_dev(bio, mdev.m_bdev);
last_bdev = mdev.m_bdev;
bio->bi_iter.bi_sector = (sector_t)cur <<
LOG_SECTORS_PER_BLOCK;
bio->bi_private = bi_private;
- bio->bi_opf = REQ_OP_READ;
if (f->readahead)
bio->bi_opf |= REQ_RAHEAD;
++nr_bios;
@@ -1348,7 +1408,7 @@ static void z_erofs_runqueue(struct super_block *sb,
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
- if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL)
+ if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
return;
z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
@@ -1435,6 +1495,7 @@ skip:
static int z_erofs_readpage(struct file *file, struct page *page)
{
struct inode *const inode = page->mapping->host;
+ struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *pagepool = NULL;
int err;
@@ -1447,17 +1508,16 @@ static int z_erofs_readpage(struct file *file, struct page *page)
err = z_erofs_do_read_page(&f, page, &pagepool);
z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
- (void)z_erofs_collector_end(&f.clt);
+ (void)z_erofs_collector_end(&f);
/* if some compressed cluster ready, need submit them anyway */
- z_erofs_runqueue(inode->i_sb, &f, &pagepool, true);
+ z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+ z_erofs_get_sync_decompress_policy(sbi, 0));
if (err)
erofs_err(inode->i_sb, "failed to read, err [%d]", err);
- if (f.map.mpage)
- put_page(f.map.mpage);
-
+ erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&pagepool);
return err;
}
@@ -1498,13 +1558,11 @@ static void z_erofs_readahead(struct readahead_control *rac)
put_page(page);
}
z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
- (void)z_erofs_collector_end(&f.clt);
+ (void)z_erofs_collector_end(&f);
z_erofs_runqueue(inode->i_sb, &f, &pagepool,
- sbi->opt.readahead_sync_decompress &&
- nr_pages <= sbi->opt.max_sync_decompress_pages);
- if (f.map.mpage)
- put_page(f.map.mpage);
+ z_erofs_get_sync_decompress_policy(sbi, nr_pages));
+ erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&pagepool);
}
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 4a69515dea75..e043216b545f 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -62,8 +62,16 @@ struct z_erofs_pcluster {
/* A: lower limit of decompressed length and if full length or not */
unsigned int length;
- /* I: physical cluster size in pages */
- unsigned short pclusterpages;
+ /* I: page offset of inline compressed data */
+ unsigned short pageofs_in;
+
+ union {
+ /* I: physical cluster size in pages */
+ unsigned short pclusterpages;
+
+ /* I: tailpacking inline compressed size */
+ unsigned short tailpacking_size;
+ };
/* I: compression algorithm format */
unsigned char algorithmformat;
@@ -94,6 +102,18 @@ struct z_erofs_decompressqueue {
} u;
};
+static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
+{
+ return !pcl->obj.index;
+}
+
+static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
+{
+ if (z_erofs_is_inline_pcluster(pcl))
+ return 1;
+ return pcl->pclusterpages;
+}
+
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 660489a7fb64..572f0b8151ba 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -7,12 +7,17 @@
#include <asm/unaligned.h>
#include <trace/events/erofs.h>
+static int z_erofs_do_map_blocks(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags);
+
int z_erofs_fill_inode(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
if (!erofs_sb_has_big_pcluster(sbi) &&
+ !erofs_sb_has_ztailpacking(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0;
vi->z_algorithmtype[0] = 0;
@@ -30,7 +35,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
struct super_block *const sb = inode->i_sb;
int err, headnr;
erofs_off_t pos;
- struct page *page;
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
void *kaddr;
struct z_erofs_map_header *h;
@@ -51,18 +56,18 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto out_unlock;
DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
+ !erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8);
- page = erofs_get_meta_page(sb, erofs_blknr(pos));
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
+ kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
+ EROFS_KMAP_ATOMIC);
+ if (IS_ERR(kaddr)) {
+ err = PTR_ERR(kaddr);
goto out_unlock;
}
- kaddr = kmap_atomic(page);
-
h = kaddr + erofs_blkoff(pos);
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
@@ -94,13 +99,33 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
err = -EFSCORRUPTED;
goto unmap_done;
}
+unmap_done:
+ erofs_put_metabuf(&buf);
+ if (err)
+ goto out_unlock;
+
+ if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
+ struct erofs_map_blocks map = {
+ .buf = __EROFS_BUF_INITIALIZER
+ };
+
+ vi->z_idata_size = le16_to_cpu(h->h_idata_size);
+ err = z_erofs_do_map_blocks(inode, &map,
+ EROFS_GET_BLOCKS_FINDTAIL);
+ erofs_put_metabuf(&map.buf);
+
+ if (!map.m_plen ||
+ erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
+ erofs_err(sb, "invalid tail-packing pclustersize %llu",
+ map.m_plen);
+ err = -EFSCORRUPTED;
+ }
+ if (err < 0)
+ goto out_unlock;
+ }
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
-unmap_done:
- kunmap_atomic(kaddr);
- unlock_page(page);
- put_page(page);
out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err;
@@ -117,37 +142,18 @@ struct z_erofs_maprecorder {
u16 clusterofs;
u16 delta[2];
erofs_blk_t pblk, compressedlcs;
+ erofs_off_t nextpackoff;
};
static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
erofs_blk_t eblk)
{
struct super_block *const sb = m->inode->i_sb;
- struct erofs_map_blocks *const map = m->map;
- struct page *mpage = map->mpage;
- if (mpage) {
- if (mpage->index == eblk) {
- if (!m->kaddr)
- m->kaddr = kmap_atomic(mpage);
- return 0;
- }
-
- if (m->kaddr) {
- kunmap_atomic(m->kaddr);
- m->kaddr = NULL;
- }
- put_page(mpage);
- }
-
- mpage = erofs_get_meta_page(sb, eblk);
- if (IS_ERR(mpage)) {
- map->mpage = NULL;
- return PTR_ERR(mpage);
- }
- m->kaddr = kmap_atomic(mpage);
- unlock_page(mpage);
- map->mpage = mpage;
+ m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk,
+ EROFS_KMAP_ATOMIC);
+ if (IS_ERR(m->kaddr))
+ return PTR_ERR(m->kaddr);
return 0;
}
@@ -169,6 +175,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
if (err)
return err;
+ m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
m->lcn = lcn;
di = m->kaddr + erofs_blkoff(pos);
@@ -243,12 +250,12 @@ static int get_compacted_la_distance(unsigned int lclusterbits,
static int unpack_compacted_index(struct z_erofs_maprecorder *m,
unsigned int amortizedshift,
- unsigned int eofs, bool lookahead)
+ erofs_off_t pos, bool lookahead)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
const unsigned int lomask = (1 << lclusterbits) - 1;
- unsigned int vcnt, base, lo, encodebits, nblk;
+ unsigned int vcnt, base, lo, encodebits, nblk, eofs;
int i;
u8 *in, type;
bool big_pcluster;
@@ -260,8 +267,12 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
else
return -EOPNOTSUPP;
+ /* it doesn't equal to round_up(..) */
+ m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
+ (vcnt << amortizedshift);
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
+ eofs = erofs_blkoff(pos);
base = round_down(eofs, vcnt << amortizedshift);
in = m->kaddr + base;
@@ -399,8 +410,7 @@ out:
err = z_erofs_reload_indexes(m, erofs_blknr(pos));
if (err)
return err;
- return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos),
- lookahead);
+ return unpack_compacted_index(m, amortizedshift, pos, lookahead);
}
static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
@@ -421,48 +431,47 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned int lookback_distance)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
- struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
- unsigned long lcn = m->lcn;
- int err;
- if (lcn < lookback_distance) {
- erofs_err(m->inode->i_sb,
- "bogus lookback distance @ nid %llu", vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
+ while (m->lcn >= lookback_distance) {
+ unsigned long lcn = m->lcn - lookback_distance;
+ int err;
- /* load extent head logical cluster if needed */
- lcn -= lookback_distance;
- err = z_erofs_load_cluster_from_disk(m, lcn, false);
- if (err)
- return err;
+ /* load extent head logical cluster if needed */
+ err = z_erofs_load_cluster_from_disk(m, lcn, false);
+ if (err)
+ return err;
- switch (m->type) {
- case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
- if (!m->delta[0]) {
+ switch (m->type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+ if (!m->delta[0]) {
+ erofs_err(m->inode->i_sb,
+ "invalid lookback distance 0 @ nid %llu",
+ vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ lookback_distance = m->delta[0];
+ continue;
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
+ m->headtype = m->type;
+ m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
+ return 0;
+ default:
erofs_err(m->inode->i_sb,
- "invalid lookback distance 0 @ nid %llu",
- vi->nid);
+ "unknown type %u @ lcn %lu of nid %llu",
+ m->type, lcn, vi->nid);
DBG_BUGON(1);
- return -EFSCORRUPTED;
+ return -EOPNOTSUPP;
}
- return z_erofs_extent_lookback(m, m->delta[0]);
- case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
- case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
- m->headtype = m->type;
- map->m_la = (lcn << lclusterbits) | m->clusterofs;
- break;
- default:
- erofs_err(m->inode->i_sb,
- "unknown type %u @ lcn %lu of nid %llu",
- m->type, lcn, vi->nid);
- DBG_BUGON(1);
- return -EOPNOTSUPP;
}
- return 0;
+
+ erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
+ vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
}
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
@@ -484,7 +493,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
- map->m_plen = 1 << lclusterbits;
+ map->m_plen = 1ULL << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
@@ -530,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
- map->m_plen = m->compressedlcs << lclusterbits;
+ map->m_plen = (u64)m->compressedlcs << lclusterbits;
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
@@ -583,11 +592,12 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
return 0;
}
-int z_erofs_map_blocks_iter(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags)
+static int z_erofs_do_map_blocks(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
{
struct erofs_inode *const vi = EROFS_I(inode);
+ bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
@@ -597,22 +607,8 @@ int z_erofs_map_blocks_iter(struct inode *inode,
unsigned long initial_lcn;
unsigned long long ofs, end;
- trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
-
- /* when trying to read beyond EOF, leave it unmapped */
- if (map->m_la >= inode->i_size) {
- map->m_llen = map->m_la + 1 - inode->i_size;
- map->m_la = inode->i_size;
- map->m_flags = 0;
- goto out;
- }
-
- err = z_erofs_fill_inode_lazy(inode);
- if (err)
- goto out;
-
lclusterbits = vi->z_logical_clusterbits;
- ofs = map->m_la;
+ ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1);
@@ -620,6 +616,9 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (err)
goto unmap_out;
+ if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
+ vi->z_idataoff = m.nextpackoff;
+
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
end = (m.lcn + 1ULL) << lclusterbits;
@@ -630,6 +629,13 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (endoff >= m.clusterofs) {
m.headtype = m.type;
map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+ /*
+ * For ztailpacking files, in order to inline data more
+ * effectively, special EOF lclusters are now supported
+ * which can have three parts at most.
+ */
+ if (ztailpacking && end > inode->i_size)
+ end = inode->i_size;
break;
}
/* m.lcn should be >= 1 if endoff < m.clusterofs */
@@ -659,11 +665,19 @@ int z_erofs_map_blocks_iter(struct inode *inode,
}
map->m_llen = end - map->m_la;
- map->m_pa = blknr_to_addr(m.pblk);
- err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
- if (err)
- goto out;
+ if (flags & EROFS_GET_BLOCKS_FINDTAIL)
+ vi->z_tailextent_headlcn = m.lcn;
+ if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
+ map->m_flags |= EROFS_MAP_META;
+ map->m_pa = vi->z_idataoff;
+ map->m_plen = vi->z_idata_size;
+ } else {
+ map->m_pa = blknr_to_addr(m.pblk);
+ err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
+ if (err)
+ goto out;
+ }
if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
@@ -681,14 +695,38 @@ int z_erofs_map_blocks_iter(struct inode *inode,
map->m_flags |= EROFS_MAP_FULL_MAPPED;
}
unmap_out:
- if (m.kaddr)
- kunmap_atomic(m.kaddr);
+ erofs_unmap_metabuf(&m.map->buf);
out:
erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
__func__, map->m_la, map->m_pa,
map->m_llen, map->m_plen, map->m_flags);
+ return err;
+}
+
+int z_erofs_map_blocks_iter(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
+{
+ int err = 0;
+
+ trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
+
+ /* when trying to read beyond EOF, leave it unmapped */
+ if (map->m_la >= inode->i_size) {
+ map->m_llen = map->m_la + 1 - inode->i_size;
+ map->m_la = inode->i_size;
+ map->m_flags = 0;
+ goto out;
+ }
+
+ err = z_erofs_fill_inode_lazy(inode);
+ if (err)
+ goto out;
+
+ err = z_erofs_do_map_blocks(inode, map, flags);
+out:
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
@@ -704,8 +742,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
struct erofs_map_blocks map = { .m_la = offset };
ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
- if (map.mpage)
- put_page(map.mpage);
+ erofs_put_metabuf(&map.buf);
if (ret < 0)
return ret;