diff options
Diffstat (limited to 'fs/erofs')
-rw-r--r-- | fs/erofs/Makefile | 2 | ||||
-rw-r--r-- | fs/erofs/compress.h | 4 | ||||
-rw-r--r-- | fs/erofs/data.c | 157 | ||||
-rw-r--r-- | fs/erofs/decompressor.c | 134 | ||||
-rw-r--r-- | fs/erofs/decompressor_lzma.c | 19 | ||||
-rw-r--r-- | fs/erofs/dir.c | 21 | ||||
-rw-r--r-- | fs/erofs/erofs_fs.h | 23 | ||||
-rw-r--r-- | fs/erofs/inode.c | 72 | ||||
-rw-r--r-- | fs/erofs/internal.h | 57 | ||||
-rw-r--r-- | fs/erofs/namei.c | 54 | ||||
-rw-r--r-- | fs/erofs/super.c | 176 | ||||
-rw-r--r-- | fs/erofs/sysfs.c | 258 | ||||
-rw-r--r-- | fs/erofs/xattr.c | 135 | ||||
-rw-r--r-- | fs/erofs/xattr.h | 1 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 436 | ||||
-rw-r--r-- | fs/erofs/zdata.h | 24 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 237 |
17 files changed, 1101 insertions, 709 deletions
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 756fe2d65272..8a3317e38e5a 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_EROFS_FS) += erofs.o -erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o +erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 579406504919..19e6c56a9f47 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -12,7 +12,7 @@ struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; - unsigned short pageofs_out; + unsigned short pageofs_in, pageofs_out; unsigned int inputsize, outputsize; /* indicate the algorithm will be used for decompression */ @@ -87,6 +87,8 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, return page->mapping == MNGD_MAPPING(sbi); } +int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, + unsigned int padbufsize); int z_erofs_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool); diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 0e35ef3f9f3d..780db1e5f4b7 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -9,37 +9,77 @@ #include <linux/dax.h> #include <trace/events/erofs.h> -struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) +void erofs_unmap_metabuf(struct erofs_buf *buf) { - struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; - struct page *page; - - page = read_cache_page_gfp(mapping, blkaddr, - mapping_gfp_constraint(mapping, ~__GFP_FS)); - /* should already be PageUptodate */ - if (!IS_ERR(page)) - lock_page(page); - return page; + if (buf->kmap_type == EROFS_KMAP) + kunmap(buf->page); + else if (buf->kmap_type == EROFS_KMAP_ATOMIC) + kunmap_atomic(buf->base); + buf->base = NULL; + buf->kmap_type = EROFS_NO_KMAP; +} + +void erofs_put_metabuf(struct erofs_buf *buf) +{ + if (!buf->page) + return; + erofs_unmap_metabuf(buf); + put_page(buf->page); + buf->page = NULL; +} + +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type) +{ + struct address_space *const mapping = inode->i_mapping; + erofs_off_t offset = blknr_to_addr(blkaddr); + pgoff_t index = offset >> PAGE_SHIFT; + struct page *page = buf->page; + + if (!page || page->index != index) { + erofs_put_metabuf(buf); + page = read_cache_page_gfp(mapping, index, + mapping_gfp_constraint(mapping, ~__GFP_FS)); + if (IS_ERR(page)) + return page; + /* should already be PageUptodate, no need to lock page */ + buf->page = page; + } + if (buf->kmap_type == EROFS_NO_KMAP) { + if (type == EROFS_KMAP) + buf->base = kmap(page); + else if (type == EROFS_KMAP_ATOMIC) + buf->base = kmap_atomic(page); + buf->kmap_type = type; + } else if (buf->kmap_type != type) { + DBG_BUGON(1); + return ERR_PTR(-EFAULT); + } + if (type == EROFS_NO_KMAP) + return NULL; + return buf->base + (offset & ~PAGE_MASK); +} + +void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, + erofs_blk_t blkaddr, enum erofs_kmap_type type) +{ + return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); } static int erofs_map_blocks_flatmode(struct inode *inode, struct erofs_map_blocks *map, int flags) { - int err = 0; erofs_blk_t nblocks, lastblk; u64 offset = map->m_la; struct erofs_inode *vi = EROFS_I(inode); bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); - trace_erofs_map_blocks_flatmode_enter(inode, map, flags); - - nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE); + nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); lastblk = nblocks - tailendpacking; /* there is no hole in flatmode */ map->m_flags = EROFS_MAP_MAPPED; - if (offset < blknr_to_addr(lastblk)) { map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; map->m_plen = blknr_to_addr(lastblk) - offset; @@ -51,30 +91,23 @@ static int erofs_map_blocks_flatmode(struct inode *inode, vi->xattr_isize + erofs_blkoff(map->m_la); map->m_plen = inode->i_size - offset; - /* inline data should be located in one meta block */ - if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) { + /* inline data should be located in the same meta block */ + if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) { erofs_err(inode->i_sb, "inline data cross block boundary @ nid %llu", vi->nid); DBG_BUGON(1); - err = -EFSCORRUPTED; - goto err_out; + return -EFSCORRUPTED; } - map->m_flags |= EROFS_MAP_META; } else { erofs_err(inode->i_sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx", vi->nid, inode->i_size, map->m_la); DBG_BUGON(1); - err = -EIO; - goto err_out; + return -EIO; } - - map->m_llen = map->m_plen; -err_out: - trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0); - return err; + return 0; } static int erofs_map_blocks(struct inode *inode, @@ -83,12 +116,14 @@ static int erofs_map_blocks(struct inode *inode, struct super_block *sb = inode->i_sb; struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode_chunk_index *idx; - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; u64 chunknr; unsigned int unit; erofs_off_t pos; + void *kaddr; int err = 0; + trace_erofs_map_blocks_enter(inode, map, flags); map->m_deviceid = 0; if (map->m_la >= inode->i_size) { /* leave out-of-bound access unmapped */ @@ -97,8 +132,10 @@ static int erofs_map_blocks(struct inode *inode, goto out; } - if (vi->datalayout != EROFS_INODE_CHUNK_BASED) - return erofs_map_blocks_flatmode(inode, map, flags); + if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { + err = erofs_map_blocks_flatmode(inode, map, flags); + goto out; + } if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(*idx); /* chunk index */ @@ -109,17 +146,18 @@ static int erofs_map_blocks(struct inode *inode, pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, unit) + unit * chunknr; - page = erofs_get_meta_page(inode->i_sb, erofs_blknr(pos)); - if (IS_ERR(page)) - return PTR_ERR(page); - + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP); + if (IS_ERR(kaddr)) { + err = PTR_ERR(kaddr); + goto out; + } map->m_la = chunknr << vi->chunkbits; map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); /* handle block map */ if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { - __le32 *blkaddr = page_address(page) + erofs_blkoff(pos); + __le32 *blkaddr = kaddr + erofs_blkoff(pos); if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { map->m_flags = 0; @@ -130,7 +168,7 @@ static int erofs_map_blocks(struct inode *inode, goto out_unlock; } /* parse chunk indexes */ - idx = page_address(page) + erofs_blkoff(pos); + idx = kaddr + erofs_blkoff(pos); switch (le32_to_cpu(idx->blkaddr)) { case EROFS_NULL_ADDR: map->m_flags = 0; @@ -143,10 +181,11 @@ static int erofs_map_blocks(struct inode *inode, break; } out_unlock: - unlock_page(page); - put_page(page); + erofs_put_metabuf(&buf); out: - map->m_llen = map->m_plen; + if (!err) + map->m_llen = map->m_plen; + trace_erofs_map_blocks_exit(inode, map, flags, 0); return err; } @@ -159,6 +198,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) /* primary device by default */ map->m_bdev = sb->s_bdev; map->m_daxdev = EROFS_SB(sb)->dax_dev; + map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; if (map->m_deviceid) { down_read(&devs->rwsem); @@ -169,6 +209,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) } map->m_bdev = dif->bdev; map->m_daxdev = dif->dax_dev; + map->m_dax_part_off = dif->dax_part_off; up_read(&devs->rwsem); } else if (devs->extra_devices) { down_read(&devs->rwsem); @@ -185,6 +226,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) map->m_pa -= startoff; map->m_bdev = dif->bdev; map->m_daxdev = dif->dax_dev; + map->m_dax_part_off = dif->dax_part_off; break; } } @@ -215,9 +257,11 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret) return ret; - iomap->bdev = mdev.m_bdev; - iomap->dax_dev = mdev.m_daxdev; iomap->offset = map.m_la; + if (flags & IOMAP_DAX) + iomap->dax_dev = mdev.m_daxdev; + else + iomap->bdev = mdev.m_bdev; iomap->length = map.m_llen; iomap->flags = 0; iomap->private = NULL; @@ -231,19 +275,21 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, } if (map.m_flags & EROFS_MAP_META) { - struct page *ipage; + void *ptr; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; iomap->type = IOMAP_INLINE; - ipage = erofs_get_meta_page(inode->i_sb, - erofs_blknr(mdev.m_pa)); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - iomap->inline_data = page_address(ipage) + - erofs_blkoff(mdev.m_pa); - iomap->private = ipage; + ptr = erofs_read_metabuf(&buf, inode->i_sb, + erofs_blknr(mdev.m_pa), EROFS_KMAP); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa); + iomap->private = buf.base; } else { iomap->type = IOMAP_MAPPED; iomap->addr = mdev.m_pa; + if (flags & IOMAP_DAX) + iomap->addr += mdev.m_dax_part_off; } return 0; } @@ -251,12 +297,17 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned int flags, struct iomap *iomap) { - struct page *ipage = iomap->private; + void *ptr = iomap->private; + + if (ptr) { + struct erofs_buf buf = { + .page = kmap_to_page(ptr), + .base = ptr, + .kmap_type = EROFS_KMAP, + }; - if (ipage) { DBG_BUGON(iomap->type != IOMAP_INLINE); - unlock_page(ipage); - put_page(ipage); + erofs_put_metabuf(&buf); } else { DBG_BUGON(iomap->type == IOMAP_INLINE); } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index bf37fc76b182..3efa686c7644 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -16,6 +16,14 @@ #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) #endif +struct z_erofs_lz4_decompress_ctx { + struct z_erofs_decompress_req *rq; + /* # of encoded, decoded pages */ + unsigned int inpages, outpages; + /* decoded block total length (used for in-place decompression) */ + unsigned int oend; +}; + int z_erofs_load_lz4_config(struct super_block *sb, struct erofs_super_block *dsb, struct z_erofs_lz4_cfgs *lz4, int size) @@ -56,11 +64,10 @@ int z_erofs_load_lz4_config(struct super_block *sb, * Fill all gaps with bounce pages if it's a sparse page list. Also check if * all physical pages are consecutive, which can be seen for moderate CR. */ -static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, +static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, struct page **pagepool) { - const unsigned int nr = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + struct z_erofs_decompress_req *rq = ctx->rq; struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, BITS_PER_LONG)] = { 0 }; @@ -70,7 +77,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, unsigned int i, j, top; top = 0; - for (i = j = 0; i < nr; ++i, ++j) { + for (i = j = 0; i < ctx->outpages; ++i, ++j) { struct page *const page = rq->out[i]; struct page *victim; @@ -112,41 +119,36 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, return kaddr ? 1 : 0; } -static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq, +static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, void *inpage, unsigned int *inputmargin, int *maptype, - bool support_0padding) + bool may_inplace) { - unsigned int nrpages_in, nrpages_out; - unsigned int ofull, oend, inputsize, total, i, j; + struct z_erofs_decompress_req *rq = ctx->rq; + unsigned int omargin, total, i, j; struct page **in; void *src, *tmp; - inputsize = rq->inputsize; - nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT; - oend = rq->pageofs_out + rq->outputsize; - ofull = PAGE_ALIGN(oend); - nrpages_out = ofull >> PAGE_SHIFT; - if (rq->inplace_io) { - if (rq->partial_decoding || !support_0padding || - ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize)) + omargin = PAGE_ALIGN(ctx->oend) - ctx->oend; + if (rq->partial_decoding || !may_inplace || + omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) goto docopy; - for (i = 0; i < nrpages_in; ++i) { + for (i = 0; i < ctx->inpages; ++i) { DBG_BUGON(rq->in[i] == NULL); - for (j = 0; j < nrpages_out - nrpages_in + i; ++j) + for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j) if (rq->out[j] == rq->in[i]) goto docopy; } } - if (nrpages_in <= 1) { + if (ctx->inpages <= 1) { *maptype = 0; return inpage; } kunmap_atomic(inpage); might_sleep(); - src = erofs_vm_map_ram(rq->in, nrpages_in); + src = erofs_vm_map_ram(rq->in, ctx->inpages); if (!src) return ERR_PTR(-ENOMEM); *maptype = 1; @@ -155,7 +157,7 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq, docopy: /* Or copy compressed data which can be overlapped to per-CPU buffer */ in = rq->in; - src = erofs_get_pcpubuf(nrpages_in); + src = erofs_get_pcpubuf(ctx->inpages); if (!src) { DBG_BUGON(1); kunmap_atomic(inpage); @@ -182,36 +184,53 @@ docopy: return src; } -static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, +/* + * Get the exact inputsize with zero_padding feature. + * - For LZ4, it should work if zero_padding feature is on (5.3+); + * - For MicroLZMA, it'd be enabled all the time. + */ +int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, + unsigned int padbufsize) +{ + const char *padend; + + padend = memchr_inv(padbuf, 0, padbufsize); + if (!padend) + return -EFSCORRUPTED; + rq->inputsize -= padend - padbuf; + rq->pageofs_in += padend - padbuf; + return 0; +} + +static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, u8 *out) { + struct z_erofs_decompress_req *rq = ctx->rq; + bool support_0padding = false, may_inplace = false; unsigned int inputmargin; u8 *headpage, *src; - bool support_0padding; int ret, maptype; DBG_BUGON(*rq->in == NULL); headpage = kmap_atomic(*rq->in); - inputmargin = 0; - support_0padding = false; - /* decompression inplace is only safe when 0padding is enabled */ - if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) { + /* LZ4 decompression inplace is only safe if zero_padding is enabled */ + if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) { support_0padding = true; - - while (!headpage[inputmargin & ~PAGE_MASK]) - if (!(++inputmargin & ~PAGE_MASK)) - break; - - if (inputmargin >= rq->inputsize) { + ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in, + min_t(unsigned int, rq->inputsize, + EROFS_BLKSIZ - rq->pageofs_in)); + if (ret) { kunmap_atomic(headpage); - return -EIO; + return ret; } + may_inplace = !((rq->pageofs_in + rq->inputsize) & + (EROFS_BLKSIZ - 1)); } - rq->inputsize -= inputmargin; - src = z_erofs_lz4_handle_inplace_io(rq, headpage, &inputmargin, - &maptype, support_0padding); + inputmargin = rq->pageofs_in; + src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin, + &maptype, may_inplace); if (IS_ERR(src)) return PTR_ERR(src); @@ -240,9 +259,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, } if (maptype == 0) { - kunmap_atomic(src); + kunmap_atomic(headpage); } else if (maptype == 1) { - vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT); + vm_unmap_ram(src, ctx->inpages); } else if (maptype == 2) { erofs_put_pcpubuf(src); } else { @@ -255,14 +274,18 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool) { - const unsigned int nrpages_out = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + struct z_erofs_lz4_decompress_ctx ctx; unsigned int dst_maptype; void *dst; int ret; + ctx.rq = rq; + ctx.oend = rq->pageofs_out + rq->outputsize; + ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT; + ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; + /* one optimized fast path only for non bigpcluster cases yet */ - if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) { + if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); dst = kmap_atomic(*rq->out); dst_maptype = 0; @@ -270,27 +293,25 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, } /* general decoding path which can be used for all cases */ - ret = z_erofs_lz4_prepare_dstpages(rq, pagepool); - if (ret < 0) + ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool); + if (ret < 0) { return ret; - if (ret) { + } else if (ret > 0) { dst = page_address(*rq->out); dst_maptype = 1; - goto dstmap_out; + } else { + dst = erofs_vm_map_ram(rq->out, ctx.outpages); + if (!dst) + return -ENOMEM; + dst_maptype = 2; } - dst = erofs_vm_map_ram(rq->out, nrpages_out); - if (!dst) - return -ENOMEM; - dst_maptype = 2; - dstmap_out: - ret = z_erofs_lz4_decompress_mem(rq, dst + rq->pageofs_out); - + ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out); if (!dst_maptype) kunmap_atomic(dst); else if (dst_maptype == 2) - vm_unmap_ram(dst, nrpages_out); + vm_unmap_ram(dst, ctx.outpages); return ret; } @@ -299,7 +320,8 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out; + const unsigned int righthalf = min_t(unsigned int, rq->outputsize, + PAGE_SIZE - rq->pageofs_out); unsigned char *src, *dst; if (nrpages_out > 2) { @@ -312,7 +334,7 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, return 0; } - src = kmap_atomic(*rq->in); + src = kmap_atomic(*rq->in) + rq->pageofs_in; if (rq->out[0]) { dst = kmap_atomic(rq->out[0]); memcpy(dst + rq->pageofs_out, src, righthalf); diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 50045510a1f4..05a3063cf2bc 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -156,7 +156,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; const unsigned int nrpages_in = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; - unsigned int inputmargin, inlen, outlen, pageofs; + unsigned int inlen, outlen, pageofs; struct z_erofs_lzma *strm; u8 *kin; bool bounced = false; @@ -164,16 +164,13 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, /* 1. get the exact LZMA compressed size */ kin = kmap(*rq->in); - inputmargin = 0; - while (!kin[inputmargin & ~PAGE_MASK]) - if (!(++inputmargin & ~PAGE_MASK)) - break; - - if (inputmargin >= PAGE_SIZE) { + err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in, + min_t(unsigned int, rq->inputsize, + EROFS_BLKSIZ - rq->pageofs_in)); + if (err) { kunmap(*rq->in); - return -EFSCORRUPTED; + return err; } - rq->inputsize -= inputmargin; /* 2. get an available lzma context */ again: @@ -193,9 +190,9 @@ again: xz_dec_microlzma_reset(strm->state, inlen, outlen, !rq->partial_decoding); pageofs = rq->pageofs_out; - strm->buf.in = kin + inputmargin; + strm->buf.in = kin + rq->pageofs_in; strm->buf.in_pos = 0; - strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin); + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - rq->pageofs_in); inlen -= strm->buf.in_size; strm->buf.out = NULL; strm->buf.out_pos = 0; diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index eee9b0b31b63..18e59821c597 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "internal.h" @@ -67,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, static int erofs_readdir(struct file *f, struct dir_context *ctx) { struct inode *dir = file_inode(f); - struct address_space *mapping = dir->i_mapping; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; const size_t dirsize = i_size_read(dir); unsigned int i = ctx->pos / EROFS_BLKSIZ; unsigned int ofs = ctx->pos % EROFS_BLKSIZ; @@ -75,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) bool initial = true; while (ctx->pos < dirsize) { - struct page *dentry_page; struct erofs_dirent *de; unsigned int nameoff, maxsize; - dentry_page = read_mapping_page(mapping, i, NULL); - if (dentry_page == ERR_PTR(-ENOMEM)) { - err = -ENOMEM; - break; - } else if (IS_ERR(dentry_page)) { + de = erofs_bread(&buf, dir, i, EROFS_KMAP); + if (IS_ERR(de)) { erofs_err(dir->i_sb, "fail to readdir of logical block %u of nid %llu", i, EROFS_I(dir)->nid); - err = -EFSCORRUPTED; + err = PTR_ERR(de); break; } - de = (struct erofs_dirent *)kmap(dentry_page); - nameoff = le16_to_cpu(de->nameoff); - if (nameoff < sizeof(struct erofs_dirent) || nameoff >= PAGE_SIZE) { erofs_err(dir->i_sb, @@ -119,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) err = erofs_fill_dentries(dir, ctx, de, &ofs, nameoff, maxsize); skip_this: - kunmap(dentry_page); - - put_page(dentry_page); - ctx->pos = blknr_to_addr(i) + ofs; if (err) @@ -130,6 +120,7 @@ skip_this: ++i; ofs = 0; } + erofs_put_metabuf(&buf); return err < 0 ? err : 0; } diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 083997a034e5..1238ca104f09 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -12,24 +12,27 @@ #define EROFS_SUPER_OFFSET 1024 #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 +#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should * be incompatible with this kernel version. */ -#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 +#define EROFS_FEATURE_INCOMPAT_ZERO_PADDING 0x00000001 #define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002 #define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002 #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004 #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 +#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010 #define EROFS_ALL_FEATURE_INCOMPAT \ - (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \ + (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ - EROFS_FEATURE_INCOMPAT_COMPR_HEAD2) + EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ + EROFS_FEATURE_INCOMPAT_ZTAILPACKING) #define EROFS_SB_EXTSLOT_SIZE 16 @@ -184,8 +187,8 @@ struct erofs_inode_extended { __le32 i_uid; __le32 i_gid; - __le64 i_ctime; - __le32 i_ctime_nsec; + __le64 i_mtime; + __le32 i_mtime_nsec; __le32 i_nlink; __u8 i_reserved2[16]; }; @@ -209,7 +212,7 @@ struct erofs_xattr_ibody_header { __le32 h_reserved; __u8 h_shared_count; __u8 h_reserved2[7]; - __le32 h_shared_xattrs[0]; /* shared xattr id array */ + __le32 h_shared_xattrs[]; /* shared xattr id array */ }; /* Name indexes */ @@ -226,7 +229,7 @@ struct erofs_xattr_entry { __u8 e_name_index; /* attribute name index */ __le16 e_value_size; /* size of attribute value */ /* followed by e_name and e_value */ - char e_name[0]; /* attribute name */ + char e_name[]; /* attribute name */ }; static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) @@ -292,13 +295,17 @@ struct z_erofs_lzma_cfgs { * (4B) + 2B + (4B) if compacted 2B is on. * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) + * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) */ #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 +#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 struct z_erofs_map_header { - __le32 h_reserved1; + __le16 h_reserved1; + /* indicates the encoded size of tailpacking data */ + __le16 h_idata_size; __le16 h_advise; /* * bit 0-3 : algorithm type of head 1 (logical cluster type 01); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 2345f1de438e..e8b37ba5e9ad 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -13,8 +13,8 @@ * the inode payload page if it's an extended inode) in order to fill * inline data if possible. */ -static struct page *erofs_read_inode(struct inode *inode, - unsigned int *ofs) +static void *erofs_read_inode(struct erofs_buf *buf, + struct inode *inode, unsigned int *ofs) { struct super_block *sb = inode->i_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); @@ -22,7 +22,7 @@ static struct page *erofs_read_inode(struct inode *inode, const erofs_off_t inode_loc = iloc(sbi, vi->nid); erofs_blk_t blkaddr, nblks = 0; - struct page *page; + void *kaddr; struct erofs_inode_compact *dic; struct erofs_inode_extended *die, *copied = NULL; unsigned int ifmt; @@ -34,14 +34,14 @@ static struct page *erofs_read_inode(struct inode *inode, erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u", __func__, vi->nid, *ofs, blkaddr); - page = erofs_get_meta_page(sb, blkaddr); - if (IS_ERR(page)) { + kaddr = erofs_read_metabuf(buf, sb, blkaddr, EROFS_KMAP); + if (IS_ERR(kaddr)) { erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", - vi->nid, PTR_ERR(page)); - return page; + vi->nid, PTR_ERR(kaddr)); + return kaddr; } - dic = page_address(page) + *ofs; + dic = kaddr + *ofs; ifmt = le16_to_cpu(dic->i_format); if (ifmt & ~EROFS_I_ALL) { @@ -62,12 +62,12 @@ static struct page *erofs_read_inode(struct inode *inode, switch (erofs_inode_version(ifmt)) { case EROFS_INODE_LAYOUT_EXTENDED: vi->inode_isize = sizeof(struct erofs_inode_extended); - /* check if the inode acrosses page boundary */ - if (*ofs + vi->inode_isize <= PAGE_SIZE) { + /* check if the extended inode acrosses block boundary */ + if (*ofs + vi->inode_isize <= EROFS_BLKSIZ) { *ofs += vi->inode_isize; die = (struct erofs_inode_extended *)dic; } else { - const unsigned int gotten = PAGE_SIZE - *ofs; + const unsigned int gotten = EROFS_BLKSIZ - *ofs; copied = kmalloc(vi->inode_isize, GFP_NOFS); if (!copied) { @@ -75,18 +75,16 @@ static struct page *erofs_read_inode(struct inode *inode, goto err_out; } memcpy(copied, dic, gotten); - unlock_page(page); - put_page(page); - - page = erofs_get_meta_page(sb, blkaddr + 1); - if (IS_ERR(page)) { - erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld", - vi->nid, PTR_ERR(page)); + kaddr = erofs_read_metabuf(buf, sb, blkaddr + 1, + EROFS_KMAP); + if (IS_ERR(kaddr)) { + erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld", + vi->nid, PTR_ERR(kaddr)); kfree(copied); - return page; + return kaddr; } *ofs = vi->inode_isize - gotten; - memcpy((u8 *)copied + gotten, page_address(page), *ofs); + memcpy((u8 *)copied + gotten, kaddr, *ofs); die = copied; } vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); @@ -115,8 +113,8 @@ static struct page *erofs_read_inode(struct inode *inode, set_nlink(inode, le32_to_cpu(die->i_nlink)); /* extended inode has its own timestamp */ - inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime); - inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec); + inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime); + inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec); inode->i_size = le64_to_cpu(die->i_size); @@ -200,7 +198,7 @@ static struct page *erofs_read_inode(struct inode *inode, inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; else inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; - return page; + return kaddr; bogusimode: erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", @@ -209,12 +207,11 @@ bogusimode: err_out: DBG_BUGON(1); kfree(copied); - unlock_page(page); - put_page(page); + erofs_put_metabuf(buf); return ERR_PTR(err); } -static int erofs_fill_symlink(struct inode *inode, void *data, +static int erofs_fill_symlink(struct inode *inode, void *kaddr, unsigned int m_pofs) { struct erofs_inode *vi = EROFS_I(inode); @@ -222,7 +219,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, /* if it cannot be handled with fast symlink scheme */ if (vi->datalayout != EROFS_INODE_FLAT_INLINE || - inode->i_size >= PAGE_SIZE) { + inode->i_size >= EROFS_BLKSIZ) { inode->i_op = &erofs_symlink_iops; return 0; } @@ -232,8 +229,8 @@ static int erofs_fill_symlink(struct inode *inode, void *data, return -ENOMEM; m_pofs += vi->xattr_isize; - /* inline symlink data shouldn't cross page boundary as well */ - if (m_pofs + inode->i_size > PAGE_SIZE) { + /* inline symlink data shouldn't cross block boundary */ + if (m_pofs + inode->i_size > EROFS_BLKSIZ) { kfree(lnk); erofs_err(inode->i_sb, "inline data cross block boundary @ nid %llu", @@ -241,8 +238,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, DBG_BUGON(1); return -EFSCORRUPTED; } - - memcpy(lnk, data + m_pofs, inode->i_size); + memcpy(lnk, kaddr + m_pofs, inode->i_size); lnk[inode->i_size] = '\0'; inode->i_link = lnk; @@ -253,16 +249,17 @@ static int erofs_fill_symlink(struct inode *inode, void *data, static int erofs_fill_inode(struct inode *inode, int isdir) { struct erofs_inode *vi = EROFS_I(inode); - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + void *kaddr; unsigned int ofs; int err = 0; trace_erofs_fill_inode(inode, isdir); /* read inode base data from disk */ - page = erofs_read_inode(inode, &ofs); - if (IS_ERR(page)) - return PTR_ERR(page); + kaddr = erofs_read_inode(&buf, inode, &ofs); + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); /* setup the new inode */ switch (inode->i_mode & S_IFMT) { @@ -278,7 +275,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir) inode->i_fop = &erofs_dir_fops; break; case S_IFLNK: - err = erofs_fill_symlink(inode, page_address(page), ofs); + err = erofs_fill_symlink(inode, kaddr, ofs); if (err) goto out_unlock; inode_nohighmem(inode); @@ -302,8 +299,7 @@ static int erofs_fill_inode(struct inode *inode, int isdir) inode->i_mapping->a_ops = &erofs_raw_access_aops; out_unlock: - unlock_page(page); - put_page(page); + erofs_put_metabuf(&buf); return err; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 3265688af7f9..5298c4ee277d 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -51,17 +51,24 @@ struct erofs_device_info { char *path; struct block_device *bdev; struct dax_device *dax_dev; + u64 dax_part_off; u32 blocks; u32 mapped_blkaddr; }; +enum { + EROFS_SYNC_DECOMPRESS_AUTO, + EROFS_SYNC_DECOMPRESS_FORCE_ON, + EROFS_SYNC_DECOMPRESS_FORCE_OFF +}; + struct erofs_mount_opts { #ifdef CONFIG_EROFS_FS_ZIP /* current strategy of how to use managed cache */ unsigned char cache_strategy; - /* strategy of sync decompression (false - auto, true - force on) */ - bool readahead_sync_decompress; + /* strategy of sync decompression (0 - auto, 1 - force on, 2 - force off) */ + unsigned int sync_decompress; /* threshold for decompression synchronously */ unsigned int max_sync_decompress_pages; @@ -109,6 +116,7 @@ struct erofs_sb_info { #endif /* CONFIG_EROFS_FS_ZIP */ struct erofs_dev_context *devs; struct dax_device *dax_dev; + u64 dax_part_off; u64 total_blocks; u32 primarydevice_blocks; @@ -134,6 +142,10 @@ struct erofs_sb_info { u8 volume_name[16]; /* volume name */ u32 feature_compat; u32 feature_incompat; + + /* sysfs support */ + struct kobject s_kobj; /* /sys/fs/erofs/<devname> */ + struct completion s_kobj_unregister; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) @@ -241,6 +253,19 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) #error erofs cannot be used in this platform #endif +enum erofs_kmap_type { + EROFS_NO_KMAP, /* don't map the buffer */ + EROFS_KMAP, /* use kmap() to map the buffer */ + EROFS_KMAP_ATOMIC, /* use kmap_atomic() to map the buffer */ +}; + +struct erofs_buf { + struct page *page; + void *base; + enum erofs_kmap_type kmap_type; +}; +#define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) + #define ROOT_NID(sb) ((sb)->root_nid) #define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) @@ -258,10 +283,13 @@ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \ return sbi->feature_##compat & EROFS_FEATURE_##feature; \ } -EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING) +EROFS_FEATURE_FUNCS(zero_padding, incompat, INCOMPAT_ZERO_PADDING) EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS) EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER) +EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE) EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) +EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2) +EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) /* atomic flag definitions */ @@ -296,6 +324,9 @@ struct erofs_inode { unsigned short z_advise; unsigned char z_algorithmtype[2]; unsigned char z_logical_clusterbits; + unsigned long z_tailextent_headlcn; + erofs_off_t z_idataoff; + unsigned short z_idata_size; }; #endif /* CONFIG_EROFS_FS_ZIP */ }; @@ -390,14 +421,14 @@ enum { #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) struct erofs_map_blocks { + struct erofs_buf buf; + erofs_off_t m_pa, m_la; u64 m_plen, m_llen; unsigned short m_deviceid; char m_algorithmformat; unsigned int m_flags; - - struct page *mpage; }; /* Flags used by erofs_map_blocks_flatmode() */ @@ -409,6 +440,8 @@ struct erofs_map_blocks { #define EROFS_GET_BLOCKS_FIEMAP 0x0002 /* Used to map the whole extent if non-negligible data is requested for LZMA */ #define EROFS_GET_BLOCKS_READMORE 0x0004 +/* Used to map tail extent for tailpacking inline pcluster */ +#define EROFS_GET_BLOCKS_FINDTAIL 0x0008 enum { Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, @@ -436,6 +469,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_dev { struct block_device *m_bdev; struct dax_device *m_daxdev; + u64 m_dax_part_off; erofs_off_t m_pa; unsigned int m_deviceid; @@ -443,7 +477,12 @@ struct erofs_map_dev { /* data.c */ extern const struct file_operations erofs_file_fops; -struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr); +void erofs_unmap_metabuf(struct erofs_buf *buf); +void erofs_put_metabuf(struct erofs_buf *buf); +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type); +void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, + erofs_blk_t blkaddr, enum erofs_kmap_type type); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); @@ -498,6 +537,12 @@ int erofs_pcpubuf_growsize(unsigned int nrpages); void erofs_pcpubuf_init(void); void erofs_pcpubuf_exit(void); +/* sysfs.c */ +int erofs_register_sysfs(struct super_block *sb); +void erofs_unregister_sysfs(struct super_block *sb); +int __init erofs_init_sysfs(void); +void erofs_exit_sysfs(void); + /* utils.c / zdata.c */ struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp); static inline void erofs_pagepool_add(struct page **pagepool, diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 8629e616028c..554efa363317 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "xattr.h" @@ -86,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic(struct inode *dir, - struct erofs_qstr *name, - int *_ndirents) +static void *find_target_block_classic(struct erofs_buf *target, + struct inode *dir, + struct erofs_qstr *name, + int *_ndirents) { unsigned int startprfx, endprfx; int head, back; - struct address_space *const mapping = dir->i_mapping; - struct page *candidate = ERR_PTR(-ENOENT); + void *candidate = ERR_PTR(-ENOENT); startprfx = endprfx = 0; head = 0; @@ -101,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir, while (head <= back) { const int mid = head + (back - head) / 2; - struct page *page = read_mapping_page(mapping, mid, NULL); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct erofs_dirent *de; - if (!IS_ERR(page)) { - struct erofs_dirent *de = kmap_atomic(page); + de = erofs_bread(&buf, dir, mid, EROFS_KMAP); + if (!IS_ERR(de)) { const int nameoff = nameoff_from_disk(de->nameoff, EROFS_BLKSIZ); const int ndirents = nameoff / sizeof(*de); @@ -113,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir, struct erofs_qstr dname; if (!ndirents) { - kunmap_atomic(de); - put_page(page); + erofs_put_metabuf(&buf); erofs_err(dir->i_sb, "corrupted dir block %d @ nid %llu", mid, EROFS_I(dir)->nid); DBG_BUGON(1); - page = ERR_PTR(-EFSCORRUPTED); + de = ERR_PTR(-EFSCORRUPTED); goto out; } @@ -135,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - kunmap_atomic(de); if (!diff) { *_ndirents = 0; @@ -145,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir, startprfx = matched; if (!IS_ERR(candidate)) - put_page(candidate); - candidate = page; + erofs_put_metabuf(target); + *target = buf; + candidate = de; *_ndirents = ndirents; } else { - put_page(page); + erofs_put_metabuf(&buf); back = mid - 1; endprfx = matched; @@ -158,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir, } out: /* free if the candidate is valid */ if (!IS_ERR(candidate)) - put_page(candidate); - return page; + erofs_put_metabuf(target); + return de; } return candidate; } @@ -169,8 +170,7 @@ int erofs_namei(struct inode *dir, erofs_nid_t *nid, unsigned int *d_type) { int ndirents; - struct page *page; - void *data; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_dirent *de; struct erofs_qstr qn; @@ -181,26 +181,20 @@ int erofs_namei(struct inode *dir, qn.end = name->name + name->len; ndirents = 0; - page = find_target_block_classic(dir, &qn, &ndirents); - if (IS_ERR(page)) - return PTR_ERR(page); + de = find_target_block_classic(&buf, dir, &qn, &ndirents); + if (IS_ERR(de)) + return PTR_ERR(de); - data = kmap_atomic(page); /* the target page has been mapped */ if (ndirents) - de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); - else - de = (struct erofs_dirent *)data; + de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); if (!IS_ERR(de)) { *nid = le64_to_cpu(de->nid); *d_type = de->file_type; } - - kunmap_atomic(data); - put_page(page); - + erofs_put_metabuf(&buf); return PTR_ERR_OR_ZERO(de); } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 6a969b1e0ee6..0c4b41130c2f 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2021, Alibaba Cloud */ #include <linux/module.h> #include <linux/buffer_head.h> @@ -83,7 +84,7 @@ static void erofs_inode_init_once(void *ptr) static struct inode *erofs_alloc_inode(struct super_block *sb) { struct erofs_inode *vi = - kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL); + alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL); if (!vi) return NULL; @@ -124,80 +125,50 @@ static bool check_layout_compatibility(struct super_block *sb, #ifdef CONFIG_EROFS_FS_ZIP /* read variable-sized metadata, offset will be aligned by 4-byte */ -static void *erofs_read_metadata(struct super_block *sb, struct page **pagep, +static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp) { - struct page *page = *pagep; u8 *buffer, *ptr; int len, i, cnt; - erofs_blk_t blk; *offset = round_up(*offset, 4); - blk = erofs_blknr(*offset); + ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP); + if (IS_ERR(ptr)) + return ptr; - if (!page || page->index != blk) { - if (page) { - unlock_page(page); - put_page(page); - } - page = erofs_get_meta_page(sb, blk); - if (IS_ERR(page)) - goto err_nullpage; - } - - ptr = kmap(page); len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); if (!len) len = U16_MAX + 1; buffer = kmalloc(len, GFP_KERNEL); - if (!buffer) { - buffer = ERR_PTR(-ENOMEM); - goto out; - } + if (!buffer) + return ERR_PTR(-ENOMEM); *offset += sizeof(__le16); *lengthp = len; for (i = 0; i < len; i += cnt) { cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); - blk = erofs_blknr(*offset); - - if (!page || page->index != blk) { - if (page) { - kunmap(page); - unlock_page(page); - put_page(page); - } - page = erofs_get_meta_page(sb, blk); - if (IS_ERR(page)) { - kfree(buffer); - goto err_nullpage; - } - ptr = kmap(page); + ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), + EROFS_KMAP); + if (IS_ERR(ptr)) { + kfree(buffer); + return ptr; } memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); *offset += cnt; } -out: - kunmap(page); - *pagep = page; return buffer; -err_nullpage: - *pagep = NULL; - return page; } static int erofs_load_compr_cfgs(struct super_block *sb, struct erofs_super_block *dsb) { - struct erofs_sb_info *sbi; - struct page *page; + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; unsigned int algs, alg; erofs_off_t offset; - int size, ret; + int size, ret = 0; - sbi = EROFS_SB(sb); sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); - if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); @@ -205,20 +176,17 @@ static int erofs_load_compr_cfgs(struct super_block *sb, } offset = EROFS_SUPER_OFFSET + sbi->sb_size; - page = NULL; alg = 0; - ret = 0; - for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { void *data; if (!(algs & 1)) continue; - data = erofs_read_metadata(sb, &page, &offset, &size); + data = erofs_read_metadata(sb, &buf, &offset, &size); if (IS_ERR(data)) { ret = PTR_ERR(data); - goto err; + break; } switch (alg) { @@ -234,13 +202,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb, } kfree(data); if (ret) - goto err; - } -err: - if (page) { - unlock_page(page); - put_page(page); + break; } + erofs_put_metabuf(&buf); return ret; } #else @@ -261,7 +225,7 @@ static int erofs_init_devices(struct super_block *sb, struct erofs_sb_info *sbi = EROFS_SB(sb); unsigned int ondisk_extradevs; erofs_off_t pos; - struct page *page = NULL; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_device_info *dif; struct erofs_deviceslot *dis; void *ptr; @@ -285,22 +249,13 @@ static int erofs_init_devices(struct super_block *sb, pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; down_read(&sbi->devs->rwsem); idr_for_each_entry(&sbi->devs->tree, dif, id) { - erofs_blk_t blk = erofs_blknr(pos); struct block_device *bdev; - if (!page || page->index != blk) { - if (page) { - kunmap(page); - unlock_page(page); - put_page(page); - } - - page = erofs_get_meta_page(sb, blk); - if (IS_ERR(page)) { - up_read(&sbi->devs->rwsem); - return PTR_ERR(page); - } - ptr = kmap(page); + ptr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), + EROFS_KMAP); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + break; } dis = ptr + erofs_blkoff(pos); @@ -309,43 +264,36 @@ static int erofs_init_devices(struct super_block *sb, sb->s_type); if (IS_ERR(bdev)) { err = PTR_ERR(bdev); - goto err_out; + break; } dif->bdev = bdev; - dif->dax_dev = fs_dax_get_by_bdev(bdev); + dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off); dif->blocks = le32_to_cpu(dis->blocks); dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); sbi->total_blocks += dif->blocks; pos += EROFS_DEVT_SLOT_SIZE; } -err_out: up_read(&sbi->devs->rwsem); - if (page) { - kunmap(page); - unlock_page(page); - put_page(page); - } + erofs_put_metabuf(&buf); return err; } static int erofs_read_superblock(struct super_block *sb) { struct erofs_sb_info *sbi; - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; unsigned int blkszbits; void *data; int ret; - page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); - if (IS_ERR(page)) { + data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); + if (IS_ERR(data)) { erofs_err(sb, "cannot read erofs superblock"); - return PTR_ERR(page); + return PTR_ERR(data); } sbi = EROFS_SB(sb); - - data = kmap(page); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); ret = -EINVAL; @@ -411,9 +359,11 @@ static int erofs_read_superblock(struct super_block *sb) /* handle multiple devices */ ret = erofs_init_devices(sb, dsb); + + if (erofs_sb_has_ztailpacking(sbi)) + erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); out: - kunmap(page); - put_page(page); + erofs_put_metabuf(&buf); return ret; } @@ -423,7 +373,7 @@ static void erofs_default_options(struct erofs_fs_context *ctx) #ifdef CONFIG_EROFS_FS_ZIP ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; ctx->opt.max_sync_decompress_pages = 3; - ctx->opt.readahead_sync_decompress = false; + ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; #endif #ifdef CONFIG_EROFS_FS_XATTR set_opt(&ctx->opt, XATTR_USER); @@ -582,25 +532,29 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask) return ret; } -static void erofs_managed_cache_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +/* + * It will be called only on inode eviction. In case that there are still some + * decompression requests in progress, wait with rescheduling for a bit here. + * We could introduce an extra locking instead but it seems unnecessary. + */ +static void erofs_managed_cache_invalidate_folio(struct folio *folio, + size_t offset, size_t length) { - const unsigned int stop = length + offset; + const size_t stop = length + offset; - DBG_BUGON(!PageLocked(page)); + DBG_BUGON(!folio_test_locked(folio)); /* Check for potential overflow in debug mode */ - DBG_BUGON(stop > PAGE_SIZE || stop < length); + DBG_BUGON(stop > folio_size(folio) || stop < length); - if (offset == 0 && stop == PAGE_SIZE) - while (!erofs_managed_cache_releasepage(page, GFP_NOFS)) + if (offset == 0 && stop == folio_size(folio)) + while (!erofs_managed_cache_releasepage(&folio->page, GFP_NOFS)) cond_resched(); } static const struct address_space_operations managed_cache_aops = { .releasepage = erofs_managed_cache_releasepage, - .invalidatepage = erofs_managed_cache_invalidatepage, + .invalidate_folio = erofs_managed_cache_invalidate_folio, }; static int erofs_init_managed_cache(struct super_block *sb) @@ -615,8 +569,7 @@ static int erofs_init_managed_cache(struct super_block *sb) inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &managed_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, - GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->managed_cache = inode; return 0; } @@ -644,7 +597,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_fs_info = sbi; sbi->opt = ctx->opt; - sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev); + sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off); sbi->devs = ctx->devs; ctx->devs = NULL; @@ -652,10 +605,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - if (test_opt(&sbi->opt, DAX_ALWAYS) && - !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) { - errorfc(fc, "DAX unsupported by block device. Turning off DAX."); - clear_opt(&sbi->opt, DAX_ALWAYS); + if (test_opt(&sbi->opt, DAX_ALWAYS)) { + BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE); + + if (!sbi->dax_dev) { + errorfc(fc, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } } sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -695,6 +651,10 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + err = erofs_register_sysfs(sb); + if (err) + return err; + erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); return 0; } @@ -808,6 +768,7 @@ static void erofs_put_super(struct super_block *sb) DBG_BUGON(!sbi); + erofs_unregister_sysfs(sb); erofs_shrinker_unregister(sb); #ifdef CONFIG_EROFS_FS_ZIP iput(sbi->managed_cache); @@ -852,6 +813,10 @@ static int __init erofs_module_init(void) if (err) goto zip_err; + err = erofs_init_sysfs(); + if (err) + goto sysfs_err; + err = register_filesystem(&erofs_fs_type); if (err) goto fs_err; @@ -859,6 +824,8 @@ static int __init erofs_module_init(void) return 0; fs_err: + erofs_exit_sysfs(); +sysfs_err: z_erofs_exit_zip_subsystem(); zip_err: z_erofs_lzma_exit(); @@ -877,6 +844,7 @@ static void __exit erofs_module_exit(void) /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ rcu_barrier(); + erofs_exit_sysfs(); z_erofs_exit_zip_subsystem(); z_erofs_lzma_exit(); erofs_exit_shrinker(); diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c new file mode 100644 index 000000000000..f3babf1e6608 --- /dev/null +++ b/fs/erofs/sysfs.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd. + * https://www.oppo.com/ + */ +#include <linux/sysfs.h> +#include <linux/kobject.h> + +#include "internal.h" + +enum { + attr_feature, + attr_pointer_ui, + attr_pointer_bool, +}; + +enum { + struct_erofs_sb_info, + struct_erofs_mount_opts, +}; + +struct erofs_attr { + struct attribute attr; + short attr_id; + int struct_type, offset; +}; + +#define EROFS_ATTR(_name, _mode, _id) \ +static struct erofs_attr erofs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ +} +#define EROFS_ATTR_FUNC(_name, _mode) EROFS_ATTR(_name, _mode, _name) +#define EROFS_ATTR_FEATURE(_name) EROFS_ATTR(_name, 0444, feature) + +#define EROFS_ATTR_OFFSET(_name, _mode, _id, _struct) \ +static struct erofs_attr erofs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ + .struct_type = struct_##_struct, \ + .offset = offsetof(struct _struct, _name),\ +} + +#define EROFS_ATTR_RW(_name, _id, _struct) \ + EROFS_ATTR_OFFSET(_name, 0644, _id, _struct) + +#define EROFS_RO_ATTR(_name, _id, _struct) \ + EROFS_ATTR_OFFSET(_name, 0444, _id, _struct) + +#define EROFS_ATTR_RW_UI(_name, _struct) \ + EROFS_ATTR_RW(_name, pointer_ui, _struct) + +#define EROFS_ATTR_RW_BOOL(_name, _struct) \ + EROFS_ATTR_RW(_name, pointer_bool, _struct) + +#define ATTR_LIST(name) (&erofs_attr_##name.attr) + +#ifdef CONFIG_EROFS_FS_ZIP +EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts); +#endif + +static struct attribute *erofs_attrs[] = { +#ifdef CONFIG_EROFS_FS_ZIP + ATTR_LIST(sync_decompress), +#endif + NULL, +}; +ATTRIBUTE_GROUPS(erofs); + +/* Features this copy of erofs supports */ +EROFS_ATTR_FEATURE(zero_padding); +EROFS_ATTR_FEATURE(compr_cfgs); +EROFS_ATTR_FEATURE(big_pcluster); +EROFS_ATTR_FEATURE(chunked_file); +EROFS_ATTR_FEATURE(device_table); +EROFS_ATTR_FEATURE(compr_head2); +EROFS_ATTR_FEATURE(sb_chksum); +EROFS_ATTR_FEATURE(ztailpacking); + +static struct attribute *erofs_feat_attrs[] = { + ATTR_LIST(zero_padding), + ATTR_LIST(compr_cfgs), + ATTR_LIST(big_pcluster), + ATTR_LIST(chunked_file), + ATTR_LIST(device_table), + ATTR_LIST(compr_head2), + ATTR_LIST(sb_chksum), + ATTR_LIST(ztailpacking), + NULL, +}; +ATTRIBUTE_GROUPS(erofs_feat); + +static unsigned char *__struct_ptr(struct erofs_sb_info *sbi, + int struct_type, int offset) +{ + if (struct_type == struct_erofs_sb_info) + return (unsigned char *)sbi + offset; + if (struct_type == struct_erofs_mount_opts) + return (unsigned char *)&sbi->opt + offset; + return NULL; +} + +static ssize_t erofs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, + s_kobj); + struct erofs_attr *a = container_of(attr, struct erofs_attr, attr); + unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset); + + switch (a->attr_id) { + case attr_feature: + return sysfs_emit(buf, "supported\n"); + case attr_pointer_ui: + if (!ptr) + return 0; + return sysfs_emit(buf, "%u\n", *(unsigned int *)ptr); + case attr_pointer_bool: + if (!ptr) + return 0; + return sysfs_emit(buf, "%d\n", *(bool *)ptr); + } + return 0; +} + +static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, + s_kobj); + struct erofs_attr *a = container_of(attr, struct erofs_attr, attr); + unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset); + unsigned long t; + int ret; + + switch (a->attr_id) { + case attr_pointer_ui: + if (!ptr) + return 0; + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + if (t != (unsigned int)t) + return -ERANGE; +#ifdef CONFIG_EROFS_FS_ZIP + if (!strcmp(a->attr.name, "sync_decompress") && + (t > EROFS_SYNC_DECOMPRESS_FORCE_OFF)) + return -EINVAL; +#endif + *(unsigned int *)ptr = t; + return len; + case attr_pointer_bool: + if (!ptr) + return 0; + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + if (t != 0 && t != 1) + return -EINVAL; + *(bool *)ptr = !!t; + return len; + } + return 0; +} + +static void erofs_sb_release(struct kobject *kobj) +{ + struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +static const struct sysfs_ops erofs_attr_ops = { + .show = erofs_attr_show, + .store = erofs_attr_store, +}; + +static struct kobj_type erofs_sb_ktype = { + .default_groups = erofs_groups, + .sysfs_ops = &erofs_attr_ops, + .release = erofs_sb_release, +}; + +static struct kobj_type erofs_ktype = { + .sysfs_ops = &erofs_attr_ops, +}; + +static struct kset erofs_root = { + .kobj = {.ktype = &erofs_ktype}, +}; + +static struct kobj_type erofs_feat_ktype = { + .default_groups = erofs_feat_groups, + .sysfs_ops = &erofs_attr_ops, +}; + +static struct kobject erofs_feat = { + .kset = &erofs_root, +}; + +int erofs_register_sysfs(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + int err; + + sbi->s_kobj.kset = &erofs_root; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, + "%s", sb->s_id); + if (err) + goto put_sb_kobj; + return 0; + +put_sb_kobj: + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + return err; +} + +void erofs_unregister_sysfs(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + if (sbi->s_kobj.state_in_sysfs) { + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + } +} + +int __init erofs_init_sysfs(void) +{ + int ret; + + kobject_set_name(&erofs_root.kobj, "erofs"); + erofs_root.kobj.parent = fs_kobj; + ret = kset_register(&erofs_root); + if (ret) + goto root_err; + + ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype, + NULL, "features"); + if (ret) + goto feat_err; + return ret; + +feat_err: + kobject_put(&erofs_feat); + kset_unregister(&erofs_root); +root_err: + return ret; +} + +void erofs_exit_sysfs(void) +{ + kobject_put(&erofs_feat); + kset_unregister(&erofs_root); +} diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 01c581e93c5f..8106bcb5a38d 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -2,39 +2,20 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2021-2022, Alibaba Cloud */ #include <linux/security.h> #include "xattr.h" struct xattr_iter { struct super_block *sb; - struct page *page; + struct erofs_buf buf; void *kaddr; erofs_blk_t blkaddr; unsigned int ofs; }; -static inline void xattr_iter_end(struct xattr_iter *it, bool atomic) -{ - /* the only user of kunmap() is 'init_inode_xattrs' */ - if (!atomic) - kunmap(it->page); - else - kunmap_atomic(it->kaddr); - - unlock_page(it->page); - put_page(it->page); -} - -static inline void xattr_iter_end_final(struct xattr_iter *it) -{ - if (!it->page) - return; - - xattr_iter_end(it, true); -} - static int init_inode_xattrs(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); @@ -43,7 +24,6 @@ static int init_inode_xattrs(struct inode *inode) struct erofs_xattr_ibody_header *ih; struct super_block *sb; struct erofs_sb_info *sbi; - bool atomic_map; int ret = 0; /* the most case is that xattrs of this inode are initialized. */ @@ -91,26 +71,23 @@ static int init_inode_xattrs(struct inode *inode) sb = inode->i_sb; sbi = EROFS_SB(sb); + it.buf = __EROFS_BUF_INITIALIZER; it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize); it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); - it.page = erofs_get_meta_page(sb, it.blkaddr); - if (IS_ERR(it.page)) { - ret = PTR_ERR(it.page); + /* read in shared xattr array (non-atomic, see kmalloc below) */ + it.kaddr = erofs_read_metabuf(&it.buf, sb, it.blkaddr, EROFS_KMAP); + if (IS_ERR(it.kaddr)) { + ret = PTR_ERR(it.kaddr); goto out_unlock; } - /* read in shared xattr array (non-atomic, see kmalloc below) */ - it.kaddr = kmap(it.page); - atomic_map = false; - ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs); - vi->xattr_shared_count = ih->h_shared_count; vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count, sizeof(uint), GFP_KERNEL); if (!vi->xattr_shared_xattrs) { - xattr_iter_end(&it, atomic_map); + erofs_put_metabuf(&it.buf); ret = -ENOMEM; goto out_unlock; } @@ -122,25 +99,22 @@ static int init_inode_xattrs(struct inode *inode) if (it.ofs >= EROFS_BLKSIZ) { /* cannot be unaligned */ DBG_BUGON(it.ofs != EROFS_BLKSIZ); - xattr_iter_end(&it, atomic_map); - it.page = erofs_get_meta_page(sb, ++it.blkaddr); - if (IS_ERR(it.page)) { + it.kaddr = erofs_read_metabuf(&it.buf, sb, ++it.blkaddr, + EROFS_KMAP); + if (IS_ERR(it.kaddr)) { kfree(vi->xattr_shared_xattrs); vi->xattr_shared_xattrs = NULL; - ret = PTR_ERR(it.page); + ret = PTR_ERR(it.kaddr); goto out_unlock; } - - it.kaddr = kmap_atomic(it.page); - atomic_map = true; it.ofs = 0; } vi->xattr_shared_xattrs[i] = le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs)); it.ofs += sizeof(__le32); } - xattr_iter_end(&it, atomic_map); + erofs_put_metabuf(&it.buf); /* paired with smp_mb() at the beginning of the function. */ smp_mb(); @@ -172,19 +146,11 @@ static inline int xattr_iter_fixup(struct xattr_iter *it) if (it->ofs < EROFS_BLKSIZ) return 0; - xattr_iter_end(it, true); - it->blkaddr += erofs_blknr(it->ofs); - - it->page = erofs_get_meta_page(it->sb, it->blkaddr); - if (IS_ERR(it->page)) { - int err = PTR_ERR(it->page); - - it->page = NULL; - return err; - } - - it->kaddr = kmap_atomic(it->page); + it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->kaddr)) + return PTR_ERR(it->kaddr); it->ofs = erofs_blkoff(it->ofs); return 0; } @@ -207,11 +173,10 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs); it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); - it->page = erofs_get_meta_page(inode->i_sb, it->blkaddr); - if (IS_ERR(it->page)) - return PTR_ERR(it->page); - - it->kaddr = kmap_atomic(it->page); + it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->kaddr)) + return PTR_ERR(it->kaddr); return vi->xattr_isize - xattr_header_sz; } @@ -272,7 +237,7 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs, entry.e_name_len - processed); /* handle name */ @@ -307,7 +272,7 @@ static int xattr_foreach(struct xattr_iter *it, it->ofs = 0; } - slice = min_t(unsigned int, PAGE_SIZE - it->ofs, + slice = min_t(unsigned int, EROFS_BLKSIZ - it->ofs, value_sz - processed); op->value(it, processed, it->kaddr + it->ofs, slice); it->ofs += slice; @@ -386,8 +351,6 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) if (ret != -ENOATTR) break; } - xattr_iter_end_final(&it->it); - return ret ? ret : it->buffer_size; } @@ -404,26 +367,16 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); - - if (!i || blkaddr != it->it.blkaddr) { - if (i) - xattr_iter_end(&it->it, true); - - it->it.page = erofs_get_meta_page(sb, blkaddr); - if (IS_ERR(it->it.page)) - return PTR_ERR(it->it.page); - - it->it.kaddr = kmap_atomic(it->it.page); - it->it.blkaddr = blkaddr; - } + it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->it.kaddr)) + return PTR_ERR(it->it.kaddr); + it->it.blkaddr = blkaddr; ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); if (ret != -ENOATTR) break; } - if (vi->xattr_shared_count) - xattr_iter_end_final(&it->it); - return ret ? ret : it->buffer_size; } @@ -452,10 +405,11 @@ int erofs_getxattr(struct inode *inode, int index, return ret; it.index = index; - it.name.len = strlen(name); if (it.name.len > EROFS_NAME_LEN) return -ERANGE; + + it.it.buf = __EROFS_BUF_INITIALIZER; it.name.name = name; it.buffer = buffer; @@ -465,6 +419,7 @@ int erofs_getxattr(struct inode *inode, int index, ret = inline_getxattr(inode, &it); if (ret == -ENOATTR) ret = shared_getxattr(inode, &it); + erofs_put_metabuf(&it.it.buf); return ret; } @@ -607,7 +562,6 @@ static int inline_listxattr(struct listxattr_iter *it) if (ret) break; } - xattr_iter_end_final(&it->it); return ret ? ret : it->buffer_ofs; } @@ -625,25 +579,16 @@ static int shared_listxattr(struct listxattr_iter *it) xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); - if (!i || blkaddr != it->it.blkaddr) { - if (i) - xattr_iter_end(&it->it, true); - - it->it.page = erofs_get_meta_page(sb, blkaddr); - if (IS_ERR(it->it.page)) - return PTR_ERR(it->it.page); - - it->it.kaddr = kmap_atomic(it->it.page); - it->it.blkaddr = blkaddr; - } + it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb, blkaddr, + EROFS_KMAP_ATOMIC); + if (IS_ERR(it->it.kaddr)) + return PTR_ERR(it->it.kaddr); + it->it.blkaddr = blkaddr; ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL); if (ret) break; } - if (vi->xattr_shared_count) - xattr_iter_end_final(&it->it); - return ret ? ret : it->buffer_ofs; } @@ -659,6 +604,7 @@ ssize_t erofs_listxattr(struct dentry *dentry, if (ret) return ret; + it.it.buf = __EROFS_BUF_INITIALIZER; it.dentry = dentry; it.buffer = buffer; it.buffer_size = buffer_size; @@ -667,9 +613,10 @@ ssize_t erofs_listxattr(struct dentry *dentry, it.it.sb = dentry->d_sb; ret = inline_listxattr(&it); - if (ret < 0 && ret != -ENOATTR) - return ret; - return shared_listxattr(&it); + if (ret >= 0 || ret == -ENOATTR) + ret = shared_listxattr(&it); + erofs_put_metabuf(&it.it.buf); + return ret; } #ifdef CONFIG_EROFS_FS_POSIX_ACL diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index 94090c74b3f7..332462c59f11 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -86,4 +86,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu); #endif #endif - diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 9a249bfc2770..0ed880f42525 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl) { + unsigned int pclusterpages = z_erofs_pclusterpages(pcl); int i; for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; - if (pcl->pclusterpages > pcs->maxpages) + if (pclusterpages > pcs->maxpages) continue; kmem_cache_free(pcs->slab, pcl); @@ -191,7 +192,10 @@ enum z_erofs_collectmode { COLLECT_PRIMARY_FOLLOWED, }; -struct z_erofs_collector { +struct z_erofs_decompress_frontend { + struct inode *const inode; + struct erofs_map_blocks map; + struct z_erofs_pagevec_ctor vector; struct z_erofs_pcluster *pcl, *tailpcl; @@ -201,13 +205,6 @@ struct z_erofs_collector { z_erofs_next_pcluster_t owned_head; enum z_erofs_collectmode mode; -}; - -struct z_erofs_decompress_frontend { - struct inode *const inode; - - struct z_erofs_collector clt; - struct erofs_map_blocks map; bool readahead; /* used for applying cache strategy on the fly */ @@ -215,30 +212,30 @@ struct z_erofs_decompress_frontend { erofs_off_t headoffset; }; -#define COLLECTOR_INIT() { \ - .owned_head = Z_EROFS_PCLUSTER_TAIL, \ - .mode = COLLECT_PRIMARY_FOLLOWED } - #define DECOMPRESS_FRONTEND_INIT(__i) { \ - .inode = __i, .clt = COLLECTOR_INIT(), \ - .backmost = true, } + .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ + .mode = COLLECT_PRIMARY_FOLLOWED } static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES]; static DEFINE_MUTEX(z_pagemap_global_lock); -static void preload_compressed_pages(struct z_erofs_collector *clt, - struct address_space *mc, - enum z_erofs_cache_alloctype type, - struct page **pagepool) +static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, + enum z_erofs_cache_alloctype type, + struct page **pagepool) { - struct z_erofs_pcluster *pcl = clt->pcl; + struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); + struct z_erofs_pcluster *pcl = fe->pcl; bool standalone = true; + /* + * optimistic allocation without direct reclaim since inplace I/O + * can be used if low memory otherwise. + */ gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; struct page **pages; pgoff_t index; - if (clt->mode < COLLECT_PRIMARY_FOLLOWED) + if (fe->mode < COLLECT_PRIMARY_FOLLOWED) return; pages = pcl->compressed_pages; @@ -287,7 +284,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt, * managed cache since it can be moved to the bypass queue instead. */ if (standalone) - clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } /* called by erofs_shrinker to get rid of all compressed_pages */ @@ -298,6 +295,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, container_of(grp, struct z_erofs_pcluster, obj); int i; + DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); /* * refcount of workgroup is now freezed as 1, * therefore no need to worry about available decompression users. @@ -331,6 +329,7 @@ int erofs_try_to_free_cached_page(struct page *page) if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) { unsigned int i; + DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); for (i = 0; i < pcl->pclusterpages; ++i) { if (pcl->compressed_pages[i] == page) { WRITE_ONCE(pcl->compressed_pages[i], NULL); @@ -347,47 +346,47 @@ int erofs_try_to_free_cached_page(struct page *page) } /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ -static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, +static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, struct page *page) { - struct z_erofs_pcluster *const pcl = clt->pcl; + struct z_erofs_pcluster *const pcl = fe->pcl; - while (clt->icpage_ptr > pcl->compressed_pages) - if (!cmpxchg(--clt->icpage_ptr, NULL, page)) + while (fe->icpage_ptr > pcl->compressed_pages) + if (!cmpxchg(--fe->icpage_ptr, NULL, page)) return true; return false; } /* callers must be with collection lock held */ -static int z_erofs_attach_page(struct z_erofs_collector *clt, +static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, struct page *page, enum z_erofs_page_type type, bool pvec_safereuse) { int ret; /* give priority for inplaceio */ - if (clt->mode >= COLLECT_PRIMARY && + if (fe->mode >= COLLECT_PRIMARY && type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && - z_erofs_try_inplace_io(clt, page)) + z_erofs_try_inplace_io(fe, page)) return 0; - ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, + ret = z_erofs_pagevec_enqueue(&fe->vector, page, type, pvec_safereuse); - clt->cl->vcnt += (unsigned int)ret; + fe->cl->vcnt += (unsigned int)ret; return ret ? 0 : -EAGAIN; } -static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) +static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) { - struct z_erofs_pcluster *pcl = clt->pcl; - z_erofs_next_pcluster_t *owned_head = &clt->owned_head; + struct z_erofs_pcluster *pcl = f->pcl; + z_erofs_next_pcluster_t *owned_head = &f->owned_head; /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, *owned_head) == Z_EROFS_PCLUSTER_NIL) { *owned_head = &pcl->next; /* so we can attach this pcluster to our submission chain. */ - clt->mode = COLLECT_PRIMARY_FOLLOWED; + f->mode = COLLECT_PRIMARY_FOLLOWED; return; } @@ -398,24 +397,24 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, *owned_head) == Z_EROFS_PCLUSTER_TAIL) { *owned_head = Z_EROFS_PCLUSTER_TAIL; - clt->mode = COLLECT_PRIMARY_HOOKED; - clt->tailpcl = NULL; + f->mode = COLLECT_PRIMARY_HOOKED; + f->tailpcl = NULL; return; } /* type 3, it belongs to a chain, but it isn't the end of the chain */ - clt->mode = COLLECT_PRIMARY; + f->mode = COLLECT_PRIMARY; } -static int z_erofs_lookup_collection(struct z_erofs_collector *clt, +static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { - struct z_erofs_pcluster *pcl = clt->pcl; + struct z_erofs_pcluster *pcl = fe->pcl; struct z_erofs_collection *cl; unsigned int length; /* to avoid unexpected loop formed by corrupted images */ - if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) { + if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) { DBG_BUGON(1); return -EFSCORRUPTED; } @@ -446,18 +445,19 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt, } mutex_lock(&cl->lock); /* used to check tail merging loop due to corrupted images */ - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) - clt->tailpcl = pcl; + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) + fe->tailpcl = pcl; - z_erofs_try_to_claim_pcluster(clt); - clt->cl = cl; + z_erofs_try_to_claim_pcluster(fe); + fe->cl = cl; return 0; } -static int z_erofs_register_collection(struct z_erofs_collector *clt, +static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { + bool ztailpacking = map->m_flags & EROFS_MAP_META; struct z_erofs_pcluster *pcl; struct z_erofs_collection *cl; struct erofs_workgroup *grp; @@ -469,20 +469,20 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, } /* no available pcluster, let's allocate one */ - pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT); + pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 : + map->m_plen >> PAGE_SHIFT); if (IS_ERR(pcl)) return PTR_ERR(pcl); atomic_set(&pcl->obj.refcount, 1); - pcl->obj.index = map->m_pa >> PAGE_SHIFT; pcl->algorithmformat = map->m_algorithmformat; pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) | (map->m_flags & EROFS_MAP_FULL_MAPPED ? Z_EROFS_PCLUSTER_FULL_LENGTH : 0); /* new pclusters should be claimed as type 1, primary and followed */ - pcl->next = clt->owned_head; - clt->mode = COLLECT_PRIMARY_FOLLOWED; + pcl->next = fe->owned_head; + fe->mode = COLLECT_PRIMARY_FOLLOWED; cl = z_erofs_primarycollection(pcl); cl->pageofs = map->m_la & ~PAGE_MASK; @@ -494,23 +494,32 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, mutex_init(&cl->lock); DBG_BUGON(!mutex_trylock(&cl->lock)); - grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj); - if (IS_ERR(grp)) { - err = PTR_ERR(grp); - goto err_out; - } + if (ztailpacking) { + pcl->obj.index = 0; /* which indicates ztailpacking */ + pcl->pageofs_in = erofs_blkoff(map->m_pa); + pcl->tailpacking_size = map->m_plen; + } else { + pcl->obj.index = map->m_pa >> PAGE_SHIFT; - if (grp != &pcl->obj) { - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj); - err = -EEXIST; - goto err_out; + grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj); + if (IS_ERR(grp)) { + err = PTR_ERR(grp); + goto err_out; + } + + if (grp != &pcl->obj) { + fe->pcl = container_of(grp, + struct z_erofs_pcluster, obj); + err = -EEXIST; + goto err_out; + } } /* used to check tail merging loop due to corrupted images */ - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) - clt->tailpcl = pcl; - clt->owned_head = &pcl->next; - clt->pcl = pcl; - clt->cl = cl; + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) + fe->tailpcl = pcl; + fe->owned_head = &pcl->next; + fe->pcl = pcl; + fe->cl = cl; return 0; err_out: @@ -519,48 +528,51 @@ err_out: return err; } -static int z_erofs_collector_begin(struct z_erofs_collector *clt, +static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { struct erofs_workgroup *grp; int ret; - DBG_BUGON(clt->cl); + DBG_BUGON(fe->cl); /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */ - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL); - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); - if (!PAGE_ALIGNED(map->m_pa)) { - DBG_BUGON(1); - return -EINVAL; + if (map->m_flags & EROFS_MAP_META) { + if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + goto tailpacking; } grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); if (grp) { - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj); + fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); } else { - ret = z_erofs_register_collection(clt, inode, map); - +tailpacking: + ret = z_erofs_register_collection(fe, inode, map); if (!ret) goto out; if (ret != -EEXIST) return ret; } - ret = z_erofs_lookup_collection(clt, inode, map); + ret = z_erofs_lookup_collection(fe, inode, map); if (ret) { - erofs_workgroup_put(&clt->pcl->obj); + erofs_workgroup_put(&fe->pcl->obj); return ret; } out: - z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, - clt->cl->pagevec, clt->cl->vcnt); - + z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS, + fe->cl->pagevec, fe->cl->vcnt); /* since file-backed online pages are traversed in reverse order */ - clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages; + fe->icpage_ptr = fe->pcl->compressed_pages + + z_erofs_pclusterpages(fe->pcl); return 0; } @@ -594,24 +606,24 @@ static void z_erofs_collection_put(struct z_erofs_collection *cl) erofs_workgroup_put(&pcl->obj); } -static bool z_erofs_collector_end(struct z_erofs_collector *clt) +static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) { - struct z_erofs_collection *cl = clt->cl; + struct z_erofs_collection *cl = fe->cl; if (!cl) return false; - z_erofs_pagevec_ctor_exit(&clt->vector, false); + z_erofs_pagevec_ctor_exit(&fe->vector, false); mutex_unlock(&cl->lock); /* * if all pending pages are added, don't hold its reference * any longer if the pcluster isn't hosted by ourselves. */ - if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) + if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) z_erofs_collection_put(cl); - clt->cl = NULL; + fe->cl = NULL; return true; } @@ -635,7 +647,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct inode *const inode = fe->inode; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct erofs_map_blocks *const map = &fe->map; - struct z_erofs_collector *const clt = &fe->clt; const loff_t offset = page_offset(page); bool tight = true; @@ -656,7 +667,7 @@ repeat: if (offset + cur >= map->m_la && offset + cur < map->m_la + map->m_llen) { /* didn't get a valid collection previously (very rare) */ - if (!clt->cl) + if (!fe->cl) goto restart_now; goto hitted; } @@ -664,7 +675,7 @@ repeat: /* go ahead the next map_blocks */ erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur); - if (z_erofs_collector_end(clt)) + if (z_erofs_collector_end(fe)) fe->backmost = false; map->m_la = offset + cur; @@ -677,19 +688,34 @@ restart_now: if (!(map->m_flags & EROFS_MAP_MAPPED)) goto hitted; - err = z_erofs_collector_begin(clt, inode, map); + err = z_erofs_collector_begin(fe, inode, map); if (err) goto err_out; - /* preload all compressed pages (maybe downgrade role if necessary) */ - if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la)) - cache_strategy = TRYALLOC; - else - cache_strategy = DONTALLOC; + if (z_erofs_is_inline_pcluster(fe->pcl)) { + void *mp; - preload_compressed_pages(clt, MNGD_MAPPING(sbi), - cache_strategy, pagepool); + mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb, + erofs_blknr(map->m_pa), EROFS_NO_KMAP); + if (IS_ERR(mp)) { + err = PTR_ERR(mp); + erofs_err(inode->i_sb, + "failed to get inline page, err %d", err); + goto err_out; + } + get_page(fe->map.buf.page); + WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page); + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; + } else { + /* bind cache first when cached decompression is preferred */ + if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, + map->m_la)) + cache_strategy = TRYALLOC; + else + cache_strategy = DONTALLOC; + z_erofs_bind_cache(fe, cache_strategy, pagepool); + } hitted: /* * Ensure the current partial page belongs to this submit chain rather @@ -697,8 +723,8 @@ hitted: * those chains are handled asynchronously thus the page cannot be used * for inplace I/O or pagevec (should be processed in strict order.) */ - tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && - clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED && + fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); cur = end - min_t(unsigned int, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { @@ -713,18 +739,18 @@ hitted: Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); if (cur) - tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); + tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED); retry: - err = z_erofs_attach_page(clt, page, page_type, - clt->mode >= COLLECT_PRIMARY_FOLLOWED); + err = z_erofs_attach_page(fe, page, page_type, + fe->mode >= COLLECT_PRIMARY_FOLLOWED); /* should allocate an additional short-lived page for pagevec */ if (err == -EAGAIN) { struct page *const newpage = alloc_page(GFP_NOFS | __GFP_NOFAIL); set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE); - err = z_erofs_attach_page(clt, newpage, + err = z_erofs_attach_page(fe, newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); if (!err) goto retry; @@ -740,7 +766,7 @@ retry: /* bump up the number of spiltted parts of a page */ ++spiltted; /* also update nr_pages */ - clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1); + fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1); next_part: /* can be used for verification */ map->m_llen = offset + cur - map->m_la; @@ -762,32 +788,19 @@ err_out: goto out; } -static void z_erofs_decompressqueue_work(struct work_struct *work); -static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, - bool sync, int bios) +static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi, + unsigned int readahead_pages) { - struct erofs_sb_info *const sbi = EROFS_SB(io->sb); - - /* wake up the caller thread for sync decompression */ - if (sync) { - unsigned long flags; + /* auto: enable for readpage, disable for readahead */ + if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) && + !readahead_pages) + return true; - spin_lock_irqsave(&io->u.wait.lock, flags); - if (!atomic_add_return(bios, &io->pending_bios)) - wake_up_locked(&io->u.wait); - spin_unlock_irqrestore(&io->u.wait.lock, flags); - return; - } + if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_FORCE_ON) && + (readahead_pages <= sbi->opt.max_sync_decompress_pages)) + return true; - if (atomic_add_return(bios, &io->pending_bios)) - return; - /* Use workqueue and sync decompression for atomic contexts only */ - if (in_atomic() || irqs_disabled()) { - queue_work(z_erofs_workqueue, &io->u.work); - sbi->opt.readahead_sync_decompress = true; - return; - } - z_erofs_decompressqueue_work(&io->u.work); + return false; } static bool z_erofs_page_is_invalidated(struct page *page) @@ -795,38 +808,12 @@ static bool z_erofs_page_is_invalidated(struct page *page) return !page->mapping && !z_erofs_is_shortlived_page(page); } -static void z_erofs_decompressqueue_endio(struct bio *bio) -{ - tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private); - struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t); - blk_status_t err = bio->bi_status; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(z_erofs_page_is_invalidated(page)); - - if (err) - SetPageError(page); - - if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { - if (!err) - SetPageUptodate(page); - unlock_page(page); - } - } - z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1); - bio_put(bio); -} - static int z_erofs_decompress_pcluster(struct super_block *sb, struct z_erofs_pcluster *pcl, struct page **pagepool) { struct erofs_sb_info *const sbi = EROFS_SB(sb); + unsigned int pclusterpages = z_erofs_pclusterpages(pcl); struct z_erofs_pagevec_ctor ctor; unsigned int i, inputsize, outputsize, llen, nr_pages; struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES]; @@ -908,15 +895,20 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, overlapped = false; compressed_pages = pcl->compressed_pages; - for (i = 0; i < pcl->pclusterpages; ++i) { + for (i = 0; i < pclusterpages; ++i) { unsigned int pagenr; page = compressed_pages[i]; - /* all compressed pages ought to be valid */ DBG_BUGON(!page); - DBG_BUGON(z_erofs_page_is_invalidated(page)); + if (z_erofs_is_inline_pcluster(pcl)) { + if (!PageUptodate(page)) + err = -EIO; + continue; + } + + DBG_BUGON(z_erofs_page_is_invalidated(page)); if (!z_erofs_is_shortlived_page(page)) { if (erofs_page_is_managed(sbi, page)) { if (!PageUptodate(page)) @@ -961,11 +953,16 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, partial = true; } - inputsize = pcl->pclusterpages * PAGE_SIZE; + if (z_erofs_is_inline_pcluster(pcl)) + inputsize = pcl->tailpacking_size; + else + inputsize = pclusterpages * PAGE_SIZE; + err = z_erofs_decompress(&(struct z_erofs_decompress_req) { .sb = sb, .in = compressed_pages, .out = pages, + .pageofs_in = pcl->pageofs_in, .pageofs_out = cl->pageofs, .inputsize = inputsize, .outputsize = outputsize, @@ -975,17 +972,22 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, }, pagepool); out: - /* must handle all compressed pages before ending pages */ - for (i = 0; i < pcl->pclusterpages; ++i) { - page = compressed_pages[i]; - - if (erofs_page_is_managed(sbi, page)) - continue; + /* must handle all compressed pages before actual file pages */ + if (z_erofs_is_inline_pcluster(pcl)) { + page = compressed_pages[0]; + WRITE_ONCE(compressed_pages[0], NULL); + put_page(page); + } else { + for (i = 0; i < pclusterpages; ++i) { + page = compressed_pages[i]; - /* recycle all individual short-lived pages */ - (void)z_erofs_put_shortlivedpage(pagepool, page); + if (erofs_page_is_managed(sbi, page)) + continue; - WRITE_ONCE(compressed_pages[i], NULL); + /* recycle all individual short-lived pages */ + (void)z_erofs_put_shortlivedpage(pagepool, page); + WRITE_ONCE(compressed_pages[i], NULL); + } } for (i = 0; i < nr_pages; ++i) { @@ -1057,13 +1059,42 @@ static void z_erofs_decompressqueue_work(struct work_struct *work) kvfree(bgq); } +static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, + bool sync, int bios) +{ + struct erofs_sb_info *const sbi = EROFS_SB(io->sb); + + /* wake up the caller thread for sync decompression */ + if (sync) { + unsigned long flags; + + spin_lock_irqsave(&io->u.wait.lock, flags); + if (!atomic_add_return(bios, &io->pending_bios)) + wake_up_locked(&io->u.wait); + spin_unlock_irqrestore(&io->u.wait.lock, flags); + return; + } + + if (atomic_add_return(bios, &io->pending_bios)) + return; + /* Use workqueue and sync decompression for atomic contexts only */ + if (in_atomic() || irqs_disabled()) { + queue_work(z_erofs_workqueue, &io->u.work); + /* enable sync decompression for readahead */ + if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) + sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; + return; + } + z_erofs_decompressqueue_work(&io->u.work); +} + static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, unsigned int nr, struct page **pagepool, - struct address_space *mc, - gfp_t gfp) + struct address_space *mc) { const pgoff_t index = pcl->obj.index; + gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; struct address_space *mapping; @@ -1234,6 +1265,33 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, qtail[JQ_BYPASS] = &pcl->next; } +static void z_erofs_decompressqueue_endio(struct bio *bio) +{ + tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private); + struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t); + blk_status_t err = bio->bi_status; + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bvec, bio, iter_all) { + struct page *page = bvec->bv_page; + + DBG_BUGON(PageUptodate(page)); + DBG_BUGON(z_erofs_page_is_invalidated(page)); + + if (err) + SetPageError(page); + + if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { + if (!err) + SetPageUptodate(page); + unlock_page(page); + } + } + z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1); + bio_put(bio); +} + static void z_erofs_submit_queue(struct super_block *sb, struct z_erofs_decompress_frontend *f, struct page **pagepool, @@ -1244,7 +1302,7 @@ static void z_erofs_submit_queue(struct super_block *sb, z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; void *bi_private; - z_erofs_next_pcluster_t owned_head = f->clt.owned_head; + z_erofs_next_pcluster_t owned_head = f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ pgoff_t last_index; struct block_device *last_bdev; @@ -1271,6 +1329,14 @@ static void z_erofs_submit_queue(struct super_block *sb, pcl = container_of(owned_head, struct z_erofs_pcluster, next); + /* close the main owned chain at first */ + owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, + Z_EROFS_PCLUSTER_TAIL_CLOSED); + if (z_erofs_is_inline_pcluster(pcl)) { + move_to_bypass_jobqueue(pcl, qtail, owned_head); + continue; + } + /* no device id here, thus it will always succeed */ mdev = (struct erofs_map_dev) { .m_pa = blknr_to_addr(pcl->obj.index), @@ -1280,16 +1346,11 @@ static void z_erofs_submit_queue(struct super_block *sb, cur = erofs_blknr(mdev.m_pa); end = cur + pcl->pclusterpages; - /* close the main owned chain at first */ - owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, - Z_EROFS_PCLUSTER_TAIL_CLOSED); - do { struct page *page; page = pickup_page_for_submission(pcl, i++, pagepool, - MNGD_MAPPING(sbi), - GFP_NOFS); + MNGD_MAPPING(sbi)); if (!page) continue; @@ -1301,15 +1362,14 @@ submit_bio_retry: } if (!bio) { - bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); + bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, + REQ_OP_READ, GFP_NOIO); bio->bi_end_io = z_erofs_decompressqueue_endio; - bio_set_dev(bio, mdev.m_bdev); last_bdev = mdev.m_bdev; bio->bi_iter.bi_sector = (sector_t)cur << LOG_SECTORS_PER_BLOCK; bio->bi_private = bi_private; - bio->bi_opf = REQ_OP_READ; if (f->readahead) bio->bi_opf |= REQ_RAHEAD; ++nr_bios; @@ -1348,7 +1408,7 @@ static void z_erofs_runqueue(struct super_block *sb, { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; - if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL) + if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) return; z_erofs_submit_queue(sb, f, pagepool, io, &force_fg); @@ -1435,6 +1495,7 @@ skip: static int z_erofs_readpage(struct file *file, struct page *page) { struct inode *const inode = page->mapping->host; + struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); struct page *pagepool = NULL; int err; @@ -1447,17 +1508,16 @@ static int z_erofs_readpage(struct file *file, struct page *page) err = z_erofs_do_read_page(&f, page, &pagepool); z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false); - (void)z_erofs_collector_end(&f.clt); + (void)z_erofs_collector_end(&f); /* if some compressed cluster ready, need submit them anyway */ - z_erofs_runqueue(inode->i_sb, &f, &pagepool, true); + z_erofs_runqueue(inode->i_sb, &f, &pagepool, + z_erofs_get_sync_decompress_policy(sbi, 0)); if (err) erofs_err(inode->i_sb, "failed to read, err [%d]", err); - if (f.map.mpage) - put_page(f.map.mpage); - + erofs_put_metabuf(&f.map.buf); erofs_release_pages(&pagepool); return err; } @@ -1498,13 +1558,11 @@ static void z_erofs_readahead(struct readahead_control *rac) put_page(page); } z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false); - (void)z_erofs_collector_end(&f.clt); + (void)z_erofs_collector_end(&f); z_erofs_runqueue(inode->i_sb, &f, &pagepool, - sbi->opt.readahead_sync_decompress && - nr_pages <= sbi->opt.max_sync_decompress_pages); - if (f.map.mpage) - put_page(f.map.mpage); + z_erofs_get_sync_decompress_policy(sbi, nr_pages)); + erofs_put_metabuf(&f.map.buf); erofs_release_pages(&pagepool); } diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index 4a69515dea75..e043216b545f 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -62,8 +62,16 @@ struct z_erofs_pcluster { /* A: lower limit of decompressed length and if full length or not */ unsigned int length; - /* I: physical cluster size in pages */ - unsigned short pclusterpages; + /* I: page offset of inline compressed data */ + unsigned short pageofs_in; + + union { + /* I: physical cluster size in pages */ + unsigned short pclusterpages; + + /* I: tailpacking inline compressed size */ + unsigned short tailpacking_size; + }; /* I: compression algorithm format */ unsigned char algorithmformat; @@ -94,6 +102,18 @@ struct z_erofs_decompressqueue { } u; }; +static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl) +{ + return !pcl->obj.index; +} + +static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) +{ + if (z_erofs_is_inline_pcluster(pcl)) + return 1; + return pcl->pclusterpages; +} + #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 660489a7fb64..572f0b8151ba 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -7,12 +7,17 @@ #include <asm/unaligned.h> #include <trace/events/erofs.h> +static int z_erofs_do_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, + int flags); + int z_erofs_fill_inode(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); if (!erofs_sb_has_big_pcluster(sbi) && + !erofs_sb_has_ztailpacking(sbi) && vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { vi->z_advise = 0; vi->z_algorithmtype[0] = 0; @@ -30,7 +35,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) struct super_block *const sb = inode->i_sb; int err, headnr; erofs_off_t pos; - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; void *kaddr; struct z_erofs_map_header *h; @@ -51,18 +56,18 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) goto out_unlock; DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && + !erofs_sb_has_ztailpacking(EROFS_SB(sb)) && vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); - page = erofs_get_meta_page(sb, erofs_blknr(pos)); - if (IS_ERR(page)) { - err = PTR_ERR(page); + kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), + EROFS_KMAP_ATOMIC); + if (IS_ERR(kaddr)) { + err = PTR_ERR(kaddr); goto out_unlock; } - kaddr = kmap_atomic(page); - h = kaddr + erofs_blkoff(pos); vi->z_advise = le16_to_cpu(h->h_advise); vi->z_algorithmtype[0] = h->h_algorithmtype & 15; @@ -94,13 +99,33 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) err = -EFSCORRUPTED; goto unmap_done; } +unmap_done: + erofs_put_metabuf(&buf); + if (err) + goto out_unlock; + + if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) { + struct erofs_map_blocks map = { + .buf = __EROFS_BUF_INITIALIZER + }; + + vi->z_idata_size = le16_to_cpu(h->h_idata_size); + err = z_erofs_do_map_blocks(inode, &map, + EROFS_GET_BLOCKS_FINDTAIL); + erofs_put_metabuf(&map.buf); + + if (!map.m_plen || + erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) { + erofs_err(sb, "invalid tail-packing pclustersize %llu", + map.m_plen); + err = -EFSCORRUPTED; + } + if (err < 0) + goto out_unlock; + } /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); -unmap_done: - kunmap_atomic(kaddr); - unlock_page(page); - put_page(page); out_unlock: clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags); return err; @@ -117,37 +142,18 @@ struct z_erofs_maprecorder { u16 clusterofs; u16 delta[2]; erofs_blk_t pblk, compressedlcs; + erofs_off_t nextpackoff; }; static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, erofs_blk_t eblk) { struct super_block *const sb = m->inode->i_sb; - struct erofs_map_blocks *const map = m->map; - struct page *mpage = map->mpage; - if (mpage) { - if (mpage->index == eblk) { - if (!m->kaddr) - m->kaddr = kmap_atomic(mpage); - return 0; - } - - if (m->kaddr) { - kunmap_atomic(m->kaddr); - m->kaddr = NULL; - } - put_page(mpage); - } - - mpage = erofs_get_meta_page(sb, eblk); - if (IS_ERR(mpage)) { - map->mpage = NULL; - return PTR_ERR(mpage); - } - m->kaddr = kmap_atomic(mpage); - unlock_page(mpage); - map->mpage = mpage; + m->kaddr = erofs_read_metabuf(&m->map->buf, sb, eblk, + EROFS_KMAP_ATOMIC); + if (IS_ERR(m->kaddr)) + return PTR_ERR(m->kaddr); return 0; } @@ -169,6 +175,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, if (err) return err; + m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index); m->lcn = lcn; di = m->kaddr + erofs_blkoff(pos); @@ -243,12 +250,12 @@ static int get_compacted_la_distance(unsigned int lclusterbits, static int unpack_compacted_index(struct z_erofs_maprecorder *m, unsigned int amortizedshift, - unsigned int eofs, bool lookahead) + erofs_off_t pos, bool lookahead) { struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_logical_clusterbits; const unsigned int lomask = (1 << lclusterbits) - 1; - unsigned int vcnt, base, lo, encodebits, nblk; + unsigned int vcnt, base, lo, encodebits, nblk, eofs; int i; u8 *in, type; bool big_pcluster; @@ -260,8 +267,12 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, else return -EOPNOTSUPP; + /* it doesn't equal to round_up(..) */ + m->nextpackoff = round_down(pos, vcnt << amortizedshift) + + (vcnt << amortizedshift); big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; + eofs = erofs_blkoff(pos); base = round_down(eofs, vcnt << amortizedshift); in = m->kaddr + base; @@ -399,8 +410,7 @@ out: err = z_erofs_reload_indexes(m, erofs_blknr(pos)); if (err) return err; - return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos), - lookahead); + return unpack_compacted_index(m, amortizedshift, pos, lookahead); } static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, @@ -421,48 +431,47 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned int lookback_distance) { struct erofs_inode *const vi = EROFS_I(m->inode); - struct erofs_map_blocks *const map = m->map; const unsigned int lclusterbits = vi->z_logical_clusterbits; - unsigned long lcn = m->lcn; - int err; - if (lcn < lookback_distance) { - erofs_err(m->inode->i_sb, - "bogus lookback distance @ nid %llu", vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } + while (m->lcn >= lookback_distance) { + unsigned long lcn = m->lcn - lookback_distance; + int err; - /* load extent head logical cluster if needed */ - lcn -= lookback_distance; - err = z_erofs_load_cluster_from_disk(m, lcn, false); - if (err) - return err; + /* load extent head logical cluster if needed */ + err = z_erofs_load_cluster_from_disk(m, lcn, false); + if (err) + return err; - switch (m->type) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - if (!m->delta[0]) { + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (!m->delta[0]) { + erofs_err(m->inode->i_sb, + "invalid lookback distance 0 @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + lookback_distance = m->delta[0]; + continue; + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: + m->headtype = m->type; + m->map->m_la = (lcn << lclusterbits) | m->clusterofs; + return 0; + default: erofs_err(m->inode->i_sb, - "invalid lookback distance 0 @ nid %llu", - vi->nid); + "unknown type %u @ lcn %lu of nid %llu", + m->type, lcn, vi->nid); DBG_BUGON(1); - return -EFSCORRUPTED; + return -EOPNOTSUPP; } - return z_erofs_extent_lookback(m, m->delta[0]); - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: - m->headtype = m->type; - map->m_la = (lcn << lclusterbits) | m->clusterofs; - break; - default: - erofs_err(m->inode->i_sb, - "unknown type %u @ lcn %lu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; } - return 0; + + erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; } static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, @@ -484,7 +493,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { - map->m_plen = 1 << lclusterbits; + map->m_plen = 1ULL << lclusterbits; return 0; } lcn = m->lcn + 1; @@ -530,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, return -EFSCORRUPTED; } out: - map->m_plen = m->compressedlcs << lclusterbits; + map->m_plen = (u64)m->compressedlcs << lclusterbits; return 0; err_bonus_cblkcnt: erofs_err(m->inode->i_sb, @@ -583,11 +592,12 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) return 0; } -int z_erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, - int flags) +static int z_erofs_do_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, + int flags) { struct erofs_inode *const vi = EROFS_I(inode); + bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; struct z_erofs_maprecorder m = { .inode = inode, .map = map, @@ -597,22 +607,8 @@ int z_erofs_map_blocks_iter(struct inode *inode, unsigned long initial_lcn; unsigned long long ofs, end; - trace_z_erofs_map_blocks_iter_enter(inode, map, flags); - - /* when trying to read beyond EOF, leave it unmapped */ - if (map->m_la >= inode->i_size) { - map->m_llen = map->m_la + 1 - inode->i_size; - map->m_la = inode->i_size; - map->m_flags = 0; - goto out; - } - - err = z_erofs_fill_inode_lazy(inode); - if (err) - goto out; - lclusterbits = vi->z_logical_clusterbits; - ofs = map->m_la; + ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la; initial_lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); @@ -620,6 +616,9 @@ int z_erofs_map_blocks_iter(struct inode *inode, if (err) goto unmap_out; + if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL)) + vi->z_idataoff = m.nextpackoff; + map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; @@ -630,6 +629,13 @@ int z_erofs_map_blocks_iter(struct inode *inode, if (endoff >= m.clusterofs) { m.headtype = m.type; map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + /* + * For ztailpacking files, in order to inline data more + * effectively, special EOF lclusters are now supported + * which can have three parts at most. + */ + if (ztailpacking && end > inode->i_size) + end = inode->i_size; break; } /* m.lcn should be >= 1 if endoff < m.clusterofs */ @@ -659,11 +665,19 @@ int z_erofs_map_blocks_iter(struct inode *inode, } map->m_llen = end - map->m_la; - map->m_pa = blknr_to_addr(m.pblk); - err = z_erofs_get_extent_compressedlen(&m, initial_lcn); - if (err) - goto out; + if (flags & EROFS_GET_BLOCKS_FINDTAIL) + vi->z_tailextent_headlcn = m.lcn; + if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { + map->m_flags |= EROFS_MAP_META; + map->m_pa = vi->z_idataoff; + map->m_plen = vi->z_idata_size; + } else { + map->m_pa = blknr_to_addr(m.pblk); + err = z_erofs_get_extent_compressedlen(&m, initial_lcn); + if (err) + goto out; + } if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; @@ -681,14 +695,38 @@ int z_erofs_map_blocks_iter(struct inode *inode, map->m_flags |= EROFS_MAP_FULL_MAPPED; } unmap_out: - if (m.kaddr) - kunmap_atomic(m.kaddr); + erofs_unmap_metabuf(&m.map->buf); out: erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", __func__, map->m_la, map->m_pa, map->m_llen, map->m_plen, map->m_flags); + return err; +} + +int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags) +{ + int err = 0; + + trace_z_erofs_map_blocks_iter_enter(inode, map, flags); + + /* when trying to read beyond EOF, leave it unmapped */ + if (map->m_la >= inode->i_size) { + map->m_llen = map->m_la + 1 - inode->i_size; + map->m_la = inode->i_size; + map->m_flags = 0; + goto out; + } + + err = z_erofs_fill_inode_lazy(inode); + if (err) + goto out; + + err = z_erofs_do_map_blocks(inode, map, flags); +out: trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ @@ -704,8 +742,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, struct erofs_map_blocks map = { .m_la = offset }; ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP); - if (map.mpage) - put_page(map.mpage); + erofs_put_metabuf(&map.buf); if (ret < 0) return ret; |