diff options
Diffstat (limited to 'fs/erofs')
-rw-r--r-- | fs/erofs/Kconfig | 10 | ||||
-rw-r--r-- | fs/erofs/Makefile | 1 | ||||
-rw-r--r-- | fs/erofs/data.c | 34 | ||||
-rw-r--r-- | fs/erofs/decompressor.c | 7 | ||||
-rw-r--r-- | fs/erofs/erofs_fs.h | 50 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 519 | ||||
-rw-r--r-- | fs/erofs/inode.c | 11 | ||||
-rw-r--r-- | fs/erofs/internal.h | 76 | ||||
-rw-r--r-- | fs/erofs/namei.c | 5 | ||||
-rw-r--r-- | fs/erofs/super.c | 237 | ||||
-rw-r--r-- | fs/erofs/sysfs.c | 4 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 7 |
12 files changed, 833 insertions, 128 deletions
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index f57255ab88ed..85490370e0ca 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -98,3 +98,13 @@ config EROFS_FS_ZIP_LZMA systems will be readable without selecting this option. If unsure, say N. + +config EROFS_FS_ONDEMAND + bool "EROFS fscache-based on-demand read support" + depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y) + default n + help + This permits EROFS to use fscache-backed data blobs with on-demand + read support. + + If unsure, say N. diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 8a3317e38e5a..99bbc597a3e9 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -5,3 +5,4 @@ erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o +erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 780db1e5f4b7..fbb037ba326e 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -6,6 +6,7 @@ */ #include "internal.h" #include <linux/prefetch.h> +#include <linux/sched/mm.h> #include <linux/dax.h> #include <trace/events/erofs.h> @@ -35,14 +36,20 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode, erofs_off_t offset = blknr_to_addr(blkaddr); pgoff_t index = offset >> PAGE_SHIFT; struct page *page = buf->page; + struct folio *folio; + unsigned int nofs_flag; if (!page || page->index != index) { erofs_put_metabuf(buf); - page = read_cache_page_gfp(mapping, index, - mapping_gfp_constraint(mapping, ~__GFP_FS)); - if (IS_ERR(page)) - return page; + + nofs_flag = memalloc_nofs_save(); + folio = read_cache_folio(mapping, index, NULL, NULL); + memalloc_nofs_restore(nofs_flag); + if (IS_ERR(folio)) + return folio; + /* should already be PageUptodate, no need to lock page */ + page = folio_file_page(folio, index); buf->page = page; } if (buf->kmap_type == EROFS_NO_KMAP) { @@ -63,6 +70,10 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode, void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, erofs_blk_t blkaddr, enum erofs_kmap_type type) { + if (erofs_is_fscache_mode(sb)) + return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode, + blkaddr, type); + return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); } @@ -110,8 +121,8 @@ static int erofs_map_blocks_flatmode(struct inode *inode, return 0; } -static int erofs_map_blocks(struct inode *inode, - struct erofs_map_blocks *map, int flags) +int erofs_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, int flags) { struct super_block *sb = inode->i_sb; struct erofs_inode *vi = EROFS_I(inode); @@ -199,6 +210,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) map->m_bdev = sb->s_bdev; map->m_daxdev = EROFS_SB(sb)->dax_dev; map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; + map->m_fscache = EROFS_SB(sb)->s_fscache; if (map->m_deviceid) { down_read(&devs->rwsem); @@ -210,6 +222,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) map->m_bdev = dif->bdev; map->m_daxdev = dif->dax_dev; map->m_dax_part_off = dif->dax_part_off; + map->m_fscache = dif->fscache; up_read(&devs->rwsem); } else if (devs->extra_devices) { down_read(&devs->rwsem); @@ -227,6 +240,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) map->m_bdev = dif->bdev; map->m_daxdev = dif->dax_dev; map->m_dax_part_off = dif->dax_part_off; + map->m_fscache = dif->fscache; break; } } @@ -337,9 +351,9 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * since we dont have write or truncate flows, so no inode * locking needs to be held at the moment. */ -static int erofs_readpage(struct file *file, struct page *page) +static int erofs_read_folio(struct file *file, struct folio *folio) { - return iomap_readpage(page, &erofs_iomap_ops); + return iomap_read_folio(folio, &erofs_iomap_ops); } static void erofs_readahead(struct readahead_control *rac) @@ -385,7 +399,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!err) return iomap_dio_rw(iocb, to, &erofs_iomap_ops, - NULL, 0, 0); + NULL, 0, NULL, 0); if (err < 0) return err; } @@ -394,7 +408,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) /* for uncompressed (aligned) files and raw access for other files */ const struct address_space_operations erofs_raw_access_aops = { - .readpage = erofs_readpage, + .read_folio = erofs_read_folio, .readahead = erofs_readahead, .bmap = erofs_bmap, .direct_IO = noop_direct_IO, diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 3efa686c7644..6dca1900c733 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -46,8 +46,6 @@ int z_erofs_load_lz4_config(struct super_block *sb, erofs_err(sb, "too large lz4 pclusterblks %u", sbi->lz4.max_pclusterblks); return -EINVAL; - } else if (sbi->lz4.max_pclusterblks >= 2) { - erofs_info(sb, "EXPERIMENTAL big pcluster feature in use. Use at your own risk!"); } } else { distance = le16_to_cpu(dsb->u1.lz4_max_distance); @@ -322,6 +320,7 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; const unsigned int righthalf = min_t(unsigned int, rq->outputsize, PAGE_SIZE - rq->pageofs_out); + const unsigned int lefthalf = rq->outputsize - righthalf; unsigned char *src, *dst; if (nrpages_out > 2) { @@ -344,10 +343,10 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, if (nrpages_out == 2) { DBG_BUGON(!rq->out[1]); if (rq->out[1] == *rq->in) { - memmove(src, src + righthalf, rq->pageofs_out); + memmove(src, src + righthalf, lefthalf); } else { dst = kmap_atomic(rq->out[1]); - memcpy(dst, src + righthalf, rq->pageofs_out); + memcpy(dst, src + righthalf, lefthalf); kunmap_atomic(dst); } } diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 1238ca104f09..2b48373f690b 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -37,12 +37,9 @@ #define EROFS_SB_EXTSLOT_SIZE 16 struct erofs_deviceslot { - union { - u8 uuid[16]; /* used for device manager later */ - u8 userdata[64]; /* digest(sha256), etc. */ - } u; - __le32 blocks; /* total fs blocks of this device */ - __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ + u8 tag[64]; /* digest(sha256), etc. */ + __le32 blocks; /* total fs blocks of this device */ + __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ u8 reserved[56]; }; #define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) @@ -58,8 +55,8 @@ struct erofs_super_block { __le16 root_nid; /* nid of root directory */ __le64 inos; /* total valid ino # (== f_files - f_favail) */ - __le64 build_time; /* inode v1 time derivation */ - __le32 build_time_nsec; /* inode v1 time derivation in nano scale */ + __le64 build_time; /* compact inode time derivation */ + __le32 build_time_nsec; /* compact inode time derivation in ns scale */ __le32 blocks; /* used for statfs */ __le32 meta_blkaddr; /* start block address of metadata area */ __le32 xattr_blkaddr; /* start block address of shared xattr area */ @@ -79,15 +76,15 @@ struct erofs_super_block { /* * erofs inode datalayout (i_format in on-disk inode): - * 0 - inode plain without inline data A: + * 0 - uncompressed flat inode without tail-packing inline data: * inode, [xattrs], ... | ... | no-holed data - * 1 - inode VLE compression B (legacy): - * inode, [xattrs], extents ... | ... - * 2 - inode plain with inline data C: - * inode, [xattrs], last_inline_data, ... | ... | no-holed data - * 3 - inode compression D: + * 1 - compressed inode with non-compact indexes: + * inode, [xattrs], [map_header], extents ... | ... + * 2 - uncompressed flat inode with tail-packing inline data: + * inode, [xattrs], tailpacking data, ... | ... | no-holed data + * 3 - compressed inode with compact indexes: * inode, [xattrs], map_header, extents ... | ... - * 4 - inode chunk-based E: + * 4 - chunk-based inode with (optional) multi-device support: * inode, [xattrs], chunk indexes ... | ... * 5~7 - reserved */ @@ -106,7 +103,7 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY; } -/* bit definitions of inode i_advise */ +/* bit definitions of inode i_format */ #define EROFS_I_VERSION_BITS 1 #define EROFS_I_DATALAYOUT_BITS 3 @@ -140,8 +137,9 @@ struct erofs_inode_compact { __le32 i_size; __le32 i_reserved; union { - /* file total compressed blocks for data mapping 1 */ + /* total compressed blocks for compressed inodes */ __le32 compressed_blocks; + /* block address for uncompressed flat inodes */ __le32 raw_blkaddr; /* for device files, used to indicate old/new device # */ @@ -156,9 +154,9 @@ struct erofs_inode_compact { __le32 i_reserved2; }; -/* 32 bytes on-disk inode */ +/* 32-byte on-disk inode */ #define EROFS_INODE_LAYOUT_COMPACT 0 -/* 64 bytes on-disk inode */ +/* 64-byte on-disk inode */ #define EROFS_INODE_LAYOUT_EXTENDED 1 /* 64-byte complete form of an ondisk inode */ @@ -171,8 +169,9 @@ struct erofs_inode_extended { __le16 i_reserved; __le64 i_size; union { - /* file total compressed blocks for data mapping 1 */ + /* total compressed blocks for compressed inodes */ __le32 compressed_blocks; + /* block address for uncompressed flat inodes */ __le32 raw_blkaddr; /* for device files, used to indicate old/new device # */ @@ -365,17 +364,16 @@ enum { struct z_erofs_vle_decompressed_index { __le16 di_advise; - /* where to decompress in the head cluster */ + /* where to decompress in the head lcluster */ __le16 di_clusterofs; union { - /* for the head cluster */ + /* for the HEAD lclusters */ __le32 blkaddr; /* - * for the rest clusters - * eg. for 4k page-sized cluster, maximum 4K*64k = 256M) - * [0] - pointing to the head cluster - * [1] - pointing to the tail cluster + * for the NONHEAD lclusters + * [0] - distance to its HEAD lcluster + * [1] - distance to the next HEAD lcluster */ __le16 delta[2]; } di_u; diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c new file mode 100644 index 000000000000..a5cc4ed2cd0d --- /dev/null +++ b/fs/erofs/fscache.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2022, Alibaba Cloud + */ +#include <linux/fscache.h> +#include "internal.h" + +static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, + loff_t start, size_t len) +{ + struct netfs_io_request *rreq; + + rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); + if (!rreq) + return ERR_PTR(-ENOMEM); + + rreq->start = start; + rreq->len = len; + rreq->mapping = mapping; + INIT_LIST_HEAD(&rreq->subrequests); + refcount_set(&rreq->ref, 1); + return rreq; +} + +static void erofs_fscache_put_request(struct netfs_io_request *rreq) +{ + if (!refcount_dec_and_test(&rreq->ref)) + return; + if (rreq->cache_resources.ops) + rreq->cache_resources.ops->end_operation(&rreq->cache_resources); + kfree(rreq); +} + +static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq) +{ + if (!refcount_dec_and_test(&subreq->ref)) + return; + erofs_fscache_put_request(subreq->rreq); + kfree(subreq); +} + +static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq) +{ + struct netfs_io_subrequest *subreq; + + while (!list_empty(&rreq->subrequests)) { + subreq = list_first_entry(&rreq->subrequests, + struct netfs_io_subrequest, rreq_link); + list_del(&subreq->rreq_link); + erofs_fscache_put_subrequest(subreq); + } +} + +static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + unsigned int iopos = 0; + pgoff_t start_page = rreq->start / PAGE_SIZE; + pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + bool subreq_failed = false; + + XA_STATE(xas, &rreq->mapping->i_pages, start_page); + + subreq = list_first_entry(&rreq->subrequests, + struct netfs_io_subrequest, rreq_link); + subreq_failed = (subreq->error < 0); + + rcu_read_lock(); + xas_for_each(&xas, folio, last_page) { + unsigned int pgpos = + (folio_index(folio) - start_page) * PAGE_SIZE; + unsigned int pgend = pgpos + folio_size(folio); + bool pg_failed = false; + + for (;;) { + if (!subreq) { + pg_failed = true; + break; + } + + pg_failed |= subreq_failed; + if (pgend < iopos + subreq->len) + break; + + iopos += subreq->len; + if (!list_is_last(&subreq->rreq_link, + &rreq->subrequests)) { + subreq = list_next_entry(subreq, rreq_link); + subreq_failed = (subreq->error < 0); + } else { + subreq = NULL; + subreq_failed = false; + } + if (pgend == iopos) + break; + } + + if (!pg_failed) + folio_mark_uptodate(folio); + + folio_unlock(folio); + } + rcu_read_unlock(); +} + +static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq) +{ + erofs_fscache_rreq_unlock_folios(rreq); + erofs_fscache_clear_subrequests(rreq); + erofs_fscache_put_request(rreq); +} + +static void erofc_fscache_subreq_complete(void *priv, + ssize_t transferred_or_error, bool was_async) +{ + struct netfs_io_subrequest *subreq = priv; + struct netfs_io_request *rreq = subreq->rreq; + + if (IS_ERR_VALUE(transferred_or_error)) + subreq->error = transferred_or_error; + + if (atomic_dec_and_test(&rreq->nr_outstanding)) + erofs_fscache_rreq_complete(rreq); + + erofs_fscache_put_subrequest(subreq); +} + +/* + * Read data from fscache and fill the read data into page cache described by + * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes + * the start physical address in the cache file. + */ +static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, + struct netfs_io_request *rreq, loff_t pstart) +{ + enum netfs_io_source source; + struct super_block *sb = rreq->mapping->host->i_sb; + struct netfs_io_subrequest *subreq; + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct iov_iter iter; + loff_t start = rreq->start; + size_t len = rreq->len; + size_t done = 0; + int ret; + + atomic_set(&rreq->nr_outstanding, 1); + + ret = fscache_begin_read_operation(cres, cookie); + if (ret) + goto out; + + while (done < len) { + subreq = kzalloc(sizeof(struct netfs_io_subrequest), + GFP_KERNEL); + if (subreq) { + INIT_LIST_HEAD(&subreq->rreq_link); + refcount_set(&subreq->ref, 2); + subreq->rreq = rreq; + refcount_inc(&rreq->ref); + } else { + ret = -ENOMEM; + goto out; + } + + subreq->start = pstart + done; + subreq->len = len - done; + subreq->flags = 1 << NETFS_SREQ_ONDEMAND; + + list_add_tail(&subreq->rreq_link, &rreq->subrequests); + + source = cres->ops->prepare_read(subreq, LLONG_MAX); + if (WARN_ON(subreq->len == 0)) + source = NETFS_INVALID_READ; + if (source != NETFS_READ_FROM_CACHE) { + erofs_err(sb, "failed to fscache prepare_read (source %d)", + source); + ret = -EIO; + subreq->error = ret; + erofs_fscache_put_subrequest(subreq); + goto out; + } + + atomic_inc(&rreq->nr_outstanding); + + iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, + start + done, subreq->len); + + ret = fscache_read(cres, subreq->start, &iter, + NETFS_READ_HOLE_FAIL, + erofc_fscache_subreq_complete, subreq); + if (ret == -EIOCBQUEUED) + ret = 0; + if (ret) { + erofs_err(sb, "failed to fscache_read (ret %d)", ret); + goto out; + } + + done += subreq->len; + } +out: + if (atomic_dec_and_test(&rreq->nr_outstanding)) + erofs_fscache_rreq_complete(rreq); + + return ret; +} + +static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) +{ + int ret; + struct super_block *sb = folio_mapping(folio)->host->i_sb; + struct netfs_io_request *rreq; + struct erofs_map_dev mdev = { + .m_deviceid = 0, + .m_pa = folio_pos(folio), + }; + + ret = erofs_map_dev(sb, &mdev); + if (ret) + goto out; + + rreq = erofs_fscache_alloc_request(folio_mapping(folio), + folio_pos(folio), folio_size(folio)); + if (IS_ERR(rreq)) + goto out; + + return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + rreq, mdev.m_pa); +out: + folio_unlock(folio); + return ret; +} + +static int erofs_fscache_read_folio_inline(struct folio *folio, + struct erofs_map_blocks *map) +{ + struct super_block *sb = folio_mapping(folio)->host->i_sb; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + erofs_blk_t blknr; + size_t offset, len; + void *src, *dst; + + /* For tail packing layout, the offset may be non-zero. */ + offset = erofs_blkoff(map->m_pa); + blknr = erofs_blknr(map->m_pa); + len = map->m_llen; + + src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = kmap_local_folio(folio, 0); + memcpy(dst, src + offset, len); + memset(dst + len, 0, PAGE_SIZE - len); + kunmap_local(dst); + + erofs_put_metabuf(&buf); + return 0; +} + +static int erofs_fscache_read_folio(struct file *file, struct folio *folio) +{ + struct inode *inode = folio_mapping(folio)->host; + struct super_block *sb = inode->i_sb; + struct erofs_map_blocks map; + struct erofs_map_dev mdev; + struct netfs_io_request *rreq; + erofs_off_t pos; + loff_t pstart; + int ret; + + DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); + + pos = folio_pos(folio); + map.m_la = pos; + + ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); + if (ret) + goto out_unlock; + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + folio_zero_range(folio, 0, folio_size(folio)); + goto out_uptodate; + } + + if (map.m_flags & EROFS_MAP_META) { + ret = erofs_fscache_read_folio_inline(folio, &map); + goto out_uptodate; + } + + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + + ret = erofs_map_dev(sb, &mdev); + if (ret) + goto out_unlock; + + + rreq = erofs_fscache_alloc_request(folio_mapping(folio), + folio_pos(folio), folio_size(folio)); + if (IS_ERR(rreq)) + goto out_unlock; + + pstart = mdev.m_pa + (pos - map.m_la); + return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + rreq, pstart); + +out_uptodate: + if (!ret) + folio_mark_uptodate(folio); +out_unlock: + folio_unlock(folio); + return ret; +} + +static void erofs_fscache_advance_folios(struct readahead_control *rac, + size_t len, bool unlock) +{ + while (len) { + struct folio *folio = readahead_folio(rac); + len -= folio_size(folio); + if (unlock) { + folio_mark_uptodate(folio); + folio_unlock(folio); + } + } +} + +static void erofs_fscache_readahead(struct readahead_control *rac) +{ + struct inode *inode = rac->mapping->host; + struct super_block *sb = inode->i_sb; + size_t len, count, done = 0; + erofs_off_t pos; + loff_t start, offset; + int ret; + + if (!readahead_count(rac)) + return; + + start = readahead_pos(rac); + len = readahead_length(rac); + + do { + struct erofs_map_blocks map; + struct erofs_map_dev mdev; + struct netfs_io_request *rreq; + + pos = start + done; + map.m_la = pos; + + ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); + if (ret) + return; + + offset = start + done; + count = min_t(size_t, map.m_llen - (pos - map.m_la), + len - done); + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + struct iov_iter iter; + + iov_iter_xarray(&iter, READ, &rac->mapping->i_pages, + offset, count); + iov_iter_zero(count, &iter); + + erofs_fscache_advance_folios(rac, count, true); + ret = count; + continue; + } + + if (map.m_flags & EROFS_MAP_META) { + struct folio *folio = readahead_folio(rac); + + ret = erofs_fscache_read_folio_inline(folio, &map); + if (!ret) { + folio_mark_uptodate(folio); + ret = folio_size(folio); + } + + folio_unlock(folio); + continue; + } + + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + ret = erofs_map_dev(sb, &mdev); + if (ret) + return; + + rreq = erofs_fscache_alloc_request(rac->mapping, offset, count); + if (IS_ERR(rreq)) + return; + /* + * Drop the ref of folios here. Unlock them in + * rreq_unlock_folios() when rreq complete. + */ + erofs_fscache_advance_folios(rac, count, false); + ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + rreq, mdev.m_pa + (pos - map.m_la)); + if (!ret) + ret = count; + } while (ret > 0 && ((done += ret) < len)); +} + +static const struct address_space_operations erofs_fscache_meta_aops = { + .read_folio = erofs_fscache_meta_read_folio, +}; + +const struct address_space_operations erofs_fscache_access_aops = { + .read_folio = erofs_fscache_read_folio, + .readahead = erofs_fscache_readahead, +}; + +int erofs_fscache_register_cookie(struct super_block *sb, + struct erofs_fscache **fscache, + char *name, bool need_inode) +{ + struct fscache_volume *volume = EROFS_SB(sb)->volume; + struct erofs_fscache *ctx; + struct fscache_cookie *cookie; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, + name, strlen(name), NULL, 0, 0); + if (!cookie) { + erofs_err(sb, "failed to get cookie for %s", name); + ret = -EINVAL; + goto err; + } + + fscache_use_cookie(cookie, false); + ctx->cookie = cookie; + + if (need_inode) { + struct inode *const inode = new_inode(sb); + + if (!inode) { + erofs_err(sb, "failed to get anon inode for %s", name); + ret = -ENOMEM; + goto err_cookie; + } + + set_nlink(inode, 1); + inode->i_size = OFFSET_MAX; + inode->i_mapping->a_ops = &erofs_fscache_meta_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + ctx->inode = inode; + } + + *fscache = ctx; + return 0; + +err_cookie: + fscache_unuse_cookie(ctx->cookie, NULL, NULL); + fscache_relinquish_cookie(ctx->cookie, false); + ctx->cookie = NULL; +err: + kfree(ctx); + return ret; +} + +void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) +{ + struct erofs_fscache *ctx = *fscache; + + if (!ctx) + return; + + fscache_unuse_cookie(ctx->cookie, NULL, NULL); + fscache_relinquish_cookie(ctx->cookie, false); + ctx->cookie = NULL; + + iput(ctx->inode); + ctx->inode = NULL; + + kfree(ctx); + *fscache = NULL; +} + +int erofs_fscache_register_fs(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct fscache_volume *volume; + char *name; + int ret = 0; + + name = kasprintf(GFP_KERNEL, "erofs,%s", sbi->opt.fsid); + if (!name) + return -ENOMEM; + + volume = fscache_acquire_volume(name, NULL, NULL, 0); + if (IS_ERR_OR_NULL(volume)) { + erofs_err(sb, "failed to register volume for %s", name); + ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; + volume = NULL; + } + + sbi->volume = volume; + kfree(name); + return ret; +} + +void erofs_fscache_unregister_fs(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + fscache_relinquish_volume(sbi->volume, NULL, false); + sbi->volume = NULL; +} diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index e8b37ba5e9ad..bcc8335b46b3 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -8,11 +8,6 @@ #include <trace/events/erofs.h> -/* - * if inode is successfully read, return its inode page (or sometimes - * the inode payload page if it's an extended inode) in order to fill - * inline data if possible. - */ static void *erofs_read_inode(struct erofs_buf *buf, struct inode *inode, unsigned int *ofs) { @@ -297,6 +292,10 @@ static int erofs_fill_inode(struct inode *inode, int isdir) goto out_unlock; } inode->i_mapping->a_ops = &erofs_raw_access_aops; +#ifdef CONFIG_EROFS_FS_ONDEMAND + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; +#endif out_unlock: erofs_put_metabuf(&buf); @@ -370,7 +369,7 @@ int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_IMMUTABLE); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(mnt_userns, inode, stat); return 0; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 5298c4ee277d..cfee49d33b95 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -49,6 +49,7 @@ typedef u32 erofs_blk_t; struct erofs_device_info { char *path; + struct erofs_fscache *fscache; struct block_device *bdev; struct dax_device *dax_dev; u64 dax_part_off; @@ -74,6 +75,7 @@ struct erofs_mount_opts { unsigned int max_sync_decompress_pages; #endif unsigned int mount_opt; + char *fsid; }; struct erofs_dev_context { @@ -96,6 +98,11 @@ struct erofs_sb_lz4_info { u16 max_pclusterblks; }; +struct erofs_fscache { + struct fscache_cookie *cookie; + struct inode *inode; +}; + struct erofs_sb_info { struct erofs_mount_opts opt; /* options */ #ifdef CONFIG_EROFS_FS_ZIP @@ -146,6 +153,10 @@ struct erofs_sb_info { /* sysfs support */ struct kobject s_kobj; /* /sys/fs/erofs/<devname> */ struct completion s_kobj_unregister; + + /* fscache support */ + struct fscache_volume *volume; + struct erofs_fscache *s_fscache; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) @@ -161,6 +172,11 @@ struct erofs_sb_info { #define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option) #define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option) +static inline bool erofs_is_fscache_mode(struct super_block *sb) +{ + return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !sb->s_bdev; +} + enum { EROFS_ZIP_CACHE_DISABLED, EROFS_ZIP_CACHE_READAHEAD, @@ -381,31 +397,6 @@ extern const struct super_operations erofs_sops; extern const struct address_space_operations erofs_raw_access_aops; extern const struct address_space_operations z_erofs_aops; -/* - * Logical to physical block mapping - * - * Different with other file systems, it is used for 2 access modes: - * - * 1) RAW access mode: - * - * Users pass a valid (m_lblk, m_lofs -- usually 0) pair, - * and get the valid m_pblk, m_pofs and the longest m_len(in bytes). - * - * Note that m_lblk in the RAW access mode refers to the number of - * the compressed ondisk block rather than the uncompressed - * in-memory block for the compressed file. - * - * m_pofs equals to m_lofs except for the inline data page. - * - * 2) Normal access mode: - * - * If the inode is not compressed, it has no difference with - * the RAW access mode. However, if the inode is compressed, - * users should pass a valid (m_lblk, m_lofs) pair, and get - * the needed m_pblk, m_pofs, m_len to get the compressed data - * and the updated m_lblk, m_lofs which indicates the start - * of the corresponding uncompressed data in the file. - */ enum { BH_Encoded = BH_PrivateStart, BH_FullMapped, @@ -467,6 +458,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode, #endif /* !CONFIG_EROFS_FS_ZIP */ struct erofs_map_dev { + struct erofs_fscache *m_fscache; struct block_device *m_bdev; struct dax_device *m_daxdev; u64 m_dax_part_off; @@ -486,6 +478,8 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +int erofs_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, int flags); /* inode.c */ static inline unsigned long erofs_inode_hash(erofs_nid_t nid) @@ -509,7 +503,7 @@ int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path, /* namei.c */ extern const struct inode_operations erofs_dir_iops; -int erofs_namei(struct inode *dir, struct qstr *name, +int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid, unsigned int *d_type); /* dir.c */ @@ -611,6 +605,36 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb, } #endif /* !CONFIG_EROFS_FS_ZIP */ +/* fscache.c */ +#ifdef CONFIG_EROFS_FS_ONDEMAND +int erofs_fscache_register_fs(struct super_block *sb); +void erofs_fscache_unregister_fs(struct super_block *sb); + +int erofs_fscache_register_cookie(struct super_block *sb, + struct erofs_fscache **fscache, + char *name, bool need_inode); +void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache); + +extern const struct address_space_operations erofs_fscache_access_aops; +#else +static inline int erofs_fscache_register_fs(struct super_block *sb) +{ + return 0; +} +static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} + +static inline int erofs_fscache_register_cookie(struct super_block *sb, + struct erofs_fscache **fscache, + char *name, bool need_inode) +{ + return -EOPNOTSUPP; +} + +static inline void erofs_fscache_unregister_cookie(struct erofs_fscache **fscache) +{ +} +#endif + #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #endif /* __EROFS_INTERNAL_H */ diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 554efa363317..fd75506799c4 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -165,9 +165,8 @@ out: /* free if the candidate is valid */ return candidate; } -int erofs_namei(struct inode *dir, - struct qstr *name, - erofs_nid_t *nid, unsigned int *d_type) +int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid, + unsigned int *d_type) { int ndirents; struct erofs_buf buf = __EROFS_BUF_INITIALIZER; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 0c4b41130c2f..95addc5c9d34 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -13,6 +13,7 @@ #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/dax.h> +#include <linux/exportfs.h> #include "xattr.h" #define CREATE_TRACE_POINTS @@ -219,7 +220,52 @@ static int erofs_load_compr_cfgs(struct super_block *sb, } #endif -static int erofs_init_devices(struct super_block *sb, +static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, + struct erofs_device_info *dif, erofs_off_t *pos) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + struct erofs_deviceslot *dis; + struct block_device *bdev; + void *ptr; + int ret; + + ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + dis = ptr + erofs_blkoff(*pos); + + if (!dif->path) { + if (!dis->tag[0]) { + erofs_err(sb, "empty device tag @ pos %llu", *pos); + return -EINVAL; + } + dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); + if (!dif->path) + return -ENOMEM; + } + + if (erofs_is_fscache_mode(sb)) { + ret = erofs_fscache_register_cookie(sb, &dif->fscache, + dif->path, false); + if (ret) + return ret; + } else { + bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, + sb->s_type); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + dif->bdev = bdev; + dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off); + } + + dif->blocks = le32_to_cpu(dis->blocks); + dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); + sbi->total_blocks += dif->blocks; + *pos += EROFS_DEVT_SLOT_SIZE; + return 0; +} + +static int erofs_scan_devices(struct super_block *sb, struct erofs_super_block *dsb) { struct erofs_sb_info *sbi = EROFS_SB(sb); @@ -227,8 +273,6 @@ static int erofs_init_devices(struct super_block *sb, erofs_off_t pos; struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_device_info *dif; - struct erofs_deviceslot *dis; - void *ptr; int id, err = 0; sbi->total_blocks = sbi->primarydevice_blocks; @@ -237,7 +281,8 @@ static int erofs_init_devices(struct super_block *sb, else ondisk_extradevs = le16_to_cpu(dsb->extra_devices); - if (ondisk_extradevs != sbi->devs->extra_devices) { + if (sbi->devs->extra_devices && + ondisk_extradevs != sbi->devs->extra_devices) { erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", ondisk_extradevs, sbi->devs->extra_devices); return -EINVAL; @@ -248,30 +293,31 @@ static int erofs_init_devices(struct super_block *sb, sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; down_read(&sbi->devs->rwsem); - idr_for_each_entry(&sbi->devs->tree, dif, id) { - struct block_device *bdev; - - ptr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), - EROFS_KMAP); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - break; + if (sbi->devs->extra_devices) { + idr_for_each_entry(&sbi->devs->tree, dif, id) { + err = erofs_init_device(&buf, sb, dif, &pos); + if (err) + break; } - dis = ptr + erofs_blkoff(pos); - - bdev = blkdev_get_by_path(dif->path, - FMODE_READ | FMODE_EXCL, - sb->s_type); - if (IS_ERR(bdev)) { - err = PTR_ERR(bdev); - break; + } else { + for (id = 0; id < ondisk_extradevs; id++) { + dif = kzalloc(sizeof(*dif), GFP_KERNEL); + if (!dif) { + err = -ENOMEM; + break; + } + + err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); + if (err < 0) { + kfree(dif); + break; + } + ++sbi->devs->extra_devices; + + err = erofs_init_device(&buf, sb, dif, &pos); + if (err) + break; } - dif->bdev = bdev; - dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off); - dif->blocks = le32_to_cpu(dis->blocks); - dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); - sbi->total_blocks += dif->blocks; - pos += EROFS_DEVT_SLOT_SIZE; } up_read(&sbi->devs->rwsem); erofs_put_metabuf(&buf); @@ -358,10 +404,12 @@ static int erofs_read_superblock(struct super_block *sb) goto out; /* handle multiple devices */ - ret = erofs_init_devices(sb, dsb); + ret = erofs_scan_devices(sb, dsb); if (erofs_sb_has_ztailpacking(sbi)) erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); + if (erofs_is_fscache_mode(sb)) + erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); out: erofs_put_metabuf(&buf); return ret; @@ -390,6 +438,7 @@ enum { Opt_dax, Opt_dax_enum, Opt_device, + Opt_fsid, Opt_err }; @@ -414,6 +463,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { fsparam_flag("dax", Opt_dax), fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), fsparam_string("device", Opt_device), + fsparam_string("fsid", Opt_fsid), {} }; @@ -509,6 +559,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, } ++ctx->devs->extra_devices; break; + case Opt_fsid: +#ifdef CONFIG_EROFS_FS_ONDEMAND + kfree(ctx->opt.fsid); + ctx->opt.fsid = kstrdup(param->string, GFP_KERNEL); + if (!ctx->opt.fsid) + return -ENOMEM; +#else + errorfc(fc, "fsid option not supported"); +#endif + break; default: return -ENOPARAM; } @@ -518,16 +578,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, #ifdef CONFIG_EROFS_FS_ZIP static const struct address_space_operations managed_cache_aops; -static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask) +static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp) { - int ret = 1; /* 0 - busy */ - struct address_space *const mapping = page->mapping; + bool ret = true; + struct address_space *const mapping = folio->mapping; - DBG_BUGON(!PageLocked(page)); + DBG_BUGON(!folio_test_locked(folio)); DBG_BUGON(mapping->a_ops != &managed_cache_aops); - if (PagePrivate(page)) - ret = erofs_try_to_free_cached_page(page); + if (folio_test_private(folio)) + ret = erofs_try_to_free_cached_page(&folio->page); return ret; } @@ -548,12 +608,12 @@ static void erofs_managed_cache_invalidate_folio(struct folio *folio, DBG_BUGON(stop > folio_size(folio) || stop < length); if (offset == 0 && stop == folio_size(folio)) - while (!erofs_managed_cache_releasepage(&folio->page, GFP_NOFS)) + while (!erofs_managed_cache_release_folio(folio, GFP_NOFS)) cond_resched(); } static const struct address_space_operations managed_cache_aops = { - .releasepage = erofs_managed_cache_releasepage, + .release_folio = erofs_managed_cache_release_folio, .invalidate_folio = erofs_managed_cache_invalidate_folio, }; @@ -577,6 +637,44 @@ static int erofs_init_managed_cache(struct super_block *sb) static int erofs_init_managed_cache(struct super_block *sb) { return 0; } #endif +static struct inode *erofs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + return erofs_iget(sb, ino, false); +} + +static struct dentry *erofs_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + erofs_nfs_get_inode); +} + +static struct dentry *erofs_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + erofs_nfs_get_inode); +} + +static struct dentry *erofs_get_parent(struct dentry *child) +{ + erofs_nid_t nid; + unsigned int d_type; + int err; + + err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); + if (err) + return ERR_PTR(err); + return d_obtain_alias(erofs_iget(child->d_sb, nid, d_type == FT_DIR)); +} + +static const struct export_operations erofs_export_ops = { + .fh_to_dentry = erofs_fh_to_dentry, + .fh_to_parent = erofs_fh_to_parent, + .get_parent = erofs_get_parent, +}; + static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; @@ -585,11 +683,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) int err; sb->s_magic = EROFS_SUPER_MAGIC; - - if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { - erofs_err(sb, "failed to set erofs blksize"); - return -EINVAL; - } + sb->s_flags |= SB_RDONLY | SB_NOATIME; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &erofs_sops; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) @@ -597,10 +693,36 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_fs_info = sbi; sbi->opt = ctx->opt; - sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off); + ctx->opt.fsid = NULL; sbi->devs = ctx->devs; ctx->devs = NULL; + if (erofs_is_fscache_mode(sb)) { + sb->s_blocksize = EROFS_BLKSIZ; + sb->s_blocksize_bits = LOG_BLOCK_SIZE; + + err = erofs_fscache_register_fs(sb); + if (err) + return err; + + err = erofs_fscache_register_cookie(sb, &sbi->s_fscache, + sbi->opt.fsid, true); + if (err) + return err; + + err = super_setup_bdi(sb); + if (err) + return err; + } else { + if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { + erofs_err(sb, "failed to set erofs blksize"); + return -EINVAL; + } + + sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, + &sbi->dax_part_off); + } + err = erofs_read_superblock(sb); if (err) return err; @@ -613,12 +735,10 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) clear_opt(&sbi->opt, DAX_ALWAYS); } } - sb->s_flags |= SB_RDONLY | SB_NOATIME; - sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_time_gran = 1; - sb->s_op = &erofs_sops; + sb->s_time_gran = 1; sb->s_xattr = erofs_xattr_handlers; + sb->s_export_op = &erofs_export_ops; if (test_opt(&sbi->opt, POSIX_ACL)) sb->s_flags |= SB_POSIXACL; @@ -661,6 +781,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) static int erofs_fc_get_tree(struct fs_context *fc) { + struct erofs_fs_context *ctx = fc->fs_private; + + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->opt.fsid) + return get_tree_nodev(fc, erofs_fc_fill_super); + return get_tree_bdev(fc, erofs_fc_fill_super); } @@ -690,6 +815,7 @@ static int erofs_release_device_info(int id, void *ptr, void *data) fs_put_dax(dif->dax_dev); if (dif->bdev) blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); + erofs_fscache_unregister_cookie(&dif->fscache); kfree(dif->path); kfree(dif); return 0; @@ -709,6 +835,7 @@ static void erofs_fc_free(struct fs_context *fc) struct erofs_fs_context *ctx = fc->fs_private; erofs_free_dev_context(ctx->devs); + kfree(ctx->opt.fsid); kfree(ctx); } @@ -749,7 +876,10 @@ static void erofs_kill_sb(struct super_block *sb) WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); - kill_block_super(sb); + if (erofs_is_fscache_mode(sb)) + generic_shutdown_super(sb); + else + kill_block_super(sb); sbi = EROFS_SB(sb); if (!sbi) @@ -757,6 +887,9 @@ static void erofs_kill_sb(struct super_block *sb) erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev); + erofs_fscache_unregister_cookie(&sbi->s_fscache); + erofs_fscache_unregister_fs(sb); + kfree(sbi->opt.fsid); kfree(sbi); sb->s_fs_info = NULL; } @@ -774,6 +907,7 @@ static void erofs_put_super(struct super_block *sb) iput(sbi->managed_cache); sbi->managed_cache = NULL; #endif + erofs_fscache_unregister_cookie(&sbi->s_fscache); } static struct file_system_type erofs_fs_type = { @@ -781,7 +915,7 @@ static struct file_system_type erofs_fs_type = { .name = "erofs", .init_fs_context = erofs_init_fs_context, .kill_sb = erofs_kill_sb, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("erofs"); @@ -857,7 +991,10 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + u64 id = 0; + + if (!erofs_is_fscache_mode(sb)) + id = huge_encode_dev(sb->s_bdev->bd_dev); buf->f_type = sb->s_magic; buf->f_bsize = EROFS_BLKSIZ; @@ -902,6 +1039,10 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",dax=always"); if (test_opt(opt, DAX_NEVER)) seq_puts(seq, ",dax=never"); +#ifdef CONFIG_EROFS_FS_ONDEMAND + if (opt->fsid) + seq_printf(seq, ",fsid=%s", opt->fsid); +#endif return 0; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index f3babf1e6608..c1383e508bbe 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -205,8 +205,8 @@ int erofs_register_sysfs(struct super_block *sb) sbi->s_kobj.kset = &erofs_root; init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, - "%s", sb->s_id); + err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, "%s", + erofs_is_fscache_mode(sb) ? sbi->opt.fsid : sb->s_id); if (err) goto put_sb_kobj; return 0; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index e6dea6dfca16..95efc127b2ba 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -791,7 +791,7 @@ err_out: static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi, unsigned int readahead_pages) { - /* auto: enable for readpage, disable for readahead */ + /* auto: enable for read_folio, disable for readahead */ if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) && !readahead_pages) return true; @@ -1488,8 +1488,9 @@ skip: } } -static int z_erofs_readpage(struct file *file, struct page *page) +static int z_erofs_read_folio(struct file *file, struct folio *folio) { + struct page *page = &folio->page; struct inode *const inode = page->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); @@ -1563,6 +1564,6 @@ static void z_erofs_readahead(struct readahead_control *rac) } const struct address_space_operations z_erofs_aops = { - .readpage = z_erofs_readpage, + .read_folio = z_erofs_read_folio, .readahead = z_erofs_readahead, }; |