diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:17:11 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:17:11 +0200 |
commit | 69a3a0a45a2f72412c2ba31761cc9193bb746fef (patch) | |
tree | da838e90e4c29bd8e5f458437918e5055a13a804 /fs/erofs/data.c | |
parent | Merge tag 'for-6.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kda... (diff) | |
parent | erofs: reject inodes with negative i_size (diff) | |
download | linux-69a3a0a45a2f72412c2ba31761cc9193bb746fef.tar.xz linux-69a3a0a45a2f72412c2ba31761cc9193bb746fef.zip |
Merge tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"In this cycle, we add file-backed mount support, which has has been a
strong requirement for years. It is especially useful when there are
thousands of images running on the same host for containers and other
sandbox use cases, unlike OS image use cases.
Without file-backed mounts, it's hard for container runtimes to manage
and isolate so many unnecessary virtual block devices safely and
efficiently, therefore file-backed mounts are highly preferred. For
EROFS users, ComposeFS [1], containerd, and Android APEXes [2] will
directly benefit from it, and I've seen no risk in implementing it as
a completely immutable filesystem.
The previous experimental feature "EROFS over fscache" is now marked
as deprecated because:
- Fscache is no longer an independent subsystem and has been merged
into netfs, which was somewhat unexpected when it was proposed.
- New HSM "fanotify pre-content hooks" [3] will be landed upstream.
These hooks will replace "EROFS over fscache" in a simpler way, as
EROFS won't be bother with kernel caching anymore. Userspace
programs can also manage their own caching hierarchy more flexibly.
Once the HSM "fanotify pre-content hooks" is landed, I will remove the
fscache backend entirely as an internal dependency cleanup. More
backgrounds are listed in the original patchset [4].
In addition to that, there are bugfixes and cleanups as usual.
Summary:
- Support file-backed mounts for containers and sandboxes
- Mark the experimental fscache backend as deprecated
- Handle overlapped pclusters caused by crafted images properly
- Fix a failure path which could cause infinite loops in
z_erofs_init_decompressor()
- Get rid of unnecessary NOFAILs
- Harmless on-disk hardening & minor cleanups"
* tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: reject inodes with negative i_size
erofs: restrict pcluster size limitations
erofs: allocate more short-lived pages from reserved pool first
erofs: sunset unneeded NOFAILs
erofs: simplify erofs_map_blocks_flatmode()
erofs: refactor read_inode calling convention
erofs: use kmemdup_nul in erofs_fill_symlink
erofs: mark experimental fscache backend deprecated
erofs: support compressed inodes for fileio
erofs: support unencoded inodes for fileio
erofs: add file-backed mount support
erofs: handle overlapped pclusters out of crafted images properly
erofs: fix error handling in z_erofs_init_decompressor
erofs: clean up erofs_register_sysfs()
erofs: fix incorrect symlink detection in fast symlink
Diffstat (limited to 'fs/erofs/data.c')
-rw-r--r-- | fs/erofs/data.c | 109 |
1 files changed, 74 insertions, 35 deletions
diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 1b7eba38ba1e..61debd799cf9 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -59,8 +59,12 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) { - if (erofs_is_fscache_mode(sb)) - buf->mapping = EROFS_SB(sb)->s_fscache->inode->i_mapping; + struct erofs_sb_info *sbi = EROFS_SB(sb); + + if (erofs_is_fileio_mode(sbi)) + buf->mapping = file_inode(sbi->fdev)->i_mapping; + else if (erofs_is_fscache_mode(sb)) + buf->mapping = sbi->s_fscache->inode->i_mapping; else buf->mapping = sb->s_bdev->bd_mapping; } @@ -75,38 +79,28 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, static int erofs_map_blocks_flatmode(struct inode *inode, struct erofs_map_blocks *map) { - erofs_blk_t nblocks, lastblk; - u64 offset = map->m_la; struct erofs_inode *vi = EROFS_I(inode); struct super_block *sb = inode->i_sb; bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); + erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking; - nblocks = erofs_iblks(inode); - lastblk = nblocks - tailendpacking; - - /* there is no hole in flatmode */ - map->m_flags = EROFS_MAP_MAPPED; - if (offset < erofs_pos(sb, lastblk)) { + map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */ + if (map->m_la < erofs_pos(sb, lastblk)) { map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la; - map->m_plen = erofs_pos(sb, lastblk) - offset; - } else if (tailendpacking) { + map->m_plen = erofs_pos(sb, lastblk) - map->m_la; + } else { + DBG_BUGON(!tailendpacking); map->m_pa = erofs_iloc(inode) + vi->inode_isize + - vi->xattr_isize + erofs_blkoff(sb, offset); - map->m_plen = inode->i_size - offset; + vi->xattr_isize + erofs_blkoff(sb, map->m_la); + map->m_plen = inode->i_size - map->m_la; /* inline data should be located in the same meta block */ if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { - erofs_err(sb, "inline data cross block boundary @ nid %llu", - vi->nid); + erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); DBG_BUGON(1); return -EFSCORRUPTED; } map->m_flags |= EROFS_MAP_META; - } else { - erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx", - vi->nid, inode->i_size, map->m_la); - DBG_BUGON(1); - return -EIO; } return 0; } @@ -128,7 +122,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) if (map->m_la >= inode->i_size) { /* leave out-of-bound access unmapped */ map->m_flags = 0; - map->m_plen = 0; + map->m_plen = map->m_llen; goto out; } @@ -189,16 +183,34 @@ out: return err; } +static void erofs_fill_from_devinfo(struct erofs_map_dev *map, + struct erofs_device_info *dif) +{ + map->m_bdev = NULL; + map->m_fp = NULL; + if (dif->file) { + if (S_ISBLK(file_inode(dif->file)->i_mode)) + map->m_bdev = file_bdev(dif->file); + else + map->m_fp = dif->file; + } + map->m_daxdev = dif->dax_dev; + map->m_dax_part_off = dif->dax_part_off; + map->m_fscache = dif->fscache; +} + int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) { struct erofs_dev_context *devs = EROFS_SB(sb)->devs; struct erofs_device_info *dif; + erofs_off_t startoff, length; int id; map->m_bdev = sb->s_bdev; map->m_daxdev = EROFS_SB(sb)->dax_dev; map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; map->m_fscache = EROFS_SB(sb)->s_fscache; + map->m_fp = EROFS_SB(sb)->fdev; if (map->m_deviceid) { down_read(&devs->rwsem); @@ -212,29 +224,20 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) up_read(&devs->rwsem); return 0; } - map->m_bdev = dif->bdev_file ? file_bdev(dif->bdev_file) : NULL; - map->m_daxdev = dif->dax_dev; - map->m_dax_part_off = dif->dax_part_off; - map->m_fscache = dif->fscache; + erofs_fill_from_devinfo(map, dif); up_read(&devs->rwsem); } else if (devs->extra_devices && !devs->flatdev) { down_read(&devs->rwsem); idr_for_each_entry(&devs->tree, dif, id) { - erofs_off_t startoff, length; - if (!dif->mapped_blkaddr) continue; + startoff = erofs_pos(sb, dif->mapped_blkaddr); length = erofs_pos(sb, dif->blocks); - if (map->m_pa >= startoff && map->m_pa < startoff + length) { map->m_pa -= startoff; - map->m_bdev = dif->bdev_file ? - file_bdev(dif->bdev_file) : NULL; - map->m_daxdev = dif->dax_dev; - map->m_dax_part_off = dif->dax_part_off; - map->m_fscache = dif->fscache; + erofs_fill_from_devinfo(map, dif); break; } } @@ -243,6 +246,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) return 0; } +/* + * bit 30: I/O error occurred on this folio + * bit 0 - 29: remaining parts to complete this folio + */ +#define EROFS_ONLINEFOLIO_EIO (1 << 30) + +void erofs_onlinefolio_init(struct folio *folio) +{ + union { + atomic_t o; + void *v; + } u = { .o = ATOMIC_INIT(1) }; + + folio->private = u.v; /* valid only if file-backed folio is locked */ +} + +void erofs_onlinefolio_split(struct folio *folio) +{ + atomic_inc((atomic_t *)&folio->private); +} + +void erofs_onlinefolio_end(struct folio *folio, int err) +{ + int orig, v; + + do { + orig = atomic_read((atomic_t *)&folio->private); + v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0); + } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); + + if (v & ~EROFS_ONLINEFOLIO_EIO) + return; + folio->private = 0; + folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO)); +} + static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { @@ -392,7 +431,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } /* for uncompressed (aligned) files and raw access for other files */ -const struct address_space_operations erofs_raw_access_aops = { +const struct address_space_operations erofs_aops = { .read_folio = erofs_read_folio, .readahead = erofs_readahead, .bmap = erofs_bmap, |