summaryrefslogtreecommitdiffstats
path: root/fs/erofs/data.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 13:17:11 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 13:17:11 +0200
commit69a3a0a45a2f72412c2ba31761cc9193bb746fef (patch)
treeda838e90e4c29bd8e5f458437918e5055a13a804 /fs/erofs/data.c
parentMerge tag 'for-6.12-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kda... (diff)
parenterofs: reject inodes with negative i_size (diff)
downloadlinux-69a3a0a45a2f72412c2ba31761cc9193bb746fef.tar.xz
linux-69a3a0a45a2f72412c2ba31761cc9193bb746fef.zip
Merge tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "In this cycle, we add file-backed mount support, which has has been a strong requirement for years. It is especially useful when there are thousands of images running on the same host for containers and other sandbox use cases, unlike OS image use cases. Without file-backed mounts, it's hard for container runtimes to manage and isolate so many unnecessary virtual block devices safely and efficiently, therefore file-backed mounts are highly preferred. For EROFS users, ComposeFS [1], containerd, and Android APEXes [2] will directly benefit from it, and I've seen no risk in implementing it as a completely immutable filesystem. The previous experimental feature "EROFS over fscache" is now marked as deprecated because: - Fscache is no longer an independent subsystem and has been merged into netfs, which was somewhat unexpected when it was proposed. - New HSM "fanotify pre-content hooks" [3] will be landed upstream. These hooks will replace "EROFS over fscache" in a simpler way, as EROFS won't be bother with kernel caching anymore. Userspace programs can also manage their own caching hierarchy more flexibly. Once the HSM "fanotify pre-content hooks" is landed, I will remove the fscache backend entirely as an internal dependency cleanup. More backgrounds are listed in the original patchset [4]. In addition to that, there are bugfixes and cleanups as usual. Summary: - Support file-backed mounts for containers and sandboxes - Mark the experimental fscache backend as deprecated - Handle overlapped pclusters caused by crafted images properly - Fix a failure path which could cause infinite loops in z_erofs_init_decompressor() - Get rid of unnecessary NOFAILs - Harmless on-disk hardening & minor cleanups" * tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: reject inodes with negative i_size erofs: restrict pcluster size limitations erofs: allocate more short-lived pages from reserved pool first erofs: sunset unneeded NOFAILs erofs: simplify erofs_map_blocks_flatmode() erofs: refactor read_inode calling convention erofs: use kmemdup_nul in erofs_fill_symlink erofs: mark experimental fscache backend deprecated erofs: support compressed inodes for fileio erofs: support unencoded inodes for fileio erofs: add file-backed mount support erofs: handle overlapped pclusters out of crafted images properly erofs: fix error handling in z_erofs_init_decompressor erofs: clean up erofs_register_sysfs() erofs: fix incorrect symlink detection in fast symlink
Diffstat (limited to 'fs/erofs/data.c')
-rw-r--r--fs/erofs/data.c109
1 files changed, 74 insertions, 35 deletions
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 1b7eba38ba1e..61debd799cf9 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -59,8 +59,12 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
{
- if (erofs_is_fscache_mode(sb))
- buf->mapping = EROFS_SB(sb)->s_fscache->inode->i_mapping;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ if (erofs_is_fileio_mode(sbi))
+ buf->mapping = file_inode(sbi->fdev)->i_mapping;
+ else if (erofs_is_fscache_mode(sb))
+ buf->mapping = sbi->s_fscache->inode->i_mapping;
else
buf->mapping = sb->s_bdev->bd_mapping;
}
@@ -75,38 +79,28 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
static int erofs_map_blocks_flatmode(struct inode *inode,
struct erofs_map_blocks *map)
{
- erofs_blk_t nblocks, lastblk;
- u64 offset = map->m_la;
struct erofs_inode *vi = EROFS_I(inode);
struct super_block *sb = inode->i_sb;
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
+ erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking;
- nblocks = erofs_iblks(inode);
- lastblk = nblocks - tailendpacking;
-
- /* there is no hole in flatmode */
- map->m_flags = EROFS_MAP_MAPPED;
- if (offset < erofs_pos(sb, lastblk)) {
+ map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */
+ if (map->m_la < erofs_pos(sb, lastblk)) {
map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
- map->m_plen = erofs_pos(sb, lastblk) - offset;
- } else if (tailendpacking) {
+ map->m_plen = erofs_pos(sb, lastblk) - map->m_la;
+ } else {
+ DBG_BUGON(!tailendpacking);
map->m_pa = erofs_iloc(inode) + vi->inode_isize +
- vi->xattr_isize + erofs_blkoff(sb, offset);
- map->m_plen = inode->i_size - offset;
+ vi->xattr_isize + erofs_blkoff(sb, map->m_la);
+ map->m_plen = inode->i_size - map->m_la;
/* inline data should be located in the same meta block */
if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
- erofs_err(sb, "inline data cross block boundary @ nid %llu",
- vi->nid);
+ erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
map->m_flags |= EROFS_MAP_META;
- } else {
- erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx",
- vi->nid, inode->i_size, map->m_la);
- DBG_BUGON(1);
- return -EIO;
}
return 0;
}
@@ -128,7 +122,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */
map->m_flags = 0;
- map->m_plen = 0;
+ map->m_plen = map->m_llen;
goto out;
}
@@ -189,16 +183,34 @@ out:
return err;
}
+static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
+ struct erofs_device_info *dif)
+{
+ map->m_bdev = NULL;
+ map->m_fp = NULL;
+ if (dif->file) {
+ if (S_ISBLK(file_inode(dif->file)->i_mode))
+ map->m_bdev = file_bdev(dif->file);
+ else
+ map->m_fp = dif->file;
+ }
+ map->m_daxdev = dif->dax_dev;
+ map->m_dax_part_off = dif->dax_part_off;
+ map->m_fscache = dif->fscache;
+}
+
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
{
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
struct erofs_device_info *dif;
+ erofs_off_t startoff, length;
int id;
map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
map->m_fscache = EROFS_SB(sb)->s_fscache;
+ map->m_fp = EROFS_SB(sb)->fdev;
if (map->m_deviceid) {
down_read(&devs->rwsem);
@@ -212,29 +224,20 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
up_read(&devs->rwsem);
return 0;
}
- map->m_bdev = dif->bdev_file ? file_bdev(dif->bdev_file) : NULL;
- map->m_daxdev = dif->dax_dev;
- map->m_dax_part_off = dif->dax_part_off;
- map->m_fscache = dif->fscache;
+ erofs_fill_from_devinfo(map, dif);
up_read(&devs->rwsem);
} else if (devs->extra_devices && !devs->flatdev) {
down_read(&devs->rwsem);
idr_for_each_entry(&devs->tree, dif, id) {
- erofs_off_t startoff, length;
-
if (!dif->mapped_blkaddr)
continue;
+
startoff = erofs_pos(sb, dif->mapped_blkaddr);
length = erofs_pos(sb, dif->blocks);
-
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
map->m_pa -= startoff;
- map->m_bdev = dif->bdev_file ?
- file_bdev(dif->bdev_file) : NULL;
- map->m_daxdev = dif->dax_dev;
- map->m_dax_part_off = dif->dax_part_off;
- map->m_fscache = dif->fscache;
+ erofs_fill_from_devinfo(map, dif);
break;
}
}
@@ -243,6 +246,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
return 0;
}
+/*
+ * bit 30: I/O error occurred on this folio
+ * bit 0 - 29: remaining parts to complete this folio
+ */
+#define EROFS_ONLINEFOLIO_EIO (1 << 30)
+
+void erofs_onlinefolio_init(struct folio *folio)
+{
+ union {
+ atomic_t o;
+ void *v;
+ } u = { .o = ATOMIC_INIT(1) };
+
+ folio->private = u.v; /* valid only if file-backed folio is locked */
+}
+
+void erofs_onlinefolio_split(struct folio *folio)
+{
+ atomic_inc((atomic_t *)&folio->private);
+}
+
+void erofs_onlinefolio_end(struct folio *folio, int err)
+{
+ int orig, v;
+
+ do {
+ orig = atomic_read((atomic_t *)&folio->private);
+ v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
+ } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
+
+ if (v & ~EROFS_ONLINEFOLIO_EIO)
+ return;
+ folio->private = 0;
+ folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
+}
+
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
@@ -392,7 +431,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
/* for uncompressed (aligned) files and raw access for other files */
-const struct address_space_operations erofs_raw_access_aops = {
+const struct address_space_operations erofs_aops = {
.read_folio = erofs_read_folio,
.readahead = erofs_readahead,
.bmap = erofs_bmap,