From ed6e0401e68bdfe08de9b44968fb235ff10ccee1 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Wed, 9 Feb 2022 14:00:52 +0800 Subject: erofs: use meta buffers for erofs_read_superblock() The only change is that, meta buffers read cache page without __GFP_FS flag, which shall not matter. Link: https://lore.kernel.org/r/20220209060108.43051-7-jefflexu@linux.alibaba.com Signed-off-by: Jeffle Xu Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/super.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 915eefe0d7e2..12755217631f 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -281,21 +281,19 @@ static int erofs_init_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { struct erofs_sb_info *sbi; - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; unsigned int blkszbits; void *data; int ret; - page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); - if (IS_ERR(page)) { + data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); + if (IS_ERR(data)) { erofs_err(sb, "cannot read erofs superblock"); - return PTR_ERR(page); + return PTR_ERR(data); } sbi = EROFS_SB(sb); - - data = kmap(page); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); ret = -EINVAL; @@ -365,8 +363,7 @@ static int erofs_read_superblock(struct super_block *sb) if (erofs_sb_has_ztailpacking(sbi)) erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); out: - kunmap(page); - put_page(page); + erofs_put_metabuf(&buf); return ret; } -- cgit v1.2.3 From 5c6dcc57e2e50553405f2cf8b949f99b8820a685 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 2 Mar 2022 03:49:50 +0800 Subject: erofs: get rid of `struct z_erofs_collector' Avoid `struct z_erofs_collector' since there is another context structure called "struct z_erofs_decompress_frontend". No logic changes. Link: https://lore.kernel.org/r/20220301194951.106227-1-hsiangkao@linux.alibaba.com Reviewed-by: Yue Hu Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 163 ++++++++++++++++++++++++++----------------------------- 1 file changed, 77 insertions(+), 86 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 423bc1a61da5..2673fc105861 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -192,7 +192,10 @@ enum z_erofs_collectmode { COLLECT_PRIMARY_FOLLOWED, }; -struct z_erofs_collector { +struct z_erofs_decompress_frontend { + struct inode *const inode; + struct erofs_map_blocks map; + struct z_erofs_pagevec_ctor vector; struct z_erofs_pcluster *pcl, *tailpcl; @@ -202,13 +205,6 @@ struct z_erofs_collector { z_erofs_next_pcluster_t owned_head; enum z_erofs_collectmode mode; -}; - -struct z_erofs_decompress_frontend { - struct inode *const inode; - - struct z_erofs_collector clt; - struct erofs_map_blocks map; bool readahead; /* used for applying cache strategy on the fly */ @@ -216,30 +212,26 @@ struct z_erofs_decompress_frontend { erofs_off_t headoffset; }; -#define COLLECTOR_INIT() { \ - .owned_head = Z_EROFS_PCLUSTER_TAIL, \ - .mode = COLLECT_PRIMARY_FOLLOWED } - #define DECOMPRESS_FRONTEND_INIT(__i) { \ - .inode = __i, .clt = COLLECTOR_INIT(), \ - .backmost = true, } + .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ + .mode = COLLECT_PRIMARY_FOLLOWED } static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES]; static DEFINE_MUTEX(z_pagemap_global_lock); -static void preload_compressed_pages(struct z_erofs_collector *clt, +static void preload_compressed_pages(struct z_erofs_decompress_frontend *fe, struct address_space *mc, enum z_erofs_cache_alloctype type, struct page **pagepool) { - struct z_erofs_pcluster *pcl = clt->pcl; + struct z_erofs_pcluster *pcl = fe->pcl; bool standalone = true; gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; struct page **pages; pgoff_t index; - if (clt->mode < COLLECT_PRIMARY_FOLLOWED) + if (fe->mode < COLLECT_PRIMARY_FOLLOWED) return; pages = pcl->compressed_pages; @@ -288,7 +280,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt, * managed cache since it can be moved to the bypass queue instead. */ if (standalone) - clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } /* called by erofs_shrinker to get rid of all compressed_pages */ @@ -350,47 +342,47 @@ int erofs_try_to_free_cached_page(struct page *page) } /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ -static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, +static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, struct page *page) { - struct z_erofs_pcluster *const pcl = clt->pcl; + struct z_erofs_pcluster *const pcl = fe->pcl; - while (clt->icpage_ptr > pcl->compressed_pages) - if (!cmpxchg(--clt->icpage_ptr, NULL, page)) + while (fe->icpage_ptr > pcl->compressed_pages) + if (!cmpxchg(--fe->icpage_ptr, NULL, page)) return true; return false; } /* callers must be with collection lock held */ -static int z_erofs_attach_page(struct z_erofs_collector *clt, +static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, struct page *page, enum z_erofs_page_type type, bool pvec_safereuse) { int ret; /* give priority for inplaceio */ - if (clt->mode >= COLLECT_PRIMARY && + if (fe->mode >= COLLECT_PRIMARY && type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && - z_erofs_try_inplace_io(clt, page)) + z_erofs_try_inplace_io(fe, page)) return 0; - ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, + ret = z_erofs_pagevec_enqueue(&fe->vector, page, type, pvec_safereuse); - clt->cl->vcnt += (unsigned int)ret; + fe->cl->vcnt += (unsigned int)ret; return ret ? 0 : -EAGAIN; } -static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) +static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) { - struct z_erofs_pcluster *pcl = clt->pcl; - z_erofs_next_pcluster_t *owned_head = &clt->owned_head; + struct z_erofs_pcluster *pcl = f->pcl; + z_erofs_next_pcluster_t *owned_head = &f->owned_head; /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, *owned_head) == Z_EROFS_PCLUSTER_NIL) { *owned_head = &pcl->next; /* so we can attach this pcluster to our submission chain. */ - clt->mode = COLLECT_PRIMARY_FOLLOWED; + f->mode = COLLECT_PRIMARY_FOLLOWED; return; } @@ -401,24 +393,24 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, *owned_head) == Z_EROFS_PCLUSTER_TAIL) { *owned_head = Z_EROFS_PCLUSTER_TAIL; - clt->mode = COLLECT_PRIMARY_HOOKED; - clt->tailpcl = NULL; + f->mode = COLLECT_PRIMARY_HOOKED; + f->tailpcl = NULL; return; } /* type 3, it belongs to a chain, but it isn't the end of the chain */ - clt->mode = COLLECT_PRIMARY; + f->mode = COLLECT_PRIMARY; } -static int z_erofs_lookup_collection(struct z_erofs_collector *clt, +static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { - struct z_erofs_pcluster *pcl = clt->pcl; + struct z_erofs_pcluster *pcl = fe->pcl; struct z_erofs_collection *cl; unsigned int length; /* to avoid unexpected loop formed by corrupted images */ - if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) { + if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) { DBG_BUGON(1); return -EFSCORRUPTED; } @@ -449,15 +441,15 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt, } mutex_lock(&cl->lock); /* used to check tail merging loop due to corrupted images */ - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) - clt->tailpcl = pcl; + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) + fe->tailpcl = pcl; - z_erofs_try_to_claim_pcluster(clt); - clt->cl = cl; + z_erofs_try_to_claim_pcluster(fe); + fe->cl = cl; return 0; } -static int z_erofs_register_collection(struct z_erofs_collector *clt, +static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { @@ -485,8 +477,8 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, Z_EROFS_PCLUSTER_FULL_LENGTH : 0); /* new pclusters should be claimed as type 1, primary and followed */ - pcl->next = clt->owned_head; - clt->mode = COLLECT_PRIMARY_FOLLOWED; + pcl->next = fe->owned_head; + fe->mode = COLLECT_PRIMARY_FOLLOWED; cl = z_erofs_primarycollection(pcl); cl->pageofs = map->m_la & ~PAGE_MASK; @@ -512,18 +504,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, } if (grp != &pcl->obj) { - clt->pcl = container_of(grp, + fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); err = -EEXIST; goto err_out; } } /* used to check tail merging loop due to corrupted images */ - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) - clt->tailpcl = pcl; - clt->owned_head = &pcl->next; - clt->pcl = pcl; - clt->cl = cl; + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) + fe->tailpcl = pcl; + fe->owned_head = &pcl->next; + fe->pcl = pcl; + fe->cl = cl; return 0; err_out: @@ -532,18 +524,18 @@ err_out: return err; } -static int z_erofs_collector_begin(struct z_erofs_collector *clt, +static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { struct erofs_workgroup *grp; int ret; - DBG_BUGON(clt->cl); + DBG_BUGON(fe->cl); /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */ - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL); - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); if (map->m_flags & EROFS_MAP_META) { if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { @@ -555,28 +547,28 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt, grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); if (grp) { - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj); + fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); } else { tailpacking: - ret = z_erofs_register_collection(clt, inode, map); + ret = z_erofs_register_collection(fe, inode, map); if (!ret) goto out; if (ret != -EEXIST) return ret; } - ret = z_erofs_lookup_collection(clt, inode, map); + ret = z_erofs_lookup_collection(fe, inode, map); if (ret) { - erofs_workgroup_put(&clt->pcl->obj); + erofs_workgroup_put(&fe->pcl->obj); return ret; } out: - z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, - clt->cl->pagevec, clt->cl->vcnt); + z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS, + fe->cl->pagevec, fe->cl->vcnt); /* since file-backed online pages are traversed in reverse order */ - clt->icpage_ptr = clt->pcl->compressed_pages + - z_erofs_pclusterpages(clt->pcl); + fe->icpage_ptr = fe->pcl->compressed_pages + + z_erofs_pclusterpages(fe->pcl); return 0; } @@ -610,24 +602,24 @@ static void z_erofs_collection_put(struct z_erofs_collection *cl) erofs_workgroup_put(&pcl->obj); } -static bool z_erofs_collector_end(struct z_erofs_collector *clt) +static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) { - struct z_erofs_collection *cl = clt->cl; + struct z_erofs_collection *cl = fe->cl; if (!cl) return false; - z_erofs_pagevec_ctor_exit(&clt->vector, false); + z_erofs_pagevec_ctor_exit(&fe->vector, false); mutex_unlock(&cl->lock); /* * if all pending pages are added, don't hold its reference * any longer if the pcluster isn't hosted by ourselves. */ - if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) + if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) z_erofs_collection_put(cl); - clt->cl = NULL; + fe->cl = NULL; return true; } @@ -651,7 +643,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct inode *const inode = fe->inode; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct erofs_map_blocks *const map = &fe->map; - struct z_erofs_collector *const clt = &fe->clt; const loff_t offset = page_offset(page); bool tight = true; @@ -672,7 +663,7 @@ repeat: if (offset + cur >= map->m_la && offset + cur < map->m_la + map->m_llen) { /* didn't get a valid collection previously (very rare) */ - if (!clt->cl) + if (!fe->cl) goto restart_now; goto hitted; } @@ -680,7 +671,7 @@ repeat: /* go ahead the next map_blocks */ erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur); - if (z_erofs_collector_end(clt)) + if (z_erofs_collector_end(fe)) fe->backmost = false; map->m_la = offset + cur; @@ -693,11 +684,11 @@ restart_now: if (!(map->m_flags & EROFS_MAP_MAPPED)) goto hitted; - err = z_erofs_collector_begin(clt, inode, map); + err = z_erofs_collector_begin(fe, inode, map); if (err) goto err_out; - if (z_erofs_is_inline_pcluster(clt->pcl)) { + if (z_erofs_is_inline_pcluster(fe->pcl)) { void *mp; mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb, @@ -709,8 +700,8 @@ restart_now: goto err_out; } get_page(fe->map.buf.page); - WRITE_ONCE(clt->pcl->compressed_pages[0], fe->map.buf.page); - clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; + WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page); + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } else { /* preload all compressed pages (can change mode if needed) */ if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, @@ -719,7 +710,7 @@ restart_now: else cache_strategy = DONTALLOC; - preload_compressed_pages(clt, MNGD_MAPPING(sbi), + preload_compressed_pages(fe, MNGD_MAPPING(sbi), cache_strategy, pagepool); } @@ -730,8 +721,8 @@ hitted: * those chains are handled asynchronously thus the page cannot be used * for inplace I/O or pagevec (should be processed in strict order.) */ - tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && - clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED && + fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); cur = end - min_t(unsigned int, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { @@ -746,18 +737,18 @@ hitted: Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); if (cur) - tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); + tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED); retry: - err = z_erofs_attach_page(clt, page, page_type, - clt->mode >= COLLECT_PRIMARY_FOLLOWED); + err = z_erofs_attach_page(fe, page, page_type, + fe->mode >= COLLECT_PRIMARY_FOLLOWED); /* should allocate an additional short-lived page for pagevec */ if (err == -EAGAIN) { struct page *const newpage = alloc_page(GFP_NOFS | __GFP_NOFAIL); set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE); - err = z_erofs_attach_page(clt, newpage, + err = z_erofs_attach_page(fe, newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); if (!err) goto retry; @@ -773,7 +764,7 @@ retry: /* bump up the number of spiltted parts of a page */ ++spiltted; /* also update nr_pages */ - clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1); + fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1); next_part: /* can be used for verification */ map->m_llen = offset + cur - map->m_la; @@ -1309,7 +1300,7 @@ static void z_erofs_submit_queue(struct super_block *sb, z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; void *bi_private; - z_erofs_next_pcluster_t owned_head = f->clt.owned_head; + z_erofs_next_pcluster_t owned_head = f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ pgoff_t last_index; struct block_device *last_bdev; @@ -1417,7 +1408,7 @@ static void z_erofs_runqueue(struct super_block *sb, { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; - if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL) + if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) return; z_erofs_submit_queue(sb, f, pagepool, io, &force_fg); @@ -1517,7 +1508,7 @@ static int z_erofs_readpage(struct file *file, struct page *page) err = z_erofs_do_read_page(&f, page, &pagepool); z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false); - (void)z_erofs_collector_end(&f.clt); + (void)z_erofs_collector_end(&f); /* if some compressed cluster ready, need submit them anyway */ z_erofs_runqueue(inode->i_sb, &f, &pagepool, @@ -1567,7 +1558,7 @@ static void z_erofs_readahead(struct readahead_control *rac) put_page(page); } z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false); - (void)z_erofs_collector_end(&f.clt); + (void)z_erofs_collector_end(&f); z_erofs_runqueue(inode->i_sb, &f, &pagepool, z_erofs_get_sync_decompress_policy(sbi, nr_pages)); -- cgit v1.2.3 From 6f39d1e1ca46782bf11b8de016e904793d46aed0 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 2 Mar 2022 03:49:51 +0800 Subject: erofs: clean up preload_compressed_pages() Rename preload_compressed_pages() as z_erofs_bind_cache() since we're trying to prepare for adapting folios. Also, add a comment for the gfp setting. No logic changes. Link: https://lore.kernel.org/r/20220301194951.106227-2-hsiangkao@linux.alibaba.com Reviewed-by: Yue Hu Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 2673fc105861..59aecf42e45c 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -219,13 +219,17 @@ struct z_erofs_decompress_frontend { static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES]; static DEFINE_MUTEX(z_pagemap_global_lock); -static void preload_compressed_pages(struct z_erofs_decompress_frontend *fe, - struct address_space *mc, - enum z_erofs_cache_alloctype type, - struct page **pagepool) +static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, + enum z_erofs_cache_alloctype type, + struct page **pagepool) { + struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); struct z_erofs_pcluster *pcl = fe->pcl; bool standalone = true; + /* + * optimistic allocation without direct reclaim since inplace I/O + * can be used if low memory otherwise. + */ gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; struct page **pages; @@ -703,17 +707,15 @@ restart_now: WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page); fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } else { - /* preload all compressed pages (can change mode if needed) */ + /* bind cache first when cached decompression is preferred */ if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la)) cache_strategy = TRYALLOC; else cache_strategy = DONTALLOC; - preload_compressed_pages(fe, MNGD_MAPPING(sbi), - cache_strategy, pagepool); + z_erofs_bind_cache(fe, cache_strategy, pagepool); } - hitted: /* * Ensure the current partial page belongs to this submit chain rather -- cgit v1.2.3 From d467e980d0239fd95cc93b0995199973624a4825 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 11 Mar 2022 01:34:48 +0800 Subject: erofs: silence warnings related to impossible m_plen Dan reported two smatch warnings [1], .. warn: should '1 << lclusterbits' be a 64 bit type? .. warn: should 'm->compressedlcs << lclusterbits' be a 64 bit type? In practice, m_plen cannot be more than 1MiB due to on-disk constraint for the compression mode, so we're always safe here. In order to make static analyzers happy and not report again, let's silence them instead. [1] https://lore.kernel.org/r/202203091002.lJVzsX6e-lkp@intel.com Reported-by: kernel test robot Reported-by: Dan Carpenter Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20220310173448.19962-1-hsiangkao@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 361b1d6e4bf9..b4059b9c3bac 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -494,7 +494,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { - map->m_plen = 1 << lclusterbits; + map->m_plen = 1ULL << lclusterbits; return 0; } lcn = m->lcn + 1; @@ -540,7 +540,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, return -EFSCORRUPTED; } out: - map->m_plen = m->compressedlcs << lclusterbits; + map->m_plen = (u64)m->compressedlcs << lclusterbits; return 0; err_bonus_cblkcnt: erofs_err(m->inode->i_sb, -- cgit v1.2.3 From ab474fccd04509db89fde8d3b28c39aa9a47db64 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 11 Mar 2022 02:27:42 +0800 Subject: erofs: clean up z_erofs_extent_lookback Avoid the unnecessary tail recursion since it can be converted into a loop directly in order to prevent potential stack overflow. It's a pretty straightforward conversion. Link: https://lore.kernel.org/r/20220310182743.102365-1-hsiangkao@linux.alibaba.com Reviewed-by: Yue Hu Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 67 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index b4059b9c3bac..572f0b8151ba 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -431,48 +431,47 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned int lookback_distance) { struct erofs_inode *const vi = EROFS_I(m->inode); - struct erofs_map_blocks *const map = m->map; const unsigned int lclusterbits = vi->z_logical_clusterbits; - unsigned long lcn = m->lcn; - int err; - if (lcn < lookback_distance) { - erofs_err(m->inode->i_sb, - "bogus lookback distance @ nid %llu", vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } + while (m->lcn >= lookback_distance) { + unsigned long lcn = m->lcn - lookback_distance; + int err; - /* load extent head logical cluster if needed */ - lcn -= lookback_distance; - err = z_erofs_load_cluster_from_disk(m, lcn, false); - if (err) - return err; + /* load extent head logical cluster if needed */ + err = z_erofs_load_cluster_from_disk(m, lcn, false); + if (err) + return err; - switch (m->type) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - if (!m->delta[0]) { + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (!m->delta[0]) { + erofs_err(m->inode->i_sb, + "invalid lookback distance 0 @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + lookback_distance = m->delta[0]; + continue; + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: + m->headtype = m->type; + m->map->m_la = (lcn << lclusterbits) | m->clusterofs; + return 0; + default: erofs_err(m->inode->i_sb, - "invalid lookback distance 0 @ nid %llu", - vi->nid); + "unknown type %u @ lcn %lu of nid %llu", + m->type, lcn, vi->nid); DBG_BUGON(1); - return -EFSCORRUPTED; + return -EOPNOTSUPP; } - return z_erofs_extent_lookback(m, m->delta[0]); - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: - m->headtype = m->type; - map->m_la = (lcn << lclusterbits) | m->clusterofs; - break; - default: - erofs_err(m->inode->i_sb, - "unknown type %u @ lcn %lu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; } - return 0; + + erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; } static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, -- cgit v1.2.3 From 9f2731d6338a072b60ab1d9340847bde3306614b Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 11 Mar 2022 02:27:43 +0800 Subject: erofs: refine managed inode stuffs Set up the correct gfp mask and use it instead of hard coding. Also add comments about .invalidatepage() to show more details. Link: https://lore.kernel.org/r/20220310182743.102365-2-hsiangkao@linux.alibaba.com Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/super.c | 8 ++++++-- fs/erofs/zdata.c | 7 +++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 12755217631f..e178100c162a 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -532,6 +532,11 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask) return ret; } +/* + * It will be called only on inode eviction. In case that there are still some + * decompression requests in progress, wait with rescheduling for a bit here. + * We could introduce an extra locking instead but it seems unnecessary. + */ static void erofs_managed_cache_invalidatepage(struct page *page, unsigned int offset, unsigned int length) @@ -565,8 +570,7 @@ static int erofs_init_managed_cache(struct super_block *sb) inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &managed_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, - GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->managed_cache = inode; return 0; } diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 59aecf42e45c..f5e17c03b2fb 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1091,10 +1091,10 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, unsigned int nr, struct page **pagepool, - struct address_space *mc, - gfp_t gfp) + struct address_space *mc) { const pgoff_t index = pcl->obj.index; + gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; struct address_space *mapping; @@ -1350,8 +1350,7 @@ static void z_erofs_submit_queue(struct super_block *sb, struct page *page; page = pickup_page_for_submission(pcl, i++, pagepool, - MNGD_MAPPING(sbi), - GFP_NOFS); + MNGD_MAPPING(sbi)); if (!page) continue; -- cgit v1.2.3 From a942da24abc5839c11a8fc2a4b7cb268ea94ba54 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 15 Mar 2022 21:28:14 +0800 Subject: fs: erofs: add sanity check for kobject in erofs_unregister_sysfs Syzkaller hit 'WARNING: kobject bug in erofs_unregister_sysfs'. This bug is triggered by injecting fault in kobject_init_and_add of erofs_unregister_sysfs. Fix this by adding sanity check for kobject in erofs_unregister_sysfs Note that I've tested the patch and the crash does not occur any more. Link: https://lore.kernel.org/r/20220315132814.12332-1-dzm91@hust.edu.cn Signed-off-by: Dongliang Mu Fixes: 168e9a76200c ("erofs: add sysfs interface") Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/sysfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index dac252bc9228..f3babf1e6608 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -221,9 +221,11 @@ void erofs_unregister_sysfs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); + if (sbi->s_kobj.state_in_sysfs) { + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + } } int __init erofs_init_sysfs(void) -- cgit v1.2.3 From fe5de5859d44eafd5828ec4910cd52cdad8ada1f Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 16 Mar 2022 09:22:45 +0800 Subject: erofs: use meta buffers for reading directories Previously, directory inodes are directly handled with page cache interfaces. In order to support sub-page directory blocks and folios, let's convert them into the latest metabuf infrastructure as well and this patch addresses the readdir case first. Link: https://lore.kernel.org/r/20220316012246.95131-1-hsiangkao@linux.alibaba.com Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/data.c | 12 +++++++++--- fs/erofs/dir.c | 21 ++++++--------------- fs/erofs/internal.h | 2 ++ 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 226a57c57ee6..780db1e5f4b7 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -28,10 +28,10 @@ void erofs_put_metabuf(struct erofs_buf *buf) buf->page = NULL; } -void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_blk_t blkaddr, enum erofs_kmap_type type) +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type) { - struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; + struct address_space *const mapping = inode->i_mapping; erofs_off_t offset = blknr_to_addr(blkaddr); pgoff_t index = offset >> PAGE_SHIFT; struct page *page = buf->page; @@ -60,6 +60,12 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, return buf->base + (offset & ~PAGE_MASK); } +void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, + erofs_blk_t blkaddr, enum erofs_kmap_type type) +{ + return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); +} + static int erofs_map_blocks_flatmode(struct inode *inode, struct erofs_map_blocks *map, int flags) diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index eee9b0b31b63..18e59821c597 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "internal.h" @@ -67,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, static int erofs_readdir(struct file *f, struct dir_context *ctx) { struct inode *dir = file_inode(f); - struct address_space *mapping = dir->i_mapping; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; const size_t dirsize = i_size_read(dir); unsigned int i = ctx->pos / EROFS_BLKSIZ; unsigned int ofs = ctx->pos % EROFS_BLKSIZ; @@ -75,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) bool initial = true; while (ctx->pos < dirsize) { - struct page *dentry_page; struct erofs_dirent *de; unsigned int nameoff, maxsize; - dentry_page = read_mapping_page(mapping, i, NULL); - if (dentry_page == ERR_PTR(-ENOMEM)) { - err = -ENOMEM; - break; - } else if (IS_ERR(dentry_page)) { + de = erofs_bread(&buf, dir, i, EROFS_KMAP); + if (IS_ERR(de)) { erofs_err(dir->i_sb, "fail to readdir of logical block %u of nid %llu", i, EROFS_I(dir)->nid); - err = -EFSCORRUPTED; + err = PTR_ERR(de); break; } - de = (struct erofs_dirent *)kmap(dentry_page); - nameoff = le16_to_cpu(de->nameoff); - if (nameoff < sizeof(struct erofs_dirent) || nameoff >= PAGE_SIZE) { erofs_err(dir->i_sb, @@ -119,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) err = erofs_fill_dentries(dir, ctx, de, &ofs, nameoff, maxsize); skip_this: - kunmap(dentry_page); - - put_page(dentry_page); - ctx->pos = blknr_to_addr(i) + ofs; if (err) @@ -130,6 +120,7 @@ skip_this: ++i; ofs = 0; } + erofs_put_metabuf(&buf); return err < 0 ? err : 0; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 5aa2cf2c2f80..5298c4ee277d 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -479,6 +479,8 @@ struct erofs_map_dev { extern const struct file_operations erofs_file_fops; void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type); void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, erofs_blk_t blkaddr, enum erofs_kmap_type type); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); -- cgit v1.2.3 From 500edd0956487281a6fa0e1e34e931817e85d8b6 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 16 Mar 2022 09:22:46 +0800 Subject: erofs: use meta buffers for inode lookup This converts the remaining inode lookup part by using metabuf in a straight-forward way. Except that it doesn't use kmap_atomic() anymore since we now have to maintain two metabufs together. After this patch, all uncompressed paths are handled with metabuf instead of page structure. Link: https://lore.kernel.org/r/20220316012246.95131-2-hsiangkao@linux.alibaba.com Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/namei.c | 54 ++++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) (limited to 'fs/erofs') diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 8629e616028c..554efa363317 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "xattr.h" @@ -86,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic(struct inode *dir, - struct erofs_qstr *name, - int *_ndirents) +static void *find_target_block_classic(struct erofs_buf *target, + struct inode *dir, + struct erofs_qstr *name, + int *_ndirents) { unsigned int startprfx, endprfx; int head, back; - struct address_space *const mapping = dir->i_mapping; - struct page *candidate = ERR_PTR(-ENOENT); + void *candidate = ERR_PTR(-ENOENT); startprfx = endprfx = 0; head = 0; @@ -101,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir, while (head <= back) { const int mid = head + (back - head) / 2; - struct page *page = read_mapping_page(mapping, mid, NULL); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct erofs_dirent *de; - if (!IS_ERR(page)) { - struct erofs_dirent *de = kmap_atomic(page); + de = erofs_bread(&buf, dir, mid, EROFS_KMAP); + if (!IS_ERR(de)) { const int nameoff = nameoff_from_disk(de->nameoff, EROFS_BLKSIZ); const int ndirents = nameoff / sizeof(*de); @@ -113,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir, struct erofs_qstr dname; if (!ndirents) { - kunmap_atomic(de); - put_page(page); + erofs_put_metabuf(&buf); erofs_err(dir->i_sb, "corrupted dir block %d @ nid %llu", mid, EROFS_I(dir)->nid); DBG_BUGON(1); - page = ERR_PTR(-EFSCORRUPTED); + de = ERR_PTR(-EFSCORRUPTED); goto out; } @@ -135,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - kunmap_atomic(de); if (!diff) { *_ndirents = 0; @@ -145,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir, startprfx = matched; if (!IS_ERR(candidate)) - put_page(candidate); - candidate = page; + erofs_put_metabuf(target); + *target = buf; + candidate = de; *_ndirents = ndirents; } else { - put_page(page); + erofs_put_metabuf(&buf); back = mid - 1; endprfx = matched; @@ -158,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir, } out: /* free if the candidate is valid */ if (!IS_ERR(candidate)) - put_page(candidate); - return page; + erofs_put_metabuf(target); + return de; } return candidate; } @@ -169,8 +170,7 @@ int erofs_namei(struct inode *dir, erofs_nid_t *nid, unsigned int *d_type) { int ndirents; - struct page *page; - void *data; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_dirent *de; struct erofs_qstr qn; @@ -181,26 +181,20 @@ int erofs_namei(struct inode *dir, qn.end = name->name + name->len; ndirents = 0; - page = find_target_block_classic(dir, &qn, &ndirents); - if (IS_ERR(page)) - return PTR_ERR(page); + de = find_target_block_classic(&buf, dir, &qn, &ndirents); + if (IS_ERR(de)) + return PTR_ERR(de); - data = kmap_atomic(page); /* the target page has been mapped */ if (ndirents) - de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); - else - de = (struct erofs_dirent *)data; + de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); if (!IS_ERR(de)) { *nid = le64_to_cpu(de->nid); *d_type = de->file_type; } - - kunmap_atomic(data); - put_page(page); - + erofs_put_metabuf(&buf); return PTR_ERR_OR_ZERO(de); } -- cgit v1.2.3 From a1108dcd9373a98f7018aa4310076260b8ecfc0b Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 17 Mar 2022 19:49:59 +0800 Subject: erofs: rename ctime to mtime EROFS images should inherit modification time rather than change time, since users and host tooling have no easy way to control change time. To reflect the new timestamp meaning, i_ctime and i_ctime_nsec are renamed to i_mtime and i_mtime_nsec. Link: https://lore.kernel.org/r/20220311041829.3109511-1-dvander@google.com # v1 Signed-off-by: David Anderson [ Gao Xiang: update document as well. ] Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20220317114959.106787-1-hsiangkao@linux.alibaba.com # v2 Signed-off-by: Gao Xiang --- Documentation/filesystems/erofs.rst | 2 +- fs/erofs/erofs_fs.h | 5 +++-- fs/erofs/inode.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/erofs') diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index 7119aa213be7..bef6d3040ce4 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -40,7 +40,7 @@ Here is the main features of EROFS: Inode metadata size 32 bytes 64 bytes Max file size 4 GB 16 EB (also limited by max. vol size) Max uids/gids 65536 4294967296 - File change time no yes (64 + 32-bit timestamp) + Per-inode timestamp no yes (64 + 32-bit timestamp) Max hardlinks 65536 4294967296 Metadata reserved 4 bytes 14 bytes ===================== ============ ===================================== diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 3ea62c6fb00a..1238ca104f09 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -12,6 +12,7 @@ #define EROFS_SUPER_OFFSET 1024 #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 +#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should @@ -186,8 +187,8 @@ struct erofs_inode_extended { __le32 i_uid; __le32 i_gid; - __le64 i_ctime; - __le32 i_ctime_nsec; + __le64 i_mtime; + __le32 i_mtime_nsec; __le32 i_nlink; __u8 i_reserved2[16]; }; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index ff62f84f47d3..e8b37ba5e9ad 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -113,8 +113,8 @@ static void *erofs_read_inode(struct erofs_buf *buf, set_nlink(inode, le32_to_cpu(die->i_nlink)); /* extended inode has its own timestamp */ - inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime); - inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec); + inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime); + inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec); inode->i_size = le64_to_cpu(die->i_size); -- cgit v1.2.3