diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 17:20:50 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 17:20:50 +0200 |
commit | 7856a565416e0cf091f825b0e25c7a1b7abb650e (patch) | |
tree | 0a04a0594167fc997b3b1299610b5ef95ab89f19 /fs/nilfs2 | |
parent | Merge tag 'mm-stable-2024-09-20-02-31' of git://git.kernel.org/pub/scm/linux/... (diff) | |
parent | list: test: increase coverage of list_test_list_replace*() (diff) | |
download | linux-7856a565416e0cf091f825b0e25c7a1b7abb650e.tar.xz linux-7856a565416e0cf091f825b0e25c7a1b7abb650e.zip |
Merge tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:
"Many singleton patches - please see the various changelogs for
details.
Quite a lot of nilfs2 work this time around.
Notable patch series in this pull request are:
- "mul_u64_u64_div_u64: new implementation" by Nicolas Pitre, with
assistance from Uwe Kleine-König. Reimplement mul_u64_u64_div_u64()
to provide (much) more accurate results. The current implementation
was causing Uwe some issues in the PWM drivers.
- "xz: Updates to license, filters, and compression options" from
Lasse Collin. Miscellaneous maintenance and kinor feature work to
the xz decompressor.
- "Fix some GDB command error and add some GDB commands" from
Kuan-Ying Lee. Fixes and enhancements to the gdb scripts.
- "treewide: add missing MODULE_DESCRIPTION() macros" from Jeff
Johnson. Adds lots of MODULE_DESCRIPTIONs, thus fixing lots of
warnings about this.
- "nilfs2: add support for some common ioctls" from Ryusuke Konishi.
Adds various commonly-available ioctls to nilfs2.
- "This series fixes a number of formatting issues in kernel doc
comments" from Ryusuke Konishi does that.
- "nilfs2: prevent unexpected ENOENT propagation" from Ryusuke
Konishi. Fix issues where -ENOENT was being unintentionally and
inappropriately returned to userspace.
- "nilfs2: assorted cleanups" from Huang Xiaojia.
- "nilfs2: fix potential issues with empty b-tree nodes" from Ryusuke
Konishi fixes some issues which can occur on corrupted nilfs2
filesystems.
- "scripts/decode_stacktrace.sh: improve error reporting and
usability" from Luca Ceresoli does those things"
* tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (103 commits)
list: test: increase coverage of list_test_list_replace*()
list: test: fix tests for list_cut_position()
proc: use __auto_type more
treewide: correct the typo 'retun'
ocfs2: cleanup return value and mlog in ocfs2_global_read_info()
nilfs2: remove duplicate 'unlikely()' usage
nilfs2: fix potential oob read in nilfs_btree_check_delete()
nilfs2: determine empty node blocks as corrupted
nilfs2: fix potential null-ptr-deref in nilfs_btree_insert()
user_namespace: use kmemdup_array() instead of kmemdup() for multiple allocation
tools/mm: rm thp_swap_allocator_test when make clean
squashfs: fix percpu address space issues in decompressor_multi_percpu.c
lib: glob.c: added null check for character class
nilfs2: refactor nilfs_segctor_thread()
nilfs2: use kthread_create and kthread_stop for the log writer thread
nilfs2: remove sc_timer_task
nilfs2: do not repair reserved inode bitmap in nilfs_new_inode()
nilfs2: eliminate the shared counter and spinlock for i_generation
nilfs2: separate inode type information from i_state field
nilfs2: use the BITS_PER_LONG macro
...
Diffstat (limited to 'fs/nilfs2')
-rw-r--r-- | fs/nilfs2/alloc.h | 2 | ||||
-rw-r--r-- | fs/nilfs2/bmap.c | 2 | ||||
-rw-r--r-- | fs/nilfs2/bmap.h | 20 | ||||
-rw-r--r-- | fs/nilfs2/btnode.c | 63 | ||||
-rw-r--r-- | fs/nilfs2/btree.c | 12 | ||||
-rw-r--r-- | fs/nilfs2/btree.h | 1 | ||||
-rw-r--r-- | fs/nilfs2/cpfile.c | 54 | ||||
-rw-r--r-- | fs/nilfs2/dat.c | 17 | ||||
-rw-r--r-- | fs/nilfs2/dir.c | 44 | ||||
-rw-r--r-- | fs/nilfs2/inode.c | 79 | ||||
-rw-r--r-- | fs/nilfs2/ioctl.c | 109 | ||||
-rw-r--r-- | fs/nilfs2/mdt.c | 6 | ||||
-rw-r--r-- | fs/nilfs2/nilfs.h | 27 | ||||
-rw-r--r-- | fs/nilfs2/page.c | 21 | ||||
-rw-r--r-- | fs/nilfs2/page.h | 4 | ||||
-rw-r--r-- | fs/nilfs2/recovery.c | 11 | ||||
-rw-r--r-- | fs/nilfs2/segment.c | 234 | ||||
-rw-r--r-- | fs/nilfs2/segment.h | 10 | ||||
-rw-r--r-- | fs/nilfs2/sufile.c | 52 | ||||
-rw-r--r-- | fs/nilfs2/super.c | 9 | ||||
-rw-r--r-- | fs/nilfs2/the_nilfs.c | 5 | ||||
-rw-r--r-- | fs/nilfs2/the_nilfs.h | 6 |
22 files changed, 463 insertions, 325 deletions
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index d825a9faca6d..e19d7eb10084 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -37,7 +37,7 @@ void *nilfs_palloc_block_get_entry(const struct inode *, __u64, int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); /** - * nilfs_palloc_req - persistent allocator request and reply + * struct nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index cd14ea25968c..c9e8d9a7d820 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -349,7 +349,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) } /** - * nilfs_bmap_lookup_dirty_buffers - + * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers * @bmap: bmap * @listp: pointer to buffer head list */ diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 608168a5cb88..4656df392722 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -44,6 +44,19 @@ struct nilfs_bmap_stats { /** * struct nilfs_bmap_operations - bmap operation table + * @bop_lookup: single block search operation + * @bop_lookup_contig: consecutive block search operation + * @bop_insert: block insertion operation + * @bop_delete: block delete operation + * @bop_clear: block mapping resource release operation + * @bop_propagate: operation to propagate dirty state towards the + * mapping root + * @bop_lookup_dirty_buffers: operation to collect dirty block buffers + * @bop_assign: disk block address assignment operation + * @bop_mark: operation to mark in-use blocks as dirty for + * relocation by GC + * @bop_seek_key: find valid block key operation + * @bop_last_key: find last valid block key operation */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); @@ -66,7 +79,7 @@ struct nilfs_bmap_operations { int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *); int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); - /* The following functions are internal use only. */ + /* private: internal use only */ int (*bop_check_insert)(const struct nilfs_bmap *, __u64); int (*bop_check_delete)(struct nilfs_bmap *, __u64); int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); @@ -74,9 +87,8 @@ struct nilfs_bmap_operations { #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) -#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) -#define NILFS_BMAP_NEW_PTR_INIT \ - (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) +#define NILFS_BMAP_KEY_BIT BITS_PER_LONG +#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1)) static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) { diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index c034080c334b..57b4af5ad646 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -179,11 +179,32 @@ void nilfs_btnode_delete(struct buffer_head *bh) } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -245,8 +266,21 @@ retry: } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -285,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 862bdf23120e..ef5061bb56da 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -350,7 +350,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || - nchildren < 0 || + nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", @@ -381,7 +381,8 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || - nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || + (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); @@ -1658,13 +1659,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) int nchildren, ret; root = nilfs_btree_get_root(btree); + nchildren = nilfs_btree_node_get_nchildren(root); + if (unlikely(nchildren == 0)) + return 0; + switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, @@ -1673,12 +1677,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } - nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 92868e1a48ca..2a220f716c91 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -24,6 +24,7 @@ * @bp_index: index of child node * @bp_oldreq: ptr end request for old ptr * @bp_newreq: ptr alloc request for new ptr + * @bp_ctxt: context information for changing the key of a b-tree node block * @bp_op: rebalance operation */ struct nilfs_btree_path { diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 69a5cced1e84..f0ce37552446 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -125,10 +125,17 @@ static void nilfs_cpfile_block_init(struct inode *cpfile, } } -static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, - struct buffer_head **bhp) +static int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(cpfile->i_sb, + "missing header block in checkpoint metadata"); + err = -EIO; + } + return err; } static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, @@ -283,14 +290,9 @@ int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (unlikely(ret < 0)) { - if (ret == -ENOENT) { - nilfs_error(cpfile->i_sb, - "checkpoint creation failed due to metadata corruption."); - ret = -EIO; - } + if (unlikely(ret < 0)) goto out_sem; - } + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh); if (unlikely(ret < 0)) goto out_header; @@ -704,9 +706,15 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, } /** - * nilfs_cpfile_delete_checkpoint - - * @cpfile: - * @cno: + * nilfs_cpfile_delete_checkpoint - delete a checkpoint + * @cpfile: checkpoint file inode + * @cno: checkpoint number to delete + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EBUSY - Checkpoint in use (snapshot specified). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid checkpoint found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { @@ -968,21 +976,15 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) } /** - * nilfs_cpfile_is_snapshot - + * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * - * Description: - * - * Return Value: On success, 1 is returned if the checkpoint specified by - * @cno is a snapshot, or 0 if not. On error, one of the following negative - * error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * @cno: checkpoint number * - * %-ENOENT - No such checkpoint. + * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or + * the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) { diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index fc1caf63a42a..0bef662176a4 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -271,18 +271,15 @@ void nilfs_dat_abort_update(struct inode *dat, } /** - * nilfs_dat_mark_dirty - - * @dat: DAT file inode + * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified + * virtual block address entry as dirty + * @dat: DAT file inode * @vblocknr: virtual block number * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid DAT entry (internal code). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) { diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 4b3e19d74925..fe5b1a30c509 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -231,37 +231,6 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) nilfs_rec_len_from_disk(p->rec_len)); } -static unsigned char -nilfs_filetype_table[NILFS_FT_MAX] = { - [NILFS_FT_UNKNOWN] = DT_UNKNOWN, - [NILFS_FT_REG_FILE] = DT_REG, - [NILFS_FT_DIR] = DT_DIR, - [NILFS_FT_CHRDEV] = DT_CHR, - [NILFS_FT_BLKDEV] = DT_BLK, - [NILFS_FT_FIFO] = DT_FIFO, - [NILFS_FT_SOCK] = DT_SOCK, - [NILFS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char -nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { - [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, -}; - -static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) -{ - umode_t mode = inode->i_mode; - - de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - static int nilfs_readdir(struct file *file, struct dir_context *ctx) { loff_t pos = ctx->pos; @@ -297,10 +266,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) if (de->inode) { unsigned char t; - if (de->file_type < NILFS_FT_MAX) - t = nilfs_filetype_table[de->file_type]; - else - t = DT_UNKNOWN; + t = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), t)) { @@ -444,7 +410,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, err = nilfs_prepare_chunk(folio, from, to); BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } @@ -531,7 +497,7 @@ got_it: de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, folio->mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); nilfs_mark_inode_dirty(dir); @@ -612,14 +578,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); kunmap_local(kaddr); nilfs_commit_chunk(folio, mapping, 0, chunk_size); fail: diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 8661f452dba6..be6acf6e2bfc 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -15,6 +15,7 @@ #include <linux/writeback.h> #include <linux/uio.h> #include <linux/fiemap.h> +#include <linux/random.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -28,17 +29,13 @@ * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag - * @for_btnc: inode for B-tree node cache flag - * @for_shadow: inode for shadowed page cache flag + * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - bool for_gc; - bool for_btnc; - bool for_shadow; + unsigned int type; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -162,7 +159,7 @@ static int nilfs_writepages(struct address_space *mapping, int err = 0; if (sb_rdonly(inode->i_sb)) { - nilfs_clear_dirty_pages(mapping, false); + nilfs_clear_dirty_pages(mapping); return -EROFS; } @@ -186,7 +183,7 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) * have dirty pages that try to be flushed in background. * So, here we simply discard this dirty page. */ - nilfs_clear_folio_dirty(folio, false); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); return -EROFS; } @@ -315,8 +312,7 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -325,7 +321,6 @@ static int nilfs_insert_inode_locked(struct inode *inode, struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; @@ -343,25 +338,13 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = BIT(NILFS_I_NEW); + ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - - if (unlikely(ino < NILFS_USER_INO)) { - nilfs_warn(sb, - "inode bitmap is inconsistent for reserved inodes"); - do { - brelse(bh); - err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); - if (unlikely(err)) - goto failed_ifile_create_inode; - } while (ino < NILFS_USER_INO); - - nilfs_info(sb, "repaired inode bitmap for reserved inodes"); - } ii->i_bh = bh; atomic64_inc(&root->inodes_count); @@ -385,9 +368,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&nilfs->ns_next_gen_lock); - inode->i_generation = nilfs->ns_next_generation++; - spin_unlock(&nilfs->ns_next_gen_lock); + inode->i_generation = get_random_u32(); if (nilfs_insert_inode_locked(inode, root, ino) < 0) { err = -EIO; goto failed_after_creation; @@ -546,23 +527,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); - if (test_bit(NILFS_I_BTNC, &ii->i_state)) { - if (!args->for_btnc) - return 0; - } else if (args->for_btnc) { + if (ii->i_type != args->type) return 0; - } - if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { - if (!args->for_shadow) - return 0; - } else if (args->for_shadow) { - return 0; - } - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; - - return args->for_gc && args->cno == ii->i_cno; + return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) @@ -572,15 +540,9 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) inode->i_ino = args->ino; NILFS_I(inode)->i_cno = args->cno; NILFS_I(inode)->i_root = args->root; + NILFS_I(inode)->i_type = args->type; if (args->root && args->ino == NILFS_ROOT_INO) nilfs_get_root(args->root); - - if (args->for_gc) - NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - if (args->for_btnc) - NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); - if (args->for_shadow) - NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -588,8 +550,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -599,8 +560,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -631,8 +591,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; @@ -677,9 +636,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.ino = inode->i_ino; args.root = ii->i_root; args.cno = ii->i_cno; - args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; - args.for_btnc = true; - args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + args.type = ii->i_type | NILFS_I_TYPE_BTNC; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -733,8 +690,8 @@ void nilfs_detach_btree_node_cache(struct inode *inode) struct inode *nilfs_iget_for_shadow(struct inode *inode) { struct nilfs_iget_args args = { - .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = true + .ino = inode->i_ino, .root = NULL, .cno = 0, + .type = NILFS_I_TYPE_SHADOW }; struct inode *s_inode; int err; @@ -900,7 +857,7 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + if (!(ii->i_type & NILFS_I_TYPE_BTNC)) nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 8be471ce4f19..fa77f78df681 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -17,6 +17,7 @@ #include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */ #include <linux/buffer_head.h> #include <linux/fileattr.h> +#include <linux/string.h> #include "nilfs.h" #include "segment.h" #include "bmap.h" @@ -114,7 +115,11 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, } /** - * nilfs_fileattr_get - ioctl to support lsattr + * nilfs_fileattr_get - retrieve miscellaneous file attributes + * @dentry: the object to retrieve from + * @fa: fileattr pointer + * + * Return: always 0 as success. */ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) { @@ -126,7 +131,12 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } /** - * nilfs_fileattr_set - ioctl to support chattr + * nilfs_fileattr_set - change miscellaneous file attributes + * @idmap: idmap of the mount + * @dentry: the object to change + * @fa: fileattr pointer + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) @@ -159,6 +169,10 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap, /** * nilfs_ioctl_getversion - get info about a file's version (generation number) + * @inode: inode object + * @argp: userspace memory where the generation number of @inode is stored + * + * Return: 0 on success, or %-EFAULT on error. */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { @@ -1266,6 +1280,91 @@ out: return ret; } +/** + * nilfs_ioctl_get_fslabel - get the volume name of the file system + * @sb: super block instance + * @argp: pointer to userspace memory where the volume name should be stored + * + * Return: 0 on success, %-EFAULT if copying to userspace memory fails. + */ +static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + char label[NILFS_MAX_VOLUME_NAME + 1]; + + BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX); + + down_read(&nilfs->ns_sem); + memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name); + up_read(&nilfs->ns_sem); + + if (copy_to_user(argp, label, sizeof(label))) + return -EFAULT; + return 0; +} + +/** + * nilfs_ioctl_set_fslabel - set the volume name of the file system + * @sb: super block instance + * @filp: file object + * @argp: pointer to userspace memory that contains the volume name + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Label length exceeds record size in superblock. + * * %-EIO - I/O error. + * * %-EPERM - Operation not permitted (insufficient permissions). + * * %-EROFS - Read only file system. + */ +static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp, + void __user *argp) +{ + char label[NILFS_MAX_VOLUME_NAME + 1]; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + size_t len; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) { + ret = -EFAULT; + goto out_drop_write; + } + + len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1); + if (len > NILFS_MAX_VOLUME_NAME) { + nilfs_err(sb, "unable to set label with more than %zu bytes", + NILFS_MAX_VOLUME_NAME); + ret = -EINVAL; + goto out_drop_write; + } + + down_write(&nilfs->ns_sem); + sbp = nilfs_prepare_super(sb, false); + if (unlikely(!sbp)) { + ret = -EIO; + goto out_unlock; + } + + strtomem_pad(sbp[0]->s_volume_name, label, 0); + if (sbp[1]) + strtomem_pad(sbp[1]->s_volume_name, label, 0); + + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + +out_unlock: + up_write(&nilfs->ns_sem); +out_drop_write: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1308,6 +1407,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_set_alloc_range(inode, argp); case FITRIM: return nilfs_ioctl_trim_fs(inode, argp); + case FS_IOC_GETFSLABEL: + return nilfs_ioctl_get_fslabel(inode->i_sb, argp); + case FS_IOC_SETFSLABEL: + return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp); default: return -ENOTTY; } @@ -1334,6 +1437,8 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 4f792a0ad0f0..ceb7dc0b5bad 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -411,7 +411,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) * have dirty folios that try to be flushed in background. * So, here we simply discard this dirty folio. */ - nilfs_clear_folio_dirty(folio, false); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); return -EROFS; } @@ -638,10 +638,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_clear_dirty_pages(inode->i_mapping); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 4017f7856440..fb1c4c5bae7c 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -22,6 +22,7 @@ /** * struct nilfs_inode_info - nilfs inode data in memory * @i_flags: inode flags + * @i_type: inode type (combination of flags that inidicate usage) * @i_state: dynamic state flags * @i_bmap: pointer on i_bmap_data * @i_bmap_data: raw block mapping @@ -37,6 +38,7 @@ */ struct nilfs_inode_info { __u32 i_flags; + unsigned int i_type; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; struct nilfs_bmap i_bmap_data; @@ -90,9 +92,16 @@ enum { NILFS_I_UPDATED, /* The file has been written back */ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_BTNC, /* inode for btree node cache */ - NILFS_I_SHADOW, /* inode for shadowed page cache */ +}; + +/* + * Flags to identify the usage of on-memory inodes (i_type) + */ +enum { + NILFS_I_TYPE_NORMAL = 0, + NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */ + NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */ + NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */ }; /* @@ -103,6 +112,18 @@ enum { NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; +/** + * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a + * file system volume name + * + * Defined by the size of the volume name field in the on-disk superblocks. + * This volume name does not include the terminating NULL byte if the string + * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the + * size of the buffer that requires a NULL byte termination. + */ +#define NILFS_MAX_VOLUME_NAME \ + sizeof_field(struct nilfs_super_block, s_volume_name) + /* * Macros to check inode numbers */ diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 14e470fb8870..9c0b7cddeaae 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -262,7 +262,7 @@ repeat: NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); dfolio = filemap_grab_folio(dmap, folio->index); - if (unlikely(IS_ERR(dfolio))) { + if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ folio_unlock(folio); err = PTR_ERR(dfolio); @@ -357,9 +357,8 @@ repeat: /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) +void nilfs_clear_dirty_pages(struct address_space *mapping) { struct folio_batch fbatch; unsigned int i; @@ -380,7 +379,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) * was acquired. Skip processing in that case. */ if (likely(folio->mapping == mapping)) - nilfs_clear_folio_dirty(folio, silent); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); } @@ -392,20 +391,13 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) /** * nilfs_clear_folio_dirty - discard dirty folio * @folio: dirty folio that will be discarded - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_folio_dirty(struct folio *folio, bool silent) +void nilfs_clear_folio_dirty(struct folio *folio) { - struct inode *inode = folio->mapping->host; - struct super_block *sb = inode->i_sb; struct buffer_head *bh, *head; BUG_ON(!folio_test_locked(folio)); - if (!silent) - nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", - folio_pos(folio), inode->i_ino); - folio_clear_uptodate(folio); folio_clear_mappedtodisk(folio); @@ -419,11 +411,6 @@ void nilfs_clear_folio_dirty(struct folio *folio, bool silent) bh = head; do { lock_buffer(bh); - if (!silent) - nilfs_warn(sb, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); - set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 7e1a2c455a10..64521a03a19e 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -41,8 +41,8 @@ void nilfs_folio_bug(struct folio *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_folio_dirty(struct folio *, bool); -void nilfs_clear_dirty_pages(struct address_space *, bool); +void nilfs_clear_folio_dirty(struct folio *folio); +void nilfs_clear_dirty_pages(struct address_space *mapping); unsigned int nilfs_page_count_clean_buffers(struct page *, unsigned int, unsigned int); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ec61ce9f29a2..21d81097a89f 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -433,8 +433,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * The next segment is invalidated by this recovery. */ err = nilfs_sufile_free(sufile, segnum[1]); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated", + (unsigned long long)nilfs->ns_last_pseg, + (unsigned long long)nilfs->ns_segnum, + (unsigned long long)segnum[1]); + err = -EINVAL; + } goto failed; + } for (i = 1; i < 4; i++) { err = nilfs_segment_list_add(head, segnum[i]); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 871ec35ea8e8..587251830897 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -519,7 +519,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, ii = NILFS_I(inode); - if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + if (ii->i_type & NILFS_I_TYPE_GC) cno = ii->i_cno; else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) cno = 0; @@ -1102,12 +1102,64 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, return err; } +/** + * nilfs_free_segments - free the segments given by an array of segment numbers + * @nilfs: nilfs object + * @segnumv: array of segment numbers to be freed + * @nsegs: number of segments to be freed in @segnumv + * + * nilfs_free_segments() wraps nilfs_sufile_freev() and + * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file + * (sufile) to free all segments given by @segnumv and @nsegs at once. If + * it fails midway, it cancels the changes so that none of the segments are + * freed. If @nsegs is 0, this function does nothing. + * + * The freeing of segments is not finalized until the writing of a log with + * a super root block containing this sufile change is complete, and it can + * be canceled with nilfs_sufile_cancel_freev() until then. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid segment number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, + size_t nsegs) +{ + size_t ndone; + int ret; + + if (!nsegs) + return 0; + + ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); + if (unlikely(ret)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, + NULL); + /* + * If a segment usage of the segments to be freed is in a + * hole block, nilfs_sufile_freev() will return -ENOENT. + * In this case, -EINVAL should be returned to the caller + * since there is something wrong with the given segment + * number array. This error can only occur during GC, so + * there is no need to worry about it propagating to other + * callers (such as fsync). + */ + if (ret == -ENOENT) { + nilfs_err(nilfs->ns_sb, + "The segment usage entry %llu to be freed is invalid (in a hole)", + (unsigned long long)segnumv[ndone]); + ret = -EINVAL; + } + } + return ret; +} + static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; - size_t ndone; int err = 0; switch (nilfs_sc_cstage_get(sci)) { @@ -1201,14 +1253,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) nilfs_sc_cstage_inc(sci); fallthrough; case NILFS_ST_SUFILE: - err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, - sci->sc_nfreesegs, &ndone); - if (unlikely(err)) { - nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, ndone, - NULL); + err = nilfs_free_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (unlikely(err)) break; - } sci->sc_stage.flags |= NILFS_CF_SUFREED; err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, @@ -2456,7 +2504,7 @@ static void nilfs_construction_timeout(struct timer_list *t) { struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); - wake_up_process(sci->sc_timer_task); + wake_up_process(sci->sc_task); } static void @@ -2582,123 +2630,85 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) } /** - * nilfs_segctor_thread - main loop of the segment constructor thread. + * nilfs_log_write_required - determine whether log writing is required + * @sci: nilfs_sc_info struct + * @modep: location for storing log writing mode + * + * Return: true if log writing is required, false otherwise. If log writing + * is required, the mode is stored in the location pointed to by @modep. + */ +static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) +{ + bool timedout, ret = true; + + spin_lock(&sci->sc_state_lock); + timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer.expires)); + if (timedout || sci->sc_seq_request != sci->sc_seq_done) + *modep = SC_LSEG_SR; + else if (sci->sc_flush_request) + *modep = nilfs_segctor_flush_mode(sci); + else + ret = false; + + spin_unlock(&sci->sc_state_lock); + return ret; +} + +/** + * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. * - * nilfs_segctor_thread() initializes a timer and serves as a daemon - * to execute segment constructions. + * nilfs_segctor_thread() is the main loop function of the log writer kernel + * thread, which determines whether log writing is necessary, and if so, + * performs the log write in the background, or waits if not. It is also + * used to decide the background writeback of the superblock. + * + * Return: Always 0. */ static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; - - sci->sc_timer_task = current; - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - /* start sync. */ - sci->sc_task = current; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); set_freezable(); - spin_lock(&sci->sc_state_lock); - loop: - for (;;) { - int mode; - - if (sci->sc_state & NILFS_SEGCTOR_QUIT) - goto end_thread; - - if (timeout || sci->sc_seq_request != sci->sc_seq_done) - mode = SC_LSEG_SR; - else if (sci->sc_flush_request) - mode = nilfs_segctor_flush_mode(sci); - else - break; - - spin_unlock(&sci->sc_state_lock); - nilfs_segctor_thread_construct(sci, mode); - spin_lock(&sci->sc_state_lock); - timeout = 0; - } - - if (freezing(current)) { - spin_unlock(&sci->sc_state_lock); - try_to_freeze(); - spin_lock(&sci->sc_state_lock); - } else { + while (!kthread_should_stop()) { DEFINE_WAIT(wait); - int should_sleep = 1; + bool should_write; + int mode; + + if (freezing(current)) { + try_to_freeze(); + continue; + } prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE); - - if (sci->sc_seq_request != sci->sc_seq_done) - should_sleep = 0; - else if (sci->sc_flush_request) - should_sleep = 0; - else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) - should_sleep = time_before(jiffies, - sci->sc_timer.expires); - - if (should_sleep) { - spin_unlock(&sci->sc_state_lock); + should_write = nilfs_log_write_required(sci, &mode); + if (!should_write) schedule(); - spin_lock(&sci->sc_state_lock); - } finish_wait(&sci->sc_wait_daemon, &wait); - timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer.expires)); if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); + + if (should_write) + nilfs_segctor_thread_construct(sci, mode); } - goto loop; - end_thread: /* end sync. */ + spin_lock(&sci->sc_state_lock); sci->sc_task = NULL; timer_shutdown_sync(&sci->sc_timer); - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ spin_unlock(&sci->sc_state_lock); return 0; } -static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) -{ - struct task_struct *t; - - t = kthread_run(nilfs_segctor_thread, sci, "segctord"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); - - nilfs_err(sci->sc_super, "error %d creating segctord thread", - err); - return err; - } - wait_event(sci->sc_wait_task, sci->sc_task != NULL); - return 0; -} - -static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) - __acquires(&sci->sc_state_lock) - __releases(&sci->sc_state_lock) -{ - sci->sc_state |= NILFS_SEGCTOR_QUIT; - - while (sci->sc_task) { - wake_up(&sci->sc_wait_daemon); - spin_unlock(&sci->sc_state_lock); - wait_event(sci->sc_wait_task, sci->sc_task == NULL); - spin_lock(&sci->sc_state_lock); - } -} - /* * Setup & clean-up functions */ @@ -2719,7 +2729,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); - init_waitqueue_head(&sci->sc_wait_task); spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); @@ -2774,8 +2783,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) up_write(&nilfs->ns_segctor_sem); + if (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + kthread_stop(sci->sc_task); + } + spin_lock(&sci->sc_state_lock); - nilfs_segctor_kill_thread(sci); flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); @@ -2823,14 +2836,15 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * This allocates a log writer object, initializes it, and starts the * log writer. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINTR - Log writer thread creation failed due to interruption. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci; + struct task_struct *t; int err; if (nilfs->ns_writer) { @@ -2843,15 +2857,23 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) return 0; } - nilfs->ns_writer = nilfs_segctor_new(sb, root); - if (!nilfs->ns_writer) + sci = nilfs_segctor_new(sb, root); + if (unlikely(!sci)) return -ENOMEM; - err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (unlikely(err)) + nilfs->ns_writer = sci; + t = kthread_create(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + err = PTR_ERR(t); + nilfs_err(sb, "error %d creating segctord thread", err); nilfs_detach_log_writer(sb); + return err; + } + sci->sc_task = t; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - return err; + wake_up_process(sci->sc_task); + return 0; } /** diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 1060f72ebf5a..f723f47ddc4e 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -22,10 +22,10 @@ struct nilfs_root; * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root - * @ri_ri_cno: Number of the last checkpoint + * @ri_cno: Number of the last checkpoint * @ri_lsegs_start: Region for roll-forwarding (start block number) * @ri_lsegs_end: Region for roll-forwarding (end block number) - * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start * @ri_used_segments: List of segments to be mark active * @ri_pseg_start: Block number of the last partial segment * @ri_seq: Sequence number on the last partial segment @@ -105,9 +105,8 @@ struct nilfs_segsum_pointer { * @sc_flush_request: inode bitmap of metadata files to be flushed * @sc_wait_request: Client request queue * @sc_wait_daemon: Daemon wait queue - * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter - * @sc_seq_accept: Accepted request count + * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -158,7 +157,6 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_request; wait_queue_head_t sc_wait_daemon; - wait_queue_head_t sc_wait_task; __u32 sc_seq_request; __u32 sc_seq_accepted; @@ -171,7 +169,6 @@ struct nilfs_sc_info { unsigned long sc_watermark; struct timer_list sc_timer; - struct task_struct *sc_timer_task; struct task_struct *sc_task; }; @@ -192,7 +189,6 @@ enum { }; /* sc_state */ -#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ /* diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 6748218be7c5..eea5a6a12f7b 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -79,10 +79,17 @@ nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, NILFS_MDT(sufile)->mi_entry_size; } -static inline int nilfs_sufile_get_header_block(struct inode *sufile, - struct buffer_head **bhp) +static int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(sufile->i_sb, + "missing header block in segment usage metadata"); + err = -EIO; + } + return err; } static inline int @@ -506,8 +513,15 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (ret) + if (unlikely(ret)) { + if (ret == -ENOENT) { + nilfs_error(sufile->i_sb, + "segment usage for segment %llu is unreadable due to a hole block", + (unsigned long long)segnum); + ret = -EIO; + } goto out_sem; + } kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); @@ -840,21 +854,17 @@ out: } /** - * nilfs_sufile_get_suinfo - + * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @buf: array of suinfo - * @sisz: byte size of suinfo - * @nsi: size of suinfo array - * - * Description: + * @buf: array of suinfo + * @sisz: byte size of suinfo + * @nsi: size of suinfo array * - * Return Value: On success, 0 is returned and .... On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: Count of segment usage info items stored in the output buffer on + * success, or the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) @@ -1241,9 +1251,15 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, if (err) goto failed; - err = nilfs_sufile_get_header_block(sufile, &header_bh); - if (err) + err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "missing header block in segment usage metadata"); + err = -EINVAL; + } goto failed; + } sui = NILFS_SUI(sufile); kaddr = kmap_local_page(header_bh->b_page); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e835e1f5a712..eca79cca3803 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -105,6 +105,10 @@ static void nilfs_set_error(struct super_block *sb) /** * __nilfs_error() - report failure condition on a filesystem + * @sb: super block instance + * @function: name of calling function + * @fmt: format string for message to be output + * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when @@ -156,6 +160,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; @@ -1063,6 +1068,10 @@ nilfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) goto failed_nilfs; + super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, + sizeof(nilfs->ns_sbp[0]->s_uuid)); + super_set_sysfs_name_bdev(sb); + cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index e44dde57ab65..ac03fd3c330c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -12,7 +12,6 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> -#include <linux/random.h> #include <linux/log2.h> #include <linux/crc32.h> #include "nilfs.h" @@ -69,7 +68,6 @@ struct the_nilfs *alloc_nilfs(struct super_block *sb) INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); - spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); @@ -754,9 +752,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; - get_random_bytes(&nilfs->ns_next_generation, - sizeof(nilfs->ns_next_generation)); - err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 1e829ed7b0ef..4776a70f01ae 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -71,8 +71,6 @@ enum { * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_next_generation: next generation number for inodes - * @ns_next_gen_lock: lock protecting @ns_next_generation * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks @@ -161,10 +159,6 @@ struct the_nilfs { /* GC inode list */ struct list_head ns_gc_inodes; - /* Inode allocator */ - u32 ns_next_generation; - spinlock_t ns_next_gen_lock; - /* Mount options */ unsigned long ns_mount_opt; |