diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 19:02:55 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 19:02:55 +0200 |
commit | 69e1aaddd63104f37021d0b0f6abfd9623c9134c (patch) | |
tree | 14ad49741b428d270b681694bb2df349465455b9 /fs/ext4 | |
parent | Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sag... (diff) | |
parent | vfs: remove unused superblock helpers (diff) | |
download | linux-69e1aaddd63104f37021d0b0f6abfd9623c9134c.tar.xz linux-69e1aaddd63104f37021d0b0f6abfd9623c9134c.zip |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates for 3.4 from Ted Ts'o:
"Ext4 commits for 3.3 merge window; mostly cleanups and bug fixes
The changes to export dirty_writeback_interval are from Artem's s_dirt
cleanup patch series. The same is true of the change to remove the
s_dirt helper functions which never got used by anyone in-tree. I've
run these changes by Al Viro, and am carrying them so that Artem can
more easily fix up the rest of the file systems during the next merge
window. (Originally we had hopped to remove the use of s_dirt from
ext4 during this merge window, but his patches had some bugs, so I
ultimately ended dropping them from the ext4 tree.)"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (66 commits)
vfs: remove unused superblock helpers
mm: export dirty_writeback_interval
ext4: remove useless s_dirt assignment
ext4: write superblock only once on unmount
ext4: do not mark superblock as dirty unnecessarily
ext4: correct ext4_punch_hole return codes
ext4: remove restrictive checks for EOFBLOCKS_FL
ext4: always set then trimmed blocks count into len
ext4: fix trimmed block count accunting
ext4: fix start and len arguments handling in ext4_trim_fs()
ext4: update s_free_{inodes,blocks}_count during online resize
ext4: change some printk() calls to use ext4_msg() instead
ext4: avoid output message interleaving in ext4_error_<foo>()
ext4: remove trailing newlines from ext4_msg() and ext4_error() messages
ext4: add no_printk argument validation, fix fallout
ext4: remove redundant "EXT4-fs: " from uses of ext4_msg
ext4: give more helpful error message in ext4_ext_rm_leaf()
ext4: remove unused code from ext4_ext_map_blocks()
ext4: rewrite punch hole to use ext4_ext_remove_space()
jbd2: cleanup journal tail after transaction commit
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 63 | ||||
-rw-r--r-- | fs/ext4/dir.c | 13 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 34 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 4 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 128 | ||||
-rw-r--r-- | fs/ext4/extents.c | 330 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 2 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 260 | ||||
-rw-r--r-- | fs/ext4/inode.c | 95 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 342 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 20 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 4 | ||||
-rw-r--r-- | fs/ext4/namei.c | 2 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 18 | ||||
-rw-r--r-- | fs/ext4/resize.c | 37 | ||||
-rw-r--r-- | fs/ext4/super.c | 1075 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 25 |
18 files changed, 1136 insertions, 1318 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f9e2cd8cf711..4bbd07a6fa18 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -336,10 +336,10 @@ err_out: * Return buffer_head on success or NULL in case of failure. */ struct buffer_head * -ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) +ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; - struct buffer_head *bh = NULL; + struct buffer_head *bh; ext4_fsblk_t bitmap_blk; desc = ext4_get_group_desc(sb, block_group, NULL); @@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) bitmap_blk = ext4_block_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, "Cannot read block bitmap - " - "block_group = %u, block_bitmap = %llu", - block_group, bitmap_blk); + ext4_error(sb, "Cannot get buffer for block bitmap - " + "block_group = %u, block_bitmap = %llu", + block_group, bitmap_blk); return NULL; } @@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) return bh; } /* - * submit the buffer_head for read. We can - * safely mark the bitmap as uptodate now. - * We do it here so the bitmap uptodate bit - * get set with buffer lock held. + * submit the buffer_head for reading */ + set_buffer_new(bh); trace_ext4_read_block_bitmap_load(sb, block_group); - set_bitmap_uptodate(bh); - if (bh_submit_read(bh) < 0) { - put_bh(bh); + bh->b_end_io = ext4_end_bitmap_read; + get_bh(bh); + submit_bh(READ, bh); + return bh; +} + +/* Returns 0 on success, 1 on error */ +int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, + struct buffer_head *bh) +{ + struct ext4_group_desc *desc; + + if (!buffer_new(bh)) + return 0; + desc = ext4_get_group_desc(sb, block_group, NULL); + if (!desc) + return 1; + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { ext4_error(sb, "Cannot read block bitmap - " - "block_group = %u, block_bitmap = %llu", - block_group, bitmap_blk); - return NULL; + "block_group = %u, block_bitmap = %llu", + block_group, (unsigned long long) bh->b_blocknr); + return 1; } + clear_buffer_new(bh); + /* Panic or remount fs read-only if block bitmap is invalid */ ext4_valid_block_bitmap(sb, desc, block_group, bh); - /* - * file system mounted not to panic on error, - * continue with corrupt bitmap - */ + return 0; +} + +struct buffer_head * +ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) +{ + struct buffer_head *bh; + + bh = ext4_read_block_bitmap_nowait(sb, block_group); + if (ext4_wait_block_bitmap(sb, block_group, bh)) { + put_bh(bh); + return NULL; + } return bh; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 164c56092e58..ad56866d729a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -91,17 +91,17 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, return 0; if (filp) - ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0, + ext4_error_file(filp, function, line, bh->b_blocknr, "bad entry in directory: %s - offset=%u(%u), " "inode=%u, rec_len=%d, name_len=%d", - error_msg, (unsigned) (offset%bh->b_size), + error_msg, (unsigned) (offset % bh->b_size), offset, le32_to_cpu(de->inode), rlen, de->name_len); else - ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0, + ext4_error_inode(dir, function, line, bh->b_blocknr, "bad entry in directory: %s - offset=%u(%u), " "inode=%u, rec_len=%d, name_len=%d", - error_msg, (unsigned) (offset%bh->b_size), + error_msg, (unsigned) (offset % bh->b_size), offset, le32_to_cpu(de->inode), rlen, de->name_len); @@ -425,8 +425,9 @@ static int call_filldir(struct file *filp, void *dirent, sb = inode->i_sb; if (!fname) { - printk(KERN_ERR "EXT4-fs: call_filldir: called with " - "null fname?!?\n"); + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " + "called with null fname?!?", __func__, __LINE__, + inode->i_ino, current->comm); return 0; } curr_pos = hash2pos(fname->hash, fname->minor_hash); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 513004fc3d84..ded731ac8a32 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -53,7 +53,7 @@ printk(KERN_DEBUG f, ## a); \ } while (0) #else -#define ext4_debug(f, a...) do {} while (0) +#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #define EXT4_ERROR_INODE(inode, fmt, a...) \ @@ -184,6 +184,8 @@ struct mpage_da_data { #define EXT4_IO_END_UNWRITTEN 0x0001 #define EXT4_IO_END_ERROR 0x0002 #define EXT4_IO_END_QUEUED 0x0004 +#define EXT4_IO_END_DIRECT 0x0008 +#define EXT4_IO_END_IN_FSYNC 0x0010 struct ext4_io_page { struct page *p_page; @@ -192,18 +194,25 @@ struct ext4_io_page { #define MAX_IO_PAGES 128 +/* + * For converting uninitialized extents on a work queue. + * + * 'page' is only used from the writepage() path; 'pages' is only used for + * buffered writes; they are used to keep page references until conversion + * takes place. For AIO/DIO, neither field is filled in. + */ typedef struct ext4_io_end { struct list_head list; /* per-file finished IO list */ struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ - struct page *page; /* page struct for buffer write */ + struct page *page; /* for writepage() path */ loff_t offset; /* offset in the file */ ssize_t size; /* size of the extent */ struct work_struct work; /* data work queue */ struct kiocb *iocb; /* iocb struct for AIO */ int result; /* error value for AIO */ - int num_io_pages; - struct ext4_io_page *pages[MAX_IO_PAGES]; + int num_io_pages; /* for writepages() */ + struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */ } ext4_io_end_t; struct ext4_io_submit { @@ -923,6 +932,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ +#define EXT4_MOUNT_ERRORS_MASK 0x00070 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ @@ -941,7 +951,6 @@ struct ext4_inode_info { #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ -#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ @@ -1142,6 +1151,7 @@ struct ext4_sb_info { unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; + unsigned int s_def_mount_opt; ext4_fsblk_t s_sb_block; uid_t s_resuid; gid_t s_resgid; @@ -1420,8 +1430,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ -#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ +#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ +#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */ #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ @@ -1794,8 +1805,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, ext4_group_t block_group, struct buffer_head ** bh); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); -struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, - ext4_group_t block_group); + +extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, + ext4_group_t block_group); +extern int ext4_wait_block_bitmap(struct super_block *sb, + ext4_group_t block_group, + struct buffer_head *bh); +extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, + ext4_group_t block_group); extern void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t group, @@ -1841,6 +1858,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *); extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int barrier); +extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); /* mballoc.c */ extern long ext4_mb_stats; diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index a52db3a69a30..0f58b86e3a02 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -47,9 +47,9 @@ */ #define EXT_DEBUG__ #ifdef EXT_DEBUG -#define ext_debug(a...) printk(a) +#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) #else -#define ext_debug(a...) +#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5802fa1dab18..83b20fcf9400 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -104,6 +104,78 @@ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) +/** + * struct ext4_journal_cb_entry - Base structure for callback information. + * + * This struct is a 'seed' structure for a using with your own callback + * structs. If you are using callbacks you must allocate one of these + * or another struct of your own definition which has this struct + * as it's first element and pass it to ext4_journal_callback_add(). + */ +struct ext4_journal_cb_entry { + /* list information for other callbacks attached to the same handle */ + struct list_head jce_list; + + /* Function to call with this callback structure */ + void (*jce_func)(struct super_block *sb, + struct ext4_journal_cb_entry *jce, int error); + + /* user data goes here */ +}; + +/** + * ext4_journal_callback_add: add a function to call after transaction commit + * @handle: active journal transaction handle to register callback on + * @func: callback function to call after the transaction has committed: + * @sb: superblock of current filesystem for transaction + * @jce: returned journal callback data + * @rc: journal state at commit (0 = transaction committed properly) + * @jce: journal callback data (internal and function private data struct) + * + * The registered function will be called in the context of the journal thread + * after the transaction for which the handle was created has completed. + * + * No locks are held when the callback function is called, so it is safe to + * call blocking functions from within the callback, but the callback should + * not block or run for too long, or the filesystem will be blocked waiting for + * the next transaction to commit. No journaling functions can be used, or + * there is a risk of deadlock. + * + * There is no guaranteed calling order of multiple registered callbacks on + * the same transaction. + */ +static inline void ext4_journal_callback_add(handle_t *handle, + void (*func)(struct super_block *sb, + struct ext4_journal_cb_entry *jce, + int rc), + struct ext4_journal_cb_entry *jce) +{ + struct ext4_sb_info *sbi = + EXT4_SB(handle->h_transaction->t_journal->j_private); + + /* Add the jce to transaction's private list */ + jce->jce_func = func; + spin_lock(&sbi->s_md_lock); + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list); + spin_unlock(&sbi->s_md_lock); +} + +/** + * ext4_journal_callback_del: delete a registered callback + * @handle: active journal transaction handle on which callback was registered + * @jce: registered journal callback entry to unregister + */ +static inline void ext4_journal_callback_del(handle_t *handle, + struct ext4_journal_cb_entry *jce) +{ + struct ext4_sb_info *sbi = + EXT4_SB(handle->h_transaction->t_journal->j_private); + + spin_lock(&sbi->s_md_lock); + list_del_init(&jce->jce_list); + spin_unlock(&sbi->s_md_lock); +} + int ext4_mark_iloc_dirty(handle_t *handle, struct inode *inode, @@ -261,43 +333,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, /* super.c */ int ext4_force_commit(struct super_block *sb); -static inline int ext4_should_journal_data(struct inode *inode) +/* + * Ext4 inode journal modes + */ +#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */ +#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */ +#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */ + +static inline int ext4_inode_journal_mode(struct inode *inode) { if (EXT4_JOURNAL(inode) == NULL) - return 0; - if (!S_ISREG(inode->i_mode)) - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - return 1; - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) - return 1; - return 0; + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ + /* We do not support data journalling with delayed allocation */ + if (!S_ISREG(inode->i_mode) || + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && + !test_opt(inode->i_sb, DELALLOC)) + return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ + else + BUG(); +} + +static inline int ext4_should_journal_data(struct inode *inode) +{ + return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE; } static inline int ext4_should_order_data(struct inode *inode) { - if (EXT4_JOURNAL(inode) == NULL) - return 0; - if (!S_ISREG(inode->i_mode)) - return 0; - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - return 1; - return 0; + return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE; } static inline int ext4_should_writeback_data(struct inode *inode) { - if (EXT4_JOURNAL(inode) == NULL) - return 1; - if (!S_ISREG(inode->i_mode)) - return 0; - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) - return 1; - return 0; + return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; } /* diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74f23c292e1b..1421938e6792 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -44,6 +44,14 @@ #include <trace/events/ext4.h> +/* + * used by extent splitting. + */ +#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ + due to ENOSPC */ +#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ +#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ + static int ext4_split_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, @@ -51,6 +59,13 @@ static int ext4_split_extent(handle_t *handle, int split_flag, int flags); +static int ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, + int flags); + static int ext4_ext_truncate_extend_restart(handle_t *handle, struct inode *inode, int needed) @@ -300,6 +315,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); + if (len == 0) + return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } @@ -2308,7 +2325,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, struct ext4_extent *ex; /* the header must be checked already in ext4_ext_remove_space() */ - ext_debug("truncate since %u in leaf\n", start); + ext_debug("truncate since %u in leaf to %u\n", start, end); if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; @@ -2343,14 +2360,17 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ext_debug(" border %u:%u\n", a, b); /* If this extent is beyond the end of the hole, skip it */ - if (end <= ex_ee_block) { + if (end < ex_ee_block) { ex--; ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_len = ext4_ext_get_actual_len(ex); continue; } else if (b != ex_ee_block + ex_ee_len - 1) { - EXT4_ERROR_INODE(inode," bad truncate %u:%u\n", - start, end); + EXT4_ERROR_INODE(inode, + "can not handle truncate %u:%u " + "on extent %u:%u", + start, end, ex_ee_block, + ex_ee_block + ex_ee_len - 1); err = -EIO; goto out; } else if (a != ex_ee_block) { @@ -2482,7 +2502,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, + ext4_lblk_t end) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -2491,7 +2512,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) handle_t *handle; int i, err; - ext_debug("truncate since %u\n", start); + ext_debug("truncate since %u to %u\n", start, end); /* probably first extent we're gonna free will be last in block */ handle = ext4_journal_start(inode, depth + 1); @@ -2504,6 +2525,61 @@ again: trace_ext4_ext_remove_space(inode, start, depth); /* + * Check if we are removing extents inside the extent tree. If that + * is the case, we are going to punch a hole inside the extent tree + * so we have to check whether we need to split the extent covering + * the last block to remove so we can easily remove the part of it + * in ext4_ext_rm_leaf(). + */ + if (end < EXT_MAX_BLOCKS - 1) { + struct ext4_extent *ex; + ext4_lblk_t ee_block; + + /* find extent for this block */ + path = ext4_ext_find_extent(inode, end, NULL); + if (IS_ERR(path)) { + ext4_journal_stop(handle); + return PTR_ERR(path); + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) + goto cont; + + ee_block = le32_to_cpu(ex->ee_block); + + /* + * See if the last block is inside the extent, if so split + * the extent at 'end' block so we can easily remove the + * tail of the first part of the split extent in + * ext4_ext_rm_leaf(). + */ + if (end >= ee_block && + end < ee_block + ext4_ext_get_actual_len(ex) - 1) { + int split_flag = 0; + + if (ext4_ext_is_uninitialized(ex)) + split_flag = EXT4_EXT_MARK_UNINIT1 | + EXT4_EXT_MARK_UNINIT2; + + /* + * Split the extent in two so that 'end' is the last + * block in the first new extent + */ + err = ext4_split_extent_at(handle, inode, path, + end + 1, split_flag, + EXT4_GET_BLOCKS_PRE_IO | + EXT4_GET_BLOCKS_PUNCH_OUT_EXT); + + if (err < 0) + goto out; + } + ext4_ext_drop_refs(path); + kfree(path); + } +cont: + + /* * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ @@ -2515,6 +2591,7 @@ again: } path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); + if (ext4_ext_check(inode, path[0].p_hdr, depth)) { err = -EIO; goto out; @@ -2526,7 +2603,7 @@ again: /* this is leaf block */ err = ext4_ext_rm_leaf(handle, inode, path, &partial_cluster, start, - EXT_MAX_BLOCKS - 1); + end); /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); path[i].p_bh = NULL; @@ -2651,17 +2728,17 @@ void ext4_ext_init(struct super_block *sb) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) - printk(KERN_INFO "EXT4-fs: file extents enabled"); + printk(KERN_INFO "EXT4-fs: file extents enabled" #ifdef AGGRESSIVE_TEST - printk(", aggressive tests"); + ", aggressive tests" #endif #ifdef CHECK_BINSEARCH - printk(", check binsearch"); + ", check binsearch" #endif #ifdef EXTENTS_STATS - printk(", stats"); + ", stats" #endif - printk("\n"); + "\n"); #endif #ifdef EXTENTS_STATS spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); @@ -2709,14 +2786,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) } /* - * used by extent splitting. - */ -#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ - due to ENOSPC */ -#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ -#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ - -/* * ext4_split_extent_at() splits an extent at given block. * * @handle: the journal handle @@ -3224,11 +3293,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, depth = ext_depth(inode); eh = path[depth].p_hdr; - if (unlikely(!eh->eh_entries)) { - EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " - "EOFBLOCKS_FL set"); - return -EIO; - } + /* + * We're going to remove EOFBLOCKS_FL entirely in future so we + * do not care for this case anymore. Simply remove the flag + * if there are no extents. + */ + if (unlikely(!eh->eh_entries)) + goto out; last_ex = EXT_LAST_EXTENT(eh); /* * We should clear the EOFBLOCKS_FL flag if we are writing the @@ -3252,6 +3323,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, for (i = depth-1; i >= 0; i--) if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) return 0; +out: ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); return ext4_mark_inode_dirty(handle, inode); } @@ -3710,8 +3782,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, int free_on_err = 0, err = 0, depth, ret; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; - unsigned int punched_out = 0; - unsigned int result = 0; struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; ext4_lblk_t cluster_offset; @@ -3721,8 +3791,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* check in cache */ - if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && - ext4_ext_in_cache(inode, map->m_lblk, &newex)) { + if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((sbi->s_cluster_ratio > 1) && ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) @@ -3790,113 +3859,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* if found extent covers block, simply return it */ if (in_range(map->m_lblk, ee_block, ee_len)) { - struct ext4_map_blocks punch_map; - ext4_fsblk_t partial_cluster = 0; - newblock = map->m_lblk - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (map->m_lblk - ee_block); ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, ee_block, ee_len, newblock); - if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { - /* - * Do not put uninitialized extent - * in the cache - */ - if (!ext4_ext_is_uninitialized(ex)) { - ext4_ext_put_in_cache(inode, ee_block, - ee_len, ee_start); - goto out; - } - ret = ext4_ext_handle_uninitialized_extents( - handle, inode, map, path, flags, - allocated, newblock); - return ret; - } - - /* - * Punch out the map length, but only to the - * end of the extent - */ - punched_out = allocated < map->m_len ? - allocated : map->m_len; - /* - * Sense extents need to be converted to - * uninitialized, they must fit in an - * uninitialized extent + * Do not put uninitialized extent + * in the cache */ - if (punched_out > EXT_UNINIT_MAX_LEN) - punched_out = EXT_UNINIT_MAX_LEN; - - punch_map.m_lblk = map->m_lblk; - punch_map.m_pblk = newblock; - punch_map.m_len = punched_out; - punch_map.m_flags = 0; - - /* Check to see if the extent needs to be split */ - if (punch_map.m_len != ee_len || - punch_map.m_lblk != ee_block) { - - ret = ext4_split_extent(handle, inode, - path, &punch_map, 0, - EXT4_GET_BLOCKS_PUNCH_OUT_EXT | - EXT4_GET_BLOCKS_PRE_IO); - - if (ret < 0) { - err = ret; - goto out2; - } - /* - * find extent for the block at - * the start of the hole - */ - ext4_ext_drop_refs(path); - kfree(path); - - path = ext4_ext_find_extent(inode, - map->m_lblk, NULL); - if (IS_ERR(path)) { - err = PTR_ERR(path); - path = NULL; - goto out2; - } - - depth = ext_depth(inode); - ex = path[depth].p_ext; - ee_len = ext4_ext_get_actual_len(ex); - ee_block = le32_to_cpu(ex->ee_block); - ee_start = ext4_ext_pblock(ex); - - } - - ext4_ext_mark_uninitialized(ex); - - ext4_ext_invalidate_cache(inode); - - err = ext4_ext_rm_leaf(handle, inode, path, - &partial_cluster, map->m_lblk, - map->m_lblk + punched_out); - - if (!err && path->p_hdr->eh_entries == 0) { - /* - * Punch hole freed all of this sub tree, - * so we need to correct eh_depth - */ - err = ext4_ext_get_access(handle, inode, path); - if (err == 0) { - ext_inode_hdr(inode)->eh_depth = 0; - ext_inode_hdr(inode)->eh_max = - cpu_to_le16(ext4_ext_space_root( - inode, 0)); - - err = ext4_ext_dirty( - handle, inode, path); - } + if (!ext4_ext_is_uninitialized(ex)) { + ext4_ext_put_in_cache(inode, ee_block, + ee_len, ee_start); + goto out; } - - goto out2; + ret = ext4_ext_handle_uninitialized_extents( + handle, inode, map, path, flags, + allocated, newblock); + return ret; } } @@ -4165,13 +4146,11 @@ out2: ext4_ext_drop_refs(path); kfree(path); } - result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? - punched_out : allocated; trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, - newblock, map->m_len, err ? err : result); + newblock, map->m_len, err ? err : allocated); - return err ? err : result; + return err ? err : allocated; } void ext4_ext_truncate(struct inode *inode) @@ -4228,7 +4207,7 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block); + err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); /* In a multi-transaction truncate, we only make the final * transaction synchronous. @@ -4436,10 +4415,11 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) { WARN_ON(ret <= 0); - printk(KERN_ERR "%s: ext4_ext_map_blocks " - "returned error inode#%lu, block=%u, " - "max_blocks=%u", __func__, - inode->i_ino, map.m_lblk, map.m_len); + ext4_msg(inode->i_sb, KERN_ERR, + "%s:%d: inode #%lu: block %u: len %u: " + "ext4_ext_map_blocks returned %d", + __func__, __LINE__, inode->i_ino, map.m_lblk, + map.m_len, ret); } ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); @@ -4705,14 +4685,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) { struct inode *inode = file->f_path.dentry->d_inode; struct super_block *sb = inode->i_sb; - struct ext4_ext_cache cache_ex; - ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; + ext4_lblk_t first_block, stop_block; struct address_space *mapping = inode->i_mapping; - struct ext4_map_blocks map; handle_t *handle; loff_t first_page, last_page, page_len; loff_t first_page_offset, last_page_offset; - int ret, credits, blocks_released, err = 0; + int credits, err = 0; /* No need to punch hole beyond i_size */ if (offset >= inode->i_size) @@ -4728,10 +4706,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) offset; } - first_block = (offset + sb->s_blocksize - 1) >> - EXT4_BLOCK_SIZE_BITS(sb); - last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); - first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; last_page = (offset + length) >> PAGE_CACHE_SHIFT; @@ -4810,7 +4784,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) } } - /* * If i_size is contained in the last page, we need to * unmap and zero the partial page after i_size @@ -4830,73 +4803,22 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) } } + first_block = (offset + sb->s_blocksize - 1) >> + EXT4_BLOCK_SIZE_BITS(sb); + stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); + /* If there are no blocks to remove, return now */ - if (first_block >= last_block) + if (first_block >= stop_block) goto out; down_write(&EXT4_I(inode)->i_data_sem); ext4_ext_invalidate_cache(inode); ext4_discard_preallocations(inode); - /* - * Loop over all the blocks and identify blocks - * that need to be punched out - */ - iblock = first_block; - blocks_released = 0; - while (iblock < last_block) { - max_blocks = last_block - iblock; - num_blocks = 1; - memset(&map, 0, sizeof(map)); - map.m_lblk = iblock; - map.m_len = max_blocks; - ret = ext4_ext_map_blocks(handle, inode, &map, - EXT4_GET_BLOCKS_PUNCH_OUT_EXT); - - if (ret > 0) { - blocks_released += ret; - num_blocks = ret; - } else if (ret == 0) { - /* - * If map blocks could not find the block, - * then it is in a hole. If the hole was - * not already cached, then map blocks should - * put it in the cache. So we can get the hole - * out of the cache - */ - memset(&cache_ex, 0, sizeof(cache_ex)); - if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && - !cache_ex.ec_start) { - - /* The hole is cached */ - num_blocks = cache_ex.ec_block + - cache_ex.ec_len - iblock; - - } else { - /* The block could not be identified */ - err = -EIO; - break; - } - } else { - /* Map blocks error */ - err = ret; - break; - } - - if (num_blocks == 0) { - /* This condition should never happen */ - ext_debug("Block lookup failed"); - err = -EIO; - break; - } - - iblock += num_blocks; - } + err = ext4_ext_remove_space(inode, first_block, stop_block - 1); - if (blocks_released > 0) { - ext4_ext_invalidate_cache(inode); - ext4_discard_preallocations(inode); - } + ext4_ext_invalidate_cache(inode); + ext4_discard_preallocations(inode); if (IS_SYNC(inode)) ext4_handle_sync(handle); diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 00a2cb753efd..bb6c7d811313 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode) io = list_entry(ei->i_completed_io_list.next, ext4_io_end_t, list); list_del_init(&io->list); + io->flag |= EXT4_IO_END_IN_FSYNC; /* * Calling ext4_end_io_nolock() to convert completed * IO to written. @@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode) if (ret < 0) ret2 = ret; spin_lock_irqsave(&ei->i_completed_io_lock, flags); + io->flag &= ~EXT4_IO_END_IN_FSYNC; } spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); return (ret2 < 0) ? ret2 : 0; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25d8c9781ad9..409c2ee7750a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, return EXT4_INODES_PER_GROUP(sb); } +void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) +{ + if (uptodate) { + set_buffer_uptodate(bh); + set_bitmap_uptodate(bh); + } + unlock_buffer(bh); + put_bh(bh); +} + /* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. @@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) return bh; } /* - * submit the buffer_head for read. We can - * safely mark the bitmap as uptodate now. - * We do it here so the bitmap uptodate bit - * get set with buffer lock held. + * submit the buffer_head for reading */ trace_ext4_load_inode_bitmap(sb, block_group); - set_bitmap_uptodate(bh); - if (bh_submit_read(bh) < 0) { + bh->b_end_io = ext4_end_bitmap_read; + get_bh(bh); + submit_bh(READ, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { put_bh(bh); ext4_error(sb, "Cannot read inode bitmap - " - "block_group = %u, inode_bitmap = %llu", - block_group, bitmap_blk); + "block_group = %u, inode_bitmap = %llu", + block_group, bitmap_blk); return NULL; } return bh; @@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) struct ext4_sb_info *sbi; int fatal = 0, err, count, cleared; - if (atomic_read(&inode->i_count) > 1) { - printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", - atomic_read(&inode->i_count)); + if (!sb) { + printk(KERN_ERR "EXT4-fs: %s:%d: inode on " + "nonexistent device\n", __func__, __LINE__); return; } - if (inode->i_nlink) { - printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", - inode->i_nlink); + if (atomic_read(&inode->i_count) > 1) { + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", + __func__, __LINE__, inode->i_ino, + atomic_read(&inode->i_count)); return; } - if (!sb) { - printk(KERN_ERR "ext4_free_inode: inode on " - "nonexistent device\n"); + if (inode->i_nlink) { + ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", + __func__, __LINE__, inode->i_ino, inode->i_nlink); return; } sbi = EXT4_SB(sb); @@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent, } /* - * claim the inode from the inode bitmap. If the group - * is uninit we need to take the groups's ext4_group_lock - * and clear the uninit flag. The inode bitmap update - * and group desc uninit flag clear should be done - * after holding ext4_group_lock so that ext4_read_inode_bitmap - * doesn't race with the ext4_claim_inode - */ -static int ext4_claim_inode(struct super_block *sb, - struct buffer_head *inode_bitmap_bh, - unsigned long ino, ext4_group_t group, umode_t mode) -{ - int free = 0, retval = 0, count; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_group_info *grp = ext4_get_group_info(sb, group); - struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); - - /* - * We have to be sure that new inode allocation does not race with - * inode table initialization, because otherwise we may end up - * allocating and writing new inode right before sb_issue_zeroout - * takes place and overwriting our new inode with zeroes. So we - * take alloc_sem to prevent it. - */ - down_read(&grp->alloc_sem); - ext4_lock_group(sb, group); - if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { - /* not a free inode */ - retval = 1; - goto err_ret; - } - ino++; - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || - ino > EXT4_INODES_PER_GROUP(sb)) { - ext4_unlock_group(sb, group); - up_read(&grp->alloc_sem); - ext4_error(sb, "reserved inode or inode > inodes count - " - "block_group = %u, inode=%lu", group, - ino + group * EXT4_INODES_PER_GROUP(sb)); - return 1; - } - /* If we didn't allocate from within the initialized part of the inode - * table then we need to initialize up to this inode. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { - - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); - /* When marking the block group with - * ~EXT4_BG_INODE_UNINIT we don't want to depend - * on the value of bg_itable_unused even though - * mke2fs could have initialized the same for us. - * Instead we calculated the value below - */ - - free = 0; - } else { - free = EXT4_INODES_PER_GROUP(sb) - - ext4_itable_unused_count(sb, gdp); - } - - /* - * Check the relative inode number against the last used - * relative inode number in this group. if it is greater - * we need to update the bg_itable_unused count - * - */ - if (ino > free) - ext4_itable_unused_set(sb, gdp, - (EXT4_INODES_PER_GROUP(sb) - ino)); - } - count = ext4_free_inodes_count(sb, gdp) - 1; - ext4_free_inodes_set(sb, gdp, count); - if (S_ISDIR(mode)) { - count = ext4_used_dirs_count(sb, gdp) + 1; - ext4_used_dirs_set(sb, gdp, count); - if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, group); - - atomic_inc(&sbi->s_flex_groups[f].used_dirs); - } - } - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); -err_ret: - ext4_unlock_group(sb, group); - up_read(&grp->alloc_sem); - return retval; -} - -/* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of @@ -741,6 +664,11 @@ got_group: if (ret2 == -1) goto out; + /* + * Normally we will only go through one pass of this loop, + * unless we get unlucky and it turns out the group we selected + * had its last inode grabbed by someone else. + */ for (i = 0; i < ngroups; i++, ino = 0) { err = -EIO; @@ -757,51 +685,24 @@ repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) inode_bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); - - if (ino < EXT4_INODES_PER_GROUP(sb)) { - - BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, - inode_bitmap_bh); - if (err) - goto fail; - - BUFFER_TRACE(group_desc_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, - group_desc_bh); - if (err) - goto fail; - if (!ext4_claim_inode(sb, inode_bitmap_bh, - ino, group, mode)) { - /* we won it */ - BUFFER_TRACE(inode_bitmap_bh, - "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, - NULL, - inode_bitmap_bh); - if (err) - goto fail; - /* zero bit is inode number 1*/ - ino++; - goto got; - } - /* we lost it */ - ext4_handle_release_buffer(handle, inode_bitmap_bh); - ext4_handle_release_buffer(handle, group_desc_bh); - - if (++ino < EXT4_INODES_PER_GROUP(sb)) - goto repeat_in_this_group; + if (ino >= EXT4_INODES_PER_GROUP(sb)) { + if (++group == ngroups) + group = 0; + continue; } - - /* - * This case is possible in concurrent environment. It is very - * rare. We cannot repeat the find_group_xxx() call because - * that will simply return the same blockgroup, because the - * group descriptor metadata has not yet been updated. - * So we just go onto the next blockgroup. - */ - if (++group == ngroups) - group = 0; + if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { + ext4_error(sb, "reserved inode found cleared - " + "inode=%lu", ino + 1); + continue; + } + ext4_lock_group(sb, group); + ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); + ext4_unlock_group(sb, group); + ino++; /* the inode bitmap is zero-based */ + if (!ret2) + goto got; /* we grabbed the inode! */ + if (ino < EXT4_INODES_PER_GROUP(sb)) + goto repeat_in_this_group; } err = -ENOSPC; goto out; @@ -838,6 +739,59 @@ got: if (err) goto fail; } + + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode_bitmap_bh); + if (err) + goto fail; + + BUFFER_TRACE(group_desc_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, group_desc_bh); + if (err) + goto fail; + + /* Update the relevant bg descriptor fields */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + int free; + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + + down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ + ext4_lock_group(sb, group); /* while we modify the bg desc */ + free = EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + free = 0; + } + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + */ + if (ino > free) + ext4_itable_unused_set(sb, gdp, + (EXT4_INODES_PER_GROUP(sb) - ino)); + up_read(&grp->alloc_sem); + } + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); + if (S_ISDIR(mode)) { + ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); + if (sbi->s_log_groups_per_flex) { + ext4_group_t f = ext4_flex_group(sbi, group); + + atomic_inc(&sbi->s_flex_groups[f].used_dirs); + } + } + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); + ext4_unlock_group(sb, group); + } + + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); + if (err) + goto fail; + BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); if (err) @@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) * where it is called from on active part of filesystem is ext4lazyinit * thread, so we do not need any special locks, however we have to prevent * inode allocation from the current group, so we take alloc_sem lock, to - * block ext4_claim_inode until we are finished. + * block ext4_new_inode() until we are finished. */ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, int barrier) @@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, sbi->s_inodes_per_block); if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { - ext4_error(sb, "Something is wrong with group %u\n" - "Used itable blocks: %d" - "itable unused count: %u\n", + ext4_error(sb, "Something is wrong with group %u: " + "used itable blocks: %d; " + "itable unused count: %u", group, used_blks, ext4_itable_unused_count(sb, gdp)); ret = 1; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index feaa82fe629d..c77b0bd2c711 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -272,7 +272,7 @@ void ext4_da_update_reserve_space(struct inode *inode, trace_ext4_da_update_reserve_space(inode, used, quota_claim); if (unlikely(used > ei->i_reserved_data_blocks)) { ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " - "with only %d reserved data blocks\n", + "with only %d reserved data blocks", __func__, inode->i_ino, used, ei->i_reserved_data_blocks); WARN_ON(1); @@ -1165,7 +1165,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) */ ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " "ino %lu, to_free %d with only %d reserved " - "data blocks\n", inode->i_ino, to_free, + "data blocks", inode->i_ino, to_free, ei->i_reserved_data_blocks); WARN_ON(1); to_free = ei->i_reserved_data_blocks; @@ -1428,20 +1428,22 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) static void ext4_print_free_blocks(struct inode *inode) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - printk(KERN_CRIT "Total free blocks count %lld\n", + struct super_block *sb = inode->i_sb; + + ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", EXT4_C2B(EXT4_SB(inode->i_sb), ext4_count_free_clusters(inode->i_sb))); - printk(KERN_CRIT "Free/Dirty block details\n"); - printk(KERN_CRIT "free_blocks=%lld\n", + ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); + ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", (long long) EXT4_C2B(EXT4_SB(inode->i_sb), percpu_counter_sum(&sbi->s_freeclusters_counter))); - printk(KERN_CRIT "dirty_blocks=%lld\n", + ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", (long long) EXT4_C2B(EXT4_SB(inode->i_sb), percpu_counter_sum(&sbi->s_dirtyclusters_counter))); - printk(KERN_CRIT "Block reservation details\n"); - printk(KERN_CRIT "i_reserved_data_blocks=%u\n", - EXT4_I(inode)->i_reserved_data_blocks); - printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", + ext4_msg(sb, KERN_CRIT, "Block reservation details"); + ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", + EXT4_I(inode)->i_reserved_data_blocks); + ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u", EXT4_I(inode)->i_reserved_meta_blocks); return; } @@ -2482,13 +2484,14 @@ static int ext4_da_write_end(struct file *file, int write_mode = (int)(unsigned long)fsdata; if (write_mode == FALL_BACK_TO_NONDELALLOC) { - if (ext4_should_order_data(inode)) { + switch (ext4_inode_journal_mode(inode)) { + case EXT4_INODE_ORDERED_DATA_MODE: return ext4_ordered_write_end(file, mapping, pos, len, copied, page, fsdata); - } else if (ext4_should_writeback_data(inode)) { + case EXT4_INODE_WRITEBACK_DATA_MODE: return ext4_writeback_write_end(file, mapping, pos, len, copied, page, fsdata); - } else { + default: BUG(); } } @@ -2763,7 +2766,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, goto out; ext_debug("ext4_end_io_dio(): io_end 0x%p " - "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", + "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", iocb->private, io_end->inode->i_ino, iocb, offset, size); @@ -2795,9 +2798,6 @@ out: /* queue the work to convert unwritten extents to written */ queue_work(wq, &io_end->work); - - /* XXX: probably should move into the real I/O completion handler */ - inode_dio_done(inode); } static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) @@ -2811,8 +2811,9 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) goto out; if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { - printk("sb umounted, discard end_io request for inode %lu\n", - io_end->inode->i_ino); + ext4_msg(io_end->inode->i_sb, KERN_INFO, + "sb umounted, discard end_io request for inode %lu", + io_end->inode->i_ino); ext4_free_io_end(io_end); goto out; } @@ -2921,9 +2922,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, iocb->private = NULL; EXT4_I(inode)->cur_aio_dio = NULL; if (!is_sync_kiocb(iocb)) { - iocb->private = ext4_init_io_end(inode, GFP_NOFS); - if (!iocb->private) + ext4_io_end_t *io_end = + ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) return -ENOMEM; + io_end->flag |= EXT4_IO_END_DIRECT; + iocb->private = io_end; /* * we save the io structure for current async * direct IO, so that later ext4_map_blocks() @@ -2940,7 +2944,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ext4_get_block_write, ext4_end_io_dio, NULL, - DIO_LOCKING | DIO_SKIP_HOLES); + DIO_LOCKING); if (iocb->private) EXT4_I(inode)->cur_aio_dio = NULL; /* @@ -3086,18 +3090,25 @@ static const struct address_space_operations ext4_da_aops = { void ext4_set_aops(struct inode *inode) { - if (ext4_should_order_data(inode) && - test_opt(inode->i_sb, DELALLOC)) - inode->i_mapping->a_ops = &ext4_da_aops; - else if (ext4_should_order_data(inode)) - inode->i_mapping->a_ops = &ext4_ordered_aops; - else if (ext4_should_writeback_data(inode) && - test_opt(inode->i_sb, DELALLOC)) - inode->i_mapping->a_ops = &ext4_da_aops; - else if (ext4_should_writeback_data(inode)) - inode->i_mapping->a_ops = &ext4_writeback_aops; - else + switch (ext4_inode_journal_mode(inode)) { + case EXT4_INODE_ORDERED_DATA_MODE: + if (test_opt(inode->i_sb, DELALLOC)) + inode->i_mapping->a_ops = &ext4_da_aops; + else + inode->i_mapping->a_ops = &ext4_ordered_aops; + break; + case EXT4_INODE_WRITEBACK_DATA_MODE: + if (test_opt(inode->i_sb, DELALLOC)) + inode->i_mapping->a_ops = &ext4_da_aops; + else + inode->i_mapping->a_ops = &ext4_writeback_aops; + break; + case EXT4_INODE_JOURNAL_DATA_MODE: inode->i_mapping->a_ops = &ext4_journalled_aops; + break; + default: + BUG(); + } } @@ -3329,16 +3340,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) { struct inode *inode = file->f_path.dentry->d_inode; if (!S_ISREG(inode->i_mode)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { /* TODO: Add support for non extent hole punching */ - return -ENOTSUPP; + return -EOPNOTSUPP; } if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { /* TODO: Add support for bigalloc file systems */ - return -ENOTSUPP; + return -EOPNOTSUPP; } return ext4_ext_punch_hole(file, offset, length); @@ -3924,10 +3935,8 @@ static int ext4_do_update_inode(handle_t *handle, ext4_update_dynamic_rev(sb); EXT4_SET_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); - sb->s_dirt = 1; ext4_handle_sync(handle); - err = ext4_handle_dirty_metadata(handle, NULL, - EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_super(handle, sb); } } raw_inode->i_generation = cpu_to_le32(inode->i_generation); @@ -4152,11 +4161,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != i_size_read(inode)) { + if (attr->ia_size != i_size_read(inode)) truncate_setsize(inode, attr->ia_size); - ext4_truncate(inode); - } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) - ext4_truncate(inode); + ext4_truncate(inode); } if (!rc) { @@ -4314,7 +4321,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; - if (test_opt(inode->i_sb, I_VERSION)) + if (IS_I_VERSION(inode)) inode_inc_iversion(inode); /* the do_update_inode consumes one bh->b_count */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cb990b21c698..99ab428bcfa0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -21,6 +21,7 @@ * mballoc.c contains the multiblocks allocation routines */ +#include "ext4_jbd2.h" #include "mballoc.h" #include <linux/debugfs.h> #include <linux/slab.h> @@ -339,7 +340,7 @@ */ static struct kmem_cache *ext4_pspace_cachep; static struct kmem_cache *ext4_ac_cachep; -static struct kmem_cache *ext4_free_ext_cachep; +static struct kmem_cache *ext4_free_data_cachep; /* We create slab caches for groupinfo data structures based on the * superblock block size. There will be one per mounted filesystem for @@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group); -static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); +static void ext4_free_data_callback(struct super_block *sb, + struct ext4_journal_cb_entry *jce, int rc); static inline void *mb_correct_addr_and_bit(int *bit, void *addr) { @@ -425,7 +427,7 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) { char *bb; - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); + BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(max == NULL); if (order > e4b->bd_blkbits + 1) { @@ -436,10 +438,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) /* at order 0 we see each particular block */ if (order == 0) { *max = 1 << (e4b->bd_blkbits + 3); - return EXT4_MB_BITMAP(e4b); + return e4b->bd_bitmap; } - bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; + bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; return bb; @@ -588,7 +590,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, for (j = 0; j < (1 << order); j++) { k = (i * (1 << order)) + j; MB_CHECK_ASSERT( - !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); + !mb_test_bit(k, e4b->bd_bitmap)); } count++; } @@ -782,7 +784,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) int groups_per_page; int err = 0; int i; - ext4_group_t first_group; + ext4_group_t first_group, group; int first_block; struct super_block *sb; struct buffer_head *bhs; @@ -806,24 +808,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore) /* allocate buffer_heads to read bitmaps */ if (groups_per_page > 1) { - err = -ENOMEM; i = sizeof(struct buffer_head *) * groups_per_page; bh = kzalloc(i, GFP_NOFS); - if (bh == NULL) + if (bh == NULL) { + err = -ENOMEM; goto out; + } } else bh = &bhs; first_group = page->index * blocks_per_page / 2; /* read all groups the page covers into the cache */ - for (i = 0; i < groups_per_page; i++) { - struct ext4_group_desc *desc; - - if (first_group + i >= ngroups) + for (i = 0, group = first_group; i < groups_per_page; i++, group++) { + if (group >= ngroups) break; - grinfo = ext4_get_group_info(sb, first_group + i); + grinfo = ext4_get_group_info(sb, group); /* * If page is uptodate then we came here after online resize * which added some new uninitialized group info structs, so @@ -834,69 +835,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore) bh[i] = NULL; continue; } - - err = -EIO; - desc = ext4_get_group_desc(sb, first_group + i, NULL); - if (desc == NULL) - goto out; - - err = -ENOMEM; - bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc)); - if (bh[i] == NULL) + if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { + err = -ENOMEM; goto out; - - if (bitmap_uptodate(bh[i])) - continue; - - lock_buffer(bh[i]); - if (bitmap_uptodate(bh[i])) { - unlock_buffer(bh[i]); - continue; - } - ext4_lock_group(sb, first_group + i); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - ext4_init_block_bitmap(sb, bh[i], - first_group + i, desc); - set_bitmap_uptodate(bh[i]); - set_buffer_uptodate(bh[i]); - ext4_unlock_group(sb, first_group + i); - unlock_buffer(bh[i]); - continue; } - ext4_unlock_group(sb, first_group + i); - if (buffer_uptodate(bh[i])) { - /* - * if not uninit if bh is uptodate, - * bitmap is also uptodate - */ - set_bitmap_uptodate(bh[i]); - unlock_buffer(bh[i]); - continue; - } - get_bh(bh[i]); - /* - * submit the buffer_head for read. We can - * safely mark the bitmap as uptodate now. - * We do it here so the bitmap uptodate bit - * get set with buffer lock held. - */ - set_bitmap_uptodate(bh[i]); - bh[i]->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh[i]); - mb_debug(1, "read bitmap for group %u\n", first_group + i); + mb_debug(1, "read bitmap for group %u\n", group); } /* wait for I/O completion */ - for (i = 0; i < groups_per_page; i++) - if (bh[i]) - wait_on_buffer(bh[i]); - - err = -EIO; - for (i = 0; i < groups_per_page; i++) - if (bh[i] && !buffer_uptodate(bh[i])) + for (i = 0, group = first_group; i < groups_per_page; i++, group++) { + if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) { + err = -EIO; goto out; + } + } - err = 0; first_block = page->index * blocks_per_page; for (i = 0; i < blocks_per_page; i++) { int group; @@ -1250,10 +1203,10 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) int order = 1; void *bb; - BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); + BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); - bb = EXT4_MB_BUDDY(e4b); + bb = e4b->bd_buddy; while (order <= e4b->bd_blkbits + 1) { block = block >> 1; if (!mb_test_bit(block, bb)) { @@ -1323,9 +1276,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, /* let's maintain fragments counter */ if (first != 0) - block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); + block = !mb_test_bit(first - 1, e4b->bd_bitmap); if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) - max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); + max = !mb_test_bit(first + count, e4b->bd_bitmap); if (block && max) e4b->bd_info->bb_fragments--; else if (!block && !max) @@ -1336,7 +1289,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, block = first++; order = 0; - if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { + if (!mb_test_bit(block, e4b->bd_bitmap)) { ext4_fsblk_t blocknr; blocknr = ext4_group_first_block_no(sb, e4b->bd_group); @@ -1347,7 +1300,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, "freeing already freed block " "(bit %u)", block); } - mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); + mb_clear_bit(block, e4b->bd_bitmap); e4b->bd_info->bb_counters[order]++; /* start of the buddy */ @@ -1429,7 +1382,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, break; next = (block + 1) * (1 << order); - if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) + if (mb_test_bit(next, e4b->bd_bitmap)) break; order = mb_find_order_for_block(e4b, next); @@ -1466,9 +1419,9 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) /* let's maintain fragments counter */ if (start != 0) - mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); + mlen = !mb_test_bit(start - 1, e4b->bd_bitmap); if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) - max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); + max = !mb_test_bit(start + len, e4b->bd_bitmap); if (mlen && max) e4b->bd_info->bb_fragments++; else if (!mlen && !max) @@ -1511,7 +1464,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) } mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); - ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); + ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -1810,7 +1763,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, struct ext4_buddy *e4b) { struct super_block *sb = ac->ac_sb; - void *bitmap = EXT4_MB_BITMAP(e4b); + void *bitmap = e4b->bd_bitmap; struct ext4_free_extent ex; int i; int free; @@ -1870,7 +1823,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, { struct super_block *sb = ac->ac_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); - void *bitmap = EXT4_MB_BITMAP(e4b); + void *bitmap = e4b->bd_bitmap; struct ext4_free_extent ex; ext4_fsblk_t first_group_block; ext4_fsblk_t a; @@ -2224,7 +2177,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, EXT4_DESC_PER_BLOCK_BITS(sb); meta_group_info = kmalloc(metalen, GFP_KERNEL); if (meta_group_info == NULL) { - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " + ext4_msg(sb, KERN_ERR, "can't allocate mem " "for a buddy group"); goto exit_meta_group_info; } @@ -2238,7 +2191,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); if (meta_group_info[i] == NULL) { - ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); + ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); goto exit_group_info; } memset(meta_group_info[i], 0, kmem_cache_size(cachep)); @@ -2522,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); - if (sbi->s_journal) - sbi->s_journal->j_commit_callback = release_blocks_on_commit; - return 0; out_free_locality_groups: @@ -2637,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb, * This function is called by the jbd2 layer once the commit has finished, * so we know we can free the blocks that were released with that commit. */ -static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) +static void ext4_free_data_callback(struct super_block *sb, + struct ext4_journal_cb_entry *jce, + int rc) { - struct super_block *sb = journal->j_private; + struct ext4_free_data *entry = (struct ext4_free_data *)jce; struct ext4_buddy e4b; struct ext4_group_info *db; int err, count = 0, count2 = 0; - struct ext4_free_data *entry; - struct list_head *l, *ltmp; - list_for_each_safe(l, ltmp, &txn->t_private_list) { - entry = list_entry(l, struct ext4_free_data, list); + mb_debug(1, "gonna free %u blocks in group %u (0x%p):", + entry->efd_count, entry->efd_group, entry); - mb_debug(1, "gonna free %u blocks in group %u (0x%p):", - entry->count, entry->group, entry); + if (test_opt(sb, DISCARD)) + ext4_issue_discard(sb, entry->efd_group, + entry->efd_start_cluster, entry->efd_count); - if (test_opt(sb, DISCARD)) - ext4_issue_discard(sb, entry->group, - entry->start_cluster, entry->count); + err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); + /* we expect to find existing buddy because it's pinned */ + BUG_ON(err != 0); - err = ext4_mb_load_buddy(sb, entry->group, &e4b); - /* we expect to find existing buddy because it's pinned */ - BUG_ON(err != 0); - db = e4b.bd_info; - /* there are blocks to put in buddy to make them really free */ - count += entry->count; - count2++; - ext4_lock_group(sb, entry->group); - /* Take it out of per group rb tree */ - rb_erase(&entry->node, &(db->bb_free_root)); - mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); + db = e4b.bd_info; + /* there are blocks to put in buddy to make them really free */ + count += entry->efd_count; + count2++; + ext4_lock_group(sb, entry->efd_group); + /* Take it out of per group rb tree */ + rb_erase(&entry->efd_node, &(db->bb_free_root)); + mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count); - /* - * Clear the trimmed flag for the group so that the next - * ext4_trim_fs can trim it. - * If the volume is mounted with -o discard, online discard - * is supported and the free blocks will be trimmed online. - */ - if (!test_opt(sb, DISCARD)) - EXT4_MB_GRP_CLEAR_TRIMMED(db); + /* + * Clear the trimmed flag for the group so that the next + * ext4_trim_fs can trim it. + * If the volume is mounted with -o discard, online discard + * is supported and the free blocks will be trimmed online. + */ + if (!test_opt(sb, DISCARD)) + EXT4_MB_GRP_CLEAR_TRIMMED(db); - if (!db->bb_free_root.rb_node) { - /* No more items in the per group rb tree - * balance refcounts from ext4_mb_free_metadata() - */ - page_cache_release(e4b.bd_buddy_page); - page_cache_release(e4b.bd_bitmap_page); - } - ext4_unlock_group(sb, entry->group); - kmem_cache_free(ext4_free_ext_cachep, entry); - ext4_mb_unload_buddy(&e4b); + if (!db->bb_free_root.rb_node) { + /* No more items in the per group rb tree + * balance refcounts from ext4_mb_free_metadata() + */ + page_cache_release(e4b.bd_buddy_page); + page_cache_release(e4b.bd_bitmap_page); } + ext4_unlock_group(sb, entry->efd_group); + kmem_cache_free(ext4_free_data_cachep, entry); + ext4_mb_unload_buddy(&e4b); mb_debug(1, "freed %u blocks in %u structures\n", count, count2); } @@ -2741,9 +2688,9 @@ int __init ext4_init_mballoc(void) return -ENOMEM; } - ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, - SLAB_RECLAIM_ACCOUNT); - if (ext4_free_ext_cachep == NULL) { + ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, + SLAB_RECLAIM_ACCOUNT); + if (ext4_free_data_cachep == NULL) { kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); return -ENOMEM; @@ -2761,7 +2708,7 @@ void ext4_exit_mballoc(void) rcu_barrier(); kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); - kmem_cache_destroy(ext4_free_ext_cachep); + kmem_cache_destroy(ext4_free_data_cachep); ext4_groupinfo_destroy_slabs(); ext4_remove_debugfs_entry(); } @@ -2815,7 +2762,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); if (!ext4_data_block_valid(sbi, block, len)) { ext4_error(sb, "Allocating blocks %llu-%llu which overlap " - "fs metadata\n", block, block+len); + "fs metadata", block, block+len); /* File system mounted not to panic on error * Fix the bitmap and repeat the block allocation * We leak some of the blocks here. @@ -2911,7 +2858,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); int bsbits, max; ext4_lblk_t end; - loff_t size, orig_size, start_off; + loff_t size, start_off; + loff_t orig_size __maybe_unused; ext4_lblk_t start; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_prealloc_space *pa; @@ -3321,8 +3269,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, n = rb_first(&(grp->bb_free_root)); while (n) { - entry = rb_entry(n, struct ext4_free_data, node); - ext4_set_bits(bitmap, entry->start_cluster, entry->count); + entry = rb_entry(n, struct ext4_free_data, efd_node); + ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); n = rb_next(n); } return; @@ -3916,11 +3864,11 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) return; - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" + ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:" " Allocation context details:"); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", + ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d", ac->ac_status, ac->ac_flags); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " + ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, " "goal %lu/%lu/%lu@%lu, " "best %lu/%lu/%lu@%lu cr %d", (unsigned long)ac->ac_o_ex.fe_group, @@ -3936,9 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (unsigned long)ac->ac_b_ex.fe_len, (unsigned long)ac->ac_b_ex.fe_logical, (int)ac->ac_criteria); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", + ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", ac->ac_ex_scanned, ac->ac_found); - ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); + ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); ngroups = ext4_get_groups_count(sb); for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); @@ -4428,9 +4376,9 @@ out: static int can_merge(struct ext4_free_data *entry1, struct ext4_free_data *entry2) { - if ((entry1->t_tid == entry2->t_tid) && - (entry1->group == entry2->group) && - ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) + if ((entry1->efd_tid == entry2->efd_tid) && + (entry1->efd_group == entry2->efd_group) && + ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) return 1; return 0; } @@ -4452,8 +4400,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, BUG_ON(e4b->bd_bitmap_page == NULL); BUG_ON(e4b->bd_buddy_page == NULL); - new_node = &new_entry->node; - cluster = new_entry->start_cluster; + new_node = &new_entry->efd_node; + cluster = new_entry->efd_start_cluster; if (!*n) { /* first free block exent. We need to @@ -4466,10 +4414,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, } while (*n) { parent = *n; - entry = rb_entry(parent, struct ext4_free_data, node); - if (cluster < entry->start_cluster) + entry = rb_entry(parent, struct ext4_free_data, efd_node); + if (cluster < entry->efd_start_cluster) n = &(*n)->rb_left; - else if (cluster >= (entry->start_cluster + entry->count)) + else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) n = &(*n)->rb_right; else { ext4_grp_locked_error(sb, group, 0, @@ -4486,34 +4434,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, /* Now try to see the extent can be merged to left and right */ node = rb_prev(new_node); if (node) { - entry = rb_entry(node, struct ext4_free_data, node); + entry = rb_entry(node, struct ext4_free_data, efd_node); if (can_merge(entry, new_entry)) { - new_entry->start_cluster = entry->start_cluster; - new_entry->count += entry->count; + new_entry->efd_start_cluster = entry->efd_start_cluster; + new_entry->efd_count += entry->efd_count; rb_erase(node, &(db->bb_free_root)); - spin_lock(&sbi->s_md_lock); - list_del(&entry->list); - spin_unlock(&sbi->s_md_lock); - kmem_cache_free(ext4_free_ext_cachep, entry); + ext4_journal_callback_del(handle, &entry->efd_jce); + kmem_cache_free(ext4_free_data_cachep, entry); } } node = rb_next(new_node); if (node) { - entry = rb_entry(node, struct ext4_free_data, node); + entry = rb_entry(node, struct ext4_free_data, efd_node); if (can_merge(new_entry, entry)) { - new_entry->count += entry->count; + new_entry->efd_count += entry->efd_count; rb_erase(node, &(db->bb_free_root)); - spin_lock(&sbi->s_md_lock); - list_del(&entry->list); - spin_unlock(&sbi->s_md_lock); - kmem_cache_free(ext4_free_ext_cachep, entry); + ext4_journal_callback_del(handle, &entry->efd_jce); + kmem_cache_free(ext4_free_data_cachep, entry); } } /* Add the extent to transaction's private list */ - spin_lock(&sbi->s_md_lock); - list_add(&new_entry->list, &handle->h_transaction->t_private_list); - spin_unlock(&sbi->s_md_lock); + ext4_journal_callback_add(handle, ext4_free_data_callback, + &new_entry->efd_jce); return 0; } @@ -4691,15 +4634,15 @@ do_more: * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed */ - new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); + new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { err = -ENOMEM; goto error_return; } - new_entry->start_cluster = bit; - new_entry->group = block_group; - new_entry->count = count_clusters; - new_entry->t_tid = handle->h_transaction->t_tid; + new_entry->efd_start_cluster = bit; + new_entry->efd_group = block_group; + new_entry->efd_count = count_clusters; + new_entry->efd_tid = handle->h_transaction->t_tid; ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); @@ -4971,11 +4914,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, start = (e4b.bd_info->bb_first_free > start) ? e4b.bd_info->bb_first_free : start; - while (start < max) { - start = mb_find_next_zero_bit(bitmap, max, start); - if (start >= max) + while (start <= max) { + start = mb_find_next_zero_bit(bitmap, max + 1, start); + if (start > max) break; - next = mb_find_next_bit(bitmap, max, start); + next = mb_find_next_bit(bitmap, max + 1, start); if ((next - start) >= minblocks) { ext4_trim_extent(sb, start, @@ -5027,37 +4970,36 @@ out: int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { struct ext4_group_info *grp; - ext4_group_t first_group, last_group; - ext4_group_t group, ngroups = ext4_get_groups_count(sb); + ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; - uint64_t start, len, minlen, trimmed = 0; + uint64_t start, end, minlen, trimmed = 0; ext4_fsblk_t first_data_blk = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); + ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); int ret = 0; start = range->start >> sb->s_blocksize_bits; - len = range->len >> sb->s_blocksize_bits; + end = start + (range->len >> sb->s_blocksize_bits) - 1; minlen = range->minlen >> sb->s_blocksize_bits; - if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) + if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || + unlikely(start >= max_blks)) return -EINVAL; - if (start + len <= first_data_blk) + if (end >= max_blks) + end = max_blks - 1; + if (end <= first_data_blk) goto out; - if (start < first_data_blk) { - len -= first_data_blk - start; + if (start < first_data_blk) start = first_data_blk; - } - /* Determine first and last group to examine based on start and len */ + /* Determine first and last group to examine based on start and end */ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, &first_group, &first_cluster); - ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), + ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end, &last_group, &last_cluster); - last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; - last_cluster = EXT4_CLUSTERS_PER_GROUP(sb); - if (first_group > last_group) - return -EINVAL; + /* end now represents the last cluster to discard in this group */ + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; for (group = first_group; group <= last_group; group++) { grp = ext4_get_group_info(sb, group); @@ -5069,31 +5011,35 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } /* - * For all the groups except the last one, last block will - * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to - * change it for the last group in which case start + - * len < EXT4_BLOCKS_PER_GROUP(sb). + * For all the groups except the last one, last cluster will + * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to + * change it for the last group, note that last_cluster is + * already computed earlier by ext4_get_group_no_and_offset() */ - if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) - last_cluster = first_cluster + len; - len -= last_cluster - first_cluster; + if (group == last_group) + end = last_cluster; if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - last_cluster, minlen); + end, minlen); if (cnt < 0) { ret = cnt; break; } + trimmed += cnt; } - trimmed += cnt; + + /* + * For every group except the first one, we are sure + * that the first cluster to discard will be cluster #0. + */ first_cluster = 0; } - range->len = trimmed * sb->s_blocksize; if (!ret) atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); out: + range->len = trimmed * sb->s_blocksize; return ret; } diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 47705f3285e3..c070618c21ce 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -96,21 +96,23 @@ extern u8 mb_enable_debug; struct ext4_free_data { - /* this links the free block information from group_info */ - struct rb_node node; + /* MUST be the first member */ + struct ext4_journal_cb_entry efd_jce; + + /* ext4_free_data private data starts from here */ - /* this links the free block information from ext4_sb_info */ - struct list_head list; + /* this links the free block information from group_info */ + struct rb_node efd_node; /* group which free block extent belongs */ - ext4_group_t group; + ext4_group_t efd_group; /* free block extent */ - ext4_grpblk_t start_cluster; - ext4_grpblk_t count; + ext4_grpblk_t efd_start_cluster; + ext4_grpblk_t efd_count; /* transaction which freed this extent */ - tid_t t_tid; + tid_t efd_tid; }; struct ext4_prealloc_space { @@ -210,8 +212,6 @@ struct ext4_buddy { __u16 bd_blkbits; ext4_group_t bd_group; }; -#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) -#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, struct ext4_free_extent *fex) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index e7d6bb0acfa6..f39f80f8f2c5 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -471,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, S_IFREG, NULL, goal, owner); if (IS_ERR(tmp_inode)) { - retval = PTR_ERR(inode); + retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); return retval; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 7ea4ba4eff2a..ed6548d89165 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -257,8 +257,8 @@ int ext4_multi_mount_protect(struct super_block *sb, * If check_interval in MMP block is larger, use that instead of * update_interval from the superblock. */ - if (mmp->mmp_check_interval > mmp_check_interval) - mmp_check_interval = mmp->mmp_check_interval; + if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) + mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); seq = le32_to_cpu(mmp->mmp_seq); if (seq == EXT4_MMP_SEQ_CLEAN) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2043f482375d..349d7b3671c8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -468,7 +468,7 @@ fail2: fail: if (*err == ERR_BAD_DX_DIR) ext4_warning(dir->i_sb, - "Corrupt dir inode %ld, running e2fsck is " + "Corrupt dir inode %lu, running e2fsck is " "recommended.", dir->i_ino); return NULL; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 475851896518..74cd1f7f1f88 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -60,7 +60,6 @@ void ext4_ioend_wait(struct inode *inode) static void put_io_page(struct ext4_io_page *io_page) { if (atomic_dec_and_test(&io_page->p_count)) { - end_page_writeback(io_page->p_page); put_page(io_page->p_page); kmem_cache_free(io_page_cachep, io_page); } @@ -110,6 +109,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io) if (io->iocb) aio_complete(io->iocb, io->result, 0); + if (io->flag & EXT4_IO_END_DIRECT) + inode_dio_done(inode); /* Wake up anyone waiting on unwritten extent conversion */ if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) wake_up_all(ext4_ioend_wq(io->inode)); @@ -127,12 +128,18 @@ static void ext4_end_io_work(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&ei->i_completed_io_lock, flags); + if (io->flag & EXT4_IO_END_IN_FSYNC) + goto requeue; if (list_empty(&io->list)) { spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); goto free; } if (!mutex_trylock(&inode->i_mutex)) { + bool was_queued; +requeue: + was_queued = !!(io->flag & EXT4_IO_END_QUEUED); + io->flag |= EXT4_IO_END_QUEUED; spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); /* * Requeue the work instead of waiting so that the work @@ -145,9 +152,8 @@ static void ext4_end_io_work(struct work_struct *work) * yield the cpu if it sees an end_io request that has already * been requeued. */ - if (io->flag & EXT4_IO_END_QUEUED) + if (was_queued) yield(); - io->flag |= EXT4_IO_END_QUEUED; return; } list_del_init(&io->list); @@ -227,9 +233,9 @@ static void ext4_end_bio(struct bio *bio, int error) } while (bh != head); } - put_io_page(io_end->pages[i]); + if (atomic_read(&io_end->pages[i]->p_count) == 1) + end_page_writeback(io_end->pages[i]->p_page); } - io_end->num_io_pages = 0; inode = io_end->inode; if (error) { @@ -421,6 +427,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, * PageWriteback bit from the page to prevent the system from * wedging later on. */ + if (atomic_read(&io_page->p_count) == 1) + end_page_writeback(page); put_io_page(io_page); return ret; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f9d948f0eb86..59fa0be27251 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1163,8 +1163,11 @@ static void ext4_update_super(struct super_block *sb, do_div(reserved_blocks, 100); ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); + ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); + le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * + flex_gd->count); /* * We need to protect s_groups_count against other CPUs seeing @@ -1465,6 +1468,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, } ext4_blocks_count_set(es, o_blocks_count + add); + ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ @@ -1512,16 +1516,17 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", - o_blocks_count, n_blocks_count); + ext4_msg(sb, KERN_DEBUG, + "extending last group from %llu to %llu blocks", + o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) return 0; if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { - printk(KERN_ERR "EXT4-fs: filesystem on %s:" - " too large to resize to %llu blocks safely\n", - sb->s_id, n_blocks_count); + ext4_msg(sb, KERN_ERR, + "filesystem too large to resize to %llu blocks safely", + n_blocks_count); if (sizeof(sector_t) < 8) ext4_warning(sb, "CONFIG_LBDAF not enabled"); return -EINVAL; @@ -1582,7 +1587,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_fsblk_t o_blocks_count; ext4_group_t o_group; ext4_group_t n_group; - ext4_grpblk_t offset; + ext4_grpblk_t offset, add; unsigned long n_desc_blocks; unsigned long o_desc_blocks; unsigned long desc_blocks; @@ -1591,8 +1596,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " - "upto %llu blocks\n", o_blocks_count, n_blocks_count); + ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " + "to %llu blocks", o_blocks_count, n_blocks_count); if (n_blocks_count < o_blocks_count) { /* On-line shrinking not supported */ @@ -1605,7 +1610,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) return 0; ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); - ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); + ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / EXT4_DESC_PER_BLOCK(sb); @@ -1634,10 +1639,12 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) } brelse(bh); - if (offset != 0) { - /* extend the last group */ - ext4_grpblk_t add; - add = EXT4_BLOCKS_PER_GROUP(sb) - offset; + /* extend the last group */ + if (n_group == o_group) + add = n_blocks_count - o_blocks_count; + else + add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); + if (add > 0) { err = ext4_group_extend_no_check(sb, o_blocks_count, add); if (err) goto out; @@ -1674,7 +1681,7 @@ out: iput(resize_inode); if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " - "upto %llu blocks\n", o_blocks_count, n_blocks_count); + ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu " + "upto %llu blocks", o_blocks_count, n_blocks_count); return err; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 933900909ed0..ceebaf853beb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -62,6 +62,7 @@ static struct ext4_features *ext4_feat; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); +static int ext4_show_options(struct seq_file *seq, struct dentry *root); static int ext4_commit_super(struct super_block *sb, int sync); static void ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); @@ -375,7 +376,7 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line, if (is_handle_aborted(handle)) return; - printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", + printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n", caller, line, errstr, err_fn); jbd2_journal_abort_handle(handle); @@ -431,6 +432,22 @@ static int block_device_ejected(struct super_block *sb) return bdi->dev == NULL; } +static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) +{ + struct super_block *sb = journal->j_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int error = is_journal_aborted(journal); + struct ext4_journal_cb_entry *jce, *tmp; + + spin_lock(&sbi->s_md_lock); + list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { + list_del_init(&jce->jce_list); + spin_unlock(&sbi->s_md_lock); + jce->jce_func(sb, jce, error); + spin_lock(&sbi->s_md_lock); + } + spin_unlock(&sbi->s_md_lock); +} /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. @@ -498,11 +515,16 @@ void ext4_error_inode(struct inode *inode, const char *function, va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", - inode->i_sb->s_id, function, line, inode->i_ino); if (block) - printk(KERN_CONT "block %llu: ", block); - printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: block %llu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, &vaf); + else + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, &vaf); va_end(args); ext4_handle_error(inode->i_sb); @@ -524,15 +546,21 @@ void ext4_error_file(struct file *file, const char *function, path = d_path(&(file->f_path), pathname, sizeof(pathname)); if (IS_ERR(path)) path = "(unknown)"; - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: ", - inode->i_sb->s_id, function, line, inode->i_ino); - if (block) - printk(KERN_CONT "block %llu: ", block); va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); + if (block) + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "block %llu: comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, path, &vaf); + else + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, path, &vaf); va_end(args); ext4_handle_error(inode->i_sb); @@ -808,9 +836,6 @@ static void ext4_put_super(struct super_block *sb) destroy_workqueue(sbi->dio_unwritten_wq); lock_super(sb); - if (sb->s_dirt) - ext4_commit_super(sb, 1); - if (sbi->s_journal) { err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -827,9 +852,12 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); - ext4_commit_super(sb, 1); } + if (sb->s_dirt || !(sb->s_flags & MS_RDONLY)) + ext4_commit_super(sb, 1); + if (sbi->s_proc) { + remove_proc_entry("options", sbi->s_proc); remove_proc_entry(sb->s_id, ext4_proc_root); } kobject_del(&sbi->s_kobj); @@ -990,180 +1018,6 @@ void ext4_clear_inode(struct inode *inode) } } -static inline void ext4_show_quota_options(struct seq_file *seq, - struct super_block *sb) -{ -#if defined(CONFIG_QUOTA) - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (sbi->s_jquota_fmt) { - char *fmtname = ""; - - switch (sbi->s_jquota_fmt) { - case QFMT_VFS_OLD: - fmtname = "vfsold"; - break; - case QFMT_VFS_V0: - fmtname = "vfsv0"; - break; - case QFMT_VFS_V1: - fmtname = "vfsv1"; - break; - } - seq_printf(seq, ",jqfmt=%s", fmtname); - } - - if (sbi->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); - - if (sbi->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - - if (test_opt(sb, USRQUOTA)) - seq_puts(seq, ",usrquota"); - - if (test_opt(sb, GRPQUOTA)) - seq_puts(seq, ",grpquota"); -#endif -} - -/* - * Show an option if - * - it's set to a non-default value OR - * - if the per-sb default is different from the global default - */ -static int ext4_show_options(struct seq_file *seq, struct dentry *root) -{ - int def_errors; - unsigned long def_mount_opts; - struct super_block *sb = root->d_sb; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); - def_errors = le16_to_cpu(es->s_errors); - - if (sbi->s_sb_block != 1) - seq_printf(seq, ",sb=%llu", sbi->s_sb_block); - if (test_opt(sb, MINIX_DF)) - seq_puts(seq, ",minixdf"); - if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) - seq_puts(seq, ",grpid"); - if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) - seq_puts(seq, ",nogrpid"); - if (sbi->s_resuid != EXT4_DEF_RESUID || - le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { - seq_printf(seq, ",resuid=%u", sbi->s_resuid); - } - if (sbi->s_resgid != EXT4_DEF_RESGID || - le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { - seq_printf(seq, ",resgid=%u", sbi->s_resgid); - } - if (test_opt(sb, ERRORS_RO)) { - if (def_errors == EXT4_ERRORS_PANIC || - def_errors == EXT4_ERRORS_CONTINUE) { - seq_puts(seq, ",errors=remount-ro"); - } - } - if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) - seq_puts(seq, ",errors=continue"); - if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) - seq_puts(seq, ",errors=panic"); - if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) - seq_puts(seq, ",nouid32"); - if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) - seq_puts(seq, ",debug"); -#ifdef CONFIG_EXT4_FS_XATTR - if (test_opt(sb, XATTR_USER)) - seq_puts(seq, ",user_xattr"); - if (!test_opt(sb, XATTR_USER)) - seq_puts(seq, ",nouser_xattr"); -#endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL - if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) - seq_puts(seq, ",acl"); - if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) - seq_puts(seq, ",noacl"); -#endif - if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { - seq_printf(seq, ",commit=%u", - (unsigned) (sbi->s_commit_interval / HZ)); - } - if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { - seq_printf(seq, ",min_batch_time=%u", - (unsigned) sbi->s_min_batch_time); - } - if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { - seq_printf(seq, ",max_batch_time=%u", - (unsigned) sbi->s_max_batch_time); - } - - /* - * We're changing the default of barrier mount option, so - * let's always display its mount state so it's clear what its - * status is. - */ - seq_puts(seq, ",barrier="); - seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) - seq_puts(seq, ",journal_async_commit"); - else if (test_opt(sb, JOURNAL_CHECKSUM)) - seq_puts(seq, ",journal_checksum"); - if (test_opt(sb, I_VERSION)) - seq_puts(seq, ",i_version"); - if (!test_opt(sb, DELALLOC) && - !(def_mount_opts & EXT4_DEFM_NODELALLOC)) - seq_puts(seq, ",nodelalloc"); - - if (!test_opt(sb, MBLK_IO_SUBMIT)) - seq_puts(seq, ",nomblk_io_submit"); - if (sbi->s_stripe) - seq_printf(seq, ",stripe=%lu", sbi->s_stripe); - /* - * journal mode get enabled in different ways - * So just print the value even if we didn't specify it - */ - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - seq_puts(seq, ",data=journal"); - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) - seq_puts(seq, ",data=ordered"); - else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) - seq_puts(seq, ",data=writeback"); - - if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) - seq_printf(seq, ",inode_readahead_blks=%u", - sbi->s_inode_readahead_blks); - - if (test_opt(sb, DATA_ERR_ABORT)) - seq_puts(seq, ",data_err=abort"); - - if (test_opt(sb, NO_AUTO_DA_ALLOC)) - seq_puts(seq, ",noauto_da_alloc"); - - if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) - seq_puts(seq, ",discard"); - - if (test_opt(sb, NOLOAD)) - seq_puts(seq, ",norecovery"); - - if (test_opt(sb, DIOREAD_NOLOCK)) - seq_puts(seq, ",dioread_nolock"); - - if (test_opt(sb, BLOCK_VALIDITY) && - !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) - seq_puts(seq, ",block_validity"); - - if (!test_opt(sb, INIT_INODE_TABLE)) - seq_puts(seq, ",noinit_itable"); - else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) - seq_printf(seq, ",init_itable=%u", - (unsigned) sbi->s_li_wait_mult); - - ext4_show_quota_options(seq, sb); - - return 0; -} - static struct inode *ext4_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) { @@ -1316,18 +1170,17 @@ static const struct export_operations ext4_export_ops = { enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, + Opt_nouid32, Opt_debug, Opt_removed, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, + Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_commit, Opt_min_batch_time, Opt_max_batch_time, - Opt_journal_update, Opt_journal_dev, - Opt_journal_checksum, Opt_journal_async_commit, + Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, - Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, - Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, + Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, + Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, @@ -1350,20 +1203,19 @@ static const match_table_t tokens = { {Opt_err_ro, "errors=remount-ro"}, {Opt_nouid32, "nouid32"}, {Opt_debug, "debug"}, - {Opt_oldalloc, "oldalloc"}, - {Opt_orlov, "orlov"}, + {Opt_removed, "oldalloc"}, + {Opt_removed, "orlov"}, {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, - {Opt_noload, "noload"}, {Opt_noload, "norecovery"}, - {Opt_nobh, "nobh"}, - {Opt_bh, "bh"}, + {Opt_noload, "noload"}, + {Opt_removed, "nobh"}, + {Opt_removed, "bh"}, {Opt_commit, "commit=%u"}, {Opt_min_batch_time, "min_batch_time=%u"}, {Opt_max_batch_time, "max_batch_time=%u"}, - {Opt_journal_update, "journal=update"}, {Opt_journal_dev, "journal_dev=%u"}, {Opt_journal_checksum, "journal_checksum"}, {Opt_journal_async_commit, "journal_async_commit"}, @@ -1389,7 +1241,6 @@ static const match_table_t tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_i_version, "i_version"}, {Opt_stripe, "stripe=%u"}, - {Opt_resize, "resize"}, {Opt_delalloc, "delalloc"}, {Opt_nodelalloc, "nodelalloc"}, {Opt_mblk_io_submit, "mblk_io_submit"}, @@ -1408,6 +1259,11 @@ static const match_table_t tokens = { {Opt_init_itable, "init_itable=%u"}, {Opt_init_itable, "init_itable"}, {Opt_noinit_itable, "noinit_itable"}, + {Opt_removed, "check=none"}, /* mount option from ext2/3 */ + {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ + {Opt_removed, "reservation"}, /* mount option from ext2/3 */ + {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ + {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ {Opt_err, NULL}, }; @@ -1496,420 +1352,273 @@ static int clear_qf_name(struct super_block *sb, int qtype) } #endif -static int parse_options(char *options, struct super_block *sb, - unsigned long *journal_devnum, - unsigned int *journal_ioprio, - ext4_fsblk_t *n_blocks_count, int is_remount) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - char *p; - substring_t args[MAX_OPT_ARGS]; - int data_opt = 0; - int option; +#define MOPT_SET 0x0001 +#define MOPT_CLEAR 0x0002 +#define MOPT_NOSUPPORT 0x0004 +#define MOPT_EXPLICIT 0x0008 +#define MOPT_CLEAR_ERR 0x0010 +#define MOPT_GTE0 0x0020 #ifdef CONFIG_QUOTA - int qfmt; +#define MOPT_Q 0 +#define MOPT_QFMT 0x0040 +#else +#define MOPT_Q MOPT_NOSUPPORT +#define MOPT_QFMT MOPT_NOSUPPORT #endif - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, tokens, args); - switch (token) { - case Opt_bsd_df: - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); - clear_opt(sb, MINIX_DF); - break; - case Opt_minix_df: - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); - set_opt(sb, MINIX_DF); - - break; - case Opt_grpid: - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); - set_opt(sb, GRPID); - - break; - case Opt_nogrpid: - ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); - clear_opt(sb, GRPID); - - break; - case Opt_resuid: - if (match_int(&args[0], &option)) - return 0; - sbi->s_resuid = option; - break; - case Opt_resgid: - if (match_int(&args[0], &option)) - return 0; - sbi->s_resgid = option; - break; - case Opt_sb: - /* handled by get_sb_block() instead of here */ - /* *sb_block = match_int(&args[0]); */ - break; - case Opt_err_panic: - clear_opt(sb, ERRORS_CONT); - clear_opt(sb, ERRORS_RO); - set_opt(sb, ERRORS_PANIC); - break; - case Opt_err_ro: - clear_opt(sb, ERRORS_CONT); - clear_opt(sb, ERRORS_PANIC); - set_opt(sb, ERRORS_RO); - break; - case Opt_err_cont: - clear_opt(sb, ERRORS_RO); - clear_opt(sb, ERRORS_PANIC); - set_opt(sb, ERRORS_CONT); - break; - case Opt_nouid32: - set_opt(sb, NO_UID32); - break; - case Opt_debug: - set_opt(sb, DEBUG); - break; - case Opt_oldalloc: - ext4_msg(sb, KERN_WARNING, - "Ignoring deprecated oldalloc option"); - break; - case Opt_orlov: - ext4_msg(sb, KERN_WARNING, - "Ignoring deprecated orlov option"); - break; +#define MOPT_DATAJ 0x0080 + +static const struct mount_opts { + int token; + int mount_opt; + int flags; +} ext4_mount_opts[] = { + {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, + {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, + {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, + {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, + {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET}, + {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR}, + {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, + {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, + {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET}, + {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR}, + {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, + {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, + {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT}, + {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT}, + {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET}, + {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | + EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET}, + {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET}, + {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, + {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, + {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, + {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET}, + {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR}, + {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, + {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, + {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, + {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, + {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, + {Opt_commit, 0, MOPT_GTE0}, + {Opt_max_batch_time, 0, MOPT_GTE0}, + {Opt_min_batch_time, 0, MOPT_GTE0}, + {Opt_inode_readahead_blks, 0, MOPT_GTE0}, + {Opt_init_itable, 0, MOPT_GTE0}, + {Opt_stripe, 0, MOPT_GTE0}, + {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, + {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, + {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, #ifdef CONFIG_EXT4_FS_XATTR - case Opt_user_xattr: - set_opt(sb, XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt(sb, XATTR_USER); - break; + {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, + {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, #else - case Opt_user_xattr: - case Opt_nouser_xattr: - ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); - break; + {Opt_user_xattr, 0, MOPT_NOSUPPORT}, + {Opt_nouser_xattr, 0, MOPT_NOSUPPORT}, #endif #ifdef CONFIG_EXT4_FS_POSIX_ACL - case Opt_acl: - set_opt(sb, POSIX_ACL); - break; - case Opt_noacl: - clear_opt(sb, POSIX_ACL); - break; + {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, + {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, #else - case Opt_acl: - case Opt_noacl: - ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); - break; + {Opt_acl, 0, MOPT_NOSUPPORT}, + {Opt_noacl, 0, MOPT_NOSUPPORT}, #endif - case Opt_journal_update: - /* @@@ FIXME */ - /* Eventually we will want to be able to create - a journal file here. For now, only allow the - user to specify an existing inode to be the - journal file. */ - if (is_remount) { - ext4_msg(sb, KERN_ERR, - "Cannot specify journal on remount"); - return 0; - } - set_opt(sb, UPDATE_JOURNAL); - break; - case Opt_journal_dev: - if (is_remount) { + {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, + {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, + {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, + {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, + MOPT_SET | MOPT_Q}, + {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, + MOPT_SET | MOPT_Q}, + {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | + EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, + {Opt_usrjquota, 0, MOPT_Q}, + {Opt_grpjquota, 0, MOPT_Q}, + {Opt_offusrjquota, 0, MOPT_Q}, + {Opt_offgrpjquota, 0, MOPT_Q}, + {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, + {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, + {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, + {Opt_err, 0, 0} +}; + +static int handle_mount_opt(struct super_block *sb, char *opt, int token, + substring_t *args, unsigned long *journal_devnum, + unsigned int *journal_ioprio, int is_remount) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + const struct mount_opts *m; + int arg = 0; + + if (args->from && match_int(args, &arg)) + return -1; + switch (token) { + case Opt_noacl: + case Opt_nouser_xattr: + ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); + break; + case Opt_sb: + return 1; /* handled by get_sb_block() */ + case Opt_removed: + ext4_msg(sb, KERN_WARNING, + "Ignoring removed %s option", opt); + return 1; + case Opt_resuid: + sbi->s_resuid = arg; + return 1; + case Opt_resgid: + sbi->s_resgid = arg; + return 1; + case Opt_abort: + sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; + return 1; + case Opt_i_version: + sb->s_flags |= MS_I_VERSION; + return 1; + case Opt_journal_dev: + if (is_remount) { + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); + return -1; + } + *journal_devnum = arg; + return 1; + case Opt_journal_ioprio: + if (arg < 0 || arg > 7) + return -1; + *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); + return 1; + } + + for (m = ext4_mount_opts; m->token != Opt_err; m++) { + if (token != m->token) + continue; + if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) + return -1; + if (m->flags & MOPT_EXPLICIT) + set_opt2(sb, EXPLICIT_DELALLOC); + if (m->flags & MOPT_CLEAR_ERR) + clear_opt(sb, ERRORS_MASK); + if (token == Opt_noquota && sb_any_quota_loaded(sb)) { + ext4_msg(sb, KERN_ERR, "Cannot change quota " + "options when quota turned on"); + return -1; + } + + if (m->flags & MOPT_NOSUPPORT) { + ext4_msg(sb, KERN_ERR, "%s option not supported", opt); + } else if (token == Opt_commit) { + if (arg == 0) + arg = JBD2_DEFAULT_MAX_COMMIT_AGE; + sbi->s_commit_interval = HZ * arg; + } else if (token == Opt_max_batch_time) { + if (arg == 0) + arg = EXT4_DEF_MAX_BATCH_TIME; + sbi->s_max_batch_time = arg; + } else if (token == Opt_min_batch_time) { + sbi->s_min_batch_time = arg; + } else if (token == Opt_inode_readahead_blks) { + if (arg > (1 << 30)) + return -1; + if (arg && !is_power_of_2(arg)) { ext4_msg(sb, KERN_ERR, - "Cannot specify journal on remount"); - return 0; + "EXT4-fs: inode_readahead_blks" + " must be a power of 2"); + return -1; } - if (match_int(&args[0], &option)) - return 0; - *journal_devnum = option; - break; - case Opt_journal_checksum: - set_opt(sb, JOURNAL_CHECKSUM); - break; - case Opt_journal_async_commit: - set_opt(sb, JOURNAL_ASYNC_COMMIT); - set_opt(sb, JOURNAL_CHECKSUM); - break; - case Opt_noload: - set_opt(sb, NOLOAD); - break; - case Opt_commit: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - if (option == 0) - option = JBD2_DEFAULT_MAX_COMMIT_AGE; - sbi->s_commit_interval = HZ * option; - break; - case Opt_max_batch_time: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - if (option == 0) - option = EXT4_DEF_MAX_BATCH_TIME; - sbi->s_max_batch_time = option; - break; - case Opt_min_batch_time: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - sbi->s_min_batch_time = option; - break; - case Opt_data_journal: - data_opt = EXT4_MOUNT_JOURNAL_DATA; - goto datacheck; - case Opt_data_ordered: - data_opt = EXT4_MOUNT_ORDERED_DATA; - goto datacheck; - case Opt_data_writeback: - data_opt = EXT4_MOUNT_WRITEBACK_DATA; - datacheck: + sbi->s_inode_readahead_blks = arg; + } else if (token == Opt_init_itable) { + set_opt(sb, INIT_INODE_TABLE); + if (!args->from) + arg = EXT4_DEF_LI_WAIT_MULT; + sbi->s_li_wait_mult = arg; + } else if (token == Opt_stripe) { + sbi->s_stripe = arg; + } else if (m->flags & MOPT_DATAJ) { if (is_remount) { if (!sbi->s_journal) ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); - else if (test_opt(sb, DATA_FLAGS) != data_opt) { + else if (test_opt(sb, DATA_FLAGS) != + m->mount_opt) { ext4_msg(sb, KERN_ERR, - "Cannot change data mode on remount"); - return 0; + "Cannot change data mode on remount"); + return -1; } } else { clear_opt(sb, DATA_FLAGS); - sbi->s_mount_opt |= data_opt; + sbi->s_mount_opt |= m->mount_opt; } - break; - case Opt_data_err_abort: - set_opt(sb, DATA_ERR_ABORT); - break; - case Opt_data_err_ignore: - clear_opt(sb, DATA_ERR_ABORT); - break; #ifdef CONFIG_QUOTA - case Opt_usrjquota: + } else if (token == Opt_usrjquota) { if (!set_qf_name(sb, USRQUOTA, &args[0])) - return 0; - break; - case Opt_grpjquota: + return -1; + } else if (token == Opt_grpjquota) { if (!set_qf_name(sb, GRPQUOTA, &args[0])) - return 0; - break; - case Opt_offusrjquota: + return -1; + } else if (token == Opt_offusrjquota) { if (!clear_qf_name(sb, USRQUOTA)) - return 0; - break; - case Opt_offgrpjquota: + return -1; + } else if (token == Opt_offgrpjquota) { if (!clear_qf_name(sb, GRPQUOTA)) - return 0; - break; - - case Opt_jqfmt_vfsold: - qfmt = QFMT_VFS_OLD; - goto set_qf_format; - case Opt_jqfmt_vfsv0: - qfmt = QFMT_VFS_V0; - goto set_qf_format; - case Opt_jqfmt_vfsv1: - qfmt = QFMT_VFS_V1; -set_qf_format: + return -1; + } else if (m->flags & MOPT_QFMT) { if (sb_any_quota_loaded(sb) && - sbi->s_jquota_fmt != qfmt) { - ext4_msg(sb, KERN_ERR, "Cannot change " - "journaled quota options when " - "quota turned on"); - return 0; - } - sbi->s_jquota_fmt = qfmt; - break; - case Opt_quota: - case Opt_usrquota: - set_opt(sb, QUOTA); - set_opt(sb, USRQUOTA); - break; - case Opt_grpquota: - set_opt(sb, QUOTA); - set_opt(sb, GRPQUOTA); - break; - case Opt_noquota: - if (sb_any_quota_loaded(sb)) { - ext4_msg(sb, KERN_ERR, "Cannot change quota " - "options when quota turned on"); - return 0; + sbi->s_jquota_fmt != m->mount_opt) { + ext4_msg(sb, KERN_ERR, "Cannot " + "change journaled quota options " + "when quota turned on"); + return -1; } - clear_opt(sb, QUOTA); - clear_opt(sb, USRQUOTA); - clear_opt(sb, GRPQUOTA); - break; -#else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - ext4_msg(sb, KERN_ERR, - "quota options not supported"); - break; - case Opt_usrjquota: - case Opt_grpjquota: - case Opt_offusrjquota: - case Opt_offgrpjquota: - case Opt_jqfmt_vfsold: - case Opt_jqfmt_vfsv0: - case Opt_jqfmt_vfsv1: - ext4_msg(sb, KERN_ERR, - "journaled quota options not supported"); - break; - case Opt_noquota: - break; + sbi->s_jquota_fmt = m->mount_opt; #endif - case Opt_abort: - sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; - break; - case Opt_nobarrier: - clear_opt(sb, BARRIER); - break; - case Opt_barrier: - if (args[0].from) { - if (match_int(&args[0], &option)) - return 0; - } else - option = 1; /* No argument, default to 1 */ - if (option) - set_opt(sb, BARRIER); - else - clear_opt(sb, BARRIER); - break; - case Opt_ignore: - break; - case Opt_resize: - if (!is_remount) { - ext4_msg(sb, KERN_ERR, - "resize option only available " - "for remount"); - return 0; - } - if (match_int(&args[0], &option) != 0) - return 0; - *n_blocks_count = option; - break; - case Opt_nobh: - ext4_msg(sb, KERN_WARNING, - "Ignoring deprecated nobh option"); - break; - case Opt_bh: - ext4_msg(sb, KERN_WARNING, - "Ignoring deprecated bh option"); - break; - case Opt_i_version: - set_opt(sb, I_VERSION); - sb->s_flags |= MS_I_VERSION; - break; - case Opt_nodelalloc: - clear_opt(sb, DELALLOC); - clear_opt2(sb, EXPLICIT_DELALLOC); - break; - case Opt_mblk_io_submit: - set_opt(sb, MBLK_IO_SUBMIT); - break; - case Opt_nomblk_io_submit: - clear_opt(sb, MBLK_IO_SUBMIT); - break; - case Opt_stripe: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - sbi->s_stripe = option; - break; - case Opt_delalloc: - set_opt(sb, DELALLOC); - set_opt2(sb, EXPLICIT_DELALLOC); - break; - case Opt_block_validity: - set_opt(sb, BLOCK_VALIDITY); - break; - case Opt_noblock_validity: - clear_opt(sb, BLOCK_VALIDITY); - break; - case Opt_inode_readahead_blks: - if (match_int(&args[0], &option)) - return 0; - if (option < 0 || option > (1 << 30)) - return 0; - if (option && !is_power_of_2(option)) { - ext4_msg(sb, KERN_ERR, - "EXT4-fs: inode_readahead_blks" - " must be a power of 2"); - return 0; + } else { + if (!args->from) + arg = 1; + if (m->flags & MOPT_CLEAR) + arg = !arg; + else if (unlikely(!(m->flags & MOPT_SET))) { + ext4_msg(sb, KERN_WARNING, + "buggy handling of option %s", opt); + WARN_ON(1); + return -1; } - sbi->s_inode_readahead_blks = option; - break; - case Opt_journal_ioprio: - if (match_int(&args[0], &option)) - return 0; - if (option < 0 || option > 7) - break; - *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, - option); - break; - case Opt_noauto_da_alloc: - set_opt(sb, NO_AUTO_DA_ALLOC); - break; - case Opt_auto_da_alloc: - if (args[0].from) { - if (match_int(&args[0], &option)) - return 0; - } else - option = 1; /* No argument, default to 1 */ - if (option) - clear_opt(sb, NO_AUTO_DA_ALLOC); + if (arg != 0) + sbi->s_mount_opt |= m->mount_opt; else - set_opt(sb,NO_AUTO_DA_ALLOC); - break; - case Opt_discard: - set_opt(sb, DISCARD); - break; - case Opt_nodiscard: - clear_opt(sb, DISCARD); - break; - case Opt_dioread_nolock: - set_opt(sb, DIOREAD_NOLOCK); - break; - case Opt_dioread_lock: - clear_opt(sb, DIOREAD_NOLOCK); - break; - case Opt_init_itable: - set_opt(sb, INIT_INODE_TABLE); - if (args[0].from) { - if (match_int(&args[0], &option)) - return 0; - } else - option = EXT4_DEF_LI_WAIT_MULT; - if (option < 0) - return 0; - sbi->s_li_wait_mult = option; - break; - case Opt_noinit_itable: - clear_opt(sb, INIT_INODE_TABLE); - break; - default: - ext4_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" " - "or missing value", p); - return 0; + sbi->s_mount_opt &= ~m->mount_opt; } + return 1; + } + ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " + "or missing value", opt); + return -1; +} + +static int parse_options(char *options, struct super_block *sb, + unsigned long *journal_devnum, + unsigned int *journal_ioprio, + int is_remount) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *p; + substring_t args[MAX_OPT_ARGS]; + int token; + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = 0; + token = match_token(p, tokens, args); + if (handle_mount_opt(sb, p, token, args, journal_devnum, + journal_ioprio, is_remount) < 0) + return 0; } #ifdef CONFIG_QUOTA if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { @@ -1942,6 +1651,160 @@ set_qf_format: return 1; } +static inline void ext4_show_quota_options(struct seq_file *seq, + struct super_block *sb) +{ +#if defined(CONFIG_QUOTA) + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (sbi->s_jquota_fmt) { + char *fmtname = ""; + + switch (sbi->s_jquota_fmt) { + case QFMT_VFS_OLD: + fmtname = "vfsold"; + break; + case QFMT_VFS_V0: + fmtname = "vfsv0"; + break; + case QFMT_VFS_V1: + fmtname = "vfsv1"; + break; + } + seq_printf(seq, ",jqfmt=%s", fmtname); + } + + if (sbi->s_qf_names[USRQUOTA]) + seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + + if (sbi->s_qf_names[GRPQUOTA]) + seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); + + if (test_opt(sb, USRQUOTA)) + seq_puts(seq, ",usrquota"); + + if (test_opt(sb, GRPQUOTA)) + seq_puts(seq, ",grpquota"); +#endif +} + +static const char *token2str(int token) +{ + static const struct match_token *t; + + for (t = tokens; t->token != Opt_err; t++) + if (t->token == token && !strchr(t->pattern, '=')) + break; + return t->pattern; +} + +/* + * Show an option if + * - it's set to a non-default value OR + * - if the per-sb default is different from the global default + */ +static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, + int nodefs) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; + const struct mount_opts *m; + char sep = nodefs ? '\n' : ','; + +#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) +#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) + + if (sbi->s_sb_block != 1) + SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); + + for (m = ext4_mount_opts; m->token != Opt_err; m++) { + int want_set = m->flags & MOPT_SET; + if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || + (m->flags & MOPT_CLEAR_ERR)) + continue; + if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) + continue; /* skip if same as the default */ + if ((want_set && + (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || + (!want_set && (sbi->s_mount_opt & m->mount_opt))) + continue; /* select Opt_noFoo vs Opt_Foo */ + SEQ_OPTS_PRINT("%s", token2str(m->token)); + } + + if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID || + le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) + SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid); + if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID || + le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) + SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid); + def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); + if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) + SEQ_OPTS_PUTS("errors=remount-ro"); + if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) + SEQ_OPTS_PUTS("errors=continue"); + if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) + SEQ_OPTS_PUTS("errors=panic"); + if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) + SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); + if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) + SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); + if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) + SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); + if (sb->s_flags & MS_I_VERSION) + SEQ_OPTS_PUTS("i_version"); + if (nodefs || sbi->s_stripe) + SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); + if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + SEQ_OPTS_PUTS("data=journal"); + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + SEQ_OPTS_PUTS("data=ordered"); + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) + SEQ_OPTS_PUTS("data=writeback"); + } + if (nodefs || + sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) + SEQ_OPTS_PRINT("inode_readahead_blks=%u", + sbi->s_inode_readahead_blks); + + if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && + (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) + SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); + + ext4_show_quota_options(seq, sb); + return 0; +} + +static int ext4_show_options(struct seq_file *seq, struct dentry *root) +{ + return _ext4_show_options(seq, root->d_sb, 0); +} + +static int options_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + int rc; + + seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); + rc = _ext4_show_options(seq, sb, 1); + seq_puts(seq, "\n"); + return rc; +} + +static int options_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, options_seq_show, PDE(inode)->data); +} + +static const struct file_operations ext4_seq_options_fops = { + .owner = THIS_MODULE, + .open = options_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int read_only) { @@ -2945,7 +2808,7 @@ static int ext4_run_lazyinit_thread(void) ext4_clear_request_list(); kfree(ext4_li_info); ext4_li_info = NULL; - printk(KERN_CRIT "EXT4: error %d creating inode table " + printk(KERN_CRIT "EXT4-fs: error %d creating inode table " "initialization thread\n", err); return err; @@ -3183,11 +3046,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sb, INIT_INODE_TABLE); if (def_mount_opts & EXT4_DEFM_DEBUG) set_opt(sb, DEBUG); - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { - ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", - "2.6.38"); + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) set_opt(sb, GRPID); - } if (def_mount_opts & EXT4_DEFM_UID16) set_opt(sb, NO_UID32); /* xattr user namespace & acls are now defaulted on */ @@ -3240,13 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, - &journal_devnum, &journal_ioprio, NULL, 0)) { + &journal_devnum, &journal_ioprio, 0)) { ext4_msg(sb, KERN_WARNING, "failed to parse options in superblock: %s", sbi->s_es->s_mount_opts); } + sbi->s_def_mount_opt = sbi->s_mount_opt; if (!parse_options((char *) data, sb, &journal_devnum, - &journal_ioprio, NULL, 0)) + &journal_ioprio, 0)) goto failed_mount; if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { @@ -3416,7 +3277,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #else es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); #endif - sb->s_dirt = 1; } /* Handle clustersize */ @@ -3540,6 +3400,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); + if (sbi->s_proc) + proc_create_data("options", S_IRUGO, sbi->s_proc, + &ext4_seq_options_fops, sb); + bgl_lock_init(sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -3694,6 +3558,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); + sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; + /* * The journal may have updated the bg summary counts, so we * need to update the global counters. @@ -3861,6 +3727,7 @@ failed_mount2: ext4_kvfree(sbi->s_group_desc); failed_mount: if (sbi->s_proc) { + remove_proc_entry("options", sbi->s_proc); remove_proc_entry(sb->s_id, ext4_proc_root); } #ifdef CONFIG_QUOTA @@ -4090,15 +3957,6 @@ static int ext4_load_journal(struct super_block *sb, if (!(journal->j_flags & JBD2_BARRIER)) ext4_msg(sb, KERN_INFO, "barriers disabled"); - if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { - err = jbd2_journal_update_format(journal); - if (err) { - ext4_msg(sb, KERN_ERR, "error updating journal"); - jbd2_journal_destroy(journal); - return err; - } - } - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) err = jbd2_journal_wipe(journal, !really_read_only); if (!err) { @@ -4385,7 +4243,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) { struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_fsblk_t n_blocks_count = 0; unsigned long old_sb_flags; struct ext4_mount_options old_opts; int enable_quota = 0; @@ -4418,8 +4275,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sb, NULL, &journal_ioprio, - &n_blocks_count, 1)) { + if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { err = -EINVAL; goto restore_opts; } @@ -4437,8 +4293,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } - if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || - n_blocks_count > ext4_blocks_count(es)) { + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; goto restore_opts; @@ -4513,8 +4368,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_clear_journal_err(sb, es); sbi->s_mount_state = le16_to_cpu(es->s_state); - if ((err = ext4_group_extend(sb, es, n_blocks_count))) - goto restore_opts; if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; if (EXT4_HAS_INCOMPAT_FEATURE(sb, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 93a00d89a220..e88748e55c0f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -82,8 +82,8 @@ printk("\n"); \ } while (0) #else -# define ea_idebug(f...) -# define ea_bdebug(f...) +# define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__) +# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif static void ext4_xattr_cache_insert(struct buffer_head *); @@ -158,13 +158,10 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) static inline int ext4_xattr_check_block(struct buffer_head *bh) { - int error; - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) return -EIO; - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); - return error; + return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); } static inline int @@ -220,7 +217,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, error = -ENODATA; if (!EXT4_I(inode)->i_file_acl) goto cleanup; - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); + ea_idebug(inode, "reading block %llu", + (unsigned long long)EXT4_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); if (!bh) goto cleanup; @@ -363,7 +361,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) error = 0; if (!EXT4_I(inode)->i_file_acl) goto cleanup; - ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl); + ea_idebug(inode, "reading block %llu", + (unsigned long long)EXT4_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); error = -EIO; if (!bh) @@ -487,18 +486,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, bh, 0, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + unlock_buffer(bh); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); + if (ce) + mb_cache_entry_release(ce); + unlock_buffer(bh); error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); dquot_free_block(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); - if (ce) - mb_cache_entry_release(ce); } - unlock_buffer(bh); out: ext4_std_error(inode->i_sb, error); return; @@ -834,7 +834,8 @@ inserted: if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); - ea_idebug(inode, "creating block %d", block); + ea_idebug(inode, "creating block %llu", + (unsigned long long)block); new_bh = sb_getblk(sb, block); if (!new_bh) { |