From 12fc760fd632a96f49e96f519c4aed4eb279bb61 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 25 Feb 2013 17:38:02 +0900 Subject: f2fs: fix overflow when calculating utilization on 32-bit Use div_u64 to fix overflow when calculating utilization. *long int* is 4-bytes on 32-bit so (user blocks * 100) might be overflow if disk size is over e.g. 512GB. Signed-off-by: Changman Lee Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 552dadbb2327..e399bd4d3af8 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -464,8 +464,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) static inline int utilization(struct f2fs_sb_info *sbi) { - return (long int)valid_user_blocks(sbi) * 100 / - (long int)sbi->user_block_count; + return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); } /* -- cgit v1.2.3 From 12faafe45477244bc32e2c58f74f7305cc7e84fa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Mar 2013 17:45:15 +0900 Subject: f2fs: fix to unlock node page when it was truncated If the node page was truncated, its block address became zero. This means that we don't need to write the node page, but have to unlock NODE_WRITE, decrease the number of dirty node pages, and then unlock_page before returning the f2fs_write_node_page with zero. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e275218904ed..3dc63f4cf2b1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1106,15 +1106,15 @@ static int f2fs_write_node_page(struct page *page, /* This page is already truncated */ if (ni.blk_addr == NULL_ADDR) - return 0; + goto out; set_page_writeback(page); /* insert node offset */ write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr); +out: dec_page_count(sbi, F2FS_DIRTY_NODES); - mutex_unlock_op(sbi, NODE_WRITE); unlock_page(page); return 0; -- cgit v1.2.3 From 66d36a2944df461448159be5af13049dd2689e77 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 26 Feb 2013 12:43:46 +0900 Subject: f2fs: read with READ_SYNC when getting dnode page The get_node_page_ra tries to: 1. grab or read a target node page for the given nid, 2. then, call ra_node_page to read other adjacent node pages in advance. So, when we try to read a target node page by #1, we should submit bio with READ_SYNC instead of READA. And, in #2, READA should be used. Signed-off-by: Jaegeuk Kim Reviewed-by: Namjae Jeon --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3dc63f4cf2b1..efcada7becd5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -930,7 +930,7 @@ repeat: if (!page) return ERR_PTR(-ENOMEM); - err = read_node_page(page, READA); + err = read_node_page(page, READ_SYNC); if (err) { f2fs_put_page(page, 1); return ERR_PTR(err); -- cgit v1.2.3 From 266e97a81cf73d1a0dac5f68391da382630a80b7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 26 Feb 2013 13:10:46 +0900 Subject: f2fs: introduce readahead mode of node pages Previously, f2fs reads several node pages ahead when get_dnode_of_data is called with RDONLY_NODE flag. And, this flag is set by the following functions. - get_data_block_ro - get_lock_data_page - do_write_data_page - truncate_blocks - truncate_hole However, this readahead mechanism is initially introduced for the use of get_data_block_ro to enhance the sequential read performance. So, let's clarify all the cases with the additional modes as follows. enum { ALLOC_NODE, /* allocate a new node page if needed */ LOOKUP_NODE, /* look up a node without readahead */ LOOKUP_NODE_RA, /* * look up a node with readahead called * by get_datablock_ro. */ } Signed-off-by: Jaegeuk Kim Reviewed-by: Namjae Jeon --- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/f2fs.h | 12 ++++++++---- fs/f2fs/file.c | 8 ++++---- fs/f2fs/node.c | 6 +++--- fs/f2fs/recovery.c | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7bd22a201125..277966a8547a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -183,7 +183,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) f2fs_put_page(page, 0); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, RDONLY_NODE); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) return ERR_PTR(err); f2fs_put_dnode(&dn); @@ -222,7 +222,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, RDONLY_NODE); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) return ERR_PTR(err); f2fs_put_dnode(&dn); @@ -262,7 +262,7 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, int err; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) return ERR_PTR(err); @@ -392,7 +392,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); if (err) return (err == -ENOENT) ? 0 : err; @@ -443,7 +443,7 @@ int do_write_data_page(struct page *page) int err = 0; set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, RDONLY_NODE); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) return err; @@ -607,7 +607,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, mutex_lock_op(sbi, DATA_NEW); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, 0); + err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) { mutex_unlock_op(sbi, DATA_NEW); f2fs_put_page(page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cc2213afdcc7..be7ae70b0b1d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -125,11 +125,15 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) * file keeping -1 as its node offset to * distinguish from index node blocks. */ -#define RDONLY_NODE 1 /* - * specify a read-only mode when getting - * a node block. 0 is read-write mode. - * used by get_dnode_of_data(). +enum { + ALLOC_NODE, /* allocate a new node page if needed */ + LOOKUP_NODE, /* look up a node without readahead */ + LOOKUP_NODE_RA, /* + * look up a node with readahead called + * by get_datablock_ro. */ +}; + #define F2FS_LINK_MAX 32000 /* maximum link count per file */ /* for in-memory extent cache entry */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 958a46da19ae..269645e23519 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -43,7 +43,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, /* block allocation */ set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, 0); + err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); if (err) { mutex_unlock_op(sbi, DATA_NEW); goto out; @@ -258,7 +258,7 @@ static int truncate_blocks(struct inode *inode, u64 from) mutex_lock_op(sbi, DATA_TRUNC); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, free_from, RDONLY_NODE); + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; @@ -420,7 +420,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) mutex_lock_op(sbi, DATA_TRUNC); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, RDONLY_NODE); + err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { mutex_unlock_op(sbi, DATA_TRUNC); if (err == -ENOENT) @@ -504,7 +504,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, mutex_lock_op(sbi, DATA_NEW); set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, index, 0); + ret = get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { mutex_unlock_op(sbi, DATA_NEW); break; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index efcada7becd5..65ec2eabb392 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -384,7 +384,7 @@ got: /* * Caller should call f2fs_put_dnode(dn). */ -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct page *npage[4]; @@ -411,7 +411,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) for (i = 1; i <= level; i++) { bool done = false; - if (!nids[i] && !ro) { + if (!nids[i] && mode == ALLOC_NODE) { mutex_lock_op(sbi, NODE_NEW); /* alloc new node */ @@ -434,7 +434,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) alloc_nid_done(sbi, nids[i]); mutex_unlock_op(sbi, NODE_NEW); done = true; - } else if (ro && i == level && level > 1) { + } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); if (IS_ERR(npage[i])) { err = PTR_ERR(npage[i]); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b235215ac138..6b82e2034cfd 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -247,7 +247,7 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, end = start + ADDRS_PER_BLOCK; set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, start, 0)) + if (get_dnode_of_data(&dn, start, ALLOC_NODE)) return; wait_on_page_writeback(dn.node_page); -- cgit v1.2.3 From 52c2db3f95ff8e8d9650885d6d66b8258ded1e38 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Wed, 20 Feb 2013 07:47:06 +0900 Subject: f2fs: check the level before calling get_nid function The caller of get_nid should be careful not to put lower value than NODE_DIR1_BLOCK in case of level is zero. Signed-off-by: Changman Lee Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 65ec2eabb392..d408e69294c8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -403,7 +403,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) return PTR_ERR(npage[0]); parent = npage[0]; - nids[1] = get_nid(parent, offset[0], true); + if (level != 0) + nids[1] = get_nid(parent, offset[0], true); dn->inode_page = npage[0]; dn->inode_page_locked = true; -- cgit v1.2.3 From e0f56cb44b05abacb6aa8fa8695c28431e84b7a0 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Feb 2013 23:51:51 +0900 Subject: f2fs: optimize get node page readahead part We can remove the call to find_get_page to get a page from the cache and check for up-to-date, instead we can make use of grab_cache_page part itself to fetch the page from the cache. So, removing the call and moving the PageUptodate at proper place, also taken care of moving the lock_page condition in the page_hit part. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d408e69294c8..58f7216993c7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -921,19 +921,17 @@ struct page *get_node_page_ra(struct page *parent, int start) if (!nid) return ERR_PTR(-ENOENT); - page = find_get_page(mapping, nid); - if (page && PageUptodate(page)) - goto page_hit; - f2fs_put_page(page, 0); - repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); + else if (PageUptodate(page)) + goto page_hit; err = read_node_page(page, READ_SYNC); + unlock_page(page); if (err) { - f2fs_put_page(page, 1); + f2fs_put_page(page, 0); return ERR_PTR(err); } @@ -947,8 +945,9 @@ repeat: ra_node_page(sbi, nid); } -page_hit: lock_page(page); + +page_hit: if (PageError(page)) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); -- cgit v1.2.3 From 3aa770a9c9d077283b1aa07e8549a4fdc41fc5ed Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Mar 2013 12:40:50 +0900 Subject: f2fs: optimize and change return path in lookup_free_nid_list Optimize and change return path in lookup_free_nid_list Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 58f7216993c7..6006e8e8a5f3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1195,14 +1195,13 @@ const struct address_space_operations f2fs_node_aops = { static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) { struct list_head *this; - struct free_nid *i = NULL; + struct free_nid *i; list_for_each(this, head) { i = list_entry(this, struct free_nid, list); if (i->nid == n) - break; - i = NULL; + return i; } - return i; + return NULL; } static void __del_from_free_nid_list(struct free_nid *i) -- cgit v1.2.3 From 5a20d339c785d98d8b050b9afc098e4184a6098c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 3 Mar 2013 13:58:05 +0900 Subject: f2fs: align f2fs maximum name length to linux based filesystem The maximum filename length supported in linux is 255 characters. So let's follow that. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 +++ fs/f2fs/namei.c | 2 +- fs/f2fs/super.c | 2 +- include/linux/f2fs_fs.h | 17 +++++++++-------- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a1f38443ecee..2851ae6948a1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -189,6 +189,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + if (namelen > F2FS_NAME_LEN) + return NULL; + if (npages == 0) return NULL; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1a49b881bac0..d4a171b1a68b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -197,7 +197,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_dir_entry *de; struct page *page; - if (dentry->d_name.len > F2FS_MAX_NAME_LEN) + if (dentry->d_name.len > F2FS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); de = f2fs_find_entry(dir, &dentry->d_name, &page); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c117649a035..1c7f595ca47c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -180,7 +180,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = sbi->total_node_count; buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); - buf->f_namelen = F2FS_MAX_NAME_LEN; + buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f9a12f6243a5..df6fab82f87e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -139,7 +139,7 @@ struct f2fs_extent { __le32 len; /* lengh of the extent */ } __packed; -#define F2FS_MAX_NAME_LEN 256 +#define F2FS_NAME_LEN 255 #define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ @@ -165,7 +165,8 @@ struct f2fs_inode { __le32 i_flags; /* file attributes */ __le32 i_pino; /* parent inode number */ __le32 i_namelen; /* file name length */ - __u8 i_name[F2FS_MAX_NAME_LEN]; /* file name for SPOR */ + __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ + __u8 i_reserved2; /* for backward compatibility */ struct f2fs_extent i_ext; /* caching a largest extent */ @@ -362,10 +363,10 @@ struct f2fs_summary_block { typedef __le32 f2fs_hash_t; /* One directory entry slot covers 8bytes-long file name */ -#define F2FS_NAME_LEN 8 -#define F2FS_NAME_LEN_BITS 3 +#define F2FS_SLOT_LEN 8 +#define F2FS_SLOT_LEN_BITS 3 -#define GET_DENTRY_SLOTS(x) ((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS) +#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) /* the number of dentry in a block */ #define NR_DENTRY_IN_BLOCK 214 @@ -377,10 +378,10 @@ typedef __le32 f2fs_hash_t; #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) #define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ - F2FS_NAME_LEN) * \ + F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) -/* One directory entry slot representing F2FS_NAME_LEN-sized file name */ +/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */ struct f2fs_dir_entry { __le32 hash_code; /* hash code of file name */ __le32 ino; /* inode number */ @@ -394,7 +395,7 @@ struct f2fs_dentry_block { __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; __u8 reserved[SIZE_OF_RESERVED]; struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK]; - __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN]; + __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; /* file types used in inode_info->flags */ -- cgit v1.2.3 From 25c0a6e529b56ca010e1f46239edd07c1b484b63 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 2 Mar 2013 12:41:31 +0900 Subject: f2fs: avoid extra ++ while returning from get_node_path In all the breaking conditions in get_node_path, 'n' is used to track index in offset[] array, but while breaking out also, in all paths n++ is done. So, remove the ++ from breaking paths. Also, avoid reset of 'level=0' in first case. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6006e8e8a5f3..a3cb1ff34f8e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -320,15 +320,14 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[0] = 0; if (block < direct_index) { - offset[n++] = block; - level = 0; + offset[n] = block; goto got; } block -= direct_index; if (block < direct_blks) { offset[n++] = NODE_DIR1_BLOCK; noffset[n] = 1; - offset[n++] = block; + offset[n] = block; level = 1; goto got; } @@ -336,7 +335,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) if (block < direct_blks) { offset[n++] = NODE_DIR2_BLOCK; noffset[n] = 2; - offset[n++] = block; + offset[n] = block; level = 1; goto got; } @@ -346,7 +345,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 3; offset[n++] = block / direct_blks; noffset[n] = 4 + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 2; goto got; } @@ -356,7 +355,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 4 + dptrs_per_blk; offset[n++] = block / direct_blks; noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 2; goto got; } @@ -371,7 +370,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 7 + (dptrs_per_blk * 2) + offset[n - 2] * (dptrs_per_blk + 1) + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 3; goto got; } else { -- cgit v1.2.3 From 393ff91f57c87d48ffed30878be6e3e486d3a00a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Mar 2013 21:29:23 +0900 Subject: f2fs: reduce unncessary locking pages during read This patch reduces redundant locking and unlocking pages during read operations. In f2fs_readpage, let's use wait_on_page_locked() instead of lock_page. And then, when we need to modify any data finally, let's lock the page so that we can avoid lock contention. [readpage rule] - The f2fs_readpage returns unlocked page, or released page too in error cases. - Its caller should handle read error, -EIO, after locking the page, which indicates read completion. - Its caller should check PageUptodate after grab_cache_page. Signed-off-by: Changman Lee Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 ++++++----- fs/f2fs/data.c | 58 +++++++++++++++++++++++++++------------------------- fs/f2fs/node.c | 58 +++++++++++++++++++++++++++++++--------------------- fs/f2fs/recovery.c | 31 +++++++++++++++++----------- 4 files changed, 91 insertions(+), 68 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2b6fc131e2ce..d947e66ee8a8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -57,13 +57,15 @@ repeat: cond_resched(); goto repeat; } - if (f2fs_readpage(sbi, page, index, READ_SYNC)) { - f2fs_put_page(page, 1); + if (PageUptodate(page)) + goto out; + + if (f2fs_readpage(sbi, page, index, READ_SYNC)) goto repeat; - } - mark_page_accessed(page); - /* We do not allow returning an errorneous page */ + lock_page(page); +out: + mark_page_accessed(page); return page; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 277966a8547a..c8e20b618913 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -199,12 +199,17 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) if (!page) return ERR_PTR(-ENOMEM); + if (PageUptodate(page)) { + unlock_page(page); + return page; + } + err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); - return ERR_PTR(err); + wait_on_page_locked(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); } - unlock_page(page); return page; } @@ -241,9 +246,13 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) BUG_ON(dn.data_blkaddr == NULL_ADDR); err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return ERR_PTR(err); + + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); } return page; } @@ -283,14 +292,17 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); } else { err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return ERR_PTR(err); + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); } } - SetPageUptodate(page); if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { @@ -325,22 +337,14 @@ static void read_end_io(struct bio *bio, int err) /* * Fill the locked page with data located in the block address. - * Read operation is synchronous, and caller must unlock the page. + * Return unlocked page. */ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, block_t blk_addr, int type) { struct block_device *bdev = sbi->sb->s_bdev; - bool sync = (type == READ_SYNC); struct bio *bio; - /* This page can be already read by other threads */ - if (PageUptodate(page)) { - if (!sync) - unlock_page(page); - return 0; - } - down_read(&sbi->bio_sem); /* Allocate a new bio */ @@ -354,18 +358,12 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, kfree(bio->bi_private); bio_put(bio); up_read(&sbi->bio_sem); + f2fs_put_page(page, 1); return -EFAULT; } submit_bio(type, bio); up_read(&sbi->bio_sem); - - /* wait for read completion if sync */ - if (sync) { - lock_page(page); - if (PageError(page)) - return -EIO; - } return 0; } @@ -636,18 +634,22 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, /* Reading beyond i_size is simple: memset to zero */ zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); - return 0; + goto out; } if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return err; + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return -EIO; } } +out: SetPageUptodate(page); clear_cold_data(page); return 0; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a3cb1ff34f8e..9e6ed6708fa8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -100,10 +100,13 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) page = grab_cache_page(mapping, index); if (!page) continue; - if (f2fs_readpage(sbi, page, index, READ)) { + if (PageUptodate(page)) { f2fs_put_page(page, 1); continue; } + if (f2fs_readpage(sbi, page, index, READ)) + continue; + f2fs_put_page(page, 0); } } @@ -851,8 +854,16 @@ static int read_node_page(struct page *page, int type) get_node_info(sbi, page->index, &ni); - if (ni.blk_addr == NULL_ADDR) + if (ni.blk_addr == NULL_ADDR) { + f2fs_put_page(page, 1); return -ENOENT; + } + + if (PageUptodate(page)) { + unlock_page(page); + return 0; + } + return f2fs_readpage(sbi, page, ni.blk_addr, type); } @@ -865,19 +876,18 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) struct page *apage; apage = find_get_page(mapping, nid); - if (apage && PageUptodate(apage)) - goto release_out; + if (apage && PageUptodate(apage)) { + f2fs_put_page(apage, 0); + return; + } f2fs_put_page(apage, 0); apage = grab_cache_page(mapping, nid); if (!apage) return; - if (read_node_page(apage, READA)) - unlock_page(apage); - -release_out: - f2fs_put_page(apage, 0); + if (read_node_page(apage, READA) == 0) + f2fs_put_page(apage, 0); return; } @@ -892,11 +902,14 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) return ERR_PTR(-ENOMEM); err = read_node_page(page, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err) return ERR_PTR(err); - } + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } BUG_ON(nid != nid_of_node(page)); mark_page_accessed(page); return page; @@ -928,11 +941,8 @@ repeat: goto page_hit; err = read_node_page(page, READ_SYNC); - unlock_page(page); - if (err) { - f2fs_put_page(page, 0); + if (err) return ERR_PTR(err); - } /* Then, try readahead for siblings of the desired node */ end = start + MAX_RA_NODE; @@ -957,6 +967,7 @@ page_hit: f2fs_put_page(page, 1); goto repeat; } + mark_page_accessed(page); return page; } @@ -1473,23 +1484,24 @@ int restore_node_summary(struct f2fs_sb_info *sbi, sum_entry = &sum->entries[0]; for (i = 0; i < last_offset; i++, sum_entry++) { + /* + * In order to read next node page, + * we must clear PageUptodate flag. + */ + ClearPageUptodate(page); + if (f2fs_readpage(sbi, page, addr, READ_SYNC)) goto out; + lock_page(page); rn = (struct f2fs_node *)page_address(page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; addr++; - - /* - * In order to read next node page, - * we must clear PageUptodate flag. - */ - ClearPageUptodate(page); } -out: unlock_page(page); +out: __free_pages(page, 0); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6b82e2034cfd..2d86eb26c493 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -112,11 +112,16 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) while (1) { struct fsync_inode_entry *entry; - if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) goto out; - if (cp_ver != cpver_of_node(page)) - goto out; + lock_page(page); + + if (cp_ver != cpver_of_node(page)) { + err = -EINVAL; + goto unlock_out; + } if (!is_fsync_dnode(page)) goto next; @@ -131,7 +136,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_INODE(page) && is_dent_dnode(page)) { if (recover_inode_page(sbi, page)) { err = -ENOMEM; - goto out; + goto unlock_out; } } @@ -139,14 +144,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); if (!entry) { err = -ENOMEM; - goto out; + goto unlock_out; } entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - goto out; + goto unlock_out; } list_add_tail(&entry->list, head); @@ -155,15 +160,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_INODE(page)) { err = recover_inode(entry->inode, page); if (err) - goto out; + goto unlock_out; } next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); - ClearPageUptodate(page); } -out: +unlock_out: unlock_page(page); +out: __free_pages(page, 0); return err; } @@ -319,8 +324,10 @@ static void recover_data(struct f2fs_sb_info *sbi, if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) goto out; + lock_page(page); + if (cp_ver != cpver_of_node(page)) - goto out; + goto unlock_out; entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) @@ -336,10 +343,10 @@ static void recover_data(struct f2fs_sb_info *sbi, next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); - ClearPageUptodate(page); } -out: +unlock_out: unlock_page(page); +out: __free_pages(page, 0); allocate_new_segments(sbi); -- cgit v1.2.3 From 08d8058be6d11bd81f2ed75fc0ecdf55b7685655 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Mar 2013 17:49:22 +0900 Subject: f2fs: should check the node page was truncated first Currently, f2fs doesn't reclaim any node pages. However, if we found that a node page was truncated by checking its block address with zero during f2fs_write_node_page, we should not skip that node page and return zero to reclaim it. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9e6ed6708fa8..90221cc7247c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1097,13 +1097,6 @@ static int f2fs_write_node_page(struct page *page, block_t new_addr; struct node_info ni; - if (wbc->for_reclaim) { - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; - } - wait_on_page_writeback(page); mutex_lock_op(sbi, NODE_WRITE); @@ -1118,6 +1111,14 @@ static int f2fs_write_node_page(struct page *page, if (ni.blk_addr == NULL_ADDR) goto out; + if (wbc->for_reclaim) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + mutex_unlock_op(sbi, NODE_WRITE); + return AOP_WRITEPAGE_ACTIVATE; + } + set_page_writeback(page); /* insert node offset */ -- cgit v1.2.3 From 48cb76c7be7056810cdcdcdcd8d90d3fdc4e250f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 14 Mar 2013 08:49:58 +0900 Subject: f2fs: scan next nat page to reuse free nids in there When we build new free nids, let's scan the just next NAT page instead of skipping a couple of previously scanned pages in order to reuse free nids in there. Otherwise, we can use too much wide range of nids even though several nids were deallocated, and also their node pages can be cached in the node_inode's address space. This means that we can retain lots of clean pages in the main memory, which induces mm's reclaiming overhead. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 90221cc7247c..94951d9aff4c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1317,7 +1317,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi) break; } - nm_i->next_scan_nid = nid; + /* go to the next nat page in order to reuse free nids first */ + nm_i->next_scan_nid = nm_i->init_scan_nid + NAT_ENTRY_PER_BLOCK; /* find free nids from current sum_pages */ mutex_lock(&curseg->curseg_mutex); -- cgit v1.2.3 From c3850aa1cb25872fddacd7abd8dfb021411e92ee Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 14 Mar 2013 09:24:32 +0900 Subject: f2fs: fix return value of releasepage for node and data If the return value of releasepage is equal to zero, the page cannot be reclaimed. Instead, we should return 1 in order to reclaim clean pages. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c8e20b618913..ea8be6fc38f1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -683,7 +683,7 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) static int f2fs_release_data_page(struct page *page, gfp_t wait) { ClearPagePrivate(page); - return 0; + return 1; } static int f2fs_set_data_page_dirty(struct page *page) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94951d9aff4c..bf9172bbbb00 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -933,7 +933,6 @@ struct page *get_node_page_ra(struct page *parent, int start) if (!nid) return ERR_PTR(-ENOENT); -repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); @@ -961,12 +960,6 @@ page_hit: f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - - /* Has the page been truncated? */ - if (page->mapping != mapping) { - f2fs_put_page(page, 1); - goto repeat; - } mark_page_accessed(page); return page; } @@ -1189,7 +1182,7 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) static int f2fs_release_node_page(struct page *page, gfp_t wait) { ClearPagePrivate(page); - return 0; + return 1; } /* @@ -1630,8 +1623,6 @@ flush_now: write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); - - /* We can reuse this freed nid at this point */ add_free_nid(NM_I(sbi), nid); } else { write_lock(&nm_i->nat_tree_lock); -- cgit v1.2.3 From 04431c44e55613a91ced16c523f749c08dff91bf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 16 Mar 2013 08:34:37 +0900 Subject: f2fs: fix not to allocate max_nid The build_free_nid should not add free nids over nm_i->max_nid. But, there was a hole that invalid free nid was added by the following scenario. Let's suppose nm_i->max_nid = 150 and the last NAT page has 100 ~ 200 nids. build_free_nids - get_current_nat_page loads the last NAT page - scan_nat_page can add 100 ~ 200 nids -> Bug here! So, when scanning an NAT page, we should check each candidate whether it is over max_nid or not. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bf9172bbbb00..f7b03ba9c0d7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1268,6 +1268,8 @@ static int scan_nat_page(struct f2fs_nm_info *nm_i, i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + if (start_nid >= nm_i->max_nid) + break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); BUG_ON(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) -- cgit v1.2.3 From ae51fb31b8c3eb0cedc223782832be393e53623b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 16 Mar 2013 11:13:04 +0900 Subject: f2fs: fix to call WRITE_FLUSH at the end of fsync The fsync call should be ended after flushing the in-device caches. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 269645e23519..ff018a42e435 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -178,6 +179,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); } out: mutex_unlock(&inode->i_mutex); -- cgit v1.2.3 From d3ee456dfbed1992bcaa0096d9bc76a691b0e700 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:26:14 +0900 Subject: f2fs: notify when discard is not supported Change f2fs so that a warning is emitted when an attempt is made to mount a filesystem with the unsupported discard option. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1c7f595ca47c..022b32a14f34 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "f2fs.h" @@ -650,6 +651,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto fail; + if (test_opt(sbi, DISCARD)) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + if (!blk_queue_discard(q)) + f2fs_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but " + "the device does not support discard"); + } + return 0; fail: stop_gc_thread(sbi); -- cgit v1.2.3 From 7c909772f1222dd82098659da4d0c41d8a051790 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:26:39 +0900 Subject: f2fs: reorganize f2fs_setxattr make use of F2FS_NAME_LEN for name length checking, change return conditions at few places, by assigning storing the errorvalue in 'error' and making a common exit path. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8038c0496504..3bfea80610ff 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -310,12 +310,13 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; - name_len = strlen(name); if (value == NULL) value_len = 0; - if (name_len > 255 || value_len > MAX_VALUE_LEN) + name_len = strlen(name); + + if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN) return -ERANGE; f2fs_balance_fs(sbi); @@ -326,8 +327,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, struct dnode_of_data dn; if (!alloc_nid(sbi, &fi->i_xattr_nid)) { - mutex_unlock_op(sbi, NODE_NEW); - return -ENOSPC; + error = -ENOSPC; + goto exit; } set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); mark_inode_dirty(inode); @@ -336,8 +337,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, if (IS_ERR(page)) { alloc_nid_failed(sbi, fi->i_xattr_nid); fi->i_xattr_nid = 0; - mutex_unlock_op(sbi, NODE_NEW); - return PTR_ERR(page); + error = PTR_ERR(page); + goto exit; } alloc_nid_done(sbi, fi->i_xattr_nid); @@ -349,8 +350,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, /* The inode already has an extended attribute block. */ page = get_node_page(sbi, fi->i_xattr_nid); if (IS_ERR(page)) { - mutex_unlock_op(sbi, NODE_NEW); - return PTR_ERR(page); + error = PTR_ERR(page); + goto exit; } base_addr = page_address(page); @@ -438,6 +439,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, return 0; cleanup: f2fs_put_page(page, 1); +exit: mutex_unlock_op(sbi, NODE_NEW); return error; } -- cgit v1.2.3 From c0d39e65ba324390eb0ffb60661ab12104e5fcc7 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:26:53 +0900 Subject: f2fs: fix return values from validate superblock validate super block is not returning with proper values. When failure from sb_bread it should reflect there is an EIO otherwise it should return of EINVAL. Returning, '1' is not conveying proper message as the return type. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 022b32a14f34..1db5ebe6692e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -474,7 +474,7 @@ static int validate_superblock(struct super_block *sb, if (!*raw_super_buf) { f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", super); - return 1; + return -EIO; } *raw_super = (struct f2fs_super_block *) @@ -486,7 +486,7 @@ static int validate_superblock(struct super_block *sb, f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " "in %s superblock", super); - return 1; + return -EINVAL; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) @@ -509,9 +509,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - if (validate_superblock(sb, &raw_super, &raw_super_buf, 0)) { + err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); + if (err) { brelse(raw_super_buf); - if (validate_superblock(sb, &raw_super, &raw_super_buf, 1)) + /* check secondary superblock when primary failed */ + err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); + if (err) goto free_sb_buf; } /* init some FS parameters */ -- cgit v1.2.3 From 064e0823285a41f5ccb92f26a661df5f44cac3eb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 17 Mar 2013 17:27:20 +0900 Subject: f2fs: avoid BUG_ON from check_nid_range and update return path in do_read_inode In function check_nid_range, there is no need to trigger BUG_ON and make kernel stop. Instead it could just check and indicate the inode number to be EINVAL. Update the return path in do_read_inode to use the return from check_nid_range. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat [Jaegeuk: replace BUG_ON with WARN_ON] Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 +++++-- fs/f2fs/inode.c | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index be7ae70b0b1d..06ff6a51c700 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -515,9 +515,12 @@ static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t) /* * Check whether the given nid is within node id range. */ -static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { - BUG_ON((nid >= NM_I(sbi)->max_nid)); + WARN_ON((nid >= NM_I(sbi)->max_nid)); + if (nid >= NM_I(sbi)->max_nid) + return -EINVAL; + return 0; } #define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ddae412d30c8..e0e8308594a5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -44,7 +44,11 @@ static int do_read_inode(struct inode *inode) struct f2fs_inode *ri; /* Check if ino is within scope */ - check_nid_range(sbi, inode->i_ino); + if (check_nid_range(sbi, inode->i_ino)) { + f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", + (unsigned long) inode->i_ino); + return -EINVAL; + } node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) -- cgit v1.2.3 From 111d2495a8a8fbd8e3bb0f1c1c60f977b1386249 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 19 Mar 2013 08:03:35 +0900 Subject: f2fs: fix typo in comments Correct spelling typo in comments Signed-off-by: Masanari Iida Acked-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 06ff6a51c700..5bb87e0216f5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -141,7 +141,7 @@ struct extent_info { rwlock_t ext_lock; /* rwlock for consistency */ unsigned int fofs; /* start offset in a file */ u32 blk_addr; /* start block address of the extent */ - unsigned int len; /* lenth of the extent */ + unsigned int len; /* length of the extent */ }; /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 94b8a0c48453..2e3eb2d4fc30 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -222,7 +222,7 @@ static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, } /* - * This function is called from two pathes. + * This function is called from two paths. * One is garbage collection and the other is SSR segment selection. * When it is called during GC, it just gets a victim segment * and it does not remove it from dirty seglist. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1db5ebe6692e..c9ef88da0723 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -83,7 +83,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); - /* Initilize f2fs-specific inode info */ + /* Initialize f2fs-specific inode info */ fi->vfs_inode.i_version = 1; atomic_set(&fi->dirty_dents, 0); fi->i_current_depth = 1; -- cgit v1.2.3 From 6ead114232f786e3ef7a034c8617f2a4df8e5226 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 20 Mar 2013 19:01:06 +0900 Subject: f2fs: fix the recovery flow to handle errors correctly We should handle errors during the recovery flow correctly. For example, if we get -ENOMEM, we should report a mount failure instead of conducting the remained mount procedure. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/recovery.c | 46 ++++++++++++++++++++++++++++------------------ fs/f2fs/super.c | 9 +++++++-- 3 files changed, 36 insertions(+), 21 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5bb87e0216f5..109e12d21a36 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1027,7 +1027,7 @@ void destroy_gc_caches(void); /* * recovery.c */ -void recover_fsync_data(struct f2fs_sb_info *); +int recover_fsync_data(struct f2fs_sb_info *); bool space_for_roll_forward(struct f2fs_sb_info *); /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2d86eb26c493..61bdaa755906 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -118,10 +118,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) lock_page(page); - if (cp_ver != cpver_of_node(page)) { - err = -EINVAL; + if (cp_ver != cpver_of_node(page)) goto unlock_out; - } if (!is_fsync_dnode(page)) goto next; @@ -134,10 +132,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) FI_INC_LINK); } else { if (IS_INODE(page) && is_dent_dnode(page)) { - if (recover_inode_page(sbi, page)) { - err = -ENOMEM; + err = recover_inode_page(sbi, page); + if (err) goto unlock_out; - } } /* add this fsync inode to the list */ @@ -237,13 +234,14 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, iput(inode); } -static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, +static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, block_t blkaddr) { unsigned int start, end; struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; + int err = 0; start = start_bidx_of_node(ofs_of_node(page)); if (IS_INODE(page)) @@ -252,8 +250,9 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, end = start + ADDRS_PER_BLOCK; set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, start, ALLOC_NODE)) - return; + err = get_dnode_of_data(&dn, start, ALLOC_NODE); + if (err) + return err; wait_on_page_writeback(dn.node_page); @@ -298,14 +297,16 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); f2fs_put_dnode(&dn); + return 0; } -static void recover_data(struct f2fs_sb_info *sbi, +static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head, int type) { unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); struct curseg_info *curseg; struct page *page; + int err = 0; block_t blkaddr; /* get node pages in the current segment */ @@ -315,13 +316,15 @@ static void recover_data(struct f2fs_sb_info *sbi, /* read node page */ page = alloc_page(GFP_NOFS | __GFP_ZERO); if (IS_ERR(page)) - return; + return -ENOMEM; + lock_page(page); while (1) { struct fsync_inode_entry *entry; - if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) + err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + if (err) goto out; lock_page(page); @@ -333,7 +336,9 @@ static void recover_data(struct f2fs_sb_info *sbi, if (!entry) goto next; - do_recover_data(sbi, entry->inode, page, blkaddr); + err = do_recover_data(sbi, entry->inode, page, blkaddr); + if (err) + goto out; if (entry->blkaddr == blkaddr) { iput(entry->inode); @@ -349,22 +354,26 @@ unlock_out: out: __free_pages(page, 0); - allocate_new_segments(sbi); + if (!err) + allocate_new_segments(sbi); + return err; } -void recover_fsync_data(struct f2fs_sb_info *sbi) +int recover_fsync_data(struct f2fs_sb_info *sbi) { struct list_head inode_list; + int err; fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry), NULL); if (unlikely(!fsync_entry_slab)) - return; + return -ENOMEM; INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ - if (find_fsync_dnodes(sbi, &inode_list)) + err = find_fsync_dnodes(sbi, &inode_list); + if (err) goto out; if (list_empty(&inode_list)) @@ -372,11 +381,12 @@ void recover_fsync_data(struct f2fs_sb_info *sbi) /* step #2: recover data */ sbi->por_doing = 1; - recover_data(sbi, &inode_list, CURSEG_WARM_NODE); + err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); sbi->por_doing = 0; BUG_ON(!list_empty(&inode_list)); out: destroy_fsync_dnodes(sbi, &inode_list); kmem_cache_destroy(fsync_entry_slab); write_checkpoint(sbi, false); + return err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c9ef88da0723..252890ef8dbc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -642,8 +642,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) - recover_fsync_data(sbi); + if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + err = recover_fsync_data(sbi); + if (err) { + f2fs_msg(sb, KERN_ERR, "Failed to recover fsync data"); + goto free_root_inode; + } + } /* After POR, we can run background GC thread */ err = start_gc_thread(sbi); -- cgit v1.2.3 From 0ff153a2f1fa7ef31d6d9bc9ce6c3815dede55e6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 20 Mar 2013 14:58:38 +0900 Subject: f2fs: do not skip writing file meta during fsync This patch removes data_version check flow during the fsync call. The original purpose for the use of data_version was to avoid writng inode pages redundantly by the fsync calls repeatedly. However, when user can modify file meta and then call fsync, we should not skip fsync procedure. So, let's remove this condition check and hope that user triggers in right manner. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 --- fs/f2fs/f2fs.h | 1 - fs/f2fs/file.c | 10 ---------- fs/f2fs/inode.c | 1 - 4 files changed, 15 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ea8be6fc38f1..47a2d7c87ea9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -435,7 +435,6 @@ static int f2fs_read_data_pages(struct file *file, int do_write_data_page(struct page *page) { struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); block_t old_blk_addr, new_blk_addr; struct dnode_of_data dn; int err = 0; @@ -465,8 +464,6 @@ int do_write_data_page(struct page *page) write_data_page(inode, page, &dn, old_blk_addr, &new_blk_addr); update_extent_cache(new_blk_addr, &dn); - F2FS_I(inode)->data_version = - le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); } out_writepage: f2fs_put_dnode(&dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 109e12d21a36..380e2b3cdac7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -159,7 +159,6 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags; /* use to pass per-file flags */ - unsigned long long data_version;/* latest version of data for fsync */ atomic_t dirty_dents; /* # of dirty dentry pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ff018a42e435..d65fcad578c5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -124,7 +124,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - unsigned long long cur_version; int ret = 0; bool need_cp = false; struct writeback_control wbc = { @@ -148,15 +147,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) goto out; - mutex_lock(&sbi->cp_mutex); - cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); - mutex_unlock(&sbi->cp_mutex); - - if (F2FS_I(inode)->data_version != cur_version && - !(inode->i_state & I_DIRTY)) - goto out; - F2FS_I(inode)->data_version--; - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e0e8308594a5..f798ddf2c8a8 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -80,7 +80,6 @@ static int do_read_inode(struct inode *inode) fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); fi->flags = 0; - fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); get_extent_info(&fi->ext, ri->i_ext); -- cgit v1.2.3 From fa37241743ac26ba0ac6f54579158c2fae310a5c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Mar 2013 12:53:19 +0900 Subject: f2fs: remain nat cache entries for further free nid allocation In the checkpoint flow, the f2fs investigates the total nat cache entries. Previously, if an entry has NULL_ADDR, f2fs drops the entry and adds the obsolete nid to the free nid list. However, this free nid will be reused sooner, resulting in its nat entry miss. In order to avoid this, we don't need to drop the nat cache entry at this moment. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f7b03ba9c0d7..0177f9434c25 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1621,11 +1621,11 @@ flush_now: nid_in_journal(sum, offset) = cpu_to_le32(nid); } - if (nat_get_blkaddr(ne) == NULL_ADDR) { + if (nat_get_blkaddr(ne) == NULL_ADDR && + !add_free_nid(NM_I(sbi), nid)) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); - add_free_nid(NM_I(sbi), nid); } else { write_lock(&nm_i->nat_tree_lock); __clear_nat_cache_dirty(nm_i, ne); -- cgit v1.2.3 From 953a3e27e10fc6acb480801ea47197d0270d735e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Mar 2013 15:21:57 +0900 Subject: f2fs: fix to give correct parent inode number for roll forward When we recover fsync'ed data after power-off-recovery, we should guarantee that any parent inode number should be correct for each direct inode blocks. So, let's make the following rules. - The fsync should do checkpoint to all the inodes that were experienced hard links. - So, the only normal files can be recovered by roll-forward. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 22 ++-------------------- fs/f2fs/namei.c | 14 ++++++++++---- fs/f2fs/node.h | 15 +++++++++++++++ 4 files changed, 28 insertions(+), 25 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 380e2b3cdac7..77e2eb061bfa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -148,6 +148,7 @@ struct extent_info { * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. */ #define FADVISE_COLD_BIT 0x01 +#define FADVISE_CP_BIT 0x02 struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ @@ -825,7 +826,6 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ - FI_NEED_CP, /* need to do checkpoint during fsync */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d65fcad578c5..e031f570df79 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -103,23 +103,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { .remap_pages = generic_file_remap_pages, }; -static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode) -{ - struct dentry *dentry; - nid_t pino; - - inode = igrab(inode); - dentry = d_find_any_alias(inode); - if (!dentry) { - iput(inode); - return 0; - } - pino = dentry->d_parent->d_inode->i_ino; - dput(dentry); - iput(inode); - return !is_checkpointed_node(sbi, pino); -} - int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -149,17 +132,16 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; - else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) + else if (is_cp_file(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) need_cp = true; - else if (need_to_sync_dir(sbi, inode)) + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) need_cp = true; if (need_cp) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - clear_inode_flag(F2FS_I(inode), FI_NEED_CP); } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d4a171b1a68b..7c6e219a479c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -15,6 +15,7 @@ #include #include "f2fs.h" +#include "node.h" #include "xattr.h" #include "acl.h" @@ -99,7 +100,7 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) /* * Set multimedia files as cold files for hot/cold data separation */ -static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode, +static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { int i; @@ -108,7 +109,7 @@ static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode, int count = le32_to_cpu(sbi->raw_super->extension_count); for (i = 0; i < count; i++) { if (!is_multimedia_file(name, extlist[i])) { - F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; + set_cold_file(inode); break; } } @@ -130,7 +131,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_file(sbi, inode, dentry->d_name.name); + set_cold_files(sbi, inode, dentry->d_name.name); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -173,6 +174,12 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (err) goto out; + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ + set_cp_file(inode); + d_instantiate(dentry, inode); return 0; out: @@ -425,7 +432,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } old_inode->i_ctime = CURRENT_TIME; - set_inode_flag(F2FS_I(old_inode), FI_NEED_CP); mark_inode_dirty(old_inode); f2fs_delete_entry(old_entry, old_page, NULL); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index afdb130f782e..d009cdfd2679 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -277,6 +277,21 @@ static inline int is_cold_file(struct inode *inode) return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; } +static inline void set_cold_file(struct inode *inode) +{ + F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; +} + +static inline int is_cp_file(struct inode *inode) +{ + return F2FS_I(inode)->i_advise & FADVISE_CP_BIT; +} + +static inline void set_cp_file(struct inode *inode) +{ + F2FS_I(inode)->i_advise |= FADVISE_CP_BIT; +} + static inline int is_cold_data(struct page *page) { return PageChecked(page); -- cgit v1.2.3 From 79b5793be44d97c0a0e905c221858af08e5ebd85 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghiu Date: Thu, 28 Mar 2013 02:24:53 +0200 Subject: f2fs: use kmemdup Use kmemdup instead of kzalloc and memcpy. Signed-off-by: Alexandru Gheorghiu Acked-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 12 +++++------- fs/f2fs/segment.c | 3 +-- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0177f9434c25..10cbee9dc3d1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1666,19 +1666,17 @@ static int init_node_manager(struct f2fs_sb_info *sbi) spin_lock_init(&nm_i->free_nid_list_lock); rwlock_init(&nm_i->nat_tree_lock); - nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); - - nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL); - if (!nm_i->nat_bitmap) - return -ENOMEM; + nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); if (!version_bitmap) return -EFAULT; - /* copy version bitmap */ - memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size); + nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, + GFP_KERNEL); + if (!nm_i->nat_bitmap) + return -ENOMEM; return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 777f17e496e6..17581495bafb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1403,10 +1403,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi) bitmap_size = __bitmap_size(sbi, SIT_BITMAP); src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); - dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); if (!dst_bitmap) return -ENOMEM; - memcpy(dst_bitmap, src_bitmap, bitmap_size); /* init SIT information */ sit_i->s_ops = &default_salloc_ops; -- cgit v1.2.3 From 5c773ba33a29c0dcddac7cfaa39fc63a7137130d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:30:04 +0900 Subject: f2fs: do not use duplicate names in a macro A macro should not use duplicate parameter names. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e399bd4d3af8..c0d774076ab9 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -23,13 +23,13 @@ ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ (t == CURSEG_WARM_NODE)) -#define IS_CURSEG(sbi, segno) \ - ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) +#define IS_CURSEG(sbi, seg) \ + ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) #define IS_CURSEC(sbi, secno) \ ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ -- cgit v1.2.3 From 53cf95222fad7a962cc03fb61a33e37bcf4f5c9d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:39:49 +0900 Subject: f2fs: introduce TOTAL_SECS macro Let's use a macro to get the total number of sections. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++---- fs/f2fs/segment.c | 19 +++++++++---------- fs/f2fs/segment.h | 1 + 3 files changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 025b9e2f935d..20b8794ec8f6 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -106,7 +106,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) } } mutex_unlock(&sit_i->sentry_lock); - dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100; + dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; si->bimodal = bimodal / dist; if (si->dirty_count) si->avg_vblocks = total_vblocks / ndirty; @@ -138,14 +138,13 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); if (sbi->segs_per_sec > 1) - si->base_mem += sbi->total_sections * - sizeof(struct sec_entry); + si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); /* build free segmap */ si->base_mem += sizeof(struct free_segmap_info); si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += f2fs_bitmap_size(sbi->total_sections); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); /* build curseg */ si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 17581495bafb..179a13e86f69 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -348,9 +348,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, int dir) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int total_secs = sbi->total_sections; unsigned int segno, secno, zoneno; - unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone; + unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = *newseg / sbi->segs_per_sec; unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); unsigned int left_start = hint; @@ -367,12 +366,12 @@ static void get_new_segment(struct f2fs_sb_info *sbi, goto got_it; } find_other_zone: - secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint); - if (secno >= total_secs) { + secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); + if (secno >= TOTAL_SECS(sbi)) { if (dir == ALLOC_RIGHT) { secno = find_next_zero_bit(free_i->free_secmap, - total_secs, 0); - BUG_ON(secno >= total_secs); + TOTAL_SECS(sbi), 0); + BUG_ON(secno >= TOTAL_SECS(sbi)); } else { go_left = 1; left_start = hint - 1; @@ -387,8 +386,8 @@ find_other_zone: continue; } left_start = find_next_zero_bit(free_i->free_secmap, - total_secs, 0); - BUG_ON(left_start >= total_secs); + TOTAL_SECS(sbi), 0); + BUG_ON(left_start >= TOTAL_SECS(sbi)); break; } secno = left_start; @@ -1390,7 +1389,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) } if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = vzalloc(sbi->total_sections * + sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * sizeof(struct sec_entry)); if (!sit_i->sec_entries) return -ENOMEM; @@ -1441,7 +1440,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) if (!free_i->free_segmap) return -ENOMEM; - sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections); + sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index c0d774076ab9..fea9245d4774 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -81,6 +81,7 @@ #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) +#define TOTAL_SECS(sbi) (sbi->total_sections) #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) -- cgit v1.2.3 From 56ae674cc27230ea86ab25db7fcf1f32dfe17ec1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:47:20 +0900 Subject: f2fs: remove redundant lock_page calls In get_node_page, we do not need to call lock_page all the time. If the node page is cached as uptodate, 1. grab_cache_page locks the page, 2. read_node_page unlocks the page, and 3. lock_page is called for further process. Let's avoid this. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 40 +++++++++++++++++++++++++--------------- fs/f2fs/node.h | 3 +++ 2 files changed, 28 insertions(+), 15 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 10cbee9dc3d1..8510c5ed402e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -847,6 +847,12 @@ fail: return ERR_PTR(err); } +/* + * Caller should do after getting the following values. + * 0: f2fs_put_page(page, 0) + * LOCKED_PAGE: f2fs_put_page(page, 1) + * error: nothing + */ static int read_node_page(struct page *page, int type) { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); @@ -859,10 +865,8 @@ static int read_node_page(struct page *page, int type) return -ENOENT; } - if (PageUptodate(page)) { - unlock_page(page); - return 0; - } + if (PageUptodate(page)) + return LOCKED_PAGE; return f2fs_readpage(sbi, page, ni.blk_addr, type); } @@ -874,6 +878,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { struct address_space *mapping = sbi->node_inode->i_mapping; struct page *apage; + int err; apage = find_get_page(mapping, nid); if (apage && PageUptodate(apage)) { @@ -886,30 +891,36 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (!apage) return; - if (read_node_page(apage, READA) == 0) + err = read_node_page(apage, READA); + if (err == 0) f2fs_put_page(apage, 0); + else if (err == LOCKED_PAGE) + f2fs_put_page(apage, 1); return; } struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { - int err; - struct page *page; struct address_space *mapping = sbi->node_inode->i_mapping; + struct page *page; + int err; page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); err = read_node_page(page, READ_SYNC); - if (err) + if (err < 0) return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto got_it; lock_page(page); if (!PageUptodate(page)) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } +got_it: BUG_ON(nid != nid_of_node(page)); mark_page_accessed(page); return page; @@ -923,10 +934,9 @@ struct page *get_node_page_ra(struct page *parent, int start) { struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); struct address_space *mapping = sbi->node_inode->i_mapping; - int i, end; - int err = 0; - nid_t nid; struct page *page; + int err, i, end; + nid_t nid; /* First, try getting the desired direct node. */ nid = get_nid(parent, start, false); @@ -936,12 +946,12 @@ struct page *get_node_page_ra(struct page *parent, int start) page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); - else if (PageUptodate(page)) - goto page_hit; err = read_node_page(page, READ_SYNC); - if (err) + if (err < 0) return ERR_PTR(err); + else if (err == LOCKED_PAGE) + goto page_hit; /* Then, try readahead for siblings of the desired node */ end = start + MAX_RA_NODE; @@ -956,7 +966,7 @@ struct page *get_node_page_ra(struct page *parent, int start) lock_page(page); page_hit: - if (PageError(page)) { + if (!PageUptodate(page)) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index d009cdfd2679..271a61c25601 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -29,6 +29,9 @@ /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 +/* return value for read_node_page */ +#define LOCKED_PAGE 1 + /* * For node information */ -- cgit v1.2.3 From 33afa7fde0defbb362328233e600e052d0a22cd5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 12:59:53 +0900 Subject: f2fs: allocate new segment aligned with sections When allocating a new segment under the LFS mode, we should keep the section boundary. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 179a13e86f69..b3486f34af78 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -362,7 +362,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, TOTAL_SEGS(sbi), *newseg + 1); - if (segno < TOTAL_SEGS(sbi)) + if (segno - *newseg < sbi->segs_per_sec - + (*newseg % sbi->segs_per_sec)) goto got_it; } find_other_zone: -- cgit v1.2.3 From 5ec4e49f9bd753e2a6857a96e01f8ae5ff00b459 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 13:26:03 +0900 Subject: f2fs: change GC bitmaps to apply the section granularity This patch removes a bitmap for victim segments selected by foreground GC, and modifies the other bitmap for victim segments selected by background GC. 1) foreground GC bitmap : We don't need to manage this, since we just only one previous victim section number instead of the whole victim history. The f2fs uses the victim section number in order not to allocate currently GC'ed section to current active logs. 2) background GC bitmap : This bitmap is used to avoid selecting victims repeatedly by background GCs. In addition, the victims are able to be selected by foreground GCs, since there is no need to read victim blocks during foreground GCs. By the fact that the foreground GC reclaims segments in a section unit, it'd be better to manage this bitmap based on the section granularity. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 -- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 43 +++++++++++++++++++--------------- fs/f2fs/segment.c | 66 ++++++++++++++++++++++++---------------------------- fs/f2fs/segment.h | 10 +++++++- fs/f2fs/super.c | 2 ++ 7 files changed, 68 insertions(+), 59 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d947e66ee8a8..93fd57d491ac 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -748,8 +748,6 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) flush_nat_entries(sbi); flush_sit_entries(sbi); - reset_victim_segmap(sbi); - /* unlock all the fs_lock[] in do_checkpoint() */ do_checkpoint(sbi, is_umount); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 20b8794ec8f6..c3bf343b0b82 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -153,7 +153,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build dirty segmap */ si->base_mem += sizeof(struct dirty_seglist_info); si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); /* buld nm */ si->base_mem += sizeof(struct f2fs_nm_info); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 77e2eb061bfa..71eacd373916 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -410,6 +410,7 @@ struct f2fs_sb_info { /* for cleaning operations */ struct mutex gc_mutex; /* mutex for GC */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ + unsigned int cur_victim_sec; /* current victim section num */ /* * for stat information. @@ -979,7 +980,6 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *, int, unsigned int, int); void flush_sit_entries(struct f2fs_sb_info *); int build_segment_manager(struct f2fs_sb_info *); -void reset_victim_segmap(struct f2fs_sb_info *); void destroy_segment_manager(struct f2fs_sb_info *); /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2e3eb2d4fc30..09b8a907400b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -160,18 +160,21 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno; + unsigned int hint = 0; + unsigned int secno; /* * If the gc_type is FG_GC, we can select victim segments * selected by background GC before. * Those segments guarantee they have small valid blocks. */ - segno = find_next_bit(dirty_i->victim_segmap[BG_GC], - TOTAL_SEGS(sbi), 0); - if (segno < TOTAL_SEGS(sbi)) { - clear_bit(segno, dirty_i->victim_segmap[BG_GC]); - return segno; +next: + secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); + if (secno < TOTAL_SECS(sbi)) { + if (sec_usage_check(sbi, secno)) + goto next; + clear_bit(secno, dirty_i->victim_secmap); + return secno * sbi->segs_per_sec; } return NULL_SEGNO; } @@ -234,7 +237,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct victim_sel_policy p; - unsigned int segno; + unsigned int secno; int nsearched = 0; p.alloc_mode = alloc_mode; @@ -253,6 +256,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, while (1) { unsigned long cost; + unsigned int segno; segno = find_next_bit(p.dirty_segmap, TOTAL_SEGS(sbi), p.offset); @@ -265,13 +269,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, break; } p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; + secno = GET_SECNO(sbi, segno); - if (test_bit(segno, dirty_i->victim_segmap[FG_GC])) + if (sec_usage_check(sbi, secno)) continue; - if (gc_type == BG_GC && - test_bit(segno, dirty_i->victim_segmap[BG_GC])) - continue; - if (IS_CURSEC(sbi, GET_SECNO(sbi, segno))) + if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) continue; cost = get_gc_cost(sbi, segno, &p); @@ -291,13 +293,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } got_it: if (p.min_segno != NULL_SEGNO) { - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; if (p.alloc_mode == LFS) { - int i; - for (i = 0; i < p.ofs_unit; i++) - set_bit(*result + i, - dirty_i->victim_segmap[gc_type]); + secno = GET_SECNO(sbi, p.min_segno); + if (gc_type == FG_GC) + sbi->cur_victim_sec = secno; + else + set_bit(secno, dirty_i->victim_secmap); } + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; } mutex_unlock(&dirty_i->seglist_lock); @@ -662,9 +665,11 @@ gc_more: for (i = 0; i < sbi->segs_per_sec; i++) do_garbage_collect(sbi, segno + i, &ilist, gc_type); - if (gc_type == FG_GC && - get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + if (gc_type == FG_GC) { + sbi->cur_victim_sec = NULL_SEGNO; nfree++; + WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); + } if (has_not_enough_free_secs(sbi, nfree)) goto gc_more; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b3486f34af78..d5244f6765a9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -69,8 +69,9 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]--; - clear_bit(segno, dirty_i->victim_segmap[FG_GC]); - clear_bit(segno, dirty_i->victim_segmap[BG_GC]); + if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + clear_bit(GET_SECNO(sbi, segno), + dirty_i->victim_secmap); } } @@ -296,13 +297,12 @@ static void write_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } -static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, - int ofs_unit, int type) +static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; - unsigned int segno, next_segno, i; - int ofs = 0; + unsigned int segno; + unsigned int ofs = 0; /* * If there is not enough reserved sections, @@ -318,23 +318,30 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, if (IS_NODESEG(type)) return NULL_SEGNO; next: - segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++); - ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit; + segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); + ofs += sbi->segs_per_sec; + if (segno < TOTAL_SEGS(sbi)) { + int i; + /* skip intermediate segments in a section */ - if (segno % ofs_unit) + if (segno % sbi->segs_per_sec) goto next; - /* skip if whole section is not prefree */ - next_segno = find_next_zero_bit(prefree_segmap, - TOTAL_SEGS(sbi), segno + 1); - if (next_segno - segno < ofs_unit) + /* skip if the section is currently used */ + if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) goto next; + /* skip if whole section is not prefree */ + for (i = 1; i < sbi->segs_per_sec; i++) + if (!test_bit(segno + i, prefree_segmap)) + goto next; + /* skip if whole section was not free at the last checkpoint */ - for (i = 0; i < ofs_unit; i++) - if (get_seg_entry(sbi, segno)->ckpt_valid_blocks) + for (i = 0; i < sbi->segs_per_sec; i++) + if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) goto next; + return segno; } return NULL_SEGNO; @@ -561,15 +568,13 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, int type, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int ofs_unit; if (force) { new_curseg(sbi, type, true); goto out; } - ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec; - curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type); + curseg->next_segno = check_prefree_segments(sbi, type); if (curseg->next_segno != NULL_SEGNO) change_curseg(sbi, type, false); @@ -1558,14 +1563,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) } } -static int init_victim_segmap(struct f2fs_sb_info *sbi) +static int init_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); - dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL); - dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL); - if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC]) + dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dirty_i->victim_secmap) return -ENOMEM; return 0; } @@ -1592,7 +1596,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) } init_dirty_segmap(sbi); - return init_victim_segmap(sbi); + return init_victim_secmap(sbi); } /* @@ -1679,18 +1683,10 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi, mutex_unlock(&dirty_i->seglist_lock); } -void reset_victim_segmap(struct f2fs_sb_info *sbi) -{ - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); - memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size); -} - -static void destroy_victim_segmap(struct f2fs_sb_info *sbi) +static void destroy_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - - kfree(dirty_i->victim_segmap[FG_GC]); - kfree(dirty_i->victim_segmap[BG_GC]); + kfree(dirty_i->victim_secmap); } static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) @@ -1705,7 +1701,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) for (i = 0; i < NR_DIRTY_TYPE; i++) discard_dirty_segmap(sbi, i); - destroy_victim_segmap(sbi); + destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; kfree(dirty_i); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index fea9245d4774..994bb7bd7b70 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -10,6 +10,7 @@ */ /* constant macro */ #define NULL_SEGNO ((unsigned int)(~0)) +#define NULL_SECNO ((unsigned int)(~0)) /* V: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) @@ -214,7 +215,7 @@ struct dirty_seglist_info { unsigned long *dirty_segmap[NR_DIRTY_TYPE]; struct mutex seglist_lock; /* lock for segment bitmaps */ int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ - unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */ + unsigned long *victim_secmap; /* background GC victims */ }; /* victim selection function for cleaning and SSR */ @@ -616,3 +617,10 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) - (base + 1) + type; } + +static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) +{ + if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) + return true; + return false; +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 252890ef8dbc..728c20a8e456 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -26,6 +26,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include "xattr.h" static struct kmem_cache *f2fs_inode_cachep; @@ -458,6 +459,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + sbi->cur_victim_sec = NULL_SECNO; for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); -- cgit v1.2.3 From 4ebefc4443898f5429185ef96d85cfce0fbcc16a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 13:49:18 +0900 Subject: f2fs: check completion of foreground GC The foreground GCs are triggered under not enough free sections. So, we should not skip moving valid blocks in the victim segments. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 09b8a907400b..136c0f7a670b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -131,7 +131,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - if (p->alloc_mode) { + if (p->alloc_mode == SSR) { p->gc_mode = GC_GREEDY; p->dirty_segmap = dirty_i->dirty_segmap[type]; p->ofs_unit = 1; @@ -404,8 +404,14 @@ next_step: continue; /* set page dirty and write it */ - if (!PageWriteback(node_page)) + if (gc_type == FG_GC) { + f2fs_submit_bio(sbi, NODE, true); + wait_on_page_writeback(node_page); set_page_dirty(node_page); + } else { + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } f2fs_put_page(node_page, 1); stat_inc_node_blk_count(sbi, 1); } @@ -421,6 +427,13 @@ next_step: .for_reclaim = 0, }; sync_node_pages(sbi, 0, &wbc); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) + goto next_step; } } @@ -484,20 +497,19 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static void move_data_page(struct inode *inode, struct page *page, int gc_type) { - if (page->mapping != inode->i_mapping) - goto out; - - if (inode != page->mapping->host) - goto out; - - if (PageWriteback(page)) - goto out; - if (gc_type == BG_GC) { + if (PageWriteback(page)) + goto out; set_page_dirty(page); set_cold_data(page); } else { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + if (PageWriteback(page)) { + f2fs_submit_bio(sbi, DATA, true); + wait_on_page_writeback(page); + } + mutex_lock_op(sbi, DATA_WRITE); if (clear_page_dirty_for_io(page) && S_ISDIR(inode->i_mode)) { @@ -594,8 +606,18 @@ next_iput: if (++phase < 4) goto next_step; - if (gc_type == FG_GC) + if (gc_type == FG_GC) { f2fs_submit_bio(sbi, DATA, true); + + /* + * In the case of FG_GC, it'd be better to reclaim this victim + * completely. + */ + if (get_valid_blocks(sbi, segno, 1) != 0) { + phase = 2; + goto next_step; + } + } } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, -- cgit v1.2.3 From 60374688a1a1cc8ef173d3dab42574719b851ac4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Mar 2013 13:58:51 +0900 Subject: f2fs: allocate remained free segments in the LFS mode This patch adds a new condition that allocates free segments in the current active section even if SSR is needed. Otherwise, f2fs cannot allocate remained free segments in the section since SSR finds dirty segments only. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d5244f6765a9..fe520d3448e0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -347,6 +347,17 @@ next: return NULL_SEGNO; } +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) + return !test_bit(segno + 1, free_i->free_segmap); + return 0; +} + /* * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG @@ -580,6 +591,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, change_curseg(sbi, type, false); else if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type, true); else -- cgit v1.2.3 From b74737541c5190ab2ad3ee0d7b323e860b988df1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Apr 2013 08:32:21 +0900 Subject: f2fs: avoid race for summary information In order to do GC more reliably, I'd like to lock the vicitm summary page until its GC is completed, and also prevent any checkpoint process. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 +------- fs/f2fs/node.c | 2 +- fs/f2fs/super.c | 7 +++++-- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 136c0f7a670b..e97f30157aa6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -642,12 +642,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, if (IS_ERR(sum_page)) return; - /* - * CP needs to lock sum_page. In this time, we don't need - * to lock this page, because this summary page is not gone anywhere. - * Also, this page is not gonna be updated before GC is done. - */ - unlock_page(sum_page); sum = page_address(sum_page); switch (GET_SUM_TYPE((&sum->footer))) { @@ -661,7 +655,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); stat_inc_call_count(sbi->stat_info); - f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 1); } int f2fs_gc(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8510c5ed402e..95298ef68262 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1149,7 +1149,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, /* First check balancing cached NAT entries */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { - write_checkpoint(sbi, false); + f2fs_sync_fs(sbi->sb, true); return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 728c20a8e456..ca5413346653 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -137,10 +137,13 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) return 0; - if (sync) + if (sync) { + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, false); - else + mutex_unlock(&sbi->gc_mutex); + } else { f2fs_balance_fs(sbi); + } return 0; } -- cgit v1.2.3 From b2f2c390c5612df97f0403e1ef1e4e41c24b7d4f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Apr 2013 13:52:09 +0900 Subject: f2fs: fix the bitmap consistency of dirty segments Like below, there are 8 segment bitmaps for SSR victim candidates. enum dirty_type { DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ DIRTY, /* to count # of dirty segments */ PRE, /* to count # of entirely obsolete segments */ NR_DIRTY_TYPE }; The upper 6 bitmaps indicates segments dirtied by active log areas respectively. And, the DIRTY bitmap integrates all the 6 bitmaps. For example, o DIRTY_HOT_DATA : 1010000 o DIRTY_WARM_DATA: 0100000 o DIRTY_COLD_DATA: 0001000 o DIRTY_HOT_NODE : 0000010 o DIRTY_WARM_NODE: 0000001 o DIRTY_COLD_NODE: 0000000 In this case, o DIRTY : 1111011, which means that we should guarantee the consistency between DIRTY and other bitmaps concreately. However, the SSR mode selects victims freely from any log types, which can set multiple bits across the various bitmap types. So, this patch eliminates this inconsistency. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fe520d3448e0..7c67ec2b63c0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -49,9 +49,20 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (dirty_type == DIRTY) { struct seg_entry *sentry = get_seg_entry(sbi, segno); + enum dirty_type t = DIRTY_HOT_DATA; + dirty_type = sentry->type; + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]++; + + /* Only one bitmap should be set */ + for (; t <= DIRTY_COLD_NODE; t++) { + if (t == dirty_type) + continue; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + } } } @@ -64,11 +75,13 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, dirty_i->nr_dirty[dirty_type]--; if (dirty_type == DIRTY) { - struct seg_entry *sentry = get_seg_entry(sbi, segno); - dirty_type = sentry->type; - if (test_and_clear_bit(segno, - dirty_i->dirty_segmap[dirty_type])) - dirty_i->nr_dirty[dirty_type]--; + enum dirty_type t = DIRTY_HOT_DATA; + + /* clear all the bitmaps */ + for (; t <= DIRTY_COLD_NODE; t++) + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) clear_bit(GET_SECNO(sbi, segno), dirty_i->victim_secmap); -- cgit v1.2.3 From cfb185a1488810fbae9256c7d52f66c558c6ea04 Mon Sep 17 00:00:00 2001 From: P J P Date: Wed, 3 Apr 2013 11:38:00 +0900 Subject: f2fs: add NULL pointer check Commit - fa9150a84c - replaces a call to generic_writepages() in f2fs_write_data_pages() with write_cache_pages(), with a function pointer argument pointing to routine: __f2fs_writepage. -> https://git.kernel.org/linus/fa9150a84ca333f68127097c4fa1eda4b3913a22 This patch adds a NULL pointer check in f2fs_write_data_pages() to avoid a possible NULL pointer dereference, in case if - mapping->a_ops->writepage - is NULL. Signed-off-by: P J P Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 47a2d7c87ea9..cf9ff5f76134 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -559,6 +559,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, int ret; long excess_nrtw = 0, desired_nrtw; + /* deal with chardevs and other special file */ + if (!mapping->a_ops->writepage) + return 0; + if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { desired_nrtw = MAX_DESIRED_PAGES_WP; excess_nrtw = desired_nrtw - wbc->nr_to_write; -- cgit v1.2.3 From 49952fa182a2e9b3f40b974278c5b1144f0c918b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Apr 2013 22:19:03 +0900 Subject: f2fs: reduce redundant spin_lock operations This patch reduces redundant spin_lock operations in alloc_nid_failed(). The alloc_nid_failed() does not need to delete entry and add one again by triggering spin_lock and spin_unlock redundantly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 95298ef68262..ad3adbee842a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1407,10 +1407,8 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - if (i) { - BUG_ON(i->state != NID_ALLOC); - __del_from_free_nid_list(i); - } + BUG_ON(!i || i->state != NID_ALLOC); + __del_from_free_nid_list(i); spin_unlock(&nm_i->free_nid_list_lock); } @@ -1419,8 +1417,15 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) */ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) { - alloc_nid_done(sbi, nid); - add_free_nid(NM_I(sbi), nid); + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + i->state = NID_NEW; + nm_i->fcnt++; + spin_unlock(&nm_i->free_nid_list_lock); } void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, -- cgit v1.2.3 From 1127a3d448bcf4de338e60a7cc695d54c5767433 Mon Sep 17 00:00:00 2001 From: Jason Hrycay Date: Mon, 8 Apr 2013 20:16:44 -0500 Subject: f2fs: move f2fs_balance_fs from truncate to punch_hole Move the f2fs_balance_fs out of the truncate_hole function and only perform that in punch_hole use case. The commit: ed60b1644e7f7e5dd67d21caf7e4425dff05dad0 intended to do this but moved it into truncate_hole to cover more cases. However, a deadlock scenario is possible when deleting an inode entry under specific conditions: f2fs_delete_entry() mutex_lock_op(sbi, DENTRY_OPS); truncate_hole() f2fs_balance_fs() mutex_lock(&sbi->gc_mutex); f2fs_gc() write_checkpoint() block_operations() mutex_lock_op(sbi, DENTRY_OPS); Lets move it into the punch_hole case to cover the original intent of avoiding it during fallocate's expand_inode_data case. Change-Id: I29f8ea1056b0b88b70ba8652d901b6e8431bb27e Signed-off-by: Jason Hrycay Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e031f570df79..155b362dad63 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -390,8 +390,6 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - f2fs_balance_fs(sbi); - mutex_lock_op(sbi, DATA_TRUNC); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); @@ -435,6 +433,9 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) if (pg_start < pg_end) { struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + f2fs_balance_fs(sbi); blk_start = pg_start << PAGE_CACHE_SHIFT; blk_end = pg_end << PAGE_CACHE_SHIFT; -- cgit v1.2.3 From 399368372ed9f3c396eadb5c2bbc98be8c774a39 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Nov 2012 16:21:29 +0900 Subject: f2fs: introduce a new global lock scheme In the previous version, f2fs uses global locks according to the usage types, such as directory operations, block allocation, block write, and so on. Reference the following lock types in f2fs.h. enum lock_type { RENAME, /* for renaming operations */ DENTRY_OPS, /* for directory operations */ DATA_WRITE, /* for data write */ DATA_NEW, /* for data allocation */ DATA_TRUNC, /* for data truncate */ NODE_NEW, /* for node allocation */ NODE_TRUNC, /* for node truncate */ NODE_WRITE, /* for node write */ NR_LOCK_TYPE, }; In that case, we lose the performance under the multi-threading environment, since every types of operations must be conducted one at a time. In order to address the problem, let's share the locks globally with a mutex array regardless of any types. So, let users grab a mutex and perform their jobs in parallel as much as possbile. For this, I propose a new global lock scheme as follows. 0. Data structure - f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS] - f2fs_sb_info -> node_write 1. mutex_lock_op(sbi) - try to get an avaiable lock from the array. - returns the index of the gottern lock variable. 2. mutex_unlock_op(sbi, index of the lock) - unlock the given index of the lock. 3. mutex_lock_all(sbi) - grab all the locks in the array before the checkpoint. 4. mutex_unlock_all(sbi) - release all the locks in the array after checkpoint. 5. block_operations() - call mutex_lock_all() - sync_dirty_dir_inodes() - grab node_write - sync_node_pages() Note that, the pairs of mutex_lock_op()/mutex_unlock_op() and mutex_lock_all()/mutex_unlock_all() should be used together. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 39 ++++++------------- fs/f2fs/data.c | 77 +++++++++++++++++++------------------ fs/f2fs/dir.c | 105 +++++++++++++++++++++++---------------------------- fs/f2fs/f2fs.h | 63 ++++++++++++++++++++----------- fs/f2fs/file.c | 46 +++++++++++----------- fs/f2fs/gc.c | 2 - fs/f2fs/inode.c | 48 ++++++++++++----------- fs/f2fs/namei.c | 44 +++++++++++++-------- fs/f2fs/node.c | 44 +++++++++------------ fs/f2fs/recovery.c | 8 +++- fs/f2fs/super.c | 4 +- fs/f2fs/xattr.c | 10 +++-- 12 files changed, 248 insertions(+), 242 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 93fd57d491ac..be6aa2eef894 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -543,54 +543,39 @@ retry: */ static void block_operations(struct f2fs_sb_info *sbi) { - int t; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; +retry_flush_dents: + mutex_lock_all(sbi); - /* Stop renaming operation */ - mutex_lock_op(sbi, RENAME); - mutex_lock_op(sbi, DENTRY_OPS); - -retry_dents: /* write all the dirty dentry pages */ - sync_dirty_dir_inodes(sbi); - - mutex_lock_op(sbi, DATA_WRITE); if (get_pages(sbi, F2FS_DIRTY_DENTS)) { - mutex_unlock_op(sbi, DATA_WRITE); - goto retry_dents; + mutex_unlock_all(sbi); + sync_dirty_dir_inodes(sbi); + goto retry_flush_dents; } - /* block all the operations */ - for (t = DATA_NEW; t <= NODE_TRUNC; t++) - mutex_lock_op(sbi, t); - - mutex_lock(&sbi->write_inode); - /* * POR: we should ensure that there is no dirty node pages * until finishing nat/sit flush. */ -retry: - sync_node_pages(sbi, 0, &wbc); - - mutex_lock_op(sbi, NODE_WRITE); +retry_flush_nodes: + mutex_lock(&sbi->node_write); if (get_pages(sbi, F2FS_DIRTY_NODES)) { - mutex_unlock_op(sbi, NODE_WRITE); - goto retry; + mutex_unlock(&sbi->node_write); + sync_node_pages(sbi, 0, &wbc); + goto retry_flush_nodes; } - mutex_unlock(&sbi->write_inode); } static void unblock_operations(struct f2fs_sb_info *sbi) { - int t; - for (t = NODE_WRITE; t >= RENAME; t--) - mutex_unlock_op(sbi, t); + mutex_unlock(&sbi->node_write); + mutex_unlock_all(sbi); } static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cf9ff5f76134..72a1b30f720a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -260,6 +260,9 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) /* * Caller ensures that this data page is never allocated. * A new zero-filled data page is allocated in the page cache. + * + * Also, caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). */ struct page *get_new_data_page(struct inode *inode, pgoff_t index, bool new_i_size) @@ -479,10 +482,11 @@ static int f2fs_write_data_page(struct page *page, const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_CACHE_SHIFT; unsigned offset; + bool need_balance_fs = false; int err = 0; if (page->index < end_index) - goto out; + goto write; /* * If the offset is out-of-range of file size, @@ -494,50 +498,46 @@ static int f2fs_write_data_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_DENTS); inode_dec_dirty_dents(inode); } - goto unlock_out; + goto out; } zero_user_segment(page, offset, PAGE_CACHE_SIZE); -out: - if (sbi->por_doing) - goto redirty_out; - - if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page)) +write: + if (sbi->por_doing) { + err = AOP_WRITEPAGE_ACTIVATE; goto redirty_out; + } - mutex_lock_op(sbi, DATA_WRITE); + /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { dec_page_count(sbi, F2FS_DIRTY_DENTS); inode_dec_dirty_dents(inode); + err = do_write_data_page(page); + } else { + int ilock = mutex_lock_op(sbi); + err = do_write_data_page(page); + mutex_unlock_op(sbi, ilock); + need_balance_fs = true; } - err = do_write_data_page(page); - if (err && err != -ENOENT) { - wbc->pages_skipped++; - set_page_dirty(page); - } - mutex_unlock_op(sbi, DATA_WRITE); + if (err == -ENOENT) + goto out; + else if (err) + goto redirty_out; if (wbc->for_reclaim) f2fs_submit_bio(sbi, DATA, true); - if (err == -ENOENT) - goto unlock_out; - clear_cold_data(page); +out: unlock_page(page); - - if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode)) + if (need_balance_fs) f2fs_balance_fs(sbi); return 0; -unlock_out: - unlock_page(page); - return (err == -ENOENT) ? 0 : err; - redirty_out: wbc->pages_skipped++; set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; + return err; } #define MAX_DESIRED_PAGES_WP 4096 @@ -592,6 +592,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; + int ilock; /* for nobh_write_end */ *fsdata = NULL; @@ -603,28 +604,21 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, return -ENOMEM; *pagep = page; - mutex_lock_op(sbi, DATA_NEW); + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, ALLOC_NODE); - if (err) { - mutex_unlock_op(sbi, DATA_NEW); - f2fs_put_page(page, 1); - return err; - } + if (err) + goto err; - if (dn.data_blkaddr == NULL_ADDR) { + if (dn.data_blkaddr == NULL_ADDR) err = reserve_new_block(&dn); - if (err) { - f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, DATA_NEW); - f2fs_put_page(page, 1); - return err; - } - } + f2fs_put_dnode(&dn); + if (err) + goto err; - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; @@ -654,6 +648,11 @@ out: SetPageUptodate(page); clear_cold_data(page); return 0; + +err: + mutex_unlock_op(sbi, ilock); + f2fs_put_page(page, 1); + return err; } static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2851ae6948a1..cd3342d4a3a7 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -249,9 +249,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - - mutex_lock_op(sbi, DENTRY_OPS); lock_page(page); wait_on_page_writeback(page); de->ino = cpu_to_le32(inode->i_ino); @@ -265,7 +262,6 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, F2FS_I(inode)->i_pino = dir->i_ino; f2fs_put_page(page, 1); - mutex_unlock_op(sbi, DENTRY_OPS); } void init_dent_inode(const struct qstr *name, struct page *ipage) @@ -284,6 +280,43 @@ void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } +static int make_empty_dir(struct inode *inode, struct inode *parent) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + void *kaddr; + + dentry_page = get_new_data_page(inode, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + kaddr = kmap_atomic(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)kaddr; + + de = &dentry_blk->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(dentry_blk->filename[0], ".", 1); + set_de_type(de, inode); + + de = &dentry_blk->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(dentry_blk->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + kunmap_atomic(kaddr); + + set_page_dirty(dentry_page); + f2fs_put_page(dentry_page, 1); + return 0; +} + static int init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { @@ -294,7 +327,7 @@ static int init_inode_metadata(struct inode *inode, return err; if (S_ISDIR(inode->i_mode)) { - err = f2fs_make_empty(inode, dir); + err = make_empty_dir(inode, dir); if (err) { remove_inode_page(inode); return err; @@ -317,7 +350,7 @@ static int init_inode_metadata(struct inode *inode, } if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { inc_nlink(inode); - f2fs_write_inode(inode, NULL); + update_inode_page(inode); } return 0; } @@ -341,7 +374,7 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, } if (need_dir_update) - f2fs_write_inode(dir, NULL); + update_inode_page(dir); else mark_inode_dirty(dir); @@ -373,6 +406,10 @@ next: goto next; } +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) { unsigned int bit_pos; @@ -382,7 +419,6 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in f2fs_hash_t dentry_hash; struct f2fs_dir_entry *de; unsigned int nbucket, nblock; - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); size_t namelen = name->len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; @@ -412,12 +448,9 @@ start: bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - mutex_lock_op(sbi, DENTRY_OPS); dentry_page = get_new_data_page(dir, block, true); - if (IS_ERR(dentry_page)) { - mutex_unlock_op(sbi, DENTRY_OPS); + if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - } dentry_blk = kmap(dentry_page); bit_pos = room_for_filename(dentry_blk, slots); @@ -426,7 +459,6 @@ start: kunmap(dentry_page); f2fs_put_page(dentry_page, 1); - mutex_unlock_op(sbi, DENTRY_OPS); } /* Move to next level to find the empty slot for new dentry */ @@ -456,7 +488,6 @@ add_dentry: fail: kunmap(dentry_page); f2fs_put_page(dentry_page, 1); - mutex_unlock_op(sbi, DENTRY_OPS); return err; } @@ -476,8 +507,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, void *kaddr = page_address(page); int i; - mutex_lock_op(sbi, DENTRY_OPS); - lock_page(page); wait_on_page_writeback(page); @@ -497,7 +526,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (inode && S_ISDIR(inode->i_mode)) { drop_nlink(dir); - f2fs_write_inode(dir, NULL); + update_inode_page(dir); } else { mark_inode_dirty(dir); } @@ -509,7 +538,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, drop_nlink(inode); i_size_write(inode, 0); } - f2fs_write_inode(inode, NULL); + update_inode_page(inode); + if (inode->i_nlink == 0) add_orphan_inode(sbi, inode->i_ino); } @@ -522,45 +552,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, inode_dec_dirty_dents(dir); } f2fs_put_page(page, 1); - - mutex_unlock_op(sbi, DENTRY_OPS); -} - -int f2fs_make_empty(struct inode *inode, struct inode *parent) -{ - struct page *dentry_page; - struct f2fs_dentry_block *dentry_blk; - struct f2fs_dir_entry *de; - void *kaddr; - - dentry_page = get_new_data_page(inode, 0, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); - - kaddr = kmap_atomic(dentry_page); - dentry_blk = (struct f2fs_dentry_block *)kaddr; - - de = &dentry_blk->dentry[0]; - de->name_len = cpu_to_le16(1); - de->hash_code = f2fs_dentry_hash(".", 1); - de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); - set_de_type(de, inode); - - de = &dentry_blk->dentry[1]; - de->hash_code = f2fs_dentry_hash("..", 2); - de->name_len = cpu_to_le16(2); - de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); - set_de_type(de, inode); - - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); - kunmap_atomic(kaddr); - - set_page_dirty(dentry_page); - f2fs_put_page(dentry_page, 1); - return 0; } bool f2fs_empty_dir(struct inode *dir) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 71eacd373916..06cc75c66c88 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -309,23 +309,12 @@ enum count_type { }; /* - * FS_LOCK nesting subclasses for the lock validator: - * - * The locking order between these classes is - * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW - * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC + * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. + * The checkpoint procedure blocks all the locks in this fs_lock array. + * Some FS operations grab free locks, and if there is no free lock, + * then wait to grab a lock in a round-robin manner. */ -enum lock_type { - RENAME, /* for renaming operations */ - DENTRY_OPS, /* for directory operations */ - DATA_WRITE, /* for data write */ - DATA_NEW, /* for data allocation */ - DATA_TRUNC, /* for data truncate */ - NODE_NEW, /* for node allocation */ - NODE_TRUNC, /* for node truncate */ - NODE_WRITE, /* for node write */ - NR_LOCK_TYPE, -}; +#define NR_GLOBAL_LOCKS 8 /* * The below are the page types of bios used in submti_bio(). @@ -365,10 +354,11 @@ struct f2fs_sb_info { /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ - struct mutex cp_mutex; /* for checkpoint procedure */ - struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */ - struct mutex write_inode; /* mutex for write inode */ + struct mutex cp_mutex; /* checkpoint procedure lock */ + struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ + struct mutex node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ + unsigned char next_lock_num; /* round-robin global locks */ int por_doing; /* recovery is doing or not */ /* for orphan inode management */ @@ -503,14 +493,40 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) cp->ckpt_flags = cpu_to_le32(ckpt_flags); } -static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t) +static inline void mutex_lock_all(struct f2fs_sb_info *sbi) +{ + int i = 0; + for (; i < NR_GLOBAL_LOCKS; i++) + mutex_lock(&sbi->fs_lock[i]); +} + +static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) { - mutex_lock_nested(&sbi->fs_lock[t], t); + int i = 0; + for (; i < NR_GLOBAL_LOCKS; i++) + mutex_unlock(&sbi->fs_lock[i]); } -static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t) +static inline int mutex_lock_op(struct f2fs_sb_info *sbi) { - mutex_unlock(&sbi->fs_lock[t]); + unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; + int i = 0; + + for (; i < NR_GLOBAL_LOCKS; i++) + if (mutex_trylock(&sbi->fs_lock[i])) + return i; + + mutex_lock(&sbi->fs_lock[next_lock]); + sbi->next_lock_num++; + return next_lock; +} + +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) +{ + if (ilock < 0) + return; + BUG_ON(ilock >= NR_GLOBAL_LOCKS); + mutex_unlock(&sbi->fs_lock[ilock]); } /* @@ -879,6 +895,7 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); void f2fs_set_inode_flags(struct inode *); struct inode *f2fs_iget(struct super_block *, unsigned long); void update_inode(struct inode *, struct page *); +int update_inode_page(struct inode *); int f2fs_write_inode(struct inode *, struct writeback_control *); void f2fs_evict_inode(struct inode *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 155b362dad63..07be88ddb9f8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -34,19 +34,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); block_t old_blk_addr; struct dnode_of_data dn; - int err; + int err, ilock; f2fs_balance_fs(sbi); sb_start_pagefault(inode->i_sb); - mutex_lock_op(sbi, DATA_NEW); - /* block allocation */ + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); if (err) { - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); goto out; } @@ -56,13 +55,12 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, err = reserve_new_block(&dn); if (err) { f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); goto out; } } f2fs_put_dnode(&dn); - - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); lock_page(page); if (page->mapping != inode->i_mapping || @@ -223,20 +221,19 @@ static int truncate_blocks(struct inode *inode, u64 from) unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; - int count = 0; + int count = 0, ilock = -1; int err; free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); - mutex_lock_op(sbi, DATA_TRUNC); - + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - mutex_unlock_op(sbi, DATA_TRUNC); + mutex_unlock_op(sbi, ilock); return err; } @@ -247,6 +244,7 @@ static int truncate_blocks(struct inode *inode, u64 from) count -= dn.ofs_in_node; BUG_ON(count < 0); + if (dn.ofs_in_node || IS_INODE(dn.node_page)) { truncate_data_blocks_range(&dn, count); free_from += count; @@ -255,7 +253,7 @@ static int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); - mutex_unlock_op(sbi, DATA_TRUNC); + mutex_unlock_op(sbi, ilock); /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); @@ -363,15 +361,16 @@ static void fill_zero(struct inode *inode, pgoff_t index, { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *page; + int ilock; if (!len) return; f2fs_balance_fs(sbi); - mutex_lock_op(sbi, DATA_NEW); + ilock = mutex_lock_op(sbi); page = get_new_data_page(inode, index, false); - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); if (!IS_ERR(page)) { wait_on_page_writeback(page); @@ -388,13 +387,10 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) for (index = pg_start; index < pg_end; index++) { struct dnode_of_data dn; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - mutex_lock_op(sbi, DATA_TRUNC); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { - mutex_unlock_op(sbi, DATA_TRUNC); if (err == -ENOENT) continue; return err; @@ -403,7 +399,6 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) if (dn.data_blkaddr != NULL_ADDR) truncate_data_blocks_range(&dn, 1); f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, DATA_TRUNC); } return 0; } @@ -434,6 +429,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; f2fs_balance_fs(sbi); @@ -441,7 +437,10 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) blk_end = pg_end << PAGE_CACHE_SHIFT; truncate_inode_pages_range(mapping, blk_start, blk_end - 1); + + ilock = mutex_lock_op(sbi); ret = truncate_hole(inode, pg_start, pg_end); + mutex_unlock_op(sbi, ilock); } } @@ -475,13 +474,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset, for (index = pg_start; index <= pg_end; index++) { struct dnode_of_data dn; + int ilock; - mutex_lock_op(sbi, DATA_NEW); - + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); ret = get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); break; } @@ -489,13 +488,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, ret = reserve_new_block(&dn); if (ret) { f2fs_put_dnode(&dn); - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); break; } } f2fs_put_dnode(&dn); - - mutex_unlock_op(sbi, DATA_NEW); + mutex_unlock_op(sbi, ilock); if (pg_start == pg_end) new_size = offset + len; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e97f30157aa6..83cec8f868c6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -510,7 +510,6 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) wait_on_page_writeback(page); } - mutex_lock_op(sbi, DATA_WRITE); if (clear_page_dirty_for_io(page) && S_ISDIR(inode->i_mode)) { dec_page_count(sbi, F2FS_DIRTY_DENTS); @@ -518,7 +517,6 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) } set_cold_data(page); do_write_data_page(page); - mutex_unlock_op(sbi, DATA_WRITE); clear_cold_data(page); } out: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f798ddf2c8a8..60105b710958 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -195,46 +195,49 @@ void update_inode(struct inode *inode, struct page *node_page) set_page_dirty(node_page); } -int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) +int update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *node_page; - bool need_lock = false; - - if (inode->i_ino == F2FS_NODE_INO(sbi) || - inode->i_ino == F2FS_META_INO(sbi)) - return 0; - - if (wbc) - f2fs_balance_fs(sbi); node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) return PTR_ERR(node_page); - if (!PageDirty(node_page)) { - need_lock = true; - f2fs_put_page(node_page, 1); - mutex_lock(&sbi->write_inode); - node_page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) { - mutex_unlock(&sbi->write_inode); - return PTR_ERR(node_page); - } - } update_inode(inode, node_page); f2fs_put_page(node_page, 1); - if (need_lock) - mutex_unlock(&sbi->write_inode); return 0; } +int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ret, ilock; + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return 0; + + if (wbc) + f2fs_balance_fs(sbi); + + /* + * We need to lock here to prevent from producing dirty node pages + * during the urgent cleaning time when runing out of free sections. + */ + ilock = mutex_lock_op(sbi); + ret = update_inode_page(inode); + mutex_unlock_op(sbi, ilock); + return ret; +} + /* * Called at the last iput() if i_nlink is zero */ void f2fs_evict_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; truncate_inode_pages(&inode->i_data, 0); @@ -255,7 +258,10 @@ void f2fs_evict_inode(struct inode *inode) if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode); + ilock = mutex_lock_op(sbi); remove_inode_page(inode); + mutex_unlock_op(sbi, ilock); + sb_end_intwrite(inode->i_sb); no_delete: clear_inode(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7c6e219a479c..841f6b486bd6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -26,19 +26,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_t ino; struct inode *inode; bool nid_free = false; - int err; + int err, ilock; inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); - mutex_lock_op(sbi, NODE_NEW); + ilock = mutex_lock_op(sbi); if (!alloc_nid(sbi, &ino)) { - mutex_unlock_op(sbi, NODE_NEW); + mutex_unlock_op(sbi, ilock); err = -ENOSPC; goto fail; } - mutex_unlock_op(sbi, NODE_NEW); + mutex_unlock_op(sbi, ilock); inode->i_uid = current_fsuid(); @@ -122,7 +122,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; nid_t ino = 0; - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -138,7 +138,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -162,7 +164,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct inode *inode = old_dentry->d_inode; struct super_block *sb = dir->i_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -170,7 +172,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, atomic_inc(&inode->i_count); set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -229,6 +233,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct f2fs_dir_entry *de; struct page *page; int err = -ENOENT; + int ilock; f2fs_balance_fs(sbi); @@ -243,7 +248,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } + ilock = mutex_lock_op(sbi); f2fs_delete_entry(de, page, inode); + mutex_unlock_op(sbi, ilock); /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); @@ -258,7 +265,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; size_t symlen = strlen(symname) + 1; - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -269,7 +276,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -291,7 +300,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode; - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -305,7 +314,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); set_inode_flag(F2FS_I(inode), FI_INC_LINK); + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); if (err) goto out_fail; @@ -340,6 +351,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; int err = 0; + int ilock; if (!new_valid_dev(rdev)) return -EINVAL; @@ -353,7 +365,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -381,7 +395,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; - int err = -ENOENT; + int err = -ENOENT, ilock = -1; f2fs_balance_fs(sbi); @@ -396,7 +410,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_old; } - mutex_lock_op(sbi, RENAME); + ilock = mutex_lock_op(sbi); if (new_inode) { struct page *new_page; @@ -419,7 +433,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(new_inode); if (!new_inode->i_nlink) add_orphan_inode(sbi, new_inode->i_ino); - f2fs_write_inode(new_inode, NULL); + update_inode_page(new_inode); } else { err = f2fs_add_link(new_dentry, old_inode); if (err) @@ -427,7 +441,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir_entry) { inc_nlink(new_dir); - f2fs_write_inode(new_dir, NULL); + update_inode_page(new_dir); } } @@ -445,10 +459,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); - f2fs_write_inode(old_dir, NULL); + update_inode_page(old_dir); } - mutex_unlock_op(sbi, RENAME); + mutex_unlock_op(sbi, ilock); return 0; out_dir: @@ -456,7 +470,7 @@ out_dir: kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } - mutex_unlock_op(sbi, RENAME); + mutex_unlock_op(sbi, ilock); out_old: kunmap(old_page); f2fs_put_page(old_page, 0); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ad3adbee842a..5a7edf90ca45 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -385,6 +385,9 @@ got: /* * Caller should call f2fs_put_dnode(dn). + * Also, it should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op() only if ro is not set RDONLY_NODE. + * In the case of RDONLY_NODE, we don't need to care about mutex. */ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { @@ -415,11 +418,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) bool done = false; if (!nids[i] && mode == ALLOC_NODE) { - mutex_lock_op(sbi, NODE_NEW); - /* alloc new node */ if (!alloc_nid(sbi, &(nids[i]))) { - mutex_unlock_op(sbi, NODE_NEW); err = -ENOSPC; goto release_pages; } @@ -428,14 +428,12 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) npage[i] = new_node_page(dn, noffset[i]); if (IS_ERR(npage[i])) { alloc_nid_failed(sbi, nids[i]); - mutex_unlock_op(sbi, NODE_NEW); err = PTR_ERR(npage[i]); goto release_pages; } set_nid(parent, offset[i - 1], nids[i], i == 1); alloc_nid_done(sbi, nids[i]); - mutex_unlock_op(sbi, NODE_NEW); done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); @@ -745,6 +743,10 @@ fail: return err > 0 ? 0 : err; } +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ int remove_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -752,21 +754,16 @@ int remove_inode_page(struct inode *inode) nid_t ino = inode->i_ino; struct dnode_of_data dn; - mutex_lock_op(sbi, NODE_TRUNC); page = get_node_page(sbi, ino); - if (IS_ERR(page)) { - mutex_unlock_op(sbi, NODE_TRUNC); + if (IS_ERR(page)) return PTR_ERR(page); - } if (F2FS_I(inode)->i_xattr_nid) { nid_t nid = F2FS_I(inode)->i_xattr_nid; struct page *npage = get_node_page(sbi, nid); - if (IS_ERR(npage)) { - mutex_unlock_op(sbi, NODE_TRUNC); + if (IS_ERR(npage)) return PTR_ERR(npage); - } F2FS_I(inode)->i_xattr_nid = 0; set_new_dnode(&dn, inode, page, npage, nid); @@ -778,23 +775,18 @@ int remove_inode_page(struct inode *inode) BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); set_new_dnode(&dn, inode, page, page, ino); truncate_node(&dn); - - mutex_unlock_op(sbi, NODE_TRUNC); return 0; } int new_inode_page(struct inode *inode, const struct qstr *name) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *page; struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - mutex_lock_op(sbi, NODE_NEW); page = new_node_page(&dn, 0); init_dent_inode(name, page); - mutex_unlock_op(sbi, NODE_NEW); if (IS_ERR(page)) return PTR_ERR(page); f2fs_put_page(page, 1); @@ -985,7 +977,7 @@ void sync_inode_page(struct dnode_of_data *dn) if (!dn->inode_page_locked) unlock_page(dn->inode_page); } else { - f2fs_write_inode(dn->inode, NULL); + update_inode_page(dn->inode); } } @@ -1102,8 +1094,6 @@ static int f2fs_write_node_page(struct page *page, wait_on_page_writeback(page); - mutex_lock_op(sbi, NODE_WRITE); - /* get old block addr of this node page */ nid = nid_of_node(page); BUG_ON(page->index != nid); @@ -1111,25 +1101,25 @@ static int f2fs_write_node_page(struct page *page, get_node_info(sbi, nid, &ni); /* This page is already truncated */ - if (ni.blk_addr == NULL_ADDR) - goto out; + if (ni.blk_addr == NULL_ADDR) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } if (wbc->for_reclaim) { dec_page_count(sbi, F2FS_DIRTY_NODES); wbc->pages_skipped++; set_page_dirty(page); - mutex_unlock_op(sbi, NODE_WRITE); return AOP_WRITEPAGE_ACTIVATE; } + mutex_lock(&sbi->node_write); set_page_writeback(page); - - /* insert node offset */ write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr); -out: dec_page_count(sbi, F2FS_DIRTY_NODES); - mutex_unlock_op(sbi, NODE_WRITE); + mutex_unlock(&sbi->node_write); unlock_page(page); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 61bdaa755906..f16d12df8e99 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -242,6 +242,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct f2fs_summary sum; struct node_info ni; int err = 0; + int ilock; start = start_bidx_of_node(ofs_of_node(page)); if (IS_INODE(page)) @@ -249,10 +250,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, else end = start + ADDRS_PER_BLOCK; + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, start, ALLOC_NODE); - if (err) + if (err) { + mutex_unlock_op(sbi, ilock); return err; + } wait_on_page_writeback(dn.node_page); @@ -297,6 +302,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); f2fs_put_dnode(&dn); + mutex_unlock_op(sbi, ilock); return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ca5413346653..2643c49c38a5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -556,11 +556,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); - mutex_init(&sbi->write_inode); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); - for (i = 0; i < NR_LOCK_TYPE; i++) + for (i = 0; i < NR_GLOBAL_LOCKS; i++) mutex_init(&sbi->fs_lock[i]); + mutex_init(&sbi->node_write); sbi->por_doing = 0; spin_lock_init(&sbi->stat_lock); init_rwsem(&sbi->bio_sem); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 3bfea80610ff..0b02dce31356 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -307,6 +307,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, int error, found, free, newsize; size_t name_len; char *pval; + int ilock; if (name == NULL) return -EINVAL; @@ -321,7 +322,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, f2fs_balance_fs(sbi); - mutex_lock_op(sbi, NODE_NEW); + ilock = mutex_lock_op(sbi); + if (!fi->i_xattr_nid) { /* Allocate new attribute block */ struct dnode_of_data dn; @@ -433,13 +435,13 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } - f2fs_write_inode(inode, NULL); - mutex_unlock_op(sbi, NODE_NEW); + update_inode_page(inode); + mutex_unlock_op(sbi, ilock); return 0; cleanup: f2fs_put_page(page, 1); exit: - mutex_unlock_op(sbi, NODE_NEW); + mutex_unlock_op(sbi, ilock); return error; } -- cgit v1.2.3 From 3315101f7070013d01bb5396c6bde07b3a8bcbd8 Mon Sep 17 00:00:00 2001 From: Zhihui Zhang Date: Sun, 7 Apr 2013 12:57:04 -0400 Subject: f2fs: fix the logic of IS_DNODE() If (ofs % (NIDS_PER_BLOCK + 1) == 0), the node is an indirect node block. Signed-off-by: Zhihui Zhang Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 271a61c25601..0a2d72f0024d 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -242,7 +242,7 @@ static inline bool IS_DNODE(struct page *node_page) return false; if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { ofs -= 6 + 2 * NIDS_PER_BLOCK; - if ((long int)ofs % (NIDS_PER_BLOCK + 1)) + if (!((long int)ofs % (NIDS_PER_BLOCK + 1))) return false; } return true; -- cgit v1.2.3 From d64f80473bb58f5c8bfcb63220c31a573a07752f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 8 Apr 2013 16:01:00 +0900 Subject: f2fs: write checkpoint before starting FG_GC In order to be aware of prefree and free sections during FG_GC, let's start with write_checkpoint(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 83cec8f868c6..37b05e1c574c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -669,8 +669,10 @@ gc_more: if (!(sbi->sb->s_flags & MS_ACTIVE)) goto stop; - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { gc_type = FG_GC; + write_checkpoint(sbi, false); + } if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) goto stop; -- cgit v1.2.3 From 6224da875ee0cb702c6ffe3456307701106dffb5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 6 Apr 2013 14:44:32 +0900 Subject: f2fs: fix typo mistakes Fix typo mistakes. 1. I think that it should be 'L' instead of 'V'. 2. and try to fix 'Front' instead of 'Frone' Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/segment.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 72a1b30f720a..3c31ec7d633d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -133,7 +133,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) goto end_update; } - /* Frone merge */ + /* Front merge */ if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { fi->ext.fofs--; fi->ext.blk_addr--; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 994bb7bd7b70..26fc0540f144 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -12,7 +12,7 @@ #define NULL_SEGNO ((unsigned int)(~0)) #define NULL_SECNO ((unsigned int)(~0)) -/* V: Logical segment # in volume, R: Relative segment # in main area */ +/* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) -- cgit v1.2.3 From 66348b723bcbf57c2d2630a0c83b77c136e61029 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 12 Apr 2013 10:23:18 +0800 Subject: f2fs: fix error return code in f2fs_fill_super() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Introduce by commit c0d39e(f2fs: fix return values from validate superblock) Signed-off-by: Wei Yongjun Acked-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2643c49c38a5..541f21ffd8f2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -534,7 +534,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi, POSIX_ACL); #endif /* parse mount options */ - if (parse_options(sb, sbi, (char *)data)) + err = parse_options(sb, sbi, (char *)data); + if (err) goto free_sb_buf; sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); -- cgit v1.2.3 From e66509f03e36ef4750bfab8f3a5cf632b313a39b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 20 Apr 2013 01:27:21 +0900 Subject: f2fs: make is_multimedia_file code align with its name The code conditions put inside the function is_multimedia_file are reverse to the name i.e, we need to negate the return to actually check if the file is a multimedia file. So, change the code and usage path to align both the name and comparision conditions. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 841f6b486bd6..1dbf11d2bc87 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -83,7 +83,7 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) int ret; if (sublen > slen) - return 1; + return 0; ret = memcmp(s + slen - sublen, sub, sublen); if (ret) { /* compare upper case */ @@ -91,10 +91,10 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) char upper_sub[8]; for (i = 0; i < sublen && i < sizeof(upper_sub); i++) upper_sub[i] = toupper(sub[i]); - return memcmp(s + slen - sublen, upper_sub, sublen); + return !memcmp(s + slen - sublen, upper_sub, sublen); } - return ret; + return !ret; } /* @@ -108,7 +108,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, int count = le32_to_cpu(sbi->raw_super->extension_count); for (i = 0; i < count; i++) { - if (!is_multimedia_file(name, extlist[i])) { + if (is_multimedia_file(name, extlist[i])) { set_cold_file(inode); break; } -- cgit v1.2.3 From a2a4a7e4abb27c833d4e09ac1d434ab48a64062c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 20 Apr 2013 01:28:40 +0900 Subject: f2fs: add tracepoints for sync & inode operations Add tracepoints in f2fs for tracing the syncing operations like filesystem sync, file sync enter/exit. It will helf to trace the code under debugging scenarios. Also add tracepoints for tracing the various inode operations like building inode, eviction of inode, link/unlike of inodes. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Acked-by: Steven Rostedt [Jaegeuk: combine and modify the tracepoint structures] Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 7 +- fs/f2fs/inode.c | 13 ++- fs/f2fs/namei.c | 3 + fs/f2fs/super.c | 5 ++ include/trace/events/f2fs.h | 195 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 219 insertions(+), 4 deletions(-) create mode 100644 include/trace/events/f2fs.h (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 07be88ddb9f8..0b0ba26ba76e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -25,6 +25,7 @@ #include "segment.h" #include "xattr.h" #include "acl.h" +#include static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -116,9 +117,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (inode->i_sb->s_flags & MS_RDONLY) return 0; + trace_f2fs_sync_file_enter(inode); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) + if (ret) { + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; + } /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); @@ -153,6 +157,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } out: mutex_unlock(&inode->i_mutex); + trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 60105b710958..91ac7f9d88ee 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -16,6 +16,8 @@ #include "f2fs.h" #include "node.h" +#include + void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; @@ -91,13 +93,16 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; - int ret; + int ret = 0; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + + if (!(inode->i_state & I_NEW)) { + trace_f2fs_iget(inode); return inode; + } if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) goto make_now; @@ -139,11 +144,12 @@ make_now: goto bad_inode; } unlock_new_inode(inode); - + trace_f2fs_iget(inode); return inode; bad_inode: iget_failed(inode); + trace_f2fs_iget_exit(inode, ret); return ERR_PTR(ret); } @@ -239,6 +245,7 @@ void f2fs_evict_inode(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int ilock; + trace_f2fs_evict_inode(inode); truncate_inode_pages(&inode->i_data, 0); if (inode->i_ino == F2FS_NODE_INO(sbi) || diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1dbf11d2bc87..c57fd18b769d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -18,6 +18,7 @@ #include "node.h" #include "xattr.h" #include "acl.h" +#include static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) { @@ -235,6 +236,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) int err = -ENOENT; int ilock; + trace_f2fs_unlink_enter(dir, dentry); f2fs_balance_fs(sbi); de = f2fs_find_entry(dir, &dentry->d_name, &page); @@ -255,6 +257,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); fail: + trace_f2fs_unlink_exit(inode, err); return err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 541f21ffd8f2..b015b6cd7bd4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -29,6 +29,9 @@ #include "segment.h" #include "xattr.h" +#define CREATE_TRACE_POINTS +#include + static struct kmem_cache *f2fs_inode_cachep; enum { @@ -134,6 +137,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + trace_f2fs_sync_fs(sb, sync); + if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) return 0; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h new file mode 100644 index 000000000000..03bb0dbf468d --- /dev/null +++ b/include/trace/events/f2fs.h @@ -0,0 +1,195 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM f2fs + +#if !defined(_TRACE_F2FS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_F2FS_H + +#include + +#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) +#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino + +DECLARE_EVENT_CLASS(f2fs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(ino_t, pino) + __field(umode_t, mode) + __field(loff_t, size) + __field(unsigned int, nlink) + __field(blkcnt_t, blocks) + __field(__u8, advise) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pino = F2FS_I(inode)->i_pino; + __entry->mode = inode->i_mode; + __entry->nlink = inode->i_nlink; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->advise = F2FS_I(inode)->i_advise; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pino = %lu, i_mode = 0x%hx, " + "i_size = %lld, i_nlink = %u, i_blocks = %llu, i_advise = 0x%x", + show_dev_ino(__entry), + (unsigned long)__entry->pino, + __entry->mode, + __entry->size, + (unsigned int)__entry->nlink, + (unsigned long long)__entry->blocks, + (unsigned char)__entry->advise) +); + +DECLARE_EVENT_CLASS(f2fs__inode_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, ret = %d", + show_dev_ino(__entry), + __entry->ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_sync_file_exit, + + TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), + + TP_ARGS(inode, need_cp, datasync, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(bool, need_cp) + __field(int, datasync) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->need_cp = need_cp; + __entry->datasync = datasync; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " + "datasync = %d, ret = %d", + show_dev_ino(__entry), + __entry->need_cp ? "needed" : "not needed", + __entry->datasync, + __entry->ret) +); + +TRACE_EVENT(f2fs_sync_fs, + + TP_PROTO(struct super_block *sb, int wait), + + TP_ARGS(sb, wait), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, dirty) + __field(int, wait) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->dirty = F2FS_SB(sb)->s_dirty; + __entry->wait = wait; + ), + + TP_printk("dev = (%d,%d), superblock is %s, wait = %d", + show_dev(__entry), + __entry->dirty ? "dirty" : "not dirty", + __entry->wait) +); + +DEFINE_EVENT(f2fs__inode, f2fs_iget, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_iget_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__inode, f2fs_evict_inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_unlink_enter, + + TP_PROTO(struct inode *dir, struct dentry *dentry), + + TP_ARGS(dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(const char *, name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->size = dir->i_size; + __entry->blocks = dir->i_blocks; + __entry->name = dentry->d_name.name; + ), + + TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, " + "i_blocks = %llu, name = %s", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + __entry->name) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); +#endif /* _TRACE_F2FS_H */ + + /* This part must be outside protection */ +#include -- cgit v1.2.3 From 51dd62493477923723c797c6da60121ed39900ed Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 20 Apr 2013 01:28:52 +0900 Subject: f2fs: add tracepoints for truncate operation add tracepoints for tracing the truncate operations like truncate node/data blocks, f2fs_truncate etc. Tracepoints are added at entry and exit of operation to trace the success & failure of operation. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Acked-by: Steven Rostedt [Jaegeuk: combine and modify the tracepoint structures] Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +++ fs/f2fs/node.c | 20 ++++- include/trace/events/f2fs.h | 176 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0b0ba26ba76e..71efa373cc45 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -193,6 +193,9 @@ static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) sync_inode_page(dn); } dn->ofs_in_node = ofs; + + trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, + dn->ofs_in_node, nr_free); return nr_free; } @@ -229,6 +232,8 @@ static int truncate_blocks(struct inode *inode, u64 from) int count = 0, ilock = -1; int err; + trace_f2fs_truncate_blocks_enter(inode, from); + free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); @@ -239,6 +244,7 @@ static int truncate_blocks(struct inode *inode, u64 from) if (err == -ENOENT) goto free_next; mutex_unlock_op(sbi, ilock); + trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -263,6 +269,7 @@ free_next: /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); + trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -272,6 +279,8 @@ void f2fs_truncate(struct inode *inode) S_ISLNK(inode->i_mode))) return; + trace_f2fs_truncate(inode); + if (!truncate_blocks(inode, i_size_read(inode))) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5a7edf90ca45..5a825502b0b0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -19,6 +19,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; @@ -508,6 +509,7 @@ invalidate: f2fs_put_page(dn->node_page, 1); dn->node_page = NULL; + trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); } static int truncate_dnode(struct dnode_of_data *dn) @@ -548,9 +550,13 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (dn->nid == 0) return NIDS_PER_BLOCK + 1; + trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); + page = get_node_page(sbi, dn->nid); - if (IS_ERR(page)) + if (IS_ERR(page)) { + trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); return PTR_ERR(page); + } rn = (struct f2fs_node *)page_address(page); if (depth < 3) { @@ -592,10 +598,12 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, } else { f2fs_put_page(page, 1); } + trace_f2fs_truncate_nodes_exit(dn->inode, freed); return freed; out_err: f2fs_put_page(page, 1); + trace_f2fs_truncate_nodes_exit(dn->inode, ret); return ret; } @@ -650,6 +658,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, fail: for (i = depth - 3; i >= 0; i--) f2fs_put_page(pages[i], 1); + + trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); + return err; } @@ -666,11 +677,15 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) struct dnode_of_data dn; struct page *page; + trace_f2fs_truncate_inode_blocks_enter(inode, from); + level = get_node_path(from, offset, noffset); page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) + if (IS_ERR(page)) { + trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); return PTR_ERR(page); + } set_new_dnode(&dn, inode, page, NULL, 0); unlock_page(page); @@ -740,6 +755,7 @@ skip_partial: } fail: f2fs_put_page(page, 0); + trace_f2fs_truncate_inode_blocks_exit(inode, err); return err > 0 ? 0 : err; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 03bb0dbf468d..4bbd19f79d27 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -189,6 +189,182 @@ DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit, TP_ARGS(inode, ret) ); + +DEFINE_EVENT(f2fs__inode, f2fs_truncate, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +TRACE_EVENT(f2fs_truncate_data_blocks_range, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs, int free), + + TP_ARGS(inode, nid, ofs, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(unsigned int, ofs) + __field(int, free) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->ofs = ofs; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, offset = %u, freed = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + __entry->ofs, + __entry->free) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_op, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(u64, from) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->from = from; + ), + + TP_printk("dev = (%d,%d), ino = %lu, i_size = %lld, i_blocks = %llu, " + "start file offset = %llu", + show_dev_ino(__entry), + __entry->size, + (unsigned long long)__entry->blocks, + (unsigned long long)__entry->from) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_inode_blocks_enter, + + TP_PROTO(struct inode *inode, u64 from), + + TP_ARGS(inode, from) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_inode_blocks_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DECLARE_EVENT_CLASS(f2fs__truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid) + __field(block_t, blk_addr) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid = nid; + __entry->blk_addr = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, nid = %u, block_address = 0x%llx", + show_dev_ino(__entry), + (unsigned int)__entry->nid, + (unsigned long long)__entry->blk_addr) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_nodes_enter, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_nodes_exit, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + +DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, + + TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr), + + TP_ARGS(inode, nid, blk_addr) +); + +TRACE_EVENT(f2fs_truncate_partial_nodes, + + TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), + + TP_ARGS(inode, nid, depth, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(nid_t, nid[3]) + __field(int, depth) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->nid[0] = nid[0]; + __entry->nid[1] = nid[1]; + __entry->nid[2] = nid[2]; + __entry->depth = depth; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), ino = %lu, " + "nid[0] = %u, nid[1] = %u, nid[2] = %u, depth = %d, err = %d", + show_dev_ino(__entry), + (unsigned int)__entry->nid[0], + (unsigned int)__entry->nid[1], + (unsigned int)__entry->nid[2], + __entry->depth, + __entry->err) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 848753aa3b19a6513315ca54f22ba1e2732ea94a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Apr 2013 16:38:02 +0900 Subject: f2fs: add tracepoint for tracing the page i/o Add tracepoints for page i/o operations and block allocation tracing during page read operation. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Acked-by: Steven Rostedt [Jaegeuk: combine and modify the tracepoint structures] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++-- include/trace/events/f2fs.h | 74 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3c31ec7d633d..8e8b14d714fc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -21,6 +21,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include /* * Lock ordering for the change of data block address: @@ -348,6 +349,8 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, struct block_device *bdev = sbi->sb->s_bdev; struct bio *bio; + trace_f2fs_readpage(page, blk_addr, type); + down_read(&sbi->bio_sem); /* Allocate a new bio */ @@ -388,14 +391,18 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, /* Get the page offset from the block offset(iblock) */ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); - if (check_extent_cache(inode, pgofs, bh_result)) + if (check_extent_cache(inode, pgofs, bh_result)) { + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); return 0; + } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); - if (err) + if (err) { + trace_f2fs_get_data_block(inode, iblock, bh_result, err); return (err == -ENOENT) ? 0 : err; + } /* It does not support data allocation */ BUG_ON(create); @@ -420,6 +427,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, bh_result->b_size = (i << blkbits); } f2fs_put_dnode(&dn); + trace_f2fs_get_data_block(inode, iblock, bh_result, 0); return 0; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4bbd19f79d27..924e69aa984f 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -8,6 +8,15 @@ #define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) #define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino +#define show_bio_type(type) \ + __print_symbolic(type, \ + { READ, "READ" }, \ + { READA, "READAHEAD" }, \ + { READ_SYNC, "READ_SYNC" }, \ + { WRITE, "WRITE" }, \ + { WRITE_SYNC, "WRITE_SYNC" }, \ + { WRITE_FLUSH, "WRITE_FLUSH" }, \ + { WRITE_FUA, "WRITE_FUA" }) DECLARE_EVENT_CLASS(f2fs__inode, @@ -365,6 +374,71 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); +TRACE_EVENT_CONDITION(f2fs_readpage, + + TP_PROTO(struct page *page, sector_t blkaddr, int type), + + TP_ARGS(page, blkaddr, type), + + TP_CONDITION(page->mapping), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(sector_t, blkaddr) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = blkaddr; + __entry->type = type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, bio_type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->type)) +); + +TRACE_EVENT(f2fs_get_data_block, + TP_PROTO(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int ret), + + TP_ARGS(inode, iblock, bh, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(sector_t, iblock) + __field(sector_t, bh_start) + __field(size_t, bh_size) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->iblock = iblock; + __entry->bh_start = bh->b_blocknr; + __entry->bh_size = bh->b_size; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " + "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d", + show_dev_ino(__entry), + (unsigned long long)__entry->iblock, + (unsigned long long)__entry->bh_start, + (unsigned long long)__entry->bh_size, + __entry->ret) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 8e46b3ed11b750a740fec0a313ad9118059fc37b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Apr 2013 16:42:53 +0900 Subject: f2fs: add tracepoints for GC threads Add tracepoints for tracing the garbage collector threads in f2fs with status of collection & type. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Acked-by: Steven Rostedt [Jaegeuk: modify slightly to show information] Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 +++ include/trace/events/f2fs.h | 75 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 37b05e1c574c..1ca332455ee2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -23,6 +23,7 @@ #include "node.h" #include "segment.h" #include "gc.h" +#include static struct kmem_cache *winode_slab; @@ -301,6 +302,10 @@ got_it: set_bit(secno, dirty_i->victim_secmap); } *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; + + trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, + sbi->cur_victim_sec, + prefree_segments(sbi), free_segments(sbi)); } mutex_unlock(&dirty_i->seglist_lock); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 924e69aa984f..6f7cf7af8899 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -18,6 +18,33 @@ { WRITE_FLUSH, "WRITE_FLUSH" }, \ { WRITE_FUA, "WRITE_FUA" }) +#define show_data_type(type) \ + __print_symbolic(type, \ + { CURSEG_HOT_DATA, "Hot DATA" }, \ + { CURSEG_WARM_DATA, "Warm DATA" }, \ + { CURSEG_COLD_DATA, "Cold DATA" }, \ + { CURSEG_HOT_NODE, "Hot NODE" }, \ + { CURSEG_WARM_NODE, "Warm NODE" }, \ + { CURSEG_COLD_NODE, "Cold NODE" }, \ + { NO_CHECK_TYPE, "No TYPE" }) + +#define show_gc_type(type) \ + __print_symbolic(type, \ + { FG_GC, "Foreground GC" }, \ + { BG_GC, "Background GC" }) + +#define show_alloc_mode(type) \ + __print_symbolic(type, \ + { LFS, "LFS-mode" }, \ + { SSR, "SSR-mode" }) + +#define show_victim_policy(type) \ + __print_symbolic(type, \ + { GC_GREEDY, "Greedy" }, \ + { GC_CB, "Cost-Benefit" }) + +struct victim_sel_policy; + DECLARE_EVENT_CLASS(f2fs__inode, TP_PROTO(struct inode *inode), @@ -439,6 +466,54 @@ TRACE_EVENT(f2fs_get_data_block, __entry->ret) ); +TRACE_EVENT(f2fs_get_victim, + + TP_PROTO(struct super_block *sb, int type, int gc_type, + struct victim_sel_policy *p, unsigned int pre_victim, + unsigned int prefree, unsigned int free), + + TP_ARGS(sb, type, gc_type, p, pre_victim, prefree, free), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, type) + __field(int, gc_type) + __field(int, alloc_mode) + __field(int, gc_mode) + __field(unsigned int, victim) + __field(unsigned int, ofs_unit) + __field(unsigned int, pre_victim) + __field(unsigned int, prefree) + __field(unsigned int, free) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->type = type; + __entry->gc_type = gc_type; + __entry->alloc_mode = p->alloc_mode; + __entry->gc_mode = p->gc_mode; + __entry->victim = p->min_segno; + __entry->ofs_unit = p->ofs_unit; + __entry->pre_victim = pre_victim; + __entry->prefree = prefree; + __entry->free = free; + ), + + TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " + "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", + show_dev(__entry), + show_data_type(__entry->type), + show_gc_type(__entry->gc_type), + show_alloc_mode(__entry->alloc_mode), + show_victim_policy(__entry->gc_mode), + __entry->victim, + __entry->ofs_unit, + (int)__entry->pre_victim, + __entry->prefree, + __entry->free) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From c01e285324793a86c2c90c8451ed6feb04b3d310 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Apr 2013 17:00:52 +0900 Subject: f2fs: add tracepoints to debug the block allocation Add tracepoints to debug the block allocation & fallocate. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Acked-by: Steven Rostedt [Jaegeuk: enhance information] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/file.c | 1 + include/trace/events/f2fs.h | 64 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e8b14d714fc..1220b5c2ea21 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -55,6 +55,8 @@ int reserve_new_block(struct dnode_of_data *dn) if (!inc_valid_block_count(sbi, dn->inode, 1)) return -ENOSPC; + trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); + __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; sync_inode_page(dn); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 71efa373cc45..0e56db2d3cc9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -546,6 +546,7 @@ static long f2fs_fallocate(struct file *file, int mode, inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); } + trace_f2fs_fallocate(inode, mode, offset, len, ret); return ret; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 6f7cf7af8899..56b6240e6c01 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -514,6 +514,70 @@ TRACE_EVENT(f2fs_get_victim, __entry->free) ); +TRACE_EVENT(f2fs_fallocate, + + TP_PROTO(struct inode *inode, int mode, + loff_t offset, loff_t len, int ret), + + TP_ARGS(inode, mode, offset, len, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, mode) + __field(loff_t, offset) + __field(loff_t, len) + __field(loff_t, size) + __field(blkcnt_t, blocks) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = mode; + __entry->offset = offset; + __entry->len = len; + __entry->size = inode->i_size; + __entry->blocks = inode->i_blocks; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, mode = %x, offset = %lld, " + "len = %lld, i_size = %lld, i_blocks = %llu, ret = %d", + show_dev_ino(__entry), + __entry->mode, + (unsigned long long)__entry->offset, + (unsigned long long)__entry->len, + (unsigned long long)__entry->size, + (unsigned long long)__entry->blocks, + __entry->ret) +); + +TRACE_EVENT(f2fs_reserve_new_block, + + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), + + TP_ARGS(inode, nid, ofs_in_node), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(nid_t, nid) + __field(unsigned int, ofs_in_node) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->nid = nid; + __entry->ofs_in_node = ofs_in_node; + ), + + TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", + show_dev(__entry), + (unsigned int)__entry->nid, + __entry->ofs_in_node) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 6ec178dac6768204a6edf70f4a53d40b691c12b4 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Apr 2013 17:51:43 +0900 Subject: f2fs: add tracepoints for write page operations Add tracepoints to debug the various page write operation like data pages, meta pages. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Acked-by: Steven Rostedt [Jaegeuk: remove unnecessary tracepoints] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++ include/trace/events/f2fs.h | 67 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7c67ec2b63c0..0a652848a9f8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -18,6 +18,7 @@ #include "f2fs.h" #include "segment.h" #include "node.h" +#include /* * This function balances dirty node and dentry pages. @@ -691,6 +692,9 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, struct bio_private *p = sbi->bio[btype]->bi_private; p->sbi = sbi; sbi->bio[btype]->bi_end_io = f2fs_end_io_write; + + trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); + if (type == META_FLUSH) { DECLARE_COMPLETION_ONSTACK(wait); p->is_sync = true; @@ -745,6 +749,7 @@ alloc_new: sbi->last_block_in_bio[type] = blk_addr; up_write(&sbi->bio_sem); + trace_f2fs_submit_write_page(page, blk_addr, type); } static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b6240e6c01..ae2da92e0768 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -8,6 +8,14 @@ #define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) #define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino + +#define show_block_type(type) \ + __print_symbolic(type, \ + { NODE, "NODE" }, \ + { DATA, "DATA" }, \ + { META, "META" }, \ + { META_FLUSH, "META_FLUSH" }) + #define show_bio_type(type) \ __print_symbolic(type, \ { READ, "READ" }, \ @@ -578,6 +586,65 @@ TRACE_EVENT(f2fs_reserve_new_block, __entry->ofs_in_node) ); +TRACE_EVENT(f2fs_do_submit_bio, + + TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), + + TP_ARGS(sb, btype, sync, bio), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, btype) + __field(bool, sync) + __field(sector_t, sector) + __field(unsigned int, size) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->btype = btype; + __entry->sync = sync; + __entry->sector = bio->bi_sector; + __entry->size = bio->bi_size; + ), + + TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", + show_dev(__entry), + show_block_type(__entry->btype), + __entry->sync ? "sync" : "no sync", + (unsigned long long)__entry->sector, + __entry->size) +); + +TRACE_EVENT(f2fs_submit_write_page, + + TP_PROTO(struct page *page, block_t blk_addr, int type), + + TP_ARGS(page, blk_addr, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, type) + __field(pgoff_t, index) + __field(block_t, block) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->type = type; + __entry->index = page->index; + __entry->block = blk_addr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", + show_dev_ino(__entry), + show_block_type(__entry->type), + (unsigned long)__entry->index, + (unsigned long long)__entry->block) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 2af4bd6ca556d690ee166200abd16fdbe749782e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 23 Apr 2013 18:26:54 +0900 Subject: f2fs: add tracepoints to debug checkpoint request Add tracepoints to debug checkpoint request. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Acked-by: Steven Rostedt [Jaegeuk: change expressions] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++++++ include/trace/events/f2fs.h | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index be6aa2eef894..f54b83b4d90b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -20,6 +20,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include static struct kmem_cache *orphan_entry_slab; static struct kmem_cache *inode_entry_slab; @@ -714,9 +715,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); + mutex_lock(&sbi->cp_mutex); block_operations(sbi); + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); + f2fs_submit_bio(sbi, DATA, true); f2fs_submit_bio(sbi, NODE, true); f2fs_submit_bio(sbi, META, true); @@ -738,6 +743,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) unblock_operations(sbi); mutex_unlock(&sbi->cp_mutex); + + trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); } void init_orphan_info(struct f2fs_sb_info *sbi) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ae2da92e0768..b2b2f72a023d 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -645,6 +645,30 @@ TRACE_EVENT(f2fs_submit_write_page, (unsigned long long)__entry->block) ); +TRACE_EVENT(f2fs_write_checkpoint, + + TP_PROTO(struct super_block *sb, bool is_umount, char *msg), + + TP_ARGS(sb, is_umount, msg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(bool, is_umount) + __field(char *, msg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->is_umount = is_umount; + __entry->msg = msg; + ), + + TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", + show_dev(__entry), + __entry->is_umount ? "clean umount" : "consistency", + __entry->msg) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 6cb968d9b0358c7e807416a85699a526e820083c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 24 Apr 2013 13:00:14 +0900 Subject: f2fs: avoid frequent background GC If there is no victim segments selected by background GC, let's wait a little bit longer time to collect dirty segments. By default, let's give 5 minutes. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 --- fs/f2fs/gc.h | 12 +++++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1ca332455ee2..6ed3263eeee8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -82,9 +82,6 @@ static int gc_thread_func(void *data) /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) wait_ms = GC_THREAD_NOGC_SLEEP_TIME; - else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME) - wait_ms = GC_THREAD_MAX_SLEEP_TIME; - } while (!kthread_should_stop()); return 0; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 30b2db003acd..2c6a6bd08322 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -13,9 +13,9 @@ * whether IO subsystem is idle * or not */ -#define GC_THREAD_MIN_SLEEP_TIME 10000 /* milliseconds */ -#define GC_THREAD_MAX_SLEEP_TIME 30000 -#define GC_THREAD_NOGC_SLEEP_TIME 10000 +#define GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ +#define GC_THREAD_MAX_SLEEP_TIME 60000 +#define GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ @@ -58,6 +58,9 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) static inline long increase_sleep_time(long wait) { + if (wait == GC_THREAD_NOGC_SLEEP_TIME) + return wait; + wait += GC_THREAD_MIN_SLEEP_TIME; if (wait > GC_THREAD_MAX_SLEEP_TIME) wait = GC_THREAD_MAX_SLEEP_TIME; @@ -66,6 +69,9 @@ static inline long increase_sleep_time(long wait) static inline long decrease_sleep_time(long wait) { + if (wait == GC_THREAD_NOGC_SLEEP_TIME) + wait = GC_THREAD_MAX_SLEEP_TIME; + wait -= GC_THREAD_MIN_SLEEP_TIME; if (wait <= GC_THREAD_MIN_SLEEP_TIME) wait = GC_THREAD_MIN_SLEEP_TIME; -- cgit v1.2.3 From c718379b6b0954a04a153d7e5dc8b3136a301ee6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 24 Apr 2013 13:19:56 +0900 Subject: f2fs: give a chance to merge IOs by IO scheduler Previously, background GC submits many 4KB read requests to load victim blocks and/or its (i)node blocks. ... f2fs_gc : f2fs_readpage: ino = 1, page_index = 0xb61, blkaddr = 0x3b964ed f2fs_gc : block_rq_complete: 8,16 R () 499854968 + 8 [0] f2fs_gc : f2fs_readpage: ino = 1, page_index = 0xb6f, blkaddr = 0x3b964ee f2fs_gc : block_rq_complete: 8,16 R () 499854976 + 8 [0] f2fs_gc : f2fs_readpage: ino = 1, page_index = 0xb79, blkaddr = 0x3b964ef f2fs_gc : block_rq_complete: 8,16 R () 499854984 + 8 [0] ... However, by the fact that many IOs are sequential, we can give a chance to merge the IOs by IO scheduler. In order to do that, let's use blk_plug. ... f2fs_gc : f2fs_iget: ino = 143 f2fs_gc : f2fs_readpage: ino = 143, page_index = 0x1c6, blkaddr = 0x2e6ee f2fs_gc : f2fs_iget: ino = 143 f2fs_gc : f2fs_readpage: ino = 143, page_index = 0x1c7, blkaddr = 0x2e6ef : block_rq_complete: 8,16 R () 1519616 + 8 [0] : block_rq_complete: 8,16 R () 1519848 + 8 [0] : block_rq_complete: 8,16 R () 1520432 + 96 [0] : block_rq_complete: 8,16 R () 1520536 + 104 [0] : block_rq_complete: 8,16 R () 1521008 + 112 [0] : block_rq_complete: 8,16 R () 1521440 + 152 [0] : block_rq_complete: 8,16 R () 1521688 + 144 [0] : block_rq_complete: 8,16 R () 1522128 + 192 [0] : block_rq_complete: 8,16 R () 1523256 + 328 [0] ... Note that this issue should be addressed in checkpoint, and some readahead flows too. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++++ fs/f2fs/data.c | 15 +++++++++------ fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 11 ++++++++++- fs/f2fs/node.c | 9 +++++++++ 7 files changed, 36 insertions(+), 10 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f54b83b4d90b..590ea50c80a7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -549,6 +549,10 @@ static void block_operations(struct f2fs_sb_info *sbi) .nr_to_write = LONG_MAX, .for_reclaim = 0, }; + struct blk_plug plug; + + blk_start_plug(&plug); + retry_flush_dents: mutex_lock_all(sbi); @@ -571,6 +575,7 @@ retry_flush_nodes: sync_node_pages(sbi, 0, &wbc); goto retry_flush_nodes; } + blk_finish_plug(&plug); } static void unblock_operations(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1220b5c2ea21..eba7e84d1ffd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -172,7 +172,7 @@ end_update: return; } -struct page *find_data_page(struct inode *inode, pgoff_t index) +struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; @@ -207,11 +207,14 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) return page; } - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); - wait_on_page_locked(page); - if (!PageUptodate(page)) { - f2fs_put_page(page, 0); - return ERR_PTR(-EIO); + err = f2fs_readpage(sbi, page, dn.data_blkaddr, + sync ? READ_SYNC : READA); + if (sync) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 0); + return ERR_PTR(-EIO); + } } return page; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cd3342d4a3a7..3ddb1bc7d07a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -148,7 +148,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, for (; bidx < end_block; bidx++) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = find_data_page(dir, bidx); + dentry_page = find_data_page(dir, bidx, true); if (IS_ERR(dentry_page)) { room = true; continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 06cc75c66c88..6283c8d77c2e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1023,7 +1023,7 @@ void destroy_checkpoint_caches(void); */ int reserve_new_block(struct dnode_of_data *); void update_extent_cache(block_t, struct dnode_of_data *); -struct page *find_data_page(struct inode *, pgoff_t); +struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, pgoff_t, bool); int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0e56db2d3cc9..9dfcdab5ea7c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -212,7 +212,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) if (!offset) return; - page = find_data_page(inode, from >> PAGE_CACHE_SHIFT); + page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); if (IS_ERR(page)) return; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6ed3263eeee8..25a1f7e593e0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -386,6 +386,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, next_step: entry = sum; + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; @@ -417,6 +418,7 @@ next_step: f2fs_put_page(node_page, 1); stat_inc_node_blk_count(sbi, 1); } + if (initial) { initial = false; goto next_step; @@ -545,6 +547,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, next_step: entry = sum; + for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { struct page *data_page; struct inode *inode; @@ -582,7 +585,7 @@ next_step: continue; data_page = find_data_page(inode, - start_bidx + ofs_in_node); + start_bidx + ofs_in_node, false); if (IS_ERR(data_page)) goto next_iput; @@ -603,6 +606,7 @@ next_step: next_iput: iput(inode); } + if (++phase < 4) goto next_step; @@ -636,12 +640,15 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, { struct page *sum_page; struct f2fs_summary_block *sum; + struct blk_plug plug; /* read segment summary of victim */ sum_page = get_sum_page(sbi, segno); if (IS_ERR(sum_page)) return; + blk_start_plug(&plug); + sum = page_address(sum_page); switch (GET_SUM_TYPE((&sum->footer))) { @@ -652,6 +659,8 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); break; } + blk_finish_plug(&plug); + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); stat_inc_call_count(sbi->stat_info); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5a825502b0b0..a0aa0446a237 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -89,10 +89,13 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) { struct address_space *mapping = sbi->meta_inode->i_mapping; struct f2fs_nm_info *nm_i = NM_I(sbi); + struct blk_plug plug; struct page *page; pgoff_t index; int i; + blk_start_plug(&plug); + for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { if (nid >= nm_i->max_nid) nid = 0; @@ -110,6 +113,7 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) f2fs_put_page(page, 0); } + blk_finish_plug(&plug); } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) @@ -942,6 +946,7 @@ struct page *get_node_page_ra(struct page *parent, int start) { struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); struct address_space *mapping = sbi->node_inode->i_mapping; + struct blk_plug plug; struct page *page; int err, i, end; nid_t nid; @@ -961,6 +966,8 @@ struct page *get_node_page_ra(struct page *parent, int start) else if (err == LOCKED_PAGE) goto page_hit; + blk_start_plug(&plug); + /* Then, try readahead for siblings of the desired node */ end = start + MAX_RA_NODE; end = min(end, NIDS_PER_BLOCK); @@ -971,6 +978,8 @@ struct page *get_node_page_ra(struct page *parent, int start) ra_node_page(sbi, nid); } + blk_finish_plug(&plug); + lock_page(page); page_hit: -- cgit v1.2.3 From 8680441caa5465a2cd87b6be857be4378df46700 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 25 Apr 2013 11:45:21 +0900 Subject: f2fs: add REQ_META about metadata requests for submit Adding REQ_META for all the metadata requests can help in improving the FS performance, if the underlying device supports TAGGING. So, when considering the submit_bio path for all the f2fs requests. We can add REQ_META for all the META requests. As a precursor to this change we considered the commit 4265900e0be653f5b78baf2816857ef57cf1332f 'mmc: MMC-4.5 Data Tag Support' Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0a652848a9f8..392ccb3d10b9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -688,6 +688,9 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) rw = WRITE_FLUSH_FUA; + if (btype == META) + rw |= REQ_META; + if (sbi->bio[btype]) { struct bio_private *p = sbi->bio[btype]->bi_private; p->sbi = sbi; -- cgit v1.2.3 From 9198aceb53a493d1be0f3a5a1ce13c07a6fdcd26 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 25 Apr 2013 13:21:12 +0900 Subject: f2fs: check nid == 0 in add_free_nid It is more obvious that add_free_nid checks whether the free nid is zero or not. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a0aa0446a237..c8f48d436487 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1245,6 +1245,10 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) if (nm_i->fcnt > 2 * MAX_FREE_NIDS) return 0; + + /* 0 nid should not be used */ + if (nid == 0) + return 0; retry: i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); if (!i) { @@ -1286,10 +1290,6 @@ static int scan_nat_page(struct f2fs_nm_info *nm_i, int fcnt = 0; int i; - /* 0 nid should not be used */ - if (start_nid == 0) - ++start_nid; - i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { -- cgit v1.2.3 From d70b4f53b950676228297f0b204f2e1512c1ff6c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 25 Apr 2013 13:24:33 +0900 Subject: f2fs: add a tracepoint on f2fs_new_inode This can help when debugging the free nid allocation flows. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 3 ++- include/trace/events/f2fs.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c57fd18b769d..4aa26e53c935 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -63,7 +63,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; goto out; } - + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -71,6 +71,7 @@ out: clear_nlink(inode); unlock_new_inode(inode); fail: + trace_f2fs_new_inode(inode, err); iput(inode); if (nid_free) alloc_nid_failed(sbi, ino); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index b2b2f72a023d..52ae54828eda 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -197,6 +197,13 @@ DEFINE_EVENT(f2fs__inode, f2fs_evict_inode, TP_ARGS(inode) ); +DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode, + + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret) +); + TRACE_EVENT(f2fs_unlink_enter, TP_PROTO(struct inode *dir, struct dentry *dentry), -- cgit v1.2.3 From 55008d845d233396ed374473da4613cee691aa03 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 25 Apr 2013 16:05:51 +0900 Subject: f2fs: enhance alloc_nid and build_free_nids flows In order to avoid build_free_nid lock contention, let's change the order of function calls as follows. At first, check whether there is enough free nids. - If available, just get a free nid with spin_lock without any overhead. - Otherwise, conduct build_free_nids. : scan nat pages, journal nat entries, and nat cache entries. We should consider carefullly not to serve free nids intermediately made by build_free_nids. We can get stable free nids only after build_free_nids is done. Reviewed-by: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 82 ++++++++++++++++++++++++++-------------------------------- 2 files changed, 37 insertions(+), 47 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6283c8d77c2e..20aab02f2a42 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -190,7 +190,6 @@ static inline void set_raw_extent(struct extent_info *ext, struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ - nid_t init_scan_nid; /* the first nid to be scanned */ nid_t next_scan_nid; /* the next nid to be scanned */ /* NAT cache management */ @@ -360,6 +359,7 @@ struct f2fs_sb_info { struct mutex writepages; /* mutex for writepages() */ unsigned char next_lock_num; /* round-robin global locks */ int por_doing; /* recovery is doing or not */ + int on_build_free_nids; /* build_free_nids is doing */ /* for orphan inode management */ struct list_head orphan_inode_list; /* orphan inode list */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c8f48d436487..aede91071f71 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1309,14 +1309,14 @@ static void build_free_nids(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - nid_t nid = 0; - bool is_cycled = false; - int fcnt = 0; - int i; + int fcnt = 0, i = 0; + nid_t nid = nm_i->next_scan_nid; - nid = nm_i->next_scan_nid; - nm_i->init_scan_nid = nid; + /* Enough entries */ + if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + return; + /* readahead nat pages to be scanned */ ra_nat_pages(sbi, nid); while (1) { @@ -1326,19 +1326,15 @@ static void build_free_nids(struct f2fs_sb_info *sbi) f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); - - if (nid >= nm_i->max_nid) { + if (nid >= nm_i->max_nid) nid = 0; - is_cycled = true; - } - if (fcnt > MAX_FREE_NIDS) - break; - if (is_cycled && nm_i->init_scan_nid <= nid) + + if (i++ == FREE_NID_PAGES) break; } - /* go to the next nat page in order to reuse free nids first */ - nm_i->next_scan_nid = nm_i->init_scan_nid + NAT_ENTRY_PER_BLOCK; + /* go to the next free nat pages to find free nids abundantly */ + nm_i->next_scan_nid = nid; /* find free nids from current sum_pages */ mutex_lock(&curseg->curseg_mutex); @@ -1375,41 +1371,36 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; struct list_head *this; retry: - mutex_lock(&nm_i->build_lock); - if (!nm_i->fcnt) { - /* scan NAT in order to build free nid list */ - build_free_nids(sbi); - if (!nm_i->fcnt) { - mutex_unlock(&nm_i->build_lock); - return false; - } - } - mutex_unlock(&nm_i->build_lock); + if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) + return false; - /* - * We check fcnt again since previous check is racy as - * we didn't hold free_nid_list_lock. So other thread - * could consume all of free nids. - */ spin_lock(&nm_i->free_nid_list_lock); - if (!nm_i->fcnt) { - spin_unlock(&nm_i->free_nid_list_lock); - goto retry; - } - BUG_ON(list_empty(&nm_i->free_nid_list)); - list_for_each(this, &nm_i->free_nid_list) { - i = list_entry(this, struct free_nid, list); - if (i->state == NID_NEW) - break; - } + /* We should not use stale free nids created by build_free_nids */ + if (nm_i->fcnt && !sbi->on_build_free_nids) { + BUG_ON(list_empty(&nm_i->free_nid_list)); + list_for_each(this, &nm_i->free_nid_list) { + i = list_entry(this, struct free_nid, list); + if (i->state == NID_NEW) + break; + } - BUG_ON(i->state != NID_NEW); - *nid = i->nid; - i->state = NID_ALLOC; - nm_i->fcnt--; + BUG_ON(i->state != NID_NEW); + *nid = i->nid; + i->state = NID_ALLOC; + nm_i->fcnt--; + spin_unlock(&nm_i->free_nid_list_lock); + return true; + } spin_unlock(&nm_i->free_nid_list_lock); - return true; + + /* Let's scan nat pages and its caches to get free nids */ + mutex_lock(&nm_i->build_lock); + sbi->on_build_free_nids = 1; + build_free_nids(sbi); + sbi->on_build_free_nids = 0; + mutex_unlock(&nm_i->build_lock); + goto retry; } /* @@ -1696,7 +1687,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi) spin_lock_init(&nm_i->free_nid_list_lock); rwlock_init(&nm_i->nat_tree_lock); - nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); -- cgit v1.2.3 From afcb7ca01f47b0481e0b248d1542d0934fa70767 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 26 Apr 2013 11:55:17 +0900 Subject: f2fs: check truncation of mapping after lock_page We call lock_page when we need to update a page after readpage. Between grab and lock page, the page can be truncated by other thread. So, we should check the page after lock_page whether it was truncated or not. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++++ fs/f2fs/data.c | 18 +++++++++++++++--- fs/f2fs/file.c | 4 ++++ fs/f2fs/node.c | 20 ++++++++++++++++---- 4 files changed, 39 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 590ea50c80a7..b1de01da1a40 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -65,6 +65,10 @@ repeat: goto repeat; lock_page(page); + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } out: mark_page_accessed(page); return page; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eba7e84d1ffd..2db9380f5dda 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -240,7 +240,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) if (dn.data_blkaddr == NULL_ADDR) return ERR_PTR(-ENOENT); - +repeat: page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -260,6 +260,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) f2fs_put_page(page, 1); return ERR_PTR(-EIO); } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } return page; } @@ -291,7 +295,7 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, } } f2fs_put_dnode(&dn); - +repeat: page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -311,6 +315,10 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, f2fs_put_page(page, 1); return ERR_PTR(-EIO); } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } } if (new_i_size && @@ -611,7 +619,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, *fsdata = NULL; f2fs_balance_fs(sbi); - +repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; @@ -656,6 +664,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_put_page(page, 1); return -EIO; } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } } out: SetPageUptodate(page); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9dfcdab5ea7c..3e2c2cbc500c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -217,6 +217,10 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) return; lock_page(page); + if (page->mapping != inode->i_mapping) { + f2fs_put_page(page, 1); + return; + } wait_on_page_writeback(page); zero_user(page, offset, PAGE_CACHE_SIZE - offset); set_page_dirty(page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index aede91071f71..6ff017245522 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -674,6 +674,7 @@ fail: int truncate_inode_blocks(struct inode *inode, pgoff_t from) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *node_mapping = sbi->node_inode->i_mapping; int err = 0, cont = 1; int level, offset[4], noffset[4]; unsigned int nofs = 0; @@ -684,7 +685,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(from, offset, noffset); - +restart: page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); @@ -748,6 +749,10 @@ skip_partial: if (offset[1] == 0 && rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); + if (page->mapping != node_mapping) { + f2fs_put_page(page, 1); + goto restart; + } wait_on_page_writeback(page); rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); @@ -916,7 +921,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) struct address_space *mapping = sbi->node_inode->i_mapping; struct page *page; int err; - +repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); @@ -932,6 +937,10 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) f2fs_put_page(page, 1); return ERR_PTR(-EIO); } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } got_it: BUG_ON(nid != nid_of_node(page)); mark_page_accessed(page); @@ -955,7 +964,7 @@ struct page *get_node_page_ra(struct page *parent, int start) nid = get_nid(parent, start, false); if (!nid) return ERR_PTR(-ENOENT); - +repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); @@ -981,7 +990,10 @@ struct page *get_node_page_ra(struct page *parent, int start) blk_finish_plug(&plug); lock_page(page); - + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } page_hit: if (!PageUptodate(page)) { f2fs_put_page(page, 1); -- cgit v1.2.3 From 6cac3759ce118a87103ce4342e6de98215d01787 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Sun, 28 Apr 2013 19:16:06 +0800 Subject: f2fs: fix inconsistent using of NM_WOUT_THRESHOLD try_to_free_nats() is usually called with parameter nr_shrink as "nm_i->nat_cnt - NM_WOUT_THRESHOLD" by flush_nat_entries() during checkpointing process. However, this is inconsistent with the actual threshold check as "if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)" , which will ignore the free_nats requests when NM_WOUT_THRESHOLD < nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD So fix the threshold check condition. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6ff017245522..f14eb7b8b2c4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -244,7 +244,7 @@ static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD) + if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) return 0; write_lock(&nm_i->nat_tree_lock); -- cgit v1.2.3 From b743ba78ae4c7c6a6e08e623af824b6208f58019 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Sun, 28 Apr 2013 19:16:07 +0800 Subject: f2fs: remove useless #include as we're now using sysfs as debug entry. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 1 - fs/f2fs/gc.c | 1 - fs/f2fs/super.c | 1 - 3 files changed, 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index c3bf343b0b82..8d9943786c31 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 25a1f7e593e0..14961593e93c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b015b6cd7bd4..5835aaf2fe2e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From ac5d156c78a68b39955ee9b09498ba93831c77d7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 29 Apr 2013 16:58:39 +0900 Subject: f2fs: modify the number of issued pages to merge IOs When testing f2fs on an SSD, I found some 128 page IOs followed by 1 page IO were issued by f2fs_write_node_pages. This means that there were some mishandling flows which degrades performance. Previous f2fs_write_node_pages determines the number of pages to be written, nr_to_write, as follows. 1. The bio_get_nr_vecs returns 129 pages. 2. The bio_alloc makes a room for 128 pages. 3. The initial 128 pages go into one bio. 4. The existing bio is submitted, and a new bio is prepared for the last 1 page. 5. Finally, sync_node_pages submits the last 1 page bio. The problem is from the use of bio_get_nr_vecs, so this patch replace it with max_hw_blocks using queue_max_sectors. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 ++---- fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 11 +++++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f14eb7b8b2c4..7209d637f942 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1171,7 +1171,6 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - struct block_device *bdev = sbi->sb->s_bdev; long nr_to_write = wbc->nr_to_write; /* First check balancing cached NAT entries */ @@ -1185,10 +1184,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, return 0; /* if mounting is failed, skip writing node pages */ - wbc->nr_to_write = bio_get_nr_vecs(bdev); + wbc->nr_to_write = max_hw_blocks(sbi); sync_node_pages(sbi, 0, wbc); - wbc->nr_to_write = nr_to_write - - (bio_get_nr_vecs(bdev) - wbc->nr_to_write); + wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 392ccb3d10b9..d8e84e49a5c3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -734,7 +734,7 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, do_submit_bio(sbi, type, false); alloc_new: if (sbi->bio[type] == NULL) { - sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev)); + sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); /* * The end_io will be assigned at the sumbission phase. diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 26fc0540f144..062424a0e4c3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -8,6 +8,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include + /* constant macro */ #define NULL_SEGNO ((unsigned int)(~0)) #define NULL_SECNO ((unsigned int)(~0)) @@ -86,6 +88,8 @@ #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) +#define SECTOR_TO_BLOCK(sbi, sectors) \ + (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) /* during checkpoint, bio_private is used to synchronize the last bio */ struct bio_private { @@ -624,3 +628,10 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) return true; return false; } + +static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct request_queue *q = bdev_get_queue(bdev); + return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); +} -- cgit v1.2.3 From 531ad7d58c6476c5856653448b4c7d26427502b4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Apr 2013 11:33:27 +0900 Subject: f2fs: avoid deadlock during evict after f2fs_gc o Deadlock case #1 Thread 1: - writeback_sb_inodes - do_writepages - f2fs_write_data_pages - write_cache_pages - f2fs_write_data_page - f2fs_balance_fs - wait mutex_lock(gc_mutex) Thread 2: - f2fs_balance_fs - mutex_lock(gc_mutex) - f2fs_gc - f2fs_iget - wait iget_locked(inode->i_lock) Thread 3: - do_unlinkat - iput - lock(inode->i_lock) - evict - inode_wait_for_writeback o Deadlock case #2 Thread 1: - __writeback_single_inode : set I_SYNC - do_writepages - f2fs_write_data_page - f2fs_balance_fs - f2fs_gc - iput - evict - inode_wait_for_writeback(I_SYNC) In order to avoid this, even though iput is called with the zero-reference count, we need to stop the eviction procedure if the inode is on writeback. So this patch links f2fs_drop_inode which checks the I_SYNC flag. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++++-- fs/f2fs/namei.c | 6 ++++++ fs/f2fs/super.c | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2db9380f5dda..61b44542417c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -577,6 +577,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + bool locked = false; int ret; long excess_nrtw = 0, desired_nrtw; @@ -590,10 +591,12 @@ static int f2fs_write_data_pages(struct address_space *mapping, wbc->nr_to_write = desired_nrtw; } - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode)) { mutex_lock(&sbi->writepages); + locked = true; + } ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); - if (!S_ISDIR(inode->i_mode)) + if (locked) mutex_unlock(&sbi->writepages); f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4aa26e53c935..47abc9722b17 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -72,6 +72,7 @@ out: unlock_new_inode(inode); fail: trace_f2fs_new_inode(inode, err); + make_bad_inode(inode); iput(inode); if (nid_free) alloc_nid_failed(sbi, ino); @@ -155,6 +156,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, out: clear_nlink(inode); unlock_new_inode(inode); + make_bad_inode(inode); iput(inode); alloc_nid_failed(sbi, ino); return err; @@ -190,6 +192,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + make_bad_inode(inode); iput(inode); return err; } @@ -295,6 +298,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, out: clear_nlink(inode); unlock_new_inode(inode); + make_bad_inode(inode); iput(inode); alloc_nid_failed(sbi, inode->i_ino); return err; @@ -335,6 +339,7 @@ out_fail: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); clear_nlink(inode); unlock_new_inode(inode); + make_bad_inode(inode); iput(inode); alloc_nid_failed(sbi, inode->i_ino); return err; @@ -382,6 +387,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, out: clear_nlink(inode); unlock_new_inode(inode); + make_bad_inode(inode); iput(inode); alloc_nid_failed(sbi, inode->i_ino); return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5835aaf2fe2e..cd0e89a1ff8f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -98,6 +98,20 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) return &fi->vfs_inode; } +static int f2fs_drop_inode(struct inode *inode) +{ + /* + * This is to avoid a deadlock condition like below. + * writeback_single_inode(inode) + * - f2fs_write_data_page + * - f2fs_gc -> iput -> evict + * - inode_wait_for_writeback(inode) + */ + if (!inode_unhashed(inode) && inode->i_state & I_SYNC) + return 0; + return generic_drop_inode(inode); +} + static void f2fs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); @@ -232,6 +246,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) static struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, + .drop_inode = f2fs_drop_inode, .destroy_inode = f2fs_destroy_inode, .write_inode = f2fs_write_inode, .show_options = f2fs_show_options, -- cgit v1.2.3 From bde582b225e98fe9e35cd67e4cb4406a6f85ae3e Mon Sep 17 00:00:00 2001 From: Chris Fries Date: Thu, 2 May 2013 16:07:34 -0500 Subject: f2fs: continue to mount after failing recovery When unable to roll forward the journal, we shouldn't bail out and not mount, we should continue to attempt the mount. Bad recovery data is likely unrecoverable at this point, and requiring the user to try to mount again doesn't solve any issues. Signed-off-by: Chris Fries Reviewed-by: Russell Knize Reviewed-by: Jason Hrycay Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cd0e89a1ff8f..392dba26414a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -669,10 +669,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { err = recover_fsync_data(sbi); - if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to recover fsync data"); - goto free_root_inode; - } + if (err) + f2fs_msg(sb, KERN_ERR, + "Cannot recover all fsync data errno=%ld", err); } /* After POR, we can run background GC thread */ -- cgit v1.2.3 From 047184b42b52376f4066f9ab357c0a61a12f116e Mon Sep 17 00:00:00 2001 From: Chris Fries Date: Thu, 2 May 2013 16:09:05 -0500 Subject: f2fs: recover when journal contains deleted files When recovering a journal file with fsync data for files that have been deleted, don't bail out on recovery. Signed-off-by: Chris Fries Reviewed-by: Russell Knize Reviewed-by: Jason Hrycay [Jaegeuk Kim: fit the coding style] Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f16d12df8e99..60c8a5097058 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -53,7 +53,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); if (IS_ERR(dir)) { - err = -EINVAL; + err = PTR_ERR(dir); goto out; } @@ -156,8 +156,12 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) } if (IS_INODE(page)) { err = recover_inode(entry->inode, page); - if (err) + if (err == -ENOENT) { + goto next; + } else if (err) { + err = -EINVAL; goto unlock_out; + } } next: /* check next segment */ -- cgit v1.2.3 From 95630cbadc3588abff24a4b1989b72c943b27512 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Mon, 6 May 2013 23:15:41 +0800 Subject: f2fs: bugfix for alloc_nid_failed() Directly drop the free_nid cache when nm_i->fcnt > 2 * MAX_FREE_NIDS Since there is NOT nmi->free_nid_list_lock spinlock protection between a sequential calling of alloc_nid() and alloc_nid_failed(), some other threads may already add new free_nid to the free_nid_list during this period. We need to make sure nmi->fcnt is never > 2 * MAX_FREE_NIDS. Signed-off-by: Haicheng Li [Jaegeuk Kim: fit the coding style] Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7209d637f942..d682f34ee0f8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1439,8 +1439,12 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); BUG_ON(!i || i->state != NID_ALLOC); - i->state = NID_NEW; - nm_i->fcnt++; + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { + __del_from_free_nid_list(i); + } else { + i->state = NID_NEW; + nm_i->fcnt++; + } spin_unlock(&nm_i->free_nid_list_lock); } -- cgit v1.2.3 From 8760952d92b2fd2310fac340ff5bcdf3ada500d7 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Mon, 6 May 2013 23:15:42 +0800 Subject: f2fs: code cleanup for scan_nat_page() and build_free_nids() This patch does two cleanups: 1. remove unused variable "fcnt" in build_free_nids(). 2. make scan_nat_page() as void type and remove useless variable "fcnt". Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d682f34ee0f8..122200e677a4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1292,12 +1292,11 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) spin_unlock(&nm_i->free_nid_list_lock); } -static int scan_nat_page(struct f2fs_nm_info *nm_i, +static void scan_nat_page(struct f2fs_nm_info *nm_i, struct page *nat_page, nid_t start_nid) { struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; - int fcnt = 0; int i; i = start_nid % NAT_ENTRY_PER_BLOCK; @@ -1308,9 +1307,8 @@ static int scan_nat_page(struct f2fs_nm_info *nm_i, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); BUG_ON(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) - fcnt += add_free_nid(nm_i, start_nid); + add_free_nid(nm_i, start_nid); } - return fcnt; } static void build_free_nids(struct f2fs_sb_info *sbi) @@ -1319,7 +1317,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - int fcnt = 0, i = 0; + int i = 0; nid_t nid = nm_i->next_scan_nid; /* Enough entries */ @@ -1332,7 +1330,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) while (1) { struct page *page = get_current_nat_page(sbi, nid); - fcnt += scan_nat_page(nm_i, page, nid); + scan_nat_page(nm_i, page, nid); f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); -- cgit v1.2.3 From 23d38844276680abbf33624f56b6779d43f53633 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Mon, 6 May 2013 23:15:43 +0800 Subject: f2fs: optimize scan_nat_page() When nm_i->fcnt > 2 * MAX_FREE_NIDS, stop scanning other NAT entries. Signed-off-by: Haicheng Li [Jaegeuk Kim: fix handling the return value of add_free_nid()] Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 122200e677a4..e42934e689c5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1254,7 +1254,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) struct free_nid *i; if (nm_i->fcnt > 2 * MAX_FREE_NIDS) - return 0; + return -1; /* 0 nid should not be used */ if (nid == 0) @@ -1302,12 +1302,16 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { + if (start_nid >= nm_i->max_nid) break; - blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); + + blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); BUG_ON(blk_addr == NEW_ADDR); - if (blk_addr == NULL_ADDR) - add_free_nid(nm_i, start_nid); + if (blk_addr == NULL_ADDR) { + if (add_free_nid(nm_i, start_nid) < 0) + break; + } } } @@ -1655,7 +1659,7 @@ flush_now: } if (nat_get_blkaddr(ne) == NULL_ADDR && - !add_free_nid(NM_I(sbi), nid)) { + add_free_nid(NM_I(sbi), nid) <= 0) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); -- cgit v1.2.3 From 59bbd474abb9dd6a0c1a74df758ec29c7a8b150f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 May 2013 20:47:40 +0900 Subject: f2fs: cover free_nid management with spin_lock After build_free_nids() searches free nid candidates from nat pages and current journal blocks, it checks all the candidates if they are allocated so that the nat cache has its nid with an allocated block address. In this procedure, previously we used list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list). But, this is not covered by free_nid_list_lock, resulting in null pointer bug. This patch moves this checking routine inside add_free_nid() in order not to use the spin_lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e42934e689c5..3df43b4efd89 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1249,9 +1249,11 @@ static void __del_from_free_nid_list(struct free_nid *i) kmem_cache_free(free_nid_slab, i); } -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) { struct free_nid *i; + struct nat_entry *ne; + bool allocated = false; if (nm_i->fcnt > 2 * MAX_FREE_NIDS) return -1; @@ -1259,6 +1261,18 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) /* 0 nid should not be used */ if (nid == 0) return 0; + + if (!build) + goto retry; + + /* do not add allocated nids */ + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + allocated = true; + read_unlock(&nm_i->nat_tree_lock); + if (allocated) + return 0; retry: i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); if (!i) { @@ -1309,7 +1323,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); BUG_ON(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { - if (add_free_nid(nm_i, start_nid) < 0) + if (add_free_nid(nm_i, start_nid, true) < 0) break; } } @@ -1317,7 +1331,6 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, static void build_free_nids(struct f2fs_sb_info *sbi) { - struct free_nid *fnid, *next_fnid; struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; @@ -1354,22 +1367,11 @@ static void build_free_nids(struct f2fs_sb_info *sbi) block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); nid = le32_to_cpu(nid_in_journal(sum, i)); if (addr == NULL_ADDR) - add_free_nid(nm_i, nid); + add_free_nid(nm_i, nid, true); else remove_free_nid(nm_i, nid); } mutex_unlock(&curseg->curseg_mutex); - - /* remove the free nids from current allocated nids */ - list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) { - struct nat_entry *ne; - - read_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, fnid->nid); - if (ne && nat_get_blkaddr(ne) != NULL_ADDR) - remove_free_nid(nm_i, fnid->nid); - read_unlock(&nm_i->nat_tree_lock); - } } /* @@ -1659,7 +1661,7 @@ flush_now: } if (nat_get_blkaddr(ne) == NULL_ADDR && - add_free_nid(NM_I(sbi), nid) <= 0) { + add_free_nid(NM_I(sbi), nid, false) <= 0) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); -- cgit v1.2.3