summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-12-05 16:44:19 +0100
committerDavid S. Miller <davem@davemloft.net>2017-12-05 16:44:19 +0100
commit7cda4cee1366fae236678a5a6b124bab86b14973 (patch)
tree180bcb2d09e3db27050658afc36d5bac88593ed5 /fs
parentrtnetlink: ipv6: convert remaining users to rtnl_register_module (diff)
parentMerge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost (diff)
downloadlinux-7cda4cee1366fae236678a5a6b124bab86b14973.tar.xz
linux-7cda4cee1366fae236678a5a6b124bab86b14973.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Small overlapping change conflict ('net' changed a line, 'net-next' added a line right afterwards) in flexcan.c Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/internal.h5
-rw-r--r--fs/afs/security.c18
-rw-r--r--fs/afs/super.c14
-rw-r--r--fs/autofs4/root.c17
-rw-r--r--fs/dax.c3
-rw-r--r--fs/exec.c7
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/namei.c15
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/quota/dquot.c22
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c6
-rw-r--r--fs/xfs/scrub/inode.c14
-rw-r--r--fs/xfs/scrub/quota.c4
-rw-r--r--fs/xfs/xfs_aops.c12
-rw-r--r--fs/xfs/xfs_bmap_item.c23
-rw-r--r--fs/xfs/xfs_bmap_item.h3
-rw-r--r--fs/xfs/xfs_buf.c15
-rw-r--r--fs/xfs/xfs_dquot.c14
-rw-r--r--fs/xfs/xfs_dquot_item.c40
-rw-r--r--fs/xfs/xfs_inode.c21
-rw-r--r--fs/xfs/xfs_log_recover.c75
-rw-r--r--fs/xfs/xfs_refcount_item.c21
-rw-r--r--fs/xfs/xfs_refcount_item.h3
26 files changed, 257 insertions, 109 deletions
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e03910cebdd4..804d1f905622 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -441,7 +441,10 @@ enum afs_lock_state {
};
/*
- * AFS inode private data
+ * AFS inode private data.
+ *
+ * Note that afs_alloc_inode() *must* reset anything that could incorrectly
+ * leak from one inode to another.
*/
struct afs_vnode {
struct inode vfs_inode; /* the VFS's inode record */
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 2b00097101b3..b88b7d45fdaa 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -120,7 +120,7 @@ static void afs_hash_permits(struct afs_permits *permits)
void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
unsigned int cb_break)
{
- struct afs_permits *permits, *xpermits, *replacement, *new = NULL;
+ struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL;
afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
size_t size = 0;
bool changed = false;
@@ -204,7 +204,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
new = kzalloc(sizeof(struct afs_permits) +
sizeof(struct afs_permit) * size, GFP_NOFS);
if (!new)
- return;
+ goto out_put;
refcount_set(&new->usage, 1);
new->nr_permits = size;
@@ -229,8 +229,6 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
afs_hash_permits(new);
- afs_put_permits(permits);
-
/* Now see if the permit list we want is actually already available */
spin_lock(&afs_permits_lock);
@@ -262,11 +260,15 @@ found:
kfree(new);
spin_lock(&vnode->lock);
- if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) ||
- permits != rcu_access_pointer(vnode->permit_cache))
- goto someone_else_changed_it_unlock;
- rcu_assign_pointer(vnode->permit_cache, replacement);
+ zap = rcu_access_pointer(vnode->permit_cache);
+ if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) &&
+ zap == permits)
+ rcu_assign_pointer(vnode->permit_cache, replacement);
+ else
+ zap = replacement;
spin_unlock(&vnode->lock);
+ afs_put_permits(zap);
+out_put:
afs_put_permits(permits);
return;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index d3f97da61bdf..1037dd41a622 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -536,7 +536,9 @@ static void afs_kill_super(struct super_block *sb)
}
/*
- * initialise an inode cache slab element prior to any use
+ * Initialise an inode cache slab element prior to any use. Note that
+ * afs_alloc_inode() *must* reset anything that could incorrectly leak from one
+ * inode to another.
*/
static void afs_i_init_once(void *_vnode)
{
@@ -568,11 +570,21 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
atomic_inc(&afs_count_active_inodes);
+ /* Reset anything that shouldn't leak from one inode to the next. */
memset(&vnode->fid, 0, sizeof(vnode->fid));
memset(&vnode->status, 0, sizeof(vnode->status));
vnode->volume = NULL;
+ vnode->lock_key = NULL;
+ vnode->permit_cache = NULL;
+ vnode->cb_interest = NULL;
+#ifdef CONFIG_AFS_FSCACHE
+ vnode->cache = NULL;
+#endif
+
vnode->flags = 1 << AFS_VNODE_UNSET;
+ vnode->cb_type = 0;
+ vnode->lock_state = AFS_VNODE_LOCK_NONE;
_leave(" = %p", &vnode->vfs_inode);
return &vnode->vfs_inode;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d79ced925861..82e8f6edfb48 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
- ino->last_used = jiffies;
}
+ ino->last_used = jiffies;
return status;
}
@@ -321,21 +321,16 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
+ struct autofs_info *ino;
struct dentry *new;
new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
- if (new == dentry)
- dput(new);
- else {
- struct autofs_info *ino;
-
- ino = autofs4_dentry_ino(new);
- ino->last_used = jiffies;
- dput(path->dentry);
- path->dentry = new;
- }
+ ino = autofs4_dentry_ino(new);
+ ino->last_used = jiffies;
+ dput(path->dentry);
+ path->dentry = new;
}
return path->dentry;
}
diff --git a/fs/dax.c b/fs/dax.c
index 95981591977a..78b72c48374e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,7 +627,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
if (pfn != pmd_pfn(*pmdp))
goto unlock_pmd;
- if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
+ if (!pmd_dirty(*pmdp)
+ && !pmd_access_permitted(*pmdp, WRITE))
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
diff --git a/fs/exec.c b/fs/exec.c
index 1d6243d9f2b6..6be2aa0ab26f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1340,10 +1340,15 @@ void setup_new_exec(struct linux_binprm * bprm)
* avoid bad behavior from the prior rlimits. This has to
* happen before arch_pick_mmap_layout(), which examines
* RLIMIT_STACK, but after the point of no return to avoid
- * needing to clean up the change on failure.
+ * races from other threads changing the limits. This also
+ * must be protected from races with prlimit() calls.
*/
+ task_lock(current->group_leader);
if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+ if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
+ current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
+ task_unlock(current->group_leader);
}
arch_pick_mmap_layout(current->mm);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 016c46b5e44c..20a0a89eaca5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -779,7 +779,7 @@ static void __exit fat_destroy_inodecache(void)
static int fat_remount(struct super_block *sb, int *flags, char *data)
{
- int new_rdonly;
+ bool new_rdonly;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
*flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 1e76730aac0d..8a85f3f53446 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -639,11 +639,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
- * page_put due to reference from alloc_huge_page()
* unlock_page because locked by add_to_page_cache()
+ * page_put due to reference from alloc_huge_page()
*/
- put_page(page);
unlock_page(page);
+ put_page(page);
}
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
diff --git a/fs/mbcache.c b/fs/mbcache.c
index d818fd236787..b8b8b9ced9f8 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -269,6 +269,9 @@ static unsigned long mb_cache_count(struct shrinker *shrink,
struct mb_cache *cache = container_of(shrink, struct mb_cache,
c_shrink);
+ /* Unlikely, but not impossible */
+ if (unlikely(cache->c_entry_count < 0))
+ return 0;
return cache->c_entry_count;
}
diff --git a/fs/namei.c b/fs/namei.c
index f0c7a7b9b6ca..9cc91fb7f156 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1129,18 +1129,9 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* of the daemon to instantiate them before they can be used.
*/
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
- LOOKUP_OPEN | LOOKUP_CREATE |
- LOOKUP_AUTOMOUNT))) {
- /* Positive dentry that isn't meant to trigger an
- * automount, EISDIR will allow it to be used,
- * otherwise there's no mount here "now" so return
- * ENOENT.
- */
- if (path->dentry->d_inode)
- return -EISDIR;
- else
- return -ENOENT;
- }
+ LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
+ path->dentry->d_inode)
+ return -EISDIR;
if (path->dentry->d_sb->s_user_ns != &init_user_ns)
return -EACCES;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 54fd56d715a8..e4f4a09ed9f4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -71,8 +71,8 @@ const nfs4_stateid zero_stateid = {
};
const nfs4_stateid invalid_stateid = {
{
- .seqid = cpu_to_be32(0xffffffffU),
- .other = { 0 },
+ /* Funky initialiser keeps older gcc versions happy */
+ .data = { 0xff, 0xff, 0xff, 0xff, 0 },
},
.type = NFS4_INVALID_STATEID_TYPE,
};
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 39f1b0b0c76f..020c597ef9b6 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -941,12 +941,13 @@ static int dqinit_needed(struct inode *inode, int type)
}
/* This routine is guarded by s_umount semaphore */
-static void add_dquot_ref(struct super_block *sb, int type)
+static int add_dquot_ref(struct super_block *sb, int type)
{
struct inode *inode, *old_inode = NULL;
#ifdef CONFIG_QUOTA_DEBUG
int reserved = 0;
#endif
+ int err = 0;
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -966,7 +967,11 @@ static void add_dquot_ref(struct super_block *sb, int type)
reserved = 1;
#endif
iput(old_inode);
- __dquot_initialize(inode, type);
+ err = __dquot_initialize(inode, type);
+ if (err) {
+ iput(inode);
+ goto out;
+ }
/*
* We hold a reference to 'inode' so it couldn't have been
@@ -981,7 +986,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
}
spin_unlock(&sb->s_inode_list_lock);
iput(old_inode);
-
+out:
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
quota_error(sb, "Writes happened before quota was turned on "
@@ -989,6 +994,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
"Please run quotacheck(8)");
}
#endif
+ return err;
}
/*
@@ -2379,10 +2385,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
dqopt->flags |= dquot_state_flag(flags, type);
spin_unlock(&dq_state_lock);
- add_dquot_ref(sb, type);
-
- return 0;
+ error = add_dquot_ref(sb, type);
+ if (error)
+ dquot_disable(sb, type, flags);
+ return error;
out_file_init:
dqopt->files[type] = NULL;
iput(inode);
@@ -2985,7 +2992,8 @@ static int __init dquot_init(void)
pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld,"
" %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order));
- register_shrinker(&dqcache_shrinker);
+ if (register_shrinker(&dqcache_shrinker))
+ panic("Cannot register dquot shrinker");
return 0;
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 020c9cacbb2f..1fc934d24459 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2591,7 +2591,6 @@ out:
return err;
if (inode->i_size < off + len - towrite)
i_size_write(inode, off + len - towrite);
- inode->i_version++;
inode->i_mtime = inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
return len - towrite;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 08df809e2315..1210f684d3c2 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents(
*done = true;
goto del_cursor;
}
- XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
+ XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
+ del_cursor);
new_startoff = got.br_startoff - offset_shift_fsb;
if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
@@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents(
goto del_cursor;
}
}
- XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
+ XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
+ del_cursor);
if (stop_fsb >= got.br_startoff + got.br_blockcount) {
error = -EIO;
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 637b7a892313..f120fb20452f 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -318,8 +318,20 @@ xfs_scrub_dinode(
/* di_mode */
mode = be16_to_cpu(dip->di_mode);
- if (mode & ~(S_IALLUGO | S_IFMT))
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ /* mode is recognized */
+ break;
+ default:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ break;
+ }
/* v1/v2 fields */
switch (dip->di_version) {
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 8e58ba842946..3d9037eceaf1 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -107,7 +107,7 @@ xfs_scrub_quota_item(
unsigned long long rcount;
xfs_ino_t fs_icount;
- offset = id * qi->qi_dqperchunk;
+ offset = id / qi->qi_dqperchunk;
/*
* We fed $id and DQNEXT into the xfs_qm_dqget call, which means
@@ -207,7 +207,7 @@ xfs_scrub_quota(
xfs_dqid_t id = 0;
uint dqtype;
int nimaps;
- int error;
+ int error = 0;
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return -ENOENT;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a3eeaba156c5..21e2d70884e1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -399,7 +399,7 @@ xfs_map_blocks(
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + count > mp->m_super->s_maxbytes)
+ if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -896,13 +896,13 @@ xfs_writepage_map(
struct writeback_control *wbc,
struct inode *inode,
struct page *page,
- loff_t offset,
- uint64_t end_offset)
+ uint64_t end_offset)
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
ssize_t len = i_blocksize(inode);
+ uint64_t offset;
int error = 0;
int count = 0;
int uptodate = 1;
@@ -1146,7 +1146,7 @@ xfs_do_writepage(
end_offset = offset;
}
- return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
+ return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
redirty:
redirty_page_for_writepage(wbc, page);
@@ -1265,7 +1265,7 @@ xfs_map_trim_size(
if (mapping_size > size)
mapping_size = size;
if (offset < i_size_read(inode) &&
- offset + mapping_size >= i_size_read(inode)) {
+ (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
i_blocksize(inode));
@@ -1312,7 +1312,7 @@ xfs_get_blocks(
lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + size > mp->m_super->s_maxbytes)
+ if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index dd136f7275e4..e5fb008d75e8 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -389,7 +389,8 @@ xfs_bud_init(
int
xfs_bui_recover(
struct xfs_mount *mp,
- struct xfs_bui_log_item *buip)
+ struct xfs_bui_log_item *buip,
+ struct xfs_defer_ops *dfops)
{
int error = 0;
unsigned int bui_type;
@@ -404,9 +405,7 @@ xfs_bui_recover(
xfs_exntst_t state;
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
- struct xfs_defer_ops dfops;
struct xfs_bmbt_irec irec;
- xfs_fsblock_t firstfsb;
ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
@@ -464,7 +463,6 @@ xfs_bui_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- xfs_defer_init(&dfops, &firstfsb);
/* Process deferred bmap item. */
state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
@@ -479,16 +477,16 @@ xfs_bui_recover(
break;
default:
error = -EFSCORRUPTED;
- goto err_dfops;
+ goto err_inode;
}
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
+ error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type,
ip, whichfork, bmap->me_startoff,
bmap->me_startblock, &count, state);
if (error)
- goto err_dfops;
+ goto err_inode;
if (count > 0) {
ASSERT(type == XFS_BMAP_UNMAP);
@@ -496,16 +494,11 @@ xfs_bui_recover(
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
irec.br_state = state;
- error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
+ error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec);
if (error)
- goto err_dfops;
+ goto err_inode;
}
- /* Finish transaction, free inodes. */
- error = xfs_defer_finish(&tp, &dfops);
- if (error)
- goto err_dfops;
-
set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -513,8 +506,6 @@ xfs_bui_recover(
return error;
-err_dfops:
- xfs_defer_cancel(&dfops);
err_inode:
xfs_trans_cancel(tp);
if (ip) {
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index c867daae4a3c..24b354a2c836 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
struct xfs_bui_log_item *);
void xfs_bui_item_free(struct xfs_bui_log_item *);
void xfs_bui_release(struct xfs_bui_log_item *);
-int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip);
+int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip,
+ struct xfs_defer_ops *dfops);
#endif /* __XFS_BMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4db6e8d780f6..4c6e86d861fd 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1815,22 +1815,27 @@ xfs_alloc_buftarg(
btp->bt_daxdev = dax_dev;
if (xfs_setsize_buftarg_early(btp, bdev))
- goto error;
+ goto error_free;
if (list_lru_init(&btp->bt_lru))
- goto error;
+ goto error_free;
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
- goto error;
+ goto error_lru;
btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
- register_shrinker(&btp->bt_shrinker);
+ if (register_shrinker(&btp->bt_shrinker))
+ goto error_pcpu;
return btp;
-error:
+error_pcpu:
+ percpu_counter_destroy(&btp->bt_io_count);
+error_lru:
+ list_lru_destroy(&btp->bt_lru);
+error_free:
kmem_free(btp);
return NULL;
}
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index d57c2db64e59..f248708c10ff 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -970,14 +970,22 @@ xfs_qm_dqflush_done(
* holding the lock before removing the dquot from the AIL.
*/
if ((lip->li_flags & XFS_LI_IN_AIL) &&
- lip->li_lsn == qip->qli_flush_lsn) {
+ ((lip->li_lsn == qip->qli_flush_lsn) ||
+ (lip->li_flags & XFS_LI_FAILED))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->xa_lock);
- if (lip->li_lsn == qip->qli_flush_lsn)
+ if (lip->li_lsn == qip->qli_flush_lsn) {
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
- else
+ } else {
+ /*
+ * Clear the failed state since we are about to drop the
+ * flush lock
+ */
+ if (lip->li_flags & XFS_LI_FAILED)
+ xfs_clear_li_failed(lip);
spin_unlock(&ailp->xa_lock);
+ }
}
/*
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 2c7a1629e064..664dea105e76 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait(
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
+/*
+ * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
+ * have been failed during writeback
+ *
+ * this informs the AIL that the dquot is already flush locked on the next push,
+ * and acquires a hold on the buffer to ensure that it isn't reclaimed before
+ * dirty data makes it to disk.
+ */
+STATIC void
+xfs_dquot_item_error(
+ struct xfs_log_item *lip,
+ struct xfs_buf *bp)
+{
+ struct xfs_dquot *dqp;
+
+ dqp = DQUOT_ITEM(lip)->qli_dquot;
+ ASSERT(!completion_done(&dqp->q_flush));
+ xfs_set_li_failed(lip, bp);
+}
+
STATIC uint
xfs_qm_dquot_logitem_push(
struct xfs_log_item *lip,
@@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push(
__acquires(&lip->li_ailp->xa_lock)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- struct xfs_buf *bp = NULL;
+ struct xfs_buf *bp = lip->li_buf;
uint rval = XFS_ITEM_SUCCESS;
int error;
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
+ /*
+ * The buffer containing this item failed to be written back
+ * previously. Resubmit the buffer for IO
+ */
+ if (lip->li_flags & XFS_LI_FAILED) {
+ if (!xfs_buf_trylock(bp))
+ return XFS_ITEM_LOCKED;
+
+ if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+ rval = XFS_ITEM_FLUSHING;
+
+ xfs_buf_unlock(bp);
+ return rval;
+ }
+
if (!xfs_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
@@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_unlock = xfs_qm_dquot_logitem_unlock,
.iop_committed = xfs_qm_dquot_logitem_committed,
.iop_push = xfs_qm_dquot_logitem_push,
- .iop_committing = xfs_qm_dquot_logitem_committing
+ .iop_committing = xfs_qm_dquot_logitem_committing,
+ .iop_error = xfs_dquot_item_error
};
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 61d1cb7dc10d..801274126648 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2401,6 +2401,24 @@ retry:
}
/*
+ * Free any local-format buffers sitting around before we reset to
+ * extents format.
+ */
+static inline void
+xfs_ifree_local_data(
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xfs_ifork *ifp;
+
+ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+ return;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+}
+
+/*
* This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have
* no pages associated with it. This routine also assumes that
@@ -2437,6 +2455,9 @@ xfs_ifree(
if (error)
return error;
+ xfs_ifree_local_data(ip, XFS_DATA_FORK);
+ xfs_ifree_local_data(ip, XFS_ATTR_FORK);
+
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 87b1c331f9eb..28d1abfe835e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -24,6 +24,7 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
+#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
@@ -4716,7 +4717,8 @@ STATIC int
xlog_recover_process_cui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip,
+ struct xfs_defer_ops *dfops)
{
struct xfs_cui_log_item *cuip;
int error;
@@ -4729,7 +4731,7 @@ xlog_recover_process_cui(
return 0;
spin_unlock(&ailp->xa_lock);
- error = xfs_cui_recover(mp, cuip);
+ error = xfs_cui_recover(mp, cuip, dfops);
spin_lock(&ailp->xa_lock);
return error;
@@ -4756,7 +4758,8 @@ STATIC int
xlog_recover_process_bui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip,
+ struct xfs_defer_ops *dfops)
{
struct xfs_bui_log_item *buip;
int error;
@@ -4769,7 +4772,7 @@ xlog_recover_process_bui(
return 0;
spin_unlock(&ailp->xa_lock);
- error = xfs_bui_recover(mp, buip);
+ error = xfs_bui_recover(mp, buip, dfops);
spin_lock(&ailp->xa_lock);
return error;
@@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
}
}
+/* Take all the collected deferred ops and finish them in order. */
+static int
+xlog_finish_defer_ops(
+ struct xfs_mount *mp,
+ struct xfs_defer_ops *dfops)
+{
+ struct xfs_trans *tp;
+ int64_t freeblks;
+ uint resblks;
+ int error;
+
+ /*
+ * We're finishing the defer_ops that accumulated as a result of
+ * recovering unfinished intent items during log recovery. We
+ * reserve an itruncate transaction because it is the largest
+ * permanent transaction type. Since we're the only user of the fs
+ * right now, take 93% (15/16) of the available free blocks. Use
+ * weird math to avoid a 64-bit division.
+ */
+ freeblks = percpu_counter_sum(&mp->m_fdblocks);
+ if (freeblks <= 0)
+ return -ENOSPC;
+ resblks = min_t(int64_t, UINT_MAX, freeblks);
+ resblks = (resblks * 15) >> 4;
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+ 0, XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+ error = xfs_defer_finish(&tp, dfops);
+ if (error)
+ goto out_cancel;
+
+ return xfs_trans_commit(tp);
+
+out_cancel:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
/*
* When this is called, all of the log intent items which did not have
* corresponding log done items should be in the AIL. What we do now
@@ -4825,10 +4868,12 @@ STATIC int
xlog_recover_process_intents(
struct xlog *log)
{
- struct xfs_log_item *lip;
- int error = 0;
+ struct xfs_defer_ops dfops;
struct xfs_ail_cursor cur;
+ struct xfs_log_item *lip;
struct xfs_ail *ailp;
+ xfs_fsblock_t firstfsb;
+ int error = 0;
#if defined(DEBUG) || defined(XFS_WARN)
xfs_lsn_t last_lsn;
#endif
@@ -4839,6 +4884,7 @@ xlog_recover_process_intents(
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
#endif
+ xfs_defer_init(&dfops, &firstfsb);
while (lip != NULL) {
/*
* We're done when we see something other than an intent.
@@ -4859,6 +4905,12 @@ xlog_recover_process_intents(
*/
ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
+ /*
+ * NOTE: If your intent processing routine can create more
+ * deferred ops, you /must/ attach them to the dfops in this
+ * routine or else those subsequent intents will get
+ * replayed in the wrong order!
+ */
switch (lip->li_type) {
case XFS_LI_EFI:
error = xlog_recover_process_efi(log->l_mp, ailp, lip);
@@ -4867,10 +4919,12 @@ xlog_recover_process_intents(
error = xlog_recover_process_rui(log->l_mp, ailp, lip);
break;
case XFS_LI_CUI:
- error = xlog_recover_process_cui(log->l_mp, ailp, lip);
+ error = xlog_recover_process_cui(log->l_mp, ailp, lip,
+ &dfops);
break;
case XFS_LI_BUI:
- error = xlog_recover_process_bui(log->l_mp, ailp, lip);
+ error = xlog_recover_process_bui(log->l_mp, ailp, lip,
+ &dfops);
break;
}
if (error)
@@ -4880,6 +4934,11 @@ xlog_recover_process_intents(
out:
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
+ if (error)
+ xfs_defer_cancel(&dfops);
+ else
+ error = xlog_finish_defer_ops(log->l_mp, &dfops);
+
return error;
}
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 8f2e2fac4255..3a55d6fc271b 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -393,7 +393,8 @@ xfs_cud_init(
int
xfs_cui_recover(
struct xfs_mount *mp,
- struct xfs_cui_log_item *cuip)
+ struct xfs_cui_log_item *cuip,
+ struct xfs_defer_ops *dfops)
{
int i;
int error = 0;
@@ -405,11 +406,9 @@ xfs_cui_recover(
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
enum xfs_refcount_intent_type type;
- xfs_fsblock_t firstfsb;
xfs_fsblock_t new_fsb;
xfs_extlen_t new_len;
struct xfs_bmbt_irec irec;
- struct xfs_defer_ops dfops;
bool requeue_only = false;
ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
@@ -465,7 +464,6 @@ xfs_cui_recover(
return error;
cudp = xfs_trans_get_cud(tp, cuip);
- xfs_defer_init(&dfops, &firstfsb);
for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
refc = &cuip->cui_format.cui_extents[i];
refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
@@ -485,7 +483,7 @@ xfs_cui_recover(
new_len = refc->pe_len;
} else
error = xfs_trans_log_finish_refcount_update(tp, cudp,
- &dfops, type, refc->pe_startblock, refc->pe_len,
+ dfops, type, refc->pe_startblock, refc->pe_len,
&new_fsb, &new_len, &rcur);
if (error)
goto abort_error;
@@ -497,21 +495,21 @@ xfs_cui_recover(
switch (type) {
case XFS_REFCOUNT_INCREASE:
error = xfs_refcount_increase_extent(
- tp->t_mountp, &dfops, &irec);
+ tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_DECREASE:
error = xfs_refcount_decrease_extent(
- tp->t_mountp, &dfops, &irec);
+ tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_ALLOC_COW:
error = xfs_refcount_alloc_cow_extent(
- tp->t_mountp, &dfops,
+ tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
case XFS_REFCOUNT_FREE_COW:
error = xfs_refcount_free_cow_extent(
- tp->t_mountp, &dfops,
+ tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
@@ -525,17 +523,12 @@ xfs_cui_recover(
}
xfs_refcount_finish_one_cleanup(tp, rcur, error);
- error = xfs_defer_finish(&tp, &dfops);
- if (error)
- goto abort_defer;
set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
error = xfs_trans_commit(tp);
return error;
abort_error:
xfs_refcount_finish_one_cleanup(tp, rcur, error);
-abort_defer:
- xfs_defer_cancel(&dfops);
xfs_trans_cancel(tp);
return error;
}
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index 5b74dddfa64b..0e5327349a13 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
struct xfs_cui_log_item *);
void xfs_cui_item_free(struct xfs_cui_log_item *);
void xfs_cui_release(struct xfs_cui_log_item *);
-int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip);
+int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip,
+ struct xfs_defer_ops *dfops);
#endif /* __XFS_REFCOUNT_ITEM_H__ */