summaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c168
1 files changed, 101 insertions, 67 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 901bad1e5f12..4d8e3be55976 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/wait.h>
+#include <linux/rwsem.h>
#include <linux/hash.h>
#include <linux/swap.h>
#include <linux/security.h>
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly;
DEFINE_SPINLOCK(inode_lock);
/*
- * iprune_mutex provides exclusion between the kswapd or try_to_free_pages
+ * iprune_sem provides exclusion between the kswapd or try_to_free_pages
* icache shrinking path, and the umount path. Without this exclusion,
* by the time prune_icache calls iput for the inode whose pages it has
* been invalidating, or by the time it calls clear_inode & destroy_inode
* from its final dispose_list, the struct super_block they refer to
* (for inode->i_sb->s_op) may already have been freed and reused.
+ *
+ * We make this an rwsem because the fastpath is icache shrinking. In
+ * some cases a filesystem may be doing a significant amount of work in
+ * its inode reclaim code, so this should improve parallelism.
*/
-static DEFINE_MUTEX(iprune_mutex);
+static DECLARE_RWSEM(iprune_sem);
/*
* Statistics gathering..
@@ -120,12 +125,11 @@ static void wake_up_inode(struct inode *inode)
* These are initializations that need to be done on every inode
* allocation as the fields are not initialised by slab allocation.
*/
-struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
+int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct address_space_operations empty_aops;
- static struct inode_operations empty_iops;
+ static const struct inode_operations empty_iops;
static const struct file_operations empty_fops;
-
struct address_space *const mapping = &inode->i_data;
inode->i_sb = sb;
@@ -152,7 +156,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->dirtied_when = 0;
if (security_inode_alloc(inode))
- goto out_free_inode;
+ goto out;
/* allocate and initialize an i_integrity */
if (ima_inode_alloc(inode))
@@ -183,9 +187,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
if (sb->s_bdev) {
struct backing_dev_info *bdi;
- bdi = sb->s_bdev->bd_inode_backing_dev_info;
- if (!bdi)
- bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+ bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
mapping->backing_dev_info = bdi;
}
inode->i_private = NULL;
@@ -198,16 +200,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif
- return inode;
+ return 0;
out_free_security:
security_inode_free(inode);
-out_free_inode:
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, (inode));
- return NULL;
+out:
+ return -ENOMEM;
}
EXPORT_SYMBOL(inode_init_always);
@@ -220,12 +218,21 @@ static struct inode *alloc_inode(struct super_block *sb)
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
- if (inode)
- return inode_init_always(sb, inode);
- return NULL;
+ if (!inode)
+ return NULL;
+
+ if (unlikely(inode_init_always(sb, inode))) {
+ if (inode->i_sb->s_op->destroy_inode)
+ inode->i_sb->s_op->destroy_inode(inode);
+ else
+ kmem_cache_free(inode_cachep, inode);
+ return NULL;
+ }
+
+ return inode;
}
-void destroy_inode(struct inode *inode)
+void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
ima_inode_free(inode);
@@ -237,13 +244,17 @@ void destroy_inode(struct inode *inode)
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
+}
+EXPORT_SYMBOL(__destroy_inode);
+
+void destroy_inode(struct inode *inode)
+{
+ __destroy_inode(inode);
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
kmem_cache_free(inode_cachep, (inode));
}
-EXPORT_SYMBOL(destroy_inode);
-
/*
* These are initializations that only need to be done
@@ -375,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
/*
* We can reschedule here without worrying about the list's
* consistency because the per-sb list of inodes must not
- * change during umount anymore, and because iprune_mutex keeps
+ * change during umount anymore, and because iprune_sem keeps
* shrink_icache_memory() away.
*/
cond_resched_lock(&inode_lock);
@@ -414,7 +425,7 @@ int invalidate_inodes(struct super_block *sb)
int busy;
LIST_HEAD(throw_away);
- mutex_lock(&iprune_mutex);
+ down_write(&iprune_sem);
spin_lock(&inode_lock);
inotify_unmount_inodes(&sb->s_inodes);
fsnotify_unmount_inodes(&sb->s_inodes);
@@ -422,7 +433,7 @@ int invalidate_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&throw_away);
- mutex_unlock(&iprune_mutex);
+ up_write(&iprune_sem);
return busy;
}
@@ -461,7 +472,7 @@ static void prune_icache(int nr_to_scan)
int nr_scanned;
unsigned long reap = 0;
- mutex_lock(&iprune_mutex);
+ down_read(&iprune_sem);
spin_lock(&inode_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
@@ -503,7 +514,7 @@ static void prune_icache(int nr_to_scan)
spin_unlock(&inode_lock);
dispose_list(&freeable);
- mutex_unlock(&iprune_mutex);
+ up_read(&iprune_sem);
}
/*
@@ -689,13 +700,15 @@ void unlock_new_inode(struct inode *inode)
}
#endif
/*
- * This is special! We do not need the spinlock
- * when clearing I_LOCK, because we're guaranteed
- * that nobody else tries to do anything about the
- * state of the inode when it is locked, as we
- * just created it (so there can be no old holders
- * that haven't tested I_LOCK).
+ * This is special! We do not need the spinlock when clearing I_LOCK,
+ * because we're guaranteed that nobody else tries to do anything about
+ * the state of the inode when it is locked, as we just created it (so
+ * there can be no old holders that haven't tested I_LOCK).
+ * However we must emit the memory barrier so that other CPUs reliably
+ * see the clearing of I_LOCK after the other inode initialisation has
+ * completed.
*/
+ smp_mb();
WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
inode->i_state &= ~(I_LOCK|I_NEW);
wake_up_inode(inode);
@@ -1228,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
}
EXPORT_SYMBOL(generic_delete_inode);
-static void generic_forget_inode(struct inode *inode)
+/**
+ * generic_detach_inode - remove inode from inode lists
+ * @inode: inode to remove
+ *
+ * Remove inode from inode lists, write it if it's dirty. This is just an
+ * internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ * Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -1238,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
- return;
+ return 0;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
@@ -1256,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+ if (!generic_detach_inode(inode))
+ return;
if (inode->i_data.nrpages)
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
@@ -1386,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct timespec now;
- if (mnt_want_write(mnt))
- return;
if (inode->i_flags & S_NOATIME)
- goto out;
+ return;
if (IS_NOATIME(inode))
- goto out;
+ return;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
if (mnt->mnt_flags & MNT_NOATIME)
- goto out;
+ return;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- goto out;
+ return;
now = current_fs_time(inode->i_sb);
if (!relatime_need_update(mnt, inode, now))
- goto out;
+ return;
if (timespec_equal(&inode->i_atime, &now))
- goto out;
+ return;
+
+ if (mnt_want_write(mnt))
+ return;
inode->i_atime = now;
mark_inode_dirty_sync(inode);
-out:
mnt_drop_write(mnt);
}
EXPORT_SYMBOL(touch_atime);
@@ -1431,34 +1461,37 @@ void file_update_time(struct file *file)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct timespec now;
- int sync_it = 0;
- int err;
+ enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+ /* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return;
- err = mnt_want_write_file(file);
- if (err)
- return;
-
now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_mtime, &now)) {
- inode->i_mtime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_mtime, &now))
+ sync_it = S_MTIME;
- if (!timespec_equal(&inode->i_ctime, &now)) {
- inode->i_ctime = now;
- sync_it = 1;
- }
+ if (!timespec_equal(&inode->i_ctime, &now))
+ sync_it |= S_CTIME;
- if (IS_I_VERSION(inode)) {
- inode_inc_iversion(inode);
- sync_it = 1;
- }
+ if (IS_I_VERSION(inode))
+ sync_it |= S_VERSION;
+
+ if (!sync_it)
+ return;
+
+ /* Finally allowed to write? Takes lock. */
+ if (mnt_want_write_file(file))
+ return;
- if (sync_it)
- mark_inode_dirty_sync(inode);
+ /* Only change inode inside the lock region */
+ if (sync_it & S_VERSION)
+ inode_inc_iversion(inode);
+ if (sync_it & S_CTIME)
+ inode->i_ctime = now;
+ if (sync_it & S_MTIME)
+ inode->i_mtime = now;
+ mark_inode_dirty_sync(inode);
mnt_drop_write(file->f_path.mnt);
}
EXPORT_SYMBOL(file_update_time);
@@ -1586,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
- printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
- mode);
+ printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
+ " inode %s:%lu\n", mode, inode->i_sb->s_id,
+ inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);