summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c42
-rw-r--r--fs/bio-integrity.c5
-rw-r--r--fs/bio.c6
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent-tree.c49
-rw-r--r--fs/btrfs/locking.c6
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/buffer.c23
-rw-r--r--fs/compat.c3
-rw-r--r--fs/devpts/inode.c5
-rw-r--r--fs/ecryptfs/crypto.c51
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h4
-rw-r--r--fs/ecryptfs/inode.c32
-rw-r--r--fs/ecryptfs/keystore.c3
-rw-r--r--fs/ecryptfs/main.c5
-rw-r--r--fs/exec.c13
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/ialloc.c12
-rw-r--r--fs/ext4/mballoc.c13
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/gfs2/Kconfig17
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/acl.c1
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c1
-rw-r--r--fs/gfs2/eaops.c1
-rw-r--r--fs/gfs2/eattr.c1
-rw-r--r--fs/gfs2/glock.c268
-rw-r--r--fs/gfs2/glock.h127
-rw-r--r--fs/gfs2/glops.c160
-rw-r--r--fs/gfs2/glops.h1
-rw-r--r--fs/gfs2/incore.h71
-rw-r--r--fs/gfs2/inode.c13
-rw-r--r--fs/gfs2/inode.h22
-rw-r--r--fs/gfs2/lock_dlm.c241
-rw-r--r--fs/gfs2/locking.c232
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c708
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h166
-rw-r--r--fs/gfs2/locking/dlm/main.c48
-rw-r--r--fs/gfs2/locking/dlm/mount.c276
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c226
-rw-r--r--fs/gfs2/locking/dlm/thread.c68
-rw-r--r--fs/gfs2/log.c1
-rw-r--r--fs/gfs2/lops.c1
-rw-r--r--fs/gfs2/main.c13
-rw-r--r--fs/gfs2/meta_io.c22
-rw-r--r--fs/gfs2/meta_io.h1
-rw-r--r--fs/gfs2/mount.c128
-rw-r--r--fs/gfs2/mount.h17
-rw-r--r--fs/gfs2/ops_address.c5
-rw-r--r--fs/gfs2/ops_dentry.c1
-rw-r--r--fs/gfs2/ops_export.c1
-rw-r--r--fs/gfs2/ops_file.c76
-rw-r--r--fs/gfs2/ops_fstype.c156
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/gfs2/ops_super.c44
-rw-r--r--fs/gfs2/quota.c203
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/recovery.c28
-rw-r--r--fs/gfs2/rgrp.c189
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/super.h26
-rw-r--r--fs/gfs2/sys.c236
-rw-r--r--fs/gfs2/trans.c19
-rw-r--r--fs/gfs2/util.c11
-rw-r--r--fs/inode.c78
-rw-r--r--fs/lockd/clntlock.c51
-rw-r--r--fs/minix/inode.c2
-rw-r--r--fs/namei.c8
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/nfs/client.c73
-rw-r--r--fs/nfs/dir.c8
-rw-r--r--fs/nfs/nfs3acl.c27
-rw-r--r--fs/nfs/nfs3xdr.c34
-rw-r--r--fs/nfs/nfs4namespace.c15
-rw-r--r--fs/nfsd/nfs4xdr.c1
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/ocfs2_fs.h6
-rw-r--r--fs/ocfs2/xattr.c30
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/pipe.c8
-rw-r--r--fs/proc/base.c16
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/squashfs/block.c18
-rw-r--r--fs/squashfs/cache.c4
-rw-r--r--fs/squashfs/inode.c6
-rw-r--r--fs/squashfs/squashfs.h2
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/bin.c253
-rw-r--r--fs/sysfs/dir.c33
-rw-r--r--fs/sysfs/file.c26
-rw-r--r--fs/sysfs/inode.c17
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/sysfs/sysfs.h3
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c10
-rw-r--r--fs/xfs/xfs_iget.c15
-rw-r--r--fs/xfs/xfs_log_recover.c17
110 files changed, 2251 insertions, 2745 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 8fa77e233944..76da12537956 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -443,7 +443,7 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
req->private = NULL;
req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);
- req->ki_eventfd = ERR_PTR(-EINVAL);
+ req->ki_eventfd = NULL;
/* Check if the completion queue has enough free space to
* accept an event from this io.
@@ -485,8 +485,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
{
assert_spin_locked(&ctx->ctx_lock);
- if (!IS_ERR(req->ki_eventfd))
- fput(req->ki_eventfd);
if (req->ki_dtor)
req->ki_dtor(req);
if (req->ki_iovec != &req->ki_inline_vec)
@@ -508,8 +506,11 @@ static void aio_fput_routine(struct work_struct *data)
list_del(&req->ki_list);
spin_unlock_irq(&fput_lock);
- /* Complete the fput */
- __fput(req->ki_filp);
+ /* Complete the fput(s) */
+ if (req->ki_filp != NULL)
+ __fput(req->ki_filp);
+ if (req->ki_eventfd != NULL)
+ __fput(req->ki_eventfd);
/* Link the iocb into the context's free list */
spin_lock_irq(&ctx->ctx_lock);
@@ -527,12 +528,14 @@ static void aio_fput_routine(struct work_struct *data)
*/
static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
{
+ int schedule_putreq = 0;
+
dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
req, atomic_long_read(&req->ki_filp->f_count));
assert_spin_locked(&ctx->ctx_lock);
- req->ki_users --;
+ req->ki_users--;
BUG_ON(req->ki_users < 0);
if (likely(req->ki_users))
return 0;
@@ -540,10 +543,23 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
req->ki_cancel = NULL;
req->ki_retry = NULL;
- /* Must be done under the lock to serialise against cancellation.
- * Call this aio_fput as it duplicates fput via the fput_work.
+ /*
+ * Try to optimize the aio and eventfd file* puts, by avoiding to
+ * schedule work in case it is not __fput() time. In normal cases,
+ * we would not be holding the last reference to the file*, so
+ * this function will be executed w/out any aio kthread wakeup.
*/
- if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
+ schedule_putreq++;
+ else
+ req->ki_filp = NULL;
+ if (req->ki_eventfd != NULL) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
+ schedule_putreq++;
+ else
+ req->ki_eventfd = NULL;
+ }
+ if (unlikely(schedule_putreq)) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
@@ -571,7 +587,7 @@ int aio_put_req(struct kiocb *req)
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
struct mm_struct *mm = current->mm;
- struct kioctx *ctx = NULL;
+ struct kioctx *ctx, *ret = NULL;
struct hlist_node *n;
rcu_read_lock();
@@ -579,12 +595,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
if (ctx->user_id == ctx_id && !ctx->dead) {
get_ioctx(ctx);
+ ret = ctx;
break;
}
}
rcu_read_unlock();
- return ctx;
+ return ret;
}
/*
@@ -1009,7 +1026,7 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (!IS_ERR(iocb->ki_eventfd))
+ if (iocb->ki_eventfd != NULL)
eventfd_signal(iocb->ki_eventfd, 1);
put_rq:
@@ -1608,6 +1625,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
+ req->ki_eventfd = NULL;
goto out_put_req;
}
}
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 549b0144da11..fe2b1aa2464e 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -685,19 +685,20 @@ EXPORT_SYMBOL(bio_integrity_split);
* bio_integrity_clone - Callback for cloning bios with integrity metadata
* @bio: New bio
* @bio_src: Original bio
+ * @gfp_mask: Memory allocation mask
* @bs: bio_set to allocate bip from
*
* Description: Called to allocate a bip when cloning a bio
*/
int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
- struct bio_set *bs)
+ gfp_t gfp_mask, struct bio_set *bs)
{
struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
struct bio_integrity_payload *bip;
BUG_ON(bip_src == NULL);
- bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs);
+ bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
if (bip == NULL)
return -EIO;
diff --git a/fs/bio.c b/fs/bio.c
index 124b95c4d582..d4f06327c810 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -463,10 +463,12 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
if (bio_integrity(bio)) {
int ret;
- ret = bio_integrity_clone(b, bio, fs_bio_set);
+ ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
- if (ret < 0)
+ if (ret < 0) {
+ bio_put(b);
return NULL;
+ }
}
return b;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 42491d728e99..37f31b5529aa 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -277,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (*cow_ret == buf)
unlock_orig = 1;
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
if (parent)
parent_start = parent->start;
@@ -2365,7 +2365,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (slot >= btrfs_header_nritems(upper) - 1)
return 1;
- WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+ btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
btrfs_tree_lock(right);
@@ -2562,7 +2562,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (right_nritems == 0)
return 1;
- WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+ btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
btrfs_tree_lock(left);
@@ -4101,7 +4101,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
next = read_node_slot(root, c, slot);
if (!path->skip_locking) {
- WARN_ON(!btrfs_tree_locked(c));
+ btrfs_assert_tree_locked(c);
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
@@ -4126,7 +4126,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
reada_for_search(root, path, level, slot, 0);
next = read_node_slot(root, next, 0);
if (!path->skip_locking) {
- WARN_ON(!btrfs_tree_locked(path->nodes[level]));
+ btrfs_assert_tree_locked(path->nodes[level]);
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 82491ba8fa40..5e1d4e30e9d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -784,7 +784,14 @@ struct btrfs_fs_info {
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
+
+ /*
+ * the space_info list is almost entirely read only. It only changes
+ * when we add a new raid type to the FS, and that happens
+ * very rarely. RCU is used to protect it.
+ */
struct list_head space_info;
+
spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;
@@ -1797,6 +1804,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+
int btrfs_check_metadata_free_space(struct btrfs_root *root);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index adda739a0215..3e18175248e0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -857,7 +857,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
/* ugh, clear_extent_buffer_dirty can be expensive */
btrfs_set_lock_blocking(buf);
@@ -2361,7 +2361,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
btrfs_set_lock_blocking(buf);
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
"found %llu running %llu\n",
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6b5966aacf44..fefe83ad2059 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -20,6 +20,7 @@
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/sort.h>
+#include <linux/rcupdate.h>
#include "compat.h"
#include "hash.h"
#include "crc32c.h"
@@ -330,13 +331,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
- list_for_each_entry(found, head, list) {
- if (found->flags == flags)
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags == flags) {
+ rcu_read_unlock();
return found;
+ }
}
+ rcu_read_unlock();
return NULL;
}
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list)
+ found->full = 0;
+ rcu_read_unlock();
+}
+
static u64 div_factor(u64 num, int factor)
{
if (factor == 10)
@@ -1903,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (!found)
return -ENOMEM;
- list_add(&found->list, &info->space_info);
INIT_LIST_HEAD(&found->block_groups);
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
@@ -1917,6 +1937,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->full = 0;
found->force_alloc = 0;
*space_info = found;
+ list_add_rcu(&found->list, &info->space_info);
return 0;
}
@@ -4418,13 +4439,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
- BUG_ON(!btrfs_tree_locked(parent));
+ btrfs_assert_tree_locked(parent);
parent_level = btrfs_header_level(parent);
extent_buffer_get(parent);
path->nodes[parent_level] = parent;
path->slots[parent_level] = btrfs_header_nritems(parent);
- BUG_ON(!btrfs_tree_locked(node));
+ btrfs_assert_tree_locked(node);
level = btrfs_header_level(node);
extent_buffer_get(node);
path->nodes[level] = node;
@@ -6320,6 +6341,7 @@ out:
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
struct rb_node *n;
spin_lock(&info->block_group_cache_lock);
@@ -6341,6 +6363,23 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
spin_lock(&info->block_group_cache_lock);
}
spin_unlock(&info->block_group_cache_lock);
+
+ /* now that all the block groups are freed, go through and
+ * free all the space_info structs. This is only called during
+ * the final stages of unmount, and so we know nobody is
+ * using them. We call synchronize_rcu() once before we start,
+ * just to be on the safe side.
+ */
+ synchronize_rcu();
+
+ while(!list_empty(&info->space_info)) {
+ space_info = list_entry(info->space_info.next,
+ struct btrfs_space_info,
+ list);
+
+ list_del(&space_info->list);
+ kfree(space_info);
+ }
return 0;
}
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 85506c4a3af7..47b0a88c12a2 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -220,8 +220,8 @@ int btrfs_tree_unlock(struct extent_buffer *eb)
return 0;
}
-int btrfs_tree_locked(struct extent_buffer *eb)
+void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
- return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) ||
- spin_is_locked(&eb->lock);
+ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+ assert_spin_locked(&eb->lock);
}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 6bb0afbff928..6c4ce457168c 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -21,11 +21,11 @@
int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb);
-int btrfs_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_lock(struct extent_buffer *eb);
int btrfs_try_spin_lock(struct extent_buffer *eb);
void btrfs_set_lock_blocking(struct extent_buffer *eb);
void btrfs_clear_lock_blocking(struct extent_buffer *eb);
+void btrfs_assert_tree_locked(struct extent_buffer *eb);
#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1316139bf9e8..dd06e18e5aac 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1374,6 +1374,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_add_device(trans, root, device);
}
+ /*
+ * we've got more storage, clear any full flags on the space
+ * infos
+ */
+ btrfs_clear_space_info_full(root->fs_info);
+
unlock_chunks(root);
btrfs_commit_transaction(trans, root);
@@ -1459,6 +1465,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
device->fs_devices->total_rw_bytes += diff;
device->total_bytes = new_size;
+ btrfs_clear_space_info_full(device->dev_root->fs_info);
+
return btrfs_update_device(trans, device);
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f697419ed8e..891e1c78e4f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -760,15 +760,9 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
*/
-static int __set_page_dirty(struct page *page,
+static void __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
-
- if (TestSetPageDirty(page))
- return 0;
-
spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -785,8 +779,6 @@ static int __set_page_dirty(struct page *page,
}
spin_unlock_irq(&mapping->tree_lock);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
- return 1;
}
/*
@@ -816,6 +808,7 @@ static int __set_page_dirty(struct page *page,
*/
int __set_page_dirty_buffers(struct page *page)
{
+ int newly_dirty;
struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
@@ -831,9 +824,12 @@ int __set_page_dirty_buffers(struct page *page)
bh = bh->b_this_page;
} while (bh != head);
}
+ newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
- return __set_page_dirty(page, mapping, 1);
+ if (newly_dirty)
+ __set_page_dirty(page, mapping, 1);
+ return newly_dirty;
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
@@ -1262,8 +1258,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
return;
}
- if (!test_set_buffer_dirty(bh))
- __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
+ if (!test_set_buffer_dirty(bh)) {
+ struct page *page = bh->b_page;
+ if (!TestSetPageDirty(page))
+ __set_page_dirty(page, page_mapping(page), 0);
+ }
}
/*
diff --git a/fs/compat.c b/fs/compat.c
index d0145ca27572..0949b43794a4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1402,6 +1402,7 @@ int compat_do_execve(char * filename,
retval = mutex_lock_interruptible(&current->cred_exec_mutex);
if (retval < 0)
goto out_free;
+ current->in_execve = 1;
retval = -ENOMEM;
bprm->cred = prepare_exec_creds();
@@ -1454,6 +1455,7 @@ int compat_do_execve(char * filename,
goto out;
/* execve succeeded */
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
acct_update_integrals(current);
free_bprm(bprm);
@@ -1470,6 +1472,7 @@ out_file:
}
out_unlock:
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
out_free:
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5f3231b9633f..bff4052b05e7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -198,9 +198,6 @@ static int mknod_ptmx(struct super_block *sb)
fsi->ptmx_dentry = dentry;
rc = 0;
-
- printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n",
- inode->i_ino);
out:
mutex_unlock(&root->d_inode->i_mutex);
return rc;
@@ -369,8 +366,6 @@ static int new_pts_mount(struct file_system_type *fs_type, int flags,
struct pts_fs_info *fsi;
struct pts_mount_opts *opts;
- printk(KERN_NOTICE "devpts: newinstance mount\n");
-
err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt);
if (err)
return err;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f6caeb1d1106..8b65f289ee00 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -946,6 +946,8 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
list_for_each_entry(global_auth_tok,
&mount_crypt_stat->global_auth_tok_list,
mount_crypt_stat_list) {
+ if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_FNEK)
+ continue;
rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig);
if (rc) {
printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc);
@@ -1322,14 +1324,13 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
}
static int
-ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
- struct dentry *ecryptfs_dentry,
- char *virt)
+ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
+ char *virt, size_t virt_len)
{
int rc;
rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
- 0, crypt_stat->num_header_bytes_at_front);
+ 0, virt_len);
if (rc)
printk(KERN_ERR "%s: Error attempting to write header "
"information to lower file; rc = [%d]\n", __func__,
@@ -1339,7 +1340,6 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
static int
ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
- struct ecryptfs_crypt_stat *crypt_stat,
char *page_virt, size_t size)
{
int rc;
@@ -1349,6 +1349,17 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
return rc;
}
+static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
+ unsigned int order)
+{
+ struct page *page;
+
+ page = alloc_pages(gfp_mask | __GFP_ZERO, order);
+ if (page)
+ return (unsigned long) page_address(page);
+ return 0;
+}
+
/**
* ecryptfs_write_metadata
* @ecryptfs_dentry: The eCryptfs dentry
@@ -1365,7 +1376,9 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
{
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+ unsigned int order;
char *virt;
+ size_t virt_len;
size_t size = 0;
int rc = 0;
@@ -1381,33 +1394,35 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
rc = -EINVAL;
goto out;
}
+ virt_len = crypt_stat->num_header_bytes_at_front;
+ order = get_order(virt_len);
/* Released in this function */
- virt = (char *)get_zeroed_page(GFP_KERNEL);
+ virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
if (!virt) {
printk(KERN_ERR "%s: Out of memory\n", __func__);
rc = -ENOMEM;
goto out;
}
- rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size,
- crypt_stat, ecryptfs_dentry);
+ rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat,
+ ecryptfs_dentry);
if (unlikely(rc)) {
printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
__func__, rc);
goto out_free;
}
if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
- rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry,
- crypt_stat, virt, size);
+ rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt,
+ size);
else
- rc = ecryptfs_write_metadata_to_contents(crypt_stat,
- ecryptfs_dentry, virt);
+ rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt,
+ virt_len);
if (rc) {
printk(KERN_ERR "%s: Error writing metadata out to lower file; "
"rc = [%d]\n", __func__, rc);
goto out_free;
}
out_free:
- free_page((unsigned long)virt);
+ free_pages((unsigned long)virt, order);
out:
return rc;
}
@@ -2206,17 +2221,19 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
struct dentry *ecryptfs_dir_dentry,
const char *name, size_t name_size)
{
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+ &ecryptfs_superblock_to_private(
+ ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
char *decoded_name;
size_t decoded_name_size;
size_t packet_size;
int rc = 0;
- if ((name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
+ if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+ && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ && (name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
&& (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) {
- struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
- &ecryptfs_superblock_to_private(
- ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
const char *orig_name = name;
size_t orig_name_size = name_size;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c11fc95714ab..ac749d4d644f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -328,6 +328,7 @@ struct ecryptfs_dentry_info {
*/
struct ecryptfs_global_auth_tok {
#define ECRYPTFS_AUTH_TOK_INVALID 0x00000001
+#define ECRYPTFS_AUTH_TOK_FNEK 0x00000002
u32 flags;
struct list_head mount_crypt_stat_list;
struct key *global_auth_tok_key;
@@ -619,7 +620,6 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
u32 flags);
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct ecryptfs_crypt_stat *crypt_stat,
struct inode *ecryptfs_dir_inode,
struct nameidata *ecryptfs_nd);
int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
@@ -696,7 +696,7 @@ ecryptfs_write_header_metadata(char *virt,
int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig);
int
ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
- char *sig);
+ char *sig, u32 global_auth_tok_flags);
int ecryptfs_get_global_auth_tok_for_sig(
struct ecryptfs_global_auth_tok **global_auth_tok,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5697899a168d..55b3145b8072 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -246,7 +246,6 @@ out:
*/
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct ecryptfs_crypt_stat *crypt_stat,
struct inode *ecryptfs_dir_inode,
struct nameidata *ecryptfs_nd)
{
@@ -254,6 +253,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct vfsmount *lower_mnt;
struct inode *lower_inode;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+ struct ecryptfs_crypt_stat *crypt_stat;
char *page_virt = NULL;
u64 file_size;
int rc = 0;
@@ -314,6 +314,11 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
goto out_free_kmem;
}
}
+ crypt_stat = &ecryptfs_inode_to_private(
+ ecryptfs_dentry->d_inode)->crypt_stat;
+ /* TODO: lock for crypt_stat comparison */
+ if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
+ ecryptfs_set_default_sizes(crypt_stat);
rc = ecryptfs_read_and_validate_header_region(page_virt,
ecryptfs_dentry->d_inode);
if (rc) {
@@ -362,9 +367,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
{
char *encrypted_and_encoded_name = NULL;
size_t encrypted_and_encoded_name_size;
- struct ecryptfs_crypt_stat *crypt_stat = NULL;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
- struct ecryptfs_inode_info *inode_info;
struct dentry *lower_dir_dentry, *lower_dentry;
int rc = 0;
@@ -388,26 +391,15 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
}
if (lower_dentry->d_inode)
goto lookup_and_interpose;
- inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
- if (inode_info) {
- crypt_stat = &inode_info->crypt_stat;
- /* TODO: lock for crypt_stat comparison */
- if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
- ecryptfs_set_default_sizes(crypt_stat);
- }
- if (crypt_stat)
- mount_crypt_stat = crypt_stat->mount_crypt_stat;
- else
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if (!(crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
- && !(mount_crypt_stat && (mount_crypt_stat->flags
- & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ if (!(mount_crypt_stat
+ && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
goto lookup_and_interpose;
dput(lower_dentry);
rc = ecryptfs_encrypt_and_encode_filename(
&encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
- crypt_stat, mount_crypt_stat, ecryptfs_dentry->d_name.name,
+ NULL, mount_crypt_stat, ecryptfs_dentry->d_name.name,
ecryptfs_dentry->d_name.len);
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt and encode "
@@ -426,7 +418,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
}
lookup_and_interpose:
rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
- crypt_stat, ecryptfs_dir_inode,
+ ecryptfs_dir_inode,
ecryptfs_nd);
goto out;
out_d_drop:
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ff539420cc6f..e4a6223c3145 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -2375,7 +2375,7 @@ struct kmem_cache *ecryptfs_global_auth_tok_cache;
int
ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
- char *sig)
+ char *sig, u32 global_auth_tok_flags)
{
struct ecryptfs_global_auth_tok *new_auth_tok;
int rc = 0;
@@ -2389,6 +2389,7 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
goto out;
}
memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX);
+ new_auth_tok->flags = global_auth_tok_flags;
new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
list_add(&new_auth_tok->mount_crypt_stat_list,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 789cf2e1be1e..aed56c25539b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -319,7 +319,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
case ecryptfs_opt_ecryptfs_sig:
sig_src = args[0].from;
rc = ecryptfs_add_global_auth_tok(mount_crypt_stat,
- sig_src);
+ sig_src, 0);
if (rc) {
printk(KERN_ERR "Error attempting to register "
"global sig; rc = [%d]\n", rc);
@@ -370,7 +370,8 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
ECRYPTFS_SIG_SIZE_HEX] = '\0';
rc = ecryptfs_add_global_auth_tok(
mount_crypt_stat,
- mount_crypt_stat->global_default_fnek_sig);
+ mount_crypt_stat->global_default_fnek_sig,
+ ECRYPTFS_AUTH_TOK_FNEK);
if (rc) {
printk(KERN_ERR "Error attempting to register "
"global fnek sig [%s]; rc = [%d]\n",
diff --git a/fs/exec.c b/fs/exec.c
index 929b58004b7e..b9f1c144b7a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/mount.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/syscalls.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
@@ -127,6 +128,9 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
MAY_READ | MAY_EXEC | MAY_OPEN);
if (error)
goto exit;
+ error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN);
+ if (error)
+ goto exit;
file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
error = PTR_ERR(file);
@@ -674,6 +678,9 @@ struct file *open_exec(const char *name)
err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
if (err)
goto out_path_put;
+ err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN);
+ if (err)
+ goto out_path_put;
file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
if (IS_ERR(file))
@@ -1184,6 +1191,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
retval = security_bprm_check(bprm);
if (retval)
return retval;
+ retval = ima_bprm_check(bprm);
+ if (retval)
+ return retval;
/* kernel module loader fixup */
/* so we don't try to load run modprobe in kernel space. */
@@ -1284,6 +1294,7 @@ int do_execve(char * filename,
retval = mutex_lock_interruptible(&current->cred_exec_mutex);
if (retval < 0)
goto out_free;
+ current->in_execve = 1;
retval = -ENOMEM;
bprm->cred = prepare_exec_creds();
@@ -1337,6 +1348,7 @@ int do_execve(char * filename,
goto out;
/* execve succeeded */
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
acct_update_integrals(current);
free_bprm(bprm);
@@ -1355,6 +1367,7 @@ out_file:
}
out_unlock:
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
out_free:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e2eab196875f..e0aa4fe4f596 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1122,7 +1122,8 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
ext4_fsblk_t block;
- int depth, ee_len;
+ int depth; /* Note, NOT eh_depth; depth from top of tree */
+ int ee_len;
BUG_ON(path == NULL);
depth = path->p_depth;
@@ -1179,7 +1180,8 @@ got_index:
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
- if (ext4_ext_check_header(inode, eh, depth)) {
+ /* subtract from p_depth to get proper eh_depth */
+ if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
put_bh(bh);
return -EIO;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18a919be70b..2d2b3585ee91 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_group_desc *gdp;
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
- int fatal = 0, err, count;
+ int fatal = 0, err, count, cleared;
ext4_group_t flex_group;
if (atomic_read(&inode->i_count) > 1) {
@@ -248,8 +248,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
goto error_return;
/* Ok, now we can actually update the inode bitmaps.. */
- if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
- bit, bitmap_bh->b_data))
+ spin_lock(sb_bgl_lock(sbi, block_group));
+ cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+ spin_unlock(sb_bgl_lock(sbi, block_group));
+ if (!cleared)
ext4_error(sb, "ext4_free_inode",
"bit already cleared for inode %lu", ino);
else {
@@ -696,6 +698,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
struct inode *ret;
ext4_group_t i;
int free = 0;
+ static int once = 1;
ext4_group_t flex_group;
/* Cannot create files in a deleted directory */
@@ -717,7 +720,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
ret2 = find_group_other(sb, dir, &group);
- if (ret2 == 0 && printk_ratelimit())
+ if (ret2 == 0 && once)
+ once = 0;
printk(KERN_NOTICE "ext4: find_group_flex "
"failed, fallback succeeded dir %lu\n",
dir->i_ino);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4415beeb0b62..9f61e62f435f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1447,7 +1447,7 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
struct ext4_free_extent *gex = &ac->ac_g_ex;
BUG_ON(ex->fe_len <= 0);
- BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
@@ -3292,7 +3292,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
@@ -3589,6 +3589,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
struct super_block *sb, struct ext4_prealloc_space *pa)
{
ext4_group_t grp;
+ ext4_fsblk_t grp_blk;
if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
return;
@@ -3603,8 +3604,12 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
pa->pa_deleted = 1;
spin_unlock(&pa->pa_lock);
- /* -1 is to protect from crossing allocation group */
- ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
+ grp_blk = pa->pa_pstart;
+ /* If linear, pa_pstart may be in the next group when pa is used up */
+ if (pa->pa_linear)
+ grp_blk--;
+
+ ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
/*
* possible race:
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6b74d09adbe5..de0004fe6e00 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -202,9 +202,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
- mutex_lock(&mapping->host->i_mutex);
+ down_read(&mapping->host->i_alloc_sem);
blocknr = generic_block_bmap(mapping, block, fat_get_block);
- mutex_unlock(&mapping->host->i_mutex);
+ up_read(&mapping->host->i_alloc_sem);
return blocknr;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index bbeeac6efa1a..da806aceae3f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/eventpoll.h>
#include <linux/rcupdate.h>
#include <linux/mount.h>
@@ -279,6 +280,7 @@ void __fput(struct file *file)
if (file->f_op && file->f_op->release)
file->f_op->release(inode, file);
security_file_free(file);
+ ima_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
cdev_put(inode->i_cdev);
fops_put(file->f_op);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e5eaa62fd17f..e3fe9918faaf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -274,6 +274,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
int ret;
BUG_ON(inode->i_state & I_SYNC);
+ WARN_ON(inode->i_state & I_NEW);
/* Set I_SYNC, reset I_DIRTY */
dirty = inode->i_state & I_DIRTY;
@@ -298,6 +299,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
}
spin_lock(&inode_lock);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
if (!(inode->i_state & I_DIRTY) &&
@@ -470,6 +472,11 @@ void generic_sync_sb_inodes(struct super_block *sb,
break;
}
+ if (inode->i_state & I_NEW) {
+ requeue_io(inode);
+ continue;
+ }
+
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
if (!sb_is_blkdev_sb(sb))
@@ -531,7 +538,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping;
- if (inode->i_state & (I_FREEING|I_WILL_FREE))
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
continue;
mapping = inode->i_mapping;
if (mapping->nrpages == 0)
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index e563a6449811..3a981b7f64ca 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,10 @@
config GFS2_FS
tristate "GFS2 file system support"
depends on EXPERIMENTAL && (64BIT || LBD)
+ select DLM if GFS2_FS_LOCKING_DLM
+ select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
+ select SYSFS if GFS2_FS_LOCKING_DLM
+ select IP_SCTP if DLM_SCTP
select FS_POSIX_ACL
select CRC32
help
@@ -18,17 +22,16 @@ config GFS2_FS
the locking module below. Documentation and utilities for GFS2 can
be found here: http://sources.redhat.com/cluster
- The "nolock" lock module is now built in to GFS2 by default.
+ The "nolock" lock module is now built in to GFS2 by default. If
+ you want to use the DLM, be sure to enable HOTPLUG and IPv4/6
+ networking.
config GFS2_FS_LOCKING_DLM
- tristate "GFS2 DLM locking module"
- depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
- select IP_SCTP if DLM_SCTP
- select CONFIGFS_FS
- select DLM
+ bool "GFS2 DLM locking"
+ depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
help
Multiple node locking module for GFS2
- Most users of GFS2 will require this module. It provides the locking
+ Most users of GFS2 will require this. It provides the locking
interface between GFS2 and the DLM, which is required to use GFS2
in a cluster environment.
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index c1b4ec6a9650..a851ea4bdf70 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
- glops.o inode.o log.o lops.o locking.o main.o meta_io.o \
+ glops.o inode.o log.o lops.o main.o meta_io.o \
mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
ops_fstype.o ops_inode.o ops_super.o quota.o \
recovery.o rgrp.o super.o sys.o trans.o util.o
-obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
+gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index e335dceb6a4f..43764f4fa763 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -15,7 +15,6 @@
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 11ffc56f1f81..3a5d3f883e10 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index b7c8e5c70791..aef4d0c06748 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -60,7 +60,6 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/vmalloc.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index f114ba2b3557..dee9b03e5b37 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -14,7 +14,6 @@
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 0d1c76d906ae..899763aed217 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/xattr.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6b983aef785d..3984e47d1d33 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -10,7 +10,6 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/delay.h>
#include <linux/sort.h>
@@ -18,7 +17,6 @@
#include <linux/kallsyms.h>
#include <linux/gfs2_ondisk.h>
#include <linux/list.h>
-#include <linux/lm_interface.h>
#include <linux/wait.h>
#include <linux/module.h>
#include <linux/rwsem.h>
@@ -155,13 +153,10 @@ static void glock_free(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct inode *aspace = gl->gl_aspace;
- if (sdp->sd_lockstruct.ls_ops->lm_put_lock)
- sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
-
if (aspace)
gfs2_aspace_put(aspace);
- kmem_cache_free(gfs2_glock_cachep, gl);
+ sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
}
/**
@@ -172,6 +167,7 @@ static void glock_free(struct gfs2_glock *gl)
static void gfs2_glock_hold(struct gfs2_glock *gl)
{
+ GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
atomic_inc(&gl->gl_ref);
}
@@ -211,17 +207,15 @@ int gfs2_glock_put(struct gfs2_glock *gl)
atomic_dec(&lru_count);
}
spin_unlock(&lru_lock);
- GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
- GLOCK_BUG_ON(gl, !list_empty(&gl->gl_lru));
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
glock_free(gl);
rv = 1;
goto out;
}
- write_unlock(gl_lock_addr(gl->gl_hash));
/* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */
if (atomic_read(&gl->gl_ref) == 2)
gfs2_glock_schedule_for_reclaim(gl);
+ write_unlock(gl_lock_addr(gl->gl_hash));
out:
return rv;
}
@@ -256,27 +250,6 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
}
/**
- * gfs2_glock_find() - Find glock by lock number
- * @sdp: The GFS2 superblock
- * @name: The lock name
- *
- * Returns: NULL, or the struct gfs2_glock with the requested number
- */
-
-static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
- const struct lm_lockname *name)
-{
- unsigned int hash = gl_hash(sdp, name);
- struct gfs2_glock *gl;
-
- read_lock(gl_lock_addr(hash));
- gl = search_bucket(hash, sdp, name);
- read_unlock(gl_lock_addr(hash));
-
- return gl;
-}
-
-/**
* may_grant - check if its ok to grant a new lock
* @gl: The glock
* @gh: The lock request which we wish to grant
@@ -523,7 +496,7 @@ out_locked:
}
static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
- unsigned int cur_state, unsigned int req_state,
+ unsigned int req_state,
unsigned int flags)
{
int ret = LM_OUT_ERROR;
@@ -532,7 +505,7 @@ static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
return req_state == LM_ST_UNLOCKED ? 0 : req_state;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
+ ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
req_state, flags);
return ret;
}
@@ -575,7 +548,7 @@ __acquires(&gl->gl_spin)
gl->gl_state == LM_ST_DEFERRED) &&
!(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
lck_flags |= LM_FLAG_TRY_1CB;
- ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags);
+ ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
if (!(ret & LM_OUT_ASYNC)) {
finish_xmote(gl, ret);
@@ -624,10 +597,11 @@ __acquires(&gl->gl_spin)
GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
+ down_read(&gfs2_umount_flush_sem);
if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
gl->gl_demote_state != gl->gl_state) {
if (find_first_holder(gl))
- goto out;
+ goto out_unlock;
if (nonblock)
goto out_sched;
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
@@ -638,23 +612,26 @@ __acquires(&gl->gl_spin)
gfs2_demote_wake(gl);
ret = do_promote(gl);
if (ret == 0)
- goto out;
+ goto out_unlock;
if (ret == 2)
- return;
+ goto out_sem;
gh = find_first_waiter(gl);
gl->gl_target = gh->gh_state;
if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
do_error(gl, 0); /* Fail queued try locks */
}
do_xmote(gl, gh, gl->gl_target);
+out_sem:
+ up_read(&gfs2_umount_flush_sem);
return;
out_sched:
gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
-out:
+out_unlock:
clear_bit(GLF_LOCK, &gl->gl_flags);
+ goto out_sem;
}
static void glock_work_func(struct work_struct *work)
@@ -681,18 +658,6 @@ static void glock_work_func(struct work_struct *work)
gfs2_glock_put(gl);
}
-static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
- void **lockp)
-{
- int error = -EIO;
- if (!sdp->sd_lockstruct.ls_ops->lm_get_lock)
- return 0;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
- sdp->sd_lockstruct.ls_lockspace, name, lockp);
- return error;
-}
-
/**
* gfs2_glock_get() - Get a glock, or create one if one doesn't exist
* @sdp: The GFS2 superblock
@@ -719,10 +684,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl = search_bucket(hash, sdp, &name);
read_unlock(gl_lock_addr(hash));
- if (gl || !create) {
- *glp = gl;
+ *glp = gl;
+ if (gl)
return 0;
- }
+ if (!create)
+ return -ENOENT;
gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
if (!gl)
@@ -736,7 +702,9 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_demote_state = LM_ST_EXCLUSIVE;
gl->gl_hash = hash;
gl->gl_ops = glops;
- gl->gl_stamp = jiffies;
+ snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number);
+ memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
+ gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_sbd = sdp;
@@ -753,10 +721,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
}
}
- error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
- if (error)
- goto fail_aspace;
-
write_lock(gl_lock_addr(hash));
tmp = search_bucket(hash, sdp, &name);
if (tmp) {
@@ -772,9 +736,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
return 0;
-fail_aspace:
- if (gl->gl_aspace)
- gfs2_aspace_put(gl->gl_aspace);
fail:
kmem_cache_free(gfs2_glock_cachep, gl);
return error;
@@ -966,7 +927,7 @@ do_cancel:
if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
spin_unlock(&gl->gl_spin);
if (sdp->sd_lockstruct.ls_ops->lm_cancel)
- sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
+ sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
spin_lock(&gl->gl_spin);
}
return;
@@ -1051,7 +1012,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
spin_lock(&gl->gl_spin);
clear_bit(GLF_LOCK, &gl->gl_flags);
}
- gl->gl_stamp = jiffies;
if (list_empty(&gl->gl_holders) &&
!test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags))
@@ -1240,70 +1200,13 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
gfs2_glock_dq_uninit(&ghs[x]);
}
-static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
-{
- int error = -EIO;
- if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb)
- return 0;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
- return error;
-}
-
-/**
- * gfs2_lvb_hold - attach a LVB from a glock
- * @gl: The glock in question
- *
- */
-
-int gfs2_lvb_hold(struct gfs2_glock *gl)
-{
- int error;
-
- if (!atomic_read(&gl->gl_lvb_count)) {
- error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
- if (error)
- return error;
- gfs2_glock_hold(gl);
- }
- atomic_inc(&gl->gl_lvb_count);
-
- return 0;
-}
-
-/**
- * gfs2_lvb_unhold - detach a LVB from a glock
- * @gl: The glock in question
- *
- */
-
-void gfs2_lvb_unhold(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
-
- gfs2_glock_hold(gl);
- gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
- if (atomic_dec_and_test(&gl->gl_lvb_count)) {
- if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb)
- sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb);
- gl->gl_lvb = NULL;
- gfs2_glock_put(gl);
- }
- gfs2_glock_put(gl);
-}
-
-static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
- unsigned int state)
+void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{
- struct gfs2_glock *gl;
unsigned long delay = 0;
unsigned long holdtime;
unsigned long now = jiffies;
- gl = gfs2_glock_find(sdp, name);
- if (!gl)
- return;
-
+ gfs2_glock_hold(gl);
holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
@@ -1317,74 +1220,33 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
gfs2_glock_put(gl);
}
-static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
-{
- struct gfs2_jdesc *jd;
-
- spin_lock(&sdp->sd_jindex_spin);
- list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
- if (jd->jd_jid != jid)
- continue;
- jd->jd_dirty = 1;
- break;
- }
- spin_unlock(&sdp->sd_jindex_spin);
-}
-
/**
- * gfs2_glock_cb - Callback used by locking module
- * @sdp: Pointer to the superblock
- * @type: Type of callback
- * @data: Type dependent data pointer
+ * gfs2_glock_complete - Callback used by locking
+ * @gl: Pointer to the glock
+ * @ret: The return value from the dlm
*
- * Called by the locking module when it wants to tell us something.
- * Either we need to drop a lock, one of our ASYNC requests completed, or
- * a journal from another client needs to be recovered.
*/
-void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
+void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
{
- struct gfs2_sbd *sdp = cb_data;
-
- switch (type) {
- case LM_CB_NEED_E:
- blocking_cb(sdp, data, LM_ST_UNLOCKED);
- return;
-
- case LM_CB_NEED_D:
- blocking_cb(sdp, data, LM_ST_DEFERRED);
- return;
-
- case LM_CB_NEED_S:
- blocking_cb(sdp, data, LM_ST_SHARED);
- return;
-
- case LM_CB_ASYNC: {
- struct lm_async_cb *async = data;
- struct gfs2_glock *gl;
-
- down_read(&gfs2_umount_flush_sem);
- gl = gfs2_glock_find(sdp, &async->lc_name);
- if (gfs2_assert_warn(sdp, gl))
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ gl->gl_reply = ret;
+ if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
+ struct gfs2_holder *gh;
+ spin_lock(&gl->gl_spin);
+ gh = find_first_waiter(gl);
+ if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
+ (gl->gl_target != LM_ST_UNLOCKED)) ||
+ ((ret & ~LM_OUT_ST_MASK) != 0))
+ set_bit(GLF_FROZEN, &gl->gl_flags);
+ spin_unlock(&gl->gl_spin);
+ if (test_bit(GLF_FROZEN, &gl->gl_flags))
return;
- gl->gl_reply = async->lc_ret;
- set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gfs2_glock_put(gl);
- up_read(&gfs2_umount_flush_sem);
- return;
- }
-
- case LM_CB_NEED_RECOVERY:
- gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
- if (sdp->sd_recoverd_process)
- wake_up_process(sdp->sd_recoverd_process);
- return;
-
- default:
- gfs2_assert_warn(sdp, 0);
- return;
}
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ gfs2_glock_hold(gl);
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ gfs2_glock_put(gl);
}
/**
@@ -1515,6 +1377,25 @@ out:
return has_entries;
}
+
+/**
+ * thaw_glock - thaw out a glock which has an unprocessed reply waiting
+ * @gl: The glock to thaw
+ *
+ * N.B. When we freeze a glock, we leave a ref to the glock outstanding,
+ * so this has to result in the ref count being dropped by one.
+ */
+
+static void thaw_glock(struct gfs2_glock *gl)
+{
+ if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+ return;
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ gfs2_glock_hold(gl);
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ gfs2_glock_put(gl);
+}
+
/**
* clear_glock - look at a glock and see if we can free it from glock cache
* @gl: the glock to look at
@@ -1540,6 +1421,20 @@ static void clear_glock(struct gfs2_glock *gl)
}
/**
+ * gfs2_glock_thaw - Thaw any frozen glocks
+ * @sdp: The super block
+ *
+ */
+
+void gfs2_glock_thaw(struct gfs2_sbd *sdp)
+{
+ unsigned x;
+
+ for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
+ examine_bucket(thaw_glock, sdp, x);
+}
+
+/**
* gfs2_gl_hash_clear - Empty out the glock hash table
* @sdp: the filesystem
* @wait: wait until it's all gone
@@ -1619,7 +1514,7 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
if (flags & LM_FLAG_NOEXP)
*p++ = 'e';
if (flags & LM_FLAG_ANY)
- *p++ = 'a';
+ *p++ = 'A';
if (flags & LM_FLAG_PRIORITY)
*p++ = 'p';
if (flags & GL_ASYNC)
@@ -1683,6 +1578,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
*p++ = 'i';
if (test_bit(GLF_REPLY_PENDING, gflags))
*p++ = 'r';
+ if (test_bit(GLF_INITIAL, gflags))
+ *p++ = 'I';
+ if (test_bit(GLF_FROZEN, gflags))
+ *p++ = 'F';
*p = 0;
return buf;
}
@@ -1717,14 +1616,13 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n",
+ gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu a:%d r:%d\n",
state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
gflags2str(gflags_buf, &gl->gl_flags),
state2str(gl->gl_target),
state2str(gl->gl_demote_state), dtime,
- atomic_read(&gl->gl_lvb_count),
atomic_read(&gl->gl_ail_count),
atomic_read(&gl->gl_ref));
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 543ec7ecfbda..a602a28f6f08 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -11,15 +11,130 @@
#define __GLOCK_DOT_H__
#include <linux/sched.h>
+#include <linux/parser.h>
#include "incore.h"
-/* Flags for lock requests; used in gfs2_holder gh_flag field.
- From lm_interface.h:
+/* Options for hostdata parser */
+
+enum {
+ Opt_jid,
+ Opt_id,
+ Opt_first,
+ Opt_nodir,
+ Opt_err,
+};
+
+/*
+ * lm_lockname types
+ */
+
+#define LM_TYPE_RESERVED 0x00
+#define LM_TYPE_NONDISK 0x01
+#define LM_TYPE_INODE 0x02
+#define LM_TYPE_RGRP 0x03
+#define LM_TYPE_META 0x04
+#define LM_TYPE_IOPEN 0x05
+#define LM_TYPE_FLOCK 0x06
+#define LM_TYPE_PLOCK 0x07
+#define LM_TYPE_QUOTA 0x08
+#define LM_TYPE_JOURNAL 0x09
+
+/*
+ * lm_lock() states
+ *
+ * SHARED is compatible with SHARED, not with DEFERRED or EX.
+ * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
+ */
+
+#define LM_ST_UNLOCKED 0
+#define LM_ST_EXCLUSIVE 1
+#define LM_ST_DEFERRED 2
+#define LM_ST_SHARED 3
+
+/*
+ * lm_lock() flags
+ *
+ * LM_FLAG_TRY
+ * Don't wait to acquire the lock if it can't be granted immediately.
+ *
+ * LM_FLAG_TRY_1CB
+ * Send one blocking callback if TRY is set and the lock is not granted.
+ *
+ * LM_FLAG_NOEXP
+ * GFS sets this flag on lock requests it makes while doing journal recovery.
+ * These special requests should not be blocked due to the recovery like
+ * ordinary locks would be.
+ *
+ * LM_FLAG_ANY
+ * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
+ * also be granted in SHARED. The preferred state is whichever is compatible
+ * with other granted locks, or the specified state if no other locks exist.
+ *
+ * LM_FLAG_PRIORITY
+ * Override fairness considerations. Suppose a lock is held in a shared state
+ * and there is a pending request for the deferred state. A shared lock
+ * request with the priority flag would be allowed to bypass the deferred
+ * request and directly join the other shared lock. A shared lock request
+ * without the priority flag might be forced to wait until the deferred
+ * requested had acquired and released the lock.
+ */
+
#define LM_FLAG_TRY 0x00000001
#define LM_FLAG_TRY_1CB 0x00000002
#define LM_FLAG_NOEXP 0x00000004
#define LM_FLAG_ANY 0x00000008
-#define LM_FLAG_PRIORITY 0x00000010 */
+#define LM_FLAG_PRIORITY 0x00000010
+#define GL_ASYNC 0x00000040
+#define GL_EXACT 0x00000080
+#define GL_SKIP 0x00000100
+#define GL_ATIME 0x00000200
+#define GL_NOCACHE 0x00000400
+
+/*
+ * lm_lock() and lm_async_cb return flags
+ *
+ * LM_OUT_ST_MASK
+ * Masks the lower two bits of lock state in the returned value.
+ *
+ * LM_OUT_CANCELED
+ * The lock request was canceled.
+ *
+ * LM_OUT_ASYNC
+ * The result of the request will be returned in an LM_CB_ASYNC callback.
+ *
+ */
+
+#define LM_OUT_ST_MASK 0x00000003
+#define LM_OUT_CANCELED 0x00000008
+#define LM_OUT_ASYNC 0x00000080
+#define LM_OUT_ERROR 0x00000100
+
+/*
+ * lm_recovery_done() messages
+ */
+
+#define LM_RD_GAVEUP 308
+#define LM_RD_SUCCESS 309
+
+#define GLR_TRYFAILED 13
+
+struct lm_lockops {
+ const char *lm_proto_name;
+ int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
+ void (*lm_unmount) (struct gfs2_sbd *sdp);
+ void (*lm_withdraw) (struct gfs2_sbd *sdp);
+ void (*lm_put_lock) (struct kmem_cache *cachep, void *gl);
+ unsigned int (*lm_lock) (struct gfs2_glock *gl,
+ unsigned int req_state, unsigned int flags);
+ void (*lm_cancel) (struct gfs2_glock *gl);
+ const match_table_t *lm_tokens;
+};
+
+#define LM_FLAG_TRY 0x00000001
+#define LM_FLAG_TRY_1CB 0x00000002
+#define LM_FLAG_NOEXP 0x00000004
+#define LM_FLAG_ANY 0x00000008
+#define LM_FLAG_PRIORITY 0x00000010
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
@@ -128,10 +243,12 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
int gfs2_lvb_hold(struct gfs2_glock *gl);
void gfs2_lvb_unhold(struct gfs2_glock *gl);
-void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
+void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
+void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
+void gfs2_glock_thaw(struct gfs2_sbd *sdp);
int __init gfs2_glock_init(void);
void gfs2_glock_exit(void);
@@ -141,4 +258,6 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
int gfs2_register_debugfs(void);
void gfs2_unregister_debugfs(void);
+extern const struct lm_lockops gfs2_dlm_ops;
+
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8522d3aa64fc..bf23a62aa925 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -12,7 +12,6 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/bio.h>
#include "gfs2.h"
@@ -38,20 +37,25 @@
static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
- unsigned int blocks;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
- int error;
+ struct gfs2_trans tr;
- blocks = atomic_read(&gl->gl_ail_count);
- if (!blocks)
- return;
+ memset(&tr, 0, sizeof(tr));
+ tr.tr_revokes = atomic_read(&gl->gl_ail_count);
- error = gfs2_trans_begin(sdp, 0, blocks);
- if (gfs2_assert_withdraw(sdp, !error))
+ if (!tr.tr_revokes)
return;
+ /* A shortened, inline version of gfs2_trans_begin() */
+ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
+ tr.tr_ip = (unsigned long)__builtin_return_address(0);
+ INIT_LIST_HEAD(&tr.tr_list_buf);
+ gfs2_log_reserve(sdp, tr.tr_reserved);
+ BUG_ON(current->journal_info);
+ current->journal_info = &tr;
+
gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata,
@@ -72,29 +76,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
}
/**
- * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_pte_inval(struct gfs2_glock *gl)
-{
- struct gfs2_inode *ip;
- struct inode *inode;
-
- ip = gl->gl_object;
- inode = &ip->i_inode;
- if (!ip || !S_ISREG(inode->i_mode))
- return;
-
- unmap_shared_mapping_range(inode->i_mapping, 0, 0);
- if (test_bit(GIF_SW_PAGED, &ip->i_flags))
- set_bit(GLF_DIRTY, &gl->gl_flags);
-
-}
-
-/**
- * meta_go_sync - sync out the metadata for this glock
+ * rgrp_go_sync - sync out the metadata for this glock
* @gl: the glock
*
* Called when demoting or unlocking an EX glock. We must flush
@@ -102,36 +84,42 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
* not return to caller to demote/unlock the glock until I/O is complete.
*/
-static void meta_go_sync(struct gfs2_glock *gl)
+static void rgrp_go_sync(struct gfs2_glock *gl)
{
- if (gl->gl_state != LM_ST_EXCLUSIVE)
+ struct address_space *metamapping = gl->gl_aspace->i_mapping;
+ int error;
+
+ if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return;
+ BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
- if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
- gfs2_log_flush(gl->gl_sbd, gl);
- gfs2_meta_sync(gl);
- gfs2_ail_empty_gl(gl);
- }
+ gfs2_log_flush(gl->gl_sbd, gl);
+ filemap_fdatawrite(metamapping);
+ error = filemap_fdatawait(metamapping);
+ mapping_set_error(metamapping, error);
+ gfs2_ail_empty_gl(gl);
}
/**
- * meta_go_inval - invalidate the metadata for this glock
+ * rgrp_go_inval - invalidate the metadata for this glock
* @gl: the glock
* @flags:
*
+ * We never used LM_ST_DEFERRED with resource groups, so that we
+ * should always see the metadata flag set here.
+ *
*/
-static void meta_go_inval(struct gfs2_glock *gl, int flags)
+static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
- if (!(flags & DIO_METADATA))
- return;
+ struct address_space *mapping = gl->gl_aspace->i_mapping;
- gfs2_meta_inval(gl);
- if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex))
- gl->gl_sbd->sd_rindex_uptodate = 0;
- else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) {
- struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
+ BUG_ON(!(flags & DIO_METADATA));
+ gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
+ truncate_inode_pages(mapping, 0);
+ if (gl->gl_object) {
+ struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
}
}
@@ -148,48 +136,54 @@ static void inode_go_sync(struct gfs2_glock *gl)
struct address_space *metamapping = gl->gl_aspace->i_mapping;
int error;
- if (gl->gl_state != LM_ST_UNLOCKED)
- gfs2_pte_inval(gl);
- if (gl->gl_state != LM_ST_EXCLUSIVE)
- return;
-
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
+ if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+ if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
+ return;
- if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
- gfs2_log_flush(gl->gl_sbd, gl);
- filemap_fdatawrite(metamapping);
- if (ip) {
- struct address_space *mapping = ip->i_inode.i_mapping;
- filemap_fdatawrite(mapping);
- error = filemap_fdatawait(mapping);
- mapping_set_error(mapping, error);
- }
- error = filemap_fdatawait(metamapping);
- mapping_set_error(metamapping, error);
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- gfs2_ail_empty_gl(gl);
+ BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
+
+ gfs2_log_flush(gl->gl_sbd, gl);
+ filemap_fdatawrite(metamapping);
+ if (ip) {
+ struct address_space *mapping = ip->i_inode.i_mapping;
+ filemap_fdatawrite(mapping);
+ error = filemap_fdatawait(mapping);
+ mapping_set_error(mapping, error);
}
+ error = filemap_fdatawait(metamapping);
+ mapping_set_error(metamapping, error);
+ gfs2_ail_empty_gl(gl);
}
/**
* inode_go_inval - prepare a inode glock to be released
* @gl: the glock
* @flags:
+ *
+ * Normally we invlidate everything, but if we are moving into
+ * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
+ * can keep hold of the metadata, since it won't have changed.
*
*/
static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gl->gl_object;
- int meta = (flags & DIO_METADATA);
- if (meta) {
- gfs2_meta_inval(gl);
+ gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
+
+ if (flags & DIO_METADATA) {
+ struct address_space *mapping = gl->gl_aspace->i_mapping;
+ truncate_inode_pages(mapping, 0);
if (ip)
set_bit(GIF_INVALID, &ip->i_flags);
}
+ if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
+ gl->gl_sbd->sd_rindex_uptodate = 0;
if (ip && S_ISREG(ip->i_inode.i_mode))
truncate_inode_pages(ip->i_inode.i_mapping, 0);
}
@@ -390,20 +384,7 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
return 0;
}
-/**
- * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int quota_go_demote_ok(const struct gfs2_glock *gl)
-{
- return !atomic_read(&gl->gl_lvb_count);
-}
-
const struct gfs2_glock_operations gfs2_meta_glops = {
- .go_xmote_th = meta_go_sync,
.go_type = LM_TYPE_META,
};
@@ -418,8 +399,8 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
- .go_xmote_th = meta_go_sync,
- .go_inval = meta_go_inval,
+ .go_xmote_th = rgrp_go_sync,
+ .go_inval = rgrp_go_inval,
.go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
@@ -448,7 +429,6 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {
};
const struct gfs2_glock_operations gfs2_quota_glops = {
- .go_demote_ok = quota_go_demote_ok,
.go_type = LM_TYPE_QUOTA,
};
@@ -456,3 +436,15 @@ const struct gfs2_glock_operations gfs2_journal_glops = {
.go_type = LM_TYPE_JOURNAL,
};
+const struct gfs2_glock_operations *gfs2_glops_list[] = {
+ [LM_TYPE_META] = &gfs2_meta_glops,
+ [LM_TYPE_INODE] = &gfs2_inode_glops,
+ [LM_TYPE_RGRP] = &gfs2_rgrp_glops,
+ [LM_TYPE_NONDISK] = &gfs2_trans_glops,
+ [LM_TYPE_IOPEN] = &gfs2_iopen_glops,
+ [LM_TYPE_FLOCK] = &gfs2_flock_glops,
+ [LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
+ [LM_TYPE_QUOTA] = &gfs2_quota_glops,
+ [LM_TYPE_JOURNAL] = &gfs2_journal_glops,
+};
+
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index a1d9b5b024e6..b3aa2e3210fd 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -21,5 +21,6 @@ extern const struct gfs2_glock_operations gfs2_flock_glops;
extern const struct gfs2_glock_operations gfs2_nondisk_glops;
extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
+extern const struct gfs2_glock_operations *gfs2_glops_list[];
#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 608849d00021..399d1b978049 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,6 +12,8 @@
#include <linux/fs.h>
#include <linux/workqueue.h>
+#include <linux/dlm.h>
+#include <linux/buffer_head.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -26,6 +28,7 @@ struct gfs2_trans;
struct gfs2_ail;
struct gfs2_jdesc;
struct gfs2_sbd;
+struct lm_lockops;
typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
@@ -121,6 +124,28 @@ struct gfs2_bufdata {
struct list_head bd_ail_gl_list;
};
+/*
+ * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
+ * prefix of lock_dlm_ gets awkward.
+ */
+
+#define GDLM_STRNAME_BYTES 25
+#define GDLM_LVB_SIZE 32
+
+enum {
+ DFL_BLOCK_LOCKS = 0,
+};
+
+struct lm_lockname {
+ u64 ln_number;
+ unsigned int ln_type;
+};
+
+#define lm_name_equal(name1, name2) \
+ (((name1)->ln_number == (name2)->ln_number) && \
+ ((name1)->ln_type == (name2)->ln_type))
+
+
struct gfs2_glock_operations {
void (*go_xmote_th) (struct gfs2_glock *gl);
int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
@@ -162,6 +187,8 @@ enum {
GLF_LFLUSH = 7,
GLF_INVALIDATE_IN_PROGRESS = 8,
GLF_REPLY_PENDING = 9,
+ GLF_INITIAL = 10,
+ GLF_FROZEN = 11,
};
struct gfs2_glock {
@@ -176,16 +203,15 @@ struct gfs2_glock {
unsigned int gl_target;
unsigned int gl_reply;
unsigned int gl_hash;
+ unsigned int gl_req;
unsigned int gl_demote_state; /* state requested by remote node */
unsigned long gl_demote_time; /* time of first demote request */
struct list_head gl_holders;
const struct gfs2_glock_operations *gl_ops;
- void *gl_lock;
- char *gl_lvb;
- atomic_t gl_lvb_count;
-
- unsigned long gl_stamp;
+ char gl_strname[GDLM_STRNAME_BYTES];
+ struct dlm_lksb gl_lksb;
+ char gl_lvb[32];
unsigned long gl_tchange;
void *gl_object;
@@ -283,7 +309,9 @@ enum {
struct gfs2_quota_data {
struct list_head qd_list;
- unsigned int qd_count;
+ struct list_head qd_reclaim;
+
+ atomic_t qd_count;
u32 qd_id;
unsigned long qd_flags; /* QDF_... */
@@ -303,7 +331,6 @@ struct gfs2_quota_data {
u64 qd_sync_gen;
unsigned long qd_last_warn;
- unsigned long qd_last_touched;
};
struct gfs2_trans {
@@ -390,7 +417,7 @@ struct gfs2_args {
unsigned int ar_suiddir:1; /* suiddir support */
unsigned int ar_data:2; /* ordered/writeback */
unsigned int ar_meta:1; /* mount metafs */
- unsigned int ar_num_glockd; /* Number of glockd threads */
+ unsigned int ar_discard:1; /* discard requests */
};
struct gfs2_tune {
@@ -406,7 +433,6 @@ struct gfs2_tune {
unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
unsigned int gt_quota_scale_num; /* Numerator */
unsigned int gt_quota_scale_den; /* Denominator */
- unsigned int gt_quota_cache_secs;
unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
unsigned int gt_new_files_jdata;
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
@@ -445,6 +471,31 @@ struct gfs2_sb_host {
char sb_lockproto[GFS2_LOCKNAME_LEN];
char sb_locktable[GFS2_LOCKNAME_LEN];
+ u8 sb_uuid[16];
+};
+
+/*
+ * lm_mount() return values
+ *
+ * ls_jid - the journal ID this node should use
+ * ls_first - this node is the first to mount the file system
+ * ls_lockspace - lock module's context for this file system
+ * ls_ops - lock module's functions
+ */
+
+struct lm_lockstruct {
+ u32 ls_id;
+ unsigned int ls_jid;
+ unsigned int ls_first;
+ unsigned int ls_first_done;
+ unsigned int ls_nodir;
+ const struct lm_lockops *ls_ops;
+ unsigned long ls_flags;
+ dlm_lockspace_t *ls_dlm;
+
+ int ls_recover_jid;
+ int ls_recover_jid_done;
+ int ls_recover_jid_status;
};
struct gfs2_sbd {
@@ -520,7 +571,6 @@ struct gfs2_sbd {
spinlock_t sd_jindex_spin;
struct mutex sd_jindex_mutex;
unsigned int sd_journals;
- unsigned long sd_jindex_refresh_time;
struct gfs2_jdesc *sd_jdesc;
struct gfs2_holder sd_journal_gh;
@@ -540,7 +590,6 @@ struct gfs2_sbd {
struct list_head sd_quota_list;
atomic_t sd_quota_count;
- spinlock_t sd_quota_spin;
struct mutex sd_quota_mutex;
wait_queue_head_t sd_quota_wait;
struct list_head sd_trunc_list;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3b87c188da41..7b277d449155 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -16,7 +16,6 @@
#include <linux/sort.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/security.h>
#include <linux/time.h>
@@ -137,16 +136,16 @@ void gfs2_set_iop(struct inode *inode)
if (S_ISREG(mode)) {
inode->i_op = &gfs2_file_iops;
- if (sdp->sd_args.ar_localflocks)
- inode->i_fop = &gfs2_file_fops_nolock;
+ if (gfs2_localflocks(sdp))
+ inode->i_fop = gfs2_file_fops_nolock;
else
- inode->i_fop = &gfs2_file_fops;
+ inode->i_fop = gfs2_file_fops;
} else if (S_ISDIR(mode)) {
inode->i_op = &gfs2_dir_iops;
- if (sdp->sd_args.ar_localflocks)
- inode->i_fop = &gfs2_dir_fops_nolock;
+ if (gfs2_localflocks(sdp))
+ inode->i_fop = gfs2_dir_fops_nolock;
else
- inode->i_fop = &gfs2_dir_fops;
+ inode->i_fop = gfs2_dir_fops;
} else if (S_ISLNK(mode)) {
inode->i_op = &gfs2_symlink_iops;
} else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d5329364cdff..dca4fee3078b 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,12 +101,26 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
extern const struct inode_operations gfs2_file_iops;
extern const struct inode_operations gfs2_dir_iops;
extern const struct inode_operations gfs2_symlink_iops;
-extern const struct file_operations gfs2_file_fops;
-extern const struct file_operations gfs2_dir_fops;
-extern const struct file_operations gfs2_file_fops_nolock;
-extern const struct file_operations gfs2_dir_fops_nolock;
+extern const struct file_operations *gfs2_file_fops_nolock;
+extern const struct file_operations *gfs2_dir_fops_nolock;
extern void gfs2_set_inode_flags(struct inode *inode);
+
+#ifdef CONFIG_GFS2_FS_LOCKING_DLM
+extern const struct file_operations *gfs2_file_fops;
+extern const struct file_operations *gfs2_dir_fops;
+static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
+{
+ return sdp->sd_args.ar_localflocks;
+}
+#else /* Single node only */
+#define gfs2_file_fops NULL
+#define gfs2_dir_fops NULL
+static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
+{
+ return 1;
+}
+#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
#endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
new file mode 100644
index 000000000000..46df988323bc
--- /dev/null
+++ b/fs/gfs2/lock_dlm.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
+ * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#include <linux/fs.h>
+#include <linux/dlm.h>
+#include <linux/types.h>
+#include <linux/gfs2_ondisk.h>
+
+#include "incore.h"
+#include "glock.h"
+#include "util.h"
+
+
+static void gdlm_ast(void *arg)
+{
+ struct gfs2_glock *gl = arg;
+ unsigned ret = gl->gl_state;
+
+ BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
+
+ if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
+ memset(gl->gl_lvb, 0, GDLM_LVB_SIZE);
+
+ switch (gl->gl_lksb.sb_status) {
+ case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
+ kmem_cache_free(gfs2_glock_cachep, gl);
+ return;
+ case -DLM_ECANCEL: /* Cancel while getting lock */
+ ret |= LM_OUT_CANCELED;
+ goto out;
+ case -EAGAIN: /* Try lock fails */
+ goto out;
+ case -EINVAL: /* Invalid */
+ case -ENOMEM: /* Out of memory */
+ ret |= LM_OUT_ERROR;
+ goto out;
+ case 0: /* Success */
+ break;
+ default: /* Something unexpected */
+ BUG();
+ }
+
+ ret = gl->gl_req;
+ if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
+ if (gl->gl_req == LM_ST_SHARED)
+ ret = LM_ST_DEFERRED;
+ else if (gl->gl_req == LM_ST_DEFERRED)
+ ret = LM_ST_SHARED;
+ else
+ BUG();
+ }
+
+ set_bit(GLF_INITIAL, &gl->gl_flags);
+ gfs2_glock_complete(gl, ret);
+ return;
+out:
+ if (!test_bit(GLF_INITIAL, &gl->gl_flags))
+ gl->gl_lksb.sb_lkid = 0;
+ gfs2_glock_complete(gl, ret);
+}
+
+static void gdlm_bast(void *arg, int mode)
+{
+ struct gfs2_glock *gl = arg;
+
+ switch (mode) {
+ case DLM_LOCK_EX:
+ gfs2_glock_cb(gl, LM_ST_UNLOCKED);
+ break;
+ case DLM_LOCK_CW:
+ gfs2_glock_cb(gl, LM_ST_DEFERRED);
+ break;
+ case DLM_LOCK_PR:
+ gfs2_glock_cb(gl, LM_ST_SHARED);
+ break;
+ default:
+ printk(KERN_ERR "unknown bast mode %d", mode);
+ BUG();
+ }
+}
+
+/* convert gfs lock-state to dlm lock-mode */
+
+static int make_mode(const unsigned int lmstate)
+{
+ switch (lmstate) {
+ case LM_ST_UNLOCKED:
+ return DLM_LOCK_NL;
+ case LM_ST_EXCLUSIVE:
+ return DLM_LOCK_EX;
+ case LM_ST_DEFERRED:
+ return DLM_LOCK_CW;
+ case LM_ST_SHARED:
+ return DLM_LOCK_PR;
+ }
+ printk(KERN_ERR "unknown LM state %d", lmstate);
+ BUG();
+ return -1;
+}
+
+static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
+ const int req)
+{
+ u32 lkf = 0;
+
+ if (gfs_flags & LM_FLAG_TRY)
+ lkf |= DLM_LKF_NOQUEUE;
+
+ if (gfs_flags & LM_FLAG_TRY_1CB) {
+ lkf |= DLM_LKF_NOQUEUE;
+ lkf |= DLM_LKF_NOQUEUEBAST;
+ }
+
+ if (gfs_flags & LM_FLAG_PRIORITY) {
+ lkf |= DLM_LKF_NOORDER;
+ lkf |= DLM_LKF_HEADQUE;
+ }
+
+ if (gfs_flags & LM_FLAG_ANY) {
+ if (req == DLM_LOCK_PR)
+ lkf |= DLM_LKF_ALTCW;
+ else if (req == DLM_LOCK_CW)
+ lkf |= DLM_LKF_ALTPR;
+ else
+ BUG();
+ }
+
+ if (lkid != 0)
+ lkf |= DLM_LKF_CONVERT;
+
+ lkf |= DLM_LKF_VALBLK;
+
+ return lkf;
+}
+
+static unsigned int gdlm_lock(struct gfs2_glock *gl,
+ unsigned int req_state, unsigned int flags)
+{
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ int error;
+ int req;
+ u32 lkf;
+
+ gl->gl_req = req_state;
+ req = make_mode(req_state);
+ lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
+
+ /*
+ * Submit the actual lock request.
+ */
+
+ error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
+ GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+ if (error == -EAGAIN)
+ return 0;
+ if (error)
+ return LM_OUT_ERROR;
+ return LM_OUT_ASYNC;
+}
+
+static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
+{
+ struct gfs2_glock *gl = ptr;
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ int error;
+
+ if (gl->gl_lksb.sb_lkid == 0) {
+ kmem_cache_free(cachep, gl);
+ return;
+ }
+
+ error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
+ NULL, gl);
+ if (error) {
+ printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n",
+ gl->gl_name.ln_type,
+ (unsigned long long)gl->gl_name.ln_number, error);
+ return;
+ }
+}
+
+static void gdlm_cancel(struct gfs2_glock *gl)
+{
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+}
+
+static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ int error;
+
+ if (fsname == NULL) {
+ fs_info(sdp, "no fsname found\n");
+ return -EINVAL;
+ }
+
+ error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm,
+ DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
+ (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
+ GDLM_LVB_SIZE);
+ if (error)
+ printk(KERN_ERR "dlm_new_lockspace error %d", error);
+
+ return error;
+}
+
+static void gdlm_unmount(struct gfs2_sbd *sdp)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+ if (ls->ls_dlm) {
+ dlm_release_lockspace(ls->ls_dlm, 2);
+ ls->ls_dlm = NULL;
+ }
+}
+
+static const match_table_t dlm_tokens = {
+ { Opt_jid, "jid=%d"},
+ { Opt_id, "id=%d"},
+ { Opt_first, "first=%d"},
+ { Opt_nodir, "nodir=%d"},
+ { Opt_err, NULL },
+};
+
+const struct lm_lockops gfs2_dlm_ops = {
+ .lm_proto_name = "lock_dlm",
+ .lm_mount = gdlm_mount,
+ .lm_unmount = gdlm_unmount,
+ .lm_put_lock = gdlm_put_lock,
+ .lm_lock = gdlm_lock,
+ .lm_cancel = gdlm_cancel,
+ .lm_tokens = &dlm_tokens,
+};
+
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
deleted file mode 100644
index 523243a13a21..000000000000
--- a/fs/gfs2/locking.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/kmod.h>
-#include <linux/fs.h>
-#include <linux/delay.h>
-#include <linux/lm_interface.h>
-
-struct lmh_wrapper {
- struct list_head lw_list;
- const struct lm_lockops *lw_ops;
-};
-
-static int nolock_mount(char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj);
-
-/* List of registered low-level locking protocols. A file system selects one
- of them by name at mount time, e.g. lock_nolock, lock_dlm. */
-
-static const struct lm_lockops nolock_ops = {
- .lm_proto_name = "lock_nolock",
- .lm_mount = nolock_mount,
-};
-
-static struct lmh_wrapper nolock_proto = {
- .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list),
- .lw_ops = &nolock_ops,
-};
-
-static LIST_HEAD(lmh_list);
-static DEFINE_MUTEX(lmh_lock);
-
-static int nolock_mount(char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj)
-{
- char *c;
- unsigned int jid;
-
- c = strstr(host_data, "jid=");
- if (!c)
- jid = 0;
- else {
- c += 4;
- sscanf(c, "%u", &jid);
- }
-
- lockstruct->ls_jid = jid;
- lockstruct->ls_first = 1;
- lockstruct->ls_lvb_size = min_lvb_size;
- lockstruct->ls_ops = &nolock_ops;
- lockstruct->ls_flags = LM_LSFLAG_LOCAL;
-
- return 0;
-}
-
-/**
- * gfs2_register_lockproto - Register a low-level locking protocol
- * @proto: the protocol definition
- *
- * Returns: 0 on success, -EXXX on failure
- */
-
-int gfs2_register_lockproto(const struct lm_lockops *proto)
-{
- struct lmh_wrapper *lw;
-
- mutex_lock(&lmh_lock);
-
- list_for_each_entry(lw, &lmh_list, lw_list) {
- if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
- mutex_unlock(&lmh_lock);
- printk(KERN_INFO "GFS2: protocol %s already exists\n",
- proto->lm_proto_name);
- return -EEXIST;
- }
- }
-
- lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
- if (!lw) {
- mutex_unlock(&lmh_lock);
- return -ENOMEM;
- }
-
- lw->lw_ops = proto;
- list_add(&lw->lw_list, &lmh_list);
-
- mutex_unlock(&lmh_lock);
-
- return 0;
-}
-
-/**
- * gfs2_unregister_lockproto - Unregister a low-level locking protocol
- * @proto: the protocol definition
- *
- */
-
-void gfs2_unregister_lockproto(const struct lm_lockops *proto)
-{
- struct lmh_wrapper *lw;
-
- mutex_lock(&lmh_lock);
-
- list_for_each_entry(lw, &lmh_list, lw_list) {
- if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
- list_del(&lw->lw_list);
- mutex_unlock(&lmh_lock);
- kfree(lw);
- return;
- }
- }
-
- mutex_unlock(&lmh_lock);
-
- printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
- proto->lm_proto_name);
-}
-
-/**
- * gfs2_mount_lockproto - Mount a lock protocol
- * @proto_name - the name of the protocol
- * @table_name - the name of the lock space
- * @host_data - data specific to this host
- * @cb - the callback to the code using the lock module
- * @sdp - The GFS2 superblock
- * @min_lvb_size - the mininum LVB size that the caller can deal with
- * @flags - LM_MFLAG_*
- * @lockstruct - a structure returned describing the mount
- *
- * Returns: 0 on success, -EXXX on failure
- */
-
-int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj)
-{
- struct lmh_wrapper *lw = NULL;
- int try = 0;
- int error, found;
-
-
-retry:
- mutex_lock(&lmh_lock);
-
- if (list_empty(&nolock_proto.lw_list))
- list_add(&nolock_proto.lw_list, &lmh_list);
-
- found = 0;
- list_for_each_entry(lw, &lmh_list, lw_list) {
- if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
- found = 1;
- break;
- }
- }
-
- if (!found) {
- if (!try && capable(CAP_SYS_MODULE)) {
- try = 1;
- mutex_unlock(&lmh_lock);
- request_module(proto_name);
- goto retry;
- }
- printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
- error = -ENOENT;
- goto out;
- }
-
- if (lw->lw_ops->lm_owner &&
- !try_module_get(lw->lw_ops->lm_owner)) {
- try = 0;
- mutex_unlock(&lmh_lock);
- msleep(1000);
- goto retry;
- }
-
- error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data,
- min_lvb_size, flags, lockstruct, fskobj);
- if (error)
- module_put(lw->lw_ops->lm_owner);
-out:
- mutex_unlock(&lmh_lock);
- return error;
-}
-
-void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
-{
- mutex_lock(&lmh_lock);
- if (lockstruct->ls_ops->lm_unmount)
- lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
- if (lockstruct->ls_ops->lm_owner)
- module_put(lockstruct->ls_ops->lm_owner);
- mutex_unlock(&lmh_lock);
-}
-
-/**
- * gfs2_withdraw_lockproto - abnormally unmount a lock module
- * @lockstruct: the lockstruct passed into mount
- *
- */
-
-void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
-{
- mutex_lock(&lmh_lock);
- lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
- if (lockstruct->ls_ops->lm_owner)
- module_put(lockstruct->ls_ops->lm_owner);
- mutex_unlock(&lmh_lock);
-}
-
-EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
-EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);
-
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
deleted file mode 100644
index 2609bb6cd013..000000000000
--- a/fs/gfs2/locking/dlm/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
-lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o
-
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
deleted file mode 100644
index 2482c9047505..000000000000
--- a/fs/gfs2/locking/dlm/lock.c
+++ /dev/null
@@ -1,708 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include "lock_dlm.h"
-
-static char junk_lvb[GDLM_LVB_SIZE];
-
-
-/* convert dlm lock-mode to gfs lock-state */
-
-static s16 gdlm_make_lmstate(s16 dlmmode)
-{
- switch (dlmmode) {
- case DLM_LOCK_IV:
- case DLM_LOCK_NL:
- return LM_ST_UNLOCKED;
- case DLM_LOCK_EX:
- return LM_ST_EXCLUSIVE;
- case DLM_LOCK_CW:
- return LM_ST_DEFERRED;
- case DLM_LOCK_PR:
- return LM_ST_SHARED;
- }
- gdlm_assert(0, "unknown DLM mode %d", dlmmode);
- return -1;
-}
-
-/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
- thread gets to it. */
-
-static void queue_submit(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
-
- spin_lock(&ls->async_lock);
- list_add_tail(&lp->delay_list, &ls->submit);
- spin_unlock(&ls->async_lock);
- wake_up(&ls->thread_wait);
-}
-
-static void wake_up_ast(struct gdlm_lock *lp)
-{
- clear_bit(LFL_AST_WAIT, &lp->flags);
- smp_mb__after_clear_bit();
- wake_up_bit(&lp->flags, LFL_AST_WAIT);
-}
-
-static void gdlm_delete_lp(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
-
- spin_lock(&ls->async_lock);
- if (!list_empty(&lp->delay_list))
- list_del_init(&lp->delay_list);
- ls->all_locks_count--;
- spin_unlock(&ls->async_lock);
-
- kfree(lp);
-}
-
-static void gdlm_queue_delayed(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
-
- spin_lock(&ls->async_lock);
- list_add_tail(&lp->delay_list, &ls->delayed);
- spin_unlock(&ls->async_lock);
-}
-
-static void process_complete(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
- struct lm_async_cb acb;
-
- memset(&acb, 0, sizeof(acb));
-
- if (lp->lksb.sb_status == -DLM_ECANCEL) {
- log_info("complete dlm cancel %x,%llx flags %lx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->flags);
-
- lp->req = lp->cur;
- acb.lc_ret |= LM_OUT_CANCELED;
- if (lp->cur == DLM_LOCK_IV)
- lp->lksb.sb_lkid = 0;
- goto out;
- }
-
- if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
- if (lp->lksb.sb_status != -DLM_EUNLOCK) {
- log_info("unlock sb_status %d %x,%llx flags %lx",
- lp->lksb.sb_status, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->flags);
- return;
- }
-
- lp->cur = DLM_LOCK_IV;
- lp->req = DLM_LOCK_IV;
- lp->lksb.sb_lkid = 0;
-
- if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
- gdlm_delete_lp(lp);
- return;
- }
- goto out;
- }
-
- if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
- memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
-
- if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
- if (lp->req == DLM_LOCK_PR)
- lp->req = DLM_LOCK_CW;
- else if (lp->req == DLM_LOCK_CW)
- lp->req = DLM_LOCK_PR;
- }
-
- /*
- * A canceled lock request. The lock was just taken off the delayed
- * list and was never even submitted to dlm.
- */
-
- if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
- log_info("complete internal cancel %x,%llx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- lp->req = lp->cur;
- acb.lc_ret |= LM_OUT_CANCELED;
- goto out;
- }
-
- /*
- * An error occured.
- */
-
- if (lp->lksb.sb_status) {
- /* a "normal" error */
- if ((lp->lksb.sb_status == -EAGAIN) &&
- (lp->lkf & DLM_LKF_NOQUEUE)) {
- lp->req = lp->cur;
- if (lp->cur == DLM_LOCK_IV)
- lp->lksb.sb_lkid = 0;
- goto out;
- }
-
- /* this could only happen with cancels I think */
- log_info("ast sb_status %d %x,%llx flags %lx",
- lp->lksb.sb_status, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->flags);
- return;
- }
-
- /*
- * This is an AST for an EX->EX conversion for sync_lvb from GFS.
- */
-
- if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
- wake_up_ast(lp);
- return;
- }
-
- /*
- * A lock has been demoted to NL because it initially completed during
- * BLOCK_LOCKS. Now it must be requested in the originally requested
- * mode.
- */
-
- if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
- gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
-
- lp->cur = DLM_LOCK_NL;
- lp->req = lp->prev_req;
- lp->prev_req = DLM_LOCK_IV;
- lp->lkf &= ~DLM_LKF_CONVDEADLK;
-
- set_bit(LFL_NOCACHE, &lp->flags);
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
- !test_bit(LFL_NOBLOCK, &lp->flags))
- gdlm_queue_delayed(lp);
- else
- queue_submit(lp);
- return;
- }
-
- /*
- * A request is granted during dlm recovery. It may be granted
- * because the locks of a failed node were cleared. In that case,
- * there may be inconsistent data beneath this lock and we must wait
- * for recovery to complete to use it. When gfs recovery is done this
- * granted lock will be converted to NL and then reacquired in this
- * granted state.
- */
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
- !test_bit(LFL_NOBLOCK, &lp->flags) &&
- lp->req != DLM_LOCK_NL) {
-
- lp->cur = lp->req;
- lp->prev_req = lp->req;
- lp->req = DLM_LOCK_NL;
- lp->lkf |= DLM_LKF_CONVERT;
- lp->lkf &= ~DLM_LKF_CONVDEADLK;
-
- log_debug("rereq %x,%llx id %x %d,%d",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->lksb.sb_lkid, lp->cur, lp->req);
-
- set_bit(LFL_REREQUEST, &lp->flags);
- queue_submit(lp);
- return;
- }
-
- /*
- * DLM demoted the lock to NL before it was granted so GFS must be
- * told it cannot cache data for this lock.
- */
-
- if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
- set_bit(LFL_NOCACHE, &lp->flags);
-
-out:
- /*
- * This is an internal lock_dlm lock
- */
-
- if (test_bit(LFL_INLOCK, &lp->flags)) {
- clear_bit(LFL_NOBLOCK, &lp->flags);
- lp->cur = lp->req;
- wake_up_ast(lp);
- return;
- }
-
- /*
- * Normal completion of a lock request. Tell GFS it now has the lock.
- */
-
- clear_bit(LFL_NOBLOCK, &lp->flags);
- lp->cur = lp->req;
-
- acb.lc_name = lp->lockname;
- acb.lc_ret |= gdlm_make_lmstate(lp->cur);
-
- ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
-}
-
-static void gdlm_ast(void *astarg)
-{
- struct gdlm_lock *lp = astarg;
- clear_bit(LFL_ACTIVE, &lp->flags);
- process_complete(lp);
-}
-
-static void process_blocking(struct gdlm_lock *lp, int bast_mode)
-{
- struct gdlm_ls *ls = lp->ls;
- unsigned int cb = 0;
-
- switch (gdlm_make_lmstate(bast_mode)) {
- case LM_ST_EXCLUSIVE:
- cb = LM_CB_NEED_E;
- break;
- case LM_ST_DEFERRED:
- cb = LM_CB_NEED_D;
- break;
- case LM_ST_SHARED:
- cb = LM_CB_NEED_S;
- break;
- default:
- gdlm_assert(0, "unknown bast mode %u", bast_mode);
- }
-
- ls->fscb(ls->sdp, cb, &lp->lockname);
-}
-
-
-static void gdlm_bast(void *astarg, int mode)
-{
- struct gdlm_lock *lp = astarg;
-
- if (!mode) {
- printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- return;
- }
-
- process_blocking(lp, mode);
-}
-
-/* convert gfs lock-state to dlm lock-mode */
-
-static s16 make_mode(s16 lmstate)
-{
- switch (lmstate) {
- case LM_ST_UNLOCKED:
- return DLM_LOCK_NL;
- case LM_ST_EXCLUSIVE:
- return DLM_LOCK_EX;
- case LM_ST_DEFERRED:
- return DLM_LOCK_CW;
- case LM_ST_SHARED:
- return DLM_LOCK_PR;
- }
- gdlm_assert(0, "unknown LM state %d", lmstate);
- return -1;
-}
-
-
-/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
- DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
-
-static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
-{
- s16 cur = make_mode(cur_state);
- if (lp->cur != DLM_LOCK_IV)
- gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
-}
-
-static inline unsigned int make_flags(struct gdlm_lock *lp,
- unsigned int gfs_flags,
- s16 cur, s16 req)
-{
- unsigned int lkf = 0;
-
- if (gfs_flags & LM_FLAG_TRY)
- lkf |= DLM_LKF_NOQUEUE;
-
- if (gfs_flags & LM_FLAG_TRY_1CB) {
- lkf |= DLM_LKF_NOQUEUE;
- lkf |= DLM_LKF_NOQUEUEBAST;
- }
-
- if (gfs_flags & LM_FLAG_PRIORITY) {
- lkf |= DLM_LKF_NOORDER;
- lkf |= DLM_LKF_HEADQUE;
- }
-
- if (gfs_flags & LM_FLAG_ANY) {
- if (req == DLM_LOCK_PR)
- lkf |= DLM_LKF_ALTCW;
- else if (req == DLM_LOCK_CW)
- lkf |= DLM_LKF_ALTPR;
- }
-
- if (lp->lksb.sb_lkid != 0) {
- lkf |= DLM_LKF_CONVERT;
- }
-
- if (lp->lvb)
- lkf |= DLM_LKF_VALBLK;
-
- return lkf;
-}
-
-/* make_strname - convert GFS lock numbers to a string */
-
-static inline void make_strname(const struct lm_lockname *lockname,
- struct gdlm_strname *str)
-{
- sprintf(str->name, "%8x%16llx", lockname->ln_type,
- (unsigned long long)lockname->ln_number);
- str->namelen = GDLM_STRNAME_BYTES;
-}
-
-static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
- struct gdlm_lock **lpp)
-{
- struct gdlm_lock *lp;
-
- lp = kzalloc(sizeof(struct gdlm_lock), GFP_NOFS);
- if (!lp)
- return -ENOMEM;
-
- lp->lockname = *name;
- make_strname(name, &lp->strname);
- lp->ls = ls;
- lp->cur = DLM_LOCK_IV;
- INIT_LIST_HEAD(&lp->delay_list);
-
- spin_lock(&ls->async_lock);
- ls->all_locks_count++;
- spin_unlock(&ls->async_lock);
-
- *lpp = lp;
- return 0;
-}
-
-int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
- void **lockp)
-{
- struct gdlm_lock *lp;
- int error;
-
- error = gdlm_create_lp(lockspace, name, &lp);
-
- *lockp = lp;
- return error;
-}
-
-void gdlm_put_lock(void *lock)
-{
- gdlm_delete_lp(lock);
-}
-
-unsigned int gdlm_do_lock(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
- int error, bast = 1;
-
- /*
- * When recovery is in progress, delay lock requests for submission
- * once recovery is done. Requests for recovery (NOEXP) and unlocks
- * can pass.
- */
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
- !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
- gdlm_queue_delayed(lp);
- return LM_OUT_ASYNC;
- }
-
- /*
- * Submit the actual lock request.
- */
-
- if (test_bit(LFL_NOBAST, &lp->flags))
- bast = 0;
-
- set_bit(LFL_ACTIVE, &lp->flags);
-
- log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
- lp->cur, lp->req, lp->lkf);
-
- error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
- lp->strname.name, lp->strname.namelen, 0, gdlm_ast,
- lp, bast ? gdlm_bast : NULL);
-
- if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
- lp->lksb.sb_status = -EAGAIN;
- gdlm_ast(lp);
- error = 0;
- }
-
- if (error) {
- log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
- "flags=%lx", ls->fsname, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, error,
- lp->cur, lp->req, lp->lkf, lp->flags);
- return LM_OUT_ERROR;
- }
- return LM_OUT_ASYNC;
-}
-
-static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
- unsigned int lkf = 0;
- int error;
-
- set_bit(LFL_DLM_UNLOCK, &lp->flags);
- set_bit(LFL_ACTIVE, &lp->flags);
-
- if (lp->lvb)
- lkf = DLM_LKF_VALBLK;
-
- log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->lksb.sb_lkid, lp->cur, lkf);
-
- error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
-
- if (error) {
- log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
- "flags=%lx", ls->fsname, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, error,
- lp->cur, lp->req, lp->lkf, lp->flags);
- return LM_OUT_ERROR;
- }
- return LM_OUT_ASYNC;
-}
-
-unsigned int gdlm_lock(void *lock, unsigned int cur_state,
- unsigned int req_state, unsigned int flags)
-{
- struct gdlm_lock *lp = lock;
-
- if (req_state == LM_ST_UNLOCKED)
- return gdlm_unlock(lock, cur_state);
-
- if (req_state == LM_ST_UNLOCKED)
- return gdlm_unlock(lock, cur_state);
-
- clear_bit(LFL_DLM_CANCEL, &lp->flags);
- if (flags & LM_FLAG_NOEXP)
- set_bit(LFL_NOBLOCK, &lp->flags);
-
- check_cur_state(lp, cur_state);
- lp->req = make_mode(req_state);
- lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
-
- return gdlm_do_lock(lp);
-}
-
-unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
-{
- struct gdlm_lock *lp = lock;
-
- clear_bit(LFL_DLM_CANCEL, &lp->flags);
- if (lp->cur == DLM_LOCK_IV)
- return 0;
- return gdlm_do_unlock(lp);
-}
-
-void gdlm_cancel(void *lock)
-{
- struct gdlm_lock *lp = lock;
- struct gdlm_ls *ls = lp->ls;
- int error, delay_list = 0;
-
- if (test_bit(LFL_DLM_CANCEL, &lp->flags))
- return;
-
- log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->flags);
-
- spin_lock(&ls->async_lock);
- if (!list_empty(&lp->delay_list)) {
- list_del_init(&lp->delay_list);
- delay_list = 1;
- }
- spin_unlock(&ls->async_lock);
-
- if (delay_list) {
- set_bit(LFL_CANCEL, &lp->flags);
- set_bit(LFL_ACTIVE, &lp->flags);
- gdlm_ast(lp);
- return;
- }
-
- if (!test_bit(LFL_ACTIVE, &lp->flags) ||
- test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
- log_info("gdlm_cancel skip %x,%llx flags %lx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->flags);
- return;
- }
-
- /* the lock is blocked in the dlm */
-
- set_bit(LFL_DLM_CANCEL, &lp->flags);
- set_bit(LFL_ACTIVE, &lp->flags);
-
- error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
- NULL, lp);
-
- log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->flags);
-
- if (error == -EBUSY)
- clear_bit(LFL_DLM_CANCEL, &lp->flags);
-}
-
-static int gdlm_add_lvb(struct gdlm_lock *lp)
-{
- char *lvb;
-
- lvb = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
- if (!lvb)
- return -ENOMEM;
-
- lp->lksb.sb_lvbptr = lvb;
- lp->lvb = lvb;
- return 0;
-}
-
-static void gdlm_del_lvb(struct gdlm_lock *lp)
-{
- kfree(lp->lvb);
- lp->lvb = NULL;
- lp->lksb.sb_lvbptr = NULL;
-}
-
-static int gdlm_ast_wait(void *word)
-{
- schedule();
- return 0;
-}
-
-/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
- the completion) because gfs won't call hold_lvb() during a callback (from
- the context of a lock_dlm thread). */
-
-static int hold_null_lock(struct gdlm_lock *lp)
-{
- struct gdlm_lock *lpn = NULL;
- int error;
-
- if (lp->hold_null) {
- printk(KERN_INFO "lock_dlm: lvb already held\n");
- return 0;
- }
-
- error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
- if (error)
- goto out;
-
- lpn->lksb.sb_lvbptr = junk_lvb;
- lpn->lvb = junk_lvb;
-
- lpn->req = DLM_LOCK_NL;
- lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
- set_bit(LFL_NOBAST, &lpn->flags);
- set_bit(LFL_INLOCK, &lpn->flags);
- set_bit(LFL_AST_WAIT, &lpn->flags);
-
- gdlm_do_lock(lpn);
- wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
- error = lpn->lksb.sb_status;
- if (error) {
- printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
- error);
- gdlm_delete_lp(lpn);
- lpn = NULL;
- }
-out:
- lp->hold_null = lpn;
- return error;
-}
-
-/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
- the completion) because gfs may call unhold_lvb() during a callback (from
- the context of a lock_dlm thread) which could cause a deadlock since the
- other lock_dlm thread could be engaged in recovery. */
-
-static void unhold_null_lock(struct gdlm_lock *lp)
-{
- struct gdlm_lock *lpn = lp->hold_null;
-
- gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- lpn->lksb.sb_lvbptr = NULL;
- lpn->lvb = NULL;
- set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
- gdlm_do_unlock(lpn);
- lp->hold_null = NULL;
-}
-
-/* Acquire a NL lock because gfs requires the value block to remain
- intact on the resource while the lvb is "held" even if it's holding no locks
- on the resource. */
-
-int gdlm_hold_lvb(void *lock, char **lvbp)
-{
- struct gdlm_lock *lp = lock;
- int error;
-
- error = gdlm_add_lvb(lp);
- if (error)
- return error;
-
- *lvbp = lp->lvb;
-
- error = hold_null_lock(lp);
- if (error)
- gdlm_del_lvb(lp);
-
- return error;
-}
-
-void gdlm_unhold_lvb(void *lock, char *lvb)
-{
- struct gdlm_lock *lp = lock;
-
- unhold_null_lock(lp);
- gdlm_del_lvb(lp);
-}
-
-void gdlm_submit_delayed(struct gdlm_ls *ls)
-{
- struct gdlm_lock *lp, *safe;
-
- spin_lock(&ls->async_lock);
- list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
- list_del_init(&lp->delay_list);
- list_add_tail(&lp->delay_list, &ls->submit);
- }
- spin_unlock(&ls->async_lock);
- wake_up(&ls->thread_wait);
-}
-
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
deleted file mode 100644
index 3c98e7c6f93b..000000000000
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef LOCK_DLM_DOT_H
-#define LOCK_DLM_DOT_H
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/list.h>
-#include <linux/socket.h>
-#include <linux/delay.h>
-#include <linux/kthread.h>
-#include <linux/kobject.h>
-#include <linux/fcntl.h>
-#include <linux/wait.h>
-#include <net/sock.h>
-
-#include <linux/dlm.h>
-#include <linux/dlm_plock.h>
-#include <linux/lm_interface.h>
-
-/*
- * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
- * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
- * as "lock_dlm".
- */
-
-#define GDLM_STRNAME_BYTES 24
-#define GDLM_LVB_SIZE 32
-#define GDLM_DROP_COUNT 0
-#define GDLM_DROP_PERIOD 60
-#define GDLM_NAME_LEN 128
-
-/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
- We sprintf these numbers into a 24 byte string of hex values to make them
- human-readable (to make debugging simpler.) */
-
-struct gdlm_strname {
- unsigned char name[GDLM_STRNAME_BYTES];
- unsigned short namelen;
-};
-
-enum {
- DFL_BLOCK_LOCKS = 0,
- DFL_SPECTATOR = 1,
- DFL_WITHDRAW = 2,
-};
-
-struct gdlm_ls {
- u32 id;
- int jid;
- int first;
- int first_done;
- unsigned long flags;
- struct kobject kobj;
- char clustername[GDLM_NAME_LEN];
- char fsname[GDLM_NAME_LEN];
- int fsflags;
- dlm_lockspace_t *dlm_lockspace;
- lm_callback_t fscb;
- struct gfs2_sbd *sdp;
- int recover_jid;
- int recover_jid_done;
- int recover_jid_status;
- spinlock_t async_lock;
- struct list_head delayed;
- struct list_head submit;
- u32 all_locks_count;
- wait_queue_head_t wait_control;
- struct task_struct *thread;
- wait_queue_head_t thread_wait;
-};
-
-enum {
- LFL_NOBLOCK = 0,
- LFL_NOCACHE = 1,
- LFL_DLM_UNLOCK = 2,
- LFL_DLM_CANCEL = 3,
- LFL_SYNC_LVB = 4,
- LFL_FORCE_PROMOTE = 5,
- LFL_REREQUEST = 6,
- LFL_ACTIVE = 7,
- LFL_INLOCK = 8,
- LFL_CANCEL = 9,
- LFL_NOBAST = 10,
- LFL_HEADQUE = 11,
- LFL_UNLOCK_DELETE = 12,
- LFL_AST_WAIT = 13,
-};
-
-struct gdlm_lock {
- struct gdlm_ls *ls;
- struct lm_lockname lockname;
- struct gdlm_strname strname;
- char *lvb;
- struct dlm_lksb lksb;
-
- s16 cur;
- s16 req;
- s16 prev_req;
- u32 lkf; /* dlm flags DLM_LKF_ */
- unsigned long flags; /* lock_dlm flags LFL_ */
-
- struct list_head delay_list; /* delayed */
- struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
-};
-
-#define gdlm_assert(assertion, fmt, args...) \
-do { \
- if (unlikely(!(assertion))) { \
- printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
- "lock_dlm: " fmt "\n", \
- #assertion, ##args); \
- BUG(); \
- } \
-} while (0)
-
-#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
-#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
-#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
-#ifdef LOCK_DLM_LOG_DEBUG
-#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
-#else
-#define log_debug(fmt, arg...)
-#endif
-
-/* sysfs.c */
-
-int gdlm_sysfs_init(void);
-void gdlm_sysfs_exit(void);
-int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
-void gdlm_kobject_release(struct gdlm_ls *);
-
-/* thread.c */
-
-int gdlm_init_threads(struct gdlm_ls *);
-void gdlm_release_threads(struct gdlm_ls *);
-
-/* lock.c */
-
-void gdlm_submit_delayed(struct gdlm_ls *);
-unsigned int gdlm_do_lock(struct gdlm_lock *);
-
-int gdlm_get_lock(void *, struct lm_lockname *, void **);
-void gdlm_put_lock(void *);
-unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int);
-unsigned int gdlm_unlock(void *, unsigned int);
-void gdlm_cancel(void *);
-int gdlm_hold_lvb(void *, char **);
-void gdlm_unhold_lvb(void *, char *);
-
-/* mount.c */
-
-extern const struct lm_lockops gdlm_ops;
-
-#endif
-
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
deleted file mode 100644
index b9a03a7ff801..000000000000
--- a/fs/gfs2/locking/dlm/main.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/init.h>
-
-#include "lock_dlm.h"
-
-static int __init init_lock_dlm(void)
-{
- int error;
-
- error = gfs2_register_lockproto(&gdlm_ops);
- if (error) {
- printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
- error);
- return error;
- }
-
- error = gdlm_sysfs_init();
- if (error) {
- gfs2_unregister_lockproto(&gdlm_ops);
- return error;
- }
-
- printk(KERN_INFO
- "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
- return 0;
-}
-
-static void __exit exit_lock_dlm(void)
-{
- gdlm_sysfs_exit();
- gfs2_unregister_lockproto(&gdlm_ops);
-}
-
-module_init(init_lock_dlm);
-module_exit(exit_lock_dlm);
-
-MODULE_DESCRIPTION("GFS DLM Locking Module");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
deleted file mode 100644
index 1aa7eb6a0226..000000000000
--- a/fs/gfs2/locking/dlm/mount.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include "lock_dlm.h"
-
-const struct lm_lockops gdlm_ops;
-
-
-static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
- int flags, char *table_name)
-{
- struct gdlm_ls *ls;
- char buf[256], *p;
-
- ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
- if (!ls)
- return NULL;
-
- ls->fscb = cb;
- ls->sdp = sdp;
- ls->fsflags = flags;
- spin_lock_init(&ls->async_lock);
- INIT_LIST_HEAD(&ls->delayed);
- INIT_LIST_HEAD(&ls->submit);
- init_waitqueue_head(&ls->thread_wait);
- init_waitqueue_head(&ls->wait_control);
- ls->jid = -1;
-
- strncpy(buf, table_name, 256);
- buf[255] = '\0';
-
- p = strchr(buf, ':');
- if (!p) {
- log_info("invalid table_name \"%s\"", table_name);
- kfree(ls);
- return NULL;
- }
- *p = '\0';
- p++;
-
- strncpy(ls->clustername, buf, GDLM_NAME_LEN);
- strncpy(ls->fsname, p, GDLM_NAME_LEN);
-
- return ls;
-}
-
-static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
-{
- char data[256];
- char *options, *x, *y;
- int error = 0;
-
- memset(data, 0, 256);
- strncpy(data, data_arg, 255);
-
- if (!strlen(data)) {
- log_error("no mount options, (u)mount helpers not installed");
- return -EINVAL;
- }
-
- for (options = data; (x = strsep(&options, ":")); ) {
- if (!*x)
- continue;
-
- y = strchr(x, '=');
- if (y)
- *y++ = 0;
-
- if (!strcmp(x, "jid")) {
- if (!y) {
- log_error("need argument to jid");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", &ls->jid);
-
- } else if (!strcmp(x, "first")) {
- if (!y) {
- log_error("need argument to first");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", &ls->first);
-
- } else if (!strcmp(x, "id")) {
- if (!y) {
- log_error("need argument to id");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", &ls->id);
-
- } else if (!strcmp(x, "nodir")) {
- if (!y) {
- log_error("need argument to nodir");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", nodir);
-
- } else {
- log_error("unkonwn option: %s", x);
- error = -EINVAL;
- break;
- }
- }
-
- return error;
-}
-
-static int gdlm_mount(char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj)
-{
- struct gdlm_ls *ls;
- int error = -ENOMEM, nodir = 0;
-
- if (min_lvb_size > GDLM_LVB_SIZE)
- goto out;
-
- ls = init_gdlm(cb, cb_data, flags, table_name);
- if (!ls)
- goto out;
-
- error = make_args(ls, host_data, &nodir);
- if (error)
- goto out;
-
- error = gdlm_init_threads(ls);
- if (error)
- goto out_free;
-
- error = gdlm_kobject_setup(ls, fskobj);
- if (error)
- goto out_thread;
-
- error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
- &ls->dlm_lockspace,
- DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
- (nodir ? DLM_LSFL_NODIR : 0),
- GDLM_LVB_SIZE);
- if (error) {
- log_error("dlm_new_lockspace error %d", error);
- goto out_kobj;
- }
-
- lockstruct->ls_jid = ls->jid;
- lockstruct->ls_first = ls->first;
- lockstruct->ls_lockspace = ls;
- lockstruct->ls_ops = &gdlm_ops;
- lockstruct->ls_flags = 0;
- lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
- return 0;
-
-out_kobj:
- gdlm_kobject_release(ls);
-out_thread:
- gdlm_release_threads(ls);
-out_free:
- kfree(ls);
-out:
- return error;
-}
-
-static void gdlm_unmount(void *lockspace)
-{
- struct gdlm_ls *ls = lockspace;
-
- log_debug("unmount flags %lx", ls->flags);
-
- /* FIXME: serialize unmount and withdraw in case they
- happen at once. Also, if unmount follows withdraw,
- wait for withdraw to finish. */
-
- if (test_bit(DFL_WITHDRAW, &ls->flags))
- goto out;
-
- gdlm_kobject_release(ls);
- dlm_release_lockspace(ls->dlm_lockspace, 2);
- gdlm_release_threads(ls);
- BUG_ON(ls->all_locks_count);
-out:
- kfree(ls);
-}
-
-static void gdlm_recovery_done(void *lockspace, unsigned int jid,
- unsigned int message)
-{
- char env_jid[20];
- char env_status[20];
- char *envp[] = { env_jid, env_status, NULL };
- struct gdlm_ls *ls = lockspace;
- ls->recover_jid_done = jid;
- ls->recover_jid_status = message;
- sprintf(env_jid, "JID=%d", jid);
- sprintf(env_status, "RECOVERY=%s",
- message == LM_RD_SUCCESS ? "Done" : "Failed");
- kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
-}
-
-static void gdlm_others_may_mount(void *lockspace)
-{
- char *message = "FIRSTMOUNT=Done";
- char *envp[] = { message, NULL };
- struct gdlm_ls *ls = lockspace;
- ls->first_done = 1;
- kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
-}
-
-/* Userspace gets the offline uevent, blocks new gfs locks on
- other mounters, and lets us know (sets WITHDRAW flag). Then,
- userspace leaves the mount group while we leave the lockspace. */
-
-static void gdlm_withdraw(void *lockspace)
-{
- struct gdlm_ls *ls = lockspace;
-
- kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
-
- wait_event_interruptible(ls->wait_control,
- test_bit(DFL_WITHDRAW, &ls->flags));
-
- dlm_release_lockspace(ls->dlm_lockspace, 2);
- gdlm_release_threads(ls);
- gdlm_kobject_release(ls);
-}
-
-static int gdlm_plock(void *lockspace, struct lm_lockname *name,
- struct file *file, int cmd, struct file_lock *fl)
-{
- struct gdlm_ls *ls = lockspace;
- return dlm_posix_lock(ls->dlm_lockspace, name->ln_number, file, cmd, fl);
-}
-
-static int gdlm_punlock(void *lockspace, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- struct gdlm_ls *ls = lockspace;
- return dlm_posix_unlock(ls->dlm_lockspace, name->ln_number, file, fl);
-}
-
-static int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- struct gdlm_ls *ls = lockspace;
- return dlm_posix_get(ls->dlm_lockspace, name->ln_number, file, fl);
-}
-
-const struct lm_lockops gdlm_ops = {
- .lm_proto_name = "lock_dlm",
- .lm_mount = gdlm_mount,
- .lm_others_may_mount = gdlm_others_may_mount,
- .lm_unmount = gdlm_unmount,
- .lm_withdraw = gdlm_withdraw,
- .lm_get_lock = gdlm_get_lock,
- .lm_put_lock = gdlm_put_lock,
- .lm_lock = gdlm_lock,
- .lm_unlock = gdlm_unlock,
- .lm_plock = gdlm_plock,
- .lm_punlock = gdlm_punlock,
- .lm_plock_get = gdlm_plock_get,
- .lm_cancel = gdlm_cancel,
- .lm_hold_lvb = gdlm_hold_lvb,
- .lm_unhold_lvb = gdlm_unhold_lvb,
- .lm_recovery_done = gdlm_recovery_done,
- .lm_owner = THIS_MODULE,
-};
-
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
deleted file mode 100644
index 9b7edcf7bd49..000000000000
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/ctype.h>
-#include <linux/stat.h>
-
-#include "lock_dlm.h"
-
-static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
-}
-
-static ssize_t block_show(struct gdlm_ls *ls, char *buf)
-{
- ssize_t ret;
- int val = 0;
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
- val = 1;
- ret = sprintf(buf, "%d\n", val);
- return ret;
-}
-
-static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
-{
- ssize_t ret = len;
- int val;
-
- val = simple_strtol(buf, NULL, 0);
-
- if (val == 1)
- set_bit(DFL_BLOCK_LOCKS, &ls->flags);
- else if (val == 0) {
- clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
- gdlm_submit_delayed(ls);
- } else {
- ret = -EINVAL;
- }
- return ret;
-}
-
-static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
-{
- ssize_t ret;
- int val = 0;
-
- if (test_bit(DFL_WITHDRAW, &ls->flags))
- val = 1;
- ret = sprintf(buf, "%d\n", val);
- return ret;
-}
-
-static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
-{
- ssize_t ret = len;
- int val;
-
- val = simple_strtol(buf, NULL, 0);
-
- if (val == 1)
- set_bit(DFL_WITHDRAW, &ls->flags);
- else
- ret = -EINVAL;
- wake_up(&ls->wait_control);
- return ret;
-}
-
-static ssize_t id_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%u\n", ls->id);
-}
-
-static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->jid);
-}
-
-static ssize_t first_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->first);
-}
-
-static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->first_done);
-}
-
-static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->recover_jid);
-}
-
-static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
-{
- ls->recover_jid = simple_strtol(buf, NULL, 0);
- ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid);
- return len;
-}
-
-static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->recover_jid_done);
-}
-
-static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->recover_jid_status);
-}
-
-struct gdlm_attr {
- struct attribute attr;
- ssize_t (*show)(struct gdlm_ls *, char *);
- ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
-};
-
-#define GDLM_ATTR(_name,_mode,_show,_store) \
-static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
-
-GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
-GDLM_ATTR(block, 0644, block_show, block_store);
-GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
-GDLM_ATTR(id, 0444, id_show, NULL);
-GDLM_ATTR(jid, 0444, jid_show, NULL);
-GDLM_ATTR(first, 0444, first_show, NULL);
-GDLM_ATTR(first_done, 0444, first_done_show, NULL);
-GDLM_ATTR(recover, 0644, recover_show, recover_store);
-GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
-GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
-
-static struct attribute *gdlm_attrs[] = {
- &gdlm_attr_proto_name.attr,
- &gdlm_attr_block.attr,
- &gdlm_attr_withdraw.attr,
- &gdlm_attr_id.attr,
- &gdlm_attr_jid.attr,
- &gdlm_attr_first.attr,
- &gdlm_attr_first_done.attr,
- &gdlm_attr_recover.attr,
- &gdlm_attr_recover_done.attr,
- &gdlm_attr_recover_status.attr,
- NULL,
-};
-
-static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
- struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
- return a->show ? a->show(ls, buf) : 0;
-}
-
-static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len)
-{
- struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
- struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
- return a->store ? a->store(ls, buf, len) : len;
-}
-
-static struct sysfs_ops gdlm_attr_ops = {
- .show = gdlm_attr_show,
- .store = gdlm_attr_store,
-};
-
-static struct kobj_type gdlm_ktype = {
- .default_attrs = gdlm_attrs,
- .sysfs_ops = &gdlm_attr_ops,
-};
-
-static struct kset *gdlm_kset;
-
-int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
-{
- int error;
-
- ls->kobj.kset = gdlm_kset;
- error = kobject_init_and_add(&ls->kobj, &gdlm_ktype, fskobj,
- "lock_module");
- if (error)
- log_error("can't register kobj %d", error);
- kobject_uevent(&ls->kobj, KOBJ_ADD);
-
- return error;
-}
-
-void gdlm_kobject_release(struct gdlm_ls *ls)
-{
- kobject_put(&ls->kobj);
-}
-
-static int gdlm_uevent(struct kset *kset, struct kobject *kobj,
- struct kobj_uevent_env *env)
-{
- struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
- add_uevent_var(env, "LOCKTABLE=%s:%s", ls->clustername, ls->fsname);
- add_uevent_var(env, "LOCKPROTO=lock_dlm");
- return 0;
-}
-
-static struct kset_uevent_ops gdlm_uevent_ops = {
- .uevent = gdlm_uevent,
-};
-
-
-int gdlm_sysfs_init(void)
-{
- gdlm_kset = kset_create_and_add("lock_dlm", &gdlm_uevent_ops, kernel_kobj);
- if (!gdlm_kset) {
- printk(KERN_WARNING "%s: can not create kset\n", __func__);
- return -ENOMEM;
- }
- return 0;
-}
-
-void gdlm_sysfs_exit(void)
-{
- kset_unregister(gdlm_kset);
-}
-
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
deleted file mode 100644
index 38823efd698c..000000000000
--- a/fs/gfs2/locking/dlm/thread.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include "lock_dlm.h"
-
-static inline int no_work(struct gdlm_ls *ls)
-{
- int ret;
-
- spin_lock(&ls->async_lock);
- ret = list_empty(&ls->submit);
- spin_unlock(&ls->async_lock);
-
- return ret;
-}
-
-static int gdlm_thread(void *data)
-{
- struct gdlm_ls *ls = (struct gdlm_ls *) data;
- struct gdlm_lock *lp = NULL;
-
- while (!kthread_should_stop()) {
- wait_event_interruptible(ls->thread_wait,
- !no_work(ls) || kthread_should_stop());
-
- spin_lock(&ls->async_lock);
-
- if (!list_empty(&ls->submit)) {
- lp = list_entry(ls->submit.next, struct gdlm_lock,
- delay_list);
- list_del_init(&lp->delay_list);
- spin_unlock(&ls->async_lock);
- gdlm_do_lock(lp);
- spin_lock(&ls->async_lock);
- }
- spin_unlock(&ls->async_lock);
- }
-
- return 0;
-}
-
-int gdlm_init_threads(struct gdlm_ls *ls)
-{
- struct task_struct *p;
- int error;
-
- p = kthread_run(gdlm_thread, ls, "lock_dlm");
- error = IS_ERR(p);
- if (error) {
- log_error("can't start lock_dlm thread %d", error);
- return error;
- }
- ls->thread = p;
-
- return 0;
-}
-
-void gdlm_release_threads(struct gdlm_ls *ls)
-{
- kthread_stop(ls->thread);
-}
-
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index ad305854bdc6..98918a756410 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -14,7 +14,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4390f6f4047d..80e4f5f898bb 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -13,7 +13,6 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7cacfde32194..a6892ed0840a 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/atomic.h>
#include "gfs2.h"
@@ -23,6 +22,12 @@
#include "sys.h"
#include "util.h"
#include "glock.h"
+#include "quota.h"
+
+static struct shrinker qd_shrinker = {
+ .shrink = gfs2_shrink_qd_memory,
+ .seeks = DEFAULT_SEEKS,
+};
static void gfs2_init_inode_once(void *foo)
{
@@ -41,8 +46,6 @@ static void gfs2_init_glock_once(void *foo)
INIT_HLIST_NODE(&gl->gl_list);
spin_lock_init(&gl->gl_spin);
INIT_LIST_HEAD(&gl->gl_holders);
- gl->gl_lvb = NULL;
- atomic_set(&gl->gl_lvb_count, 0);
INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
atomic_set(&gl->gl_ail_count, 0);
@@ -100,6 +103,8 @@ static int __init init_gfs2_fs(void)
if (!gfs2_quotad_cachep)
goto fail;
+ register_shrinker(&qd_shrinker);
+
error = register_filesystem(&gfs2_fs_type);
if (error)
goto fail;
@@ -117,6 +122,7 @@ static int __init init_gfs2_fs(void)
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
fail:
+ unregister_shrinker(&qd_shrinker);
gfs2_glock_exit();
if (gfs2_quotad_cachep)
@@ -145,6 +151,7 @@ fail:
static void __exit exit_gfs2_fs(void)
{
+ unregister_shrinker(&qd_shrinker);
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 09853620c951..8d6f13256b26 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -19,7 +19,6 @@
#include <linux/delay.h>
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
@@ -90,27 +89,6 @@ void gfs2_aspace_put(struct inode *aspace)
}
/**
- * gfs2_meta_inval - Invalidate all buffers associated with a glock
- * @gl: the glock
- *
- */
-
-void gfs2_meta_inval(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- struct inode *aspace = gl->gl_aspace;
- struct address_space *mapping = gl->gl_aspace->i_mapping;
-
- gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
-
- atomic_inc(&aspace->i_writecount);
- truncate_inode_pages(mapping, 0);
- atomic_dec(&aspace->i_writecount);
-
- gfs2_assert_withdraw(sdp, !mapping->nrpages);
-}
-
-/**
* gfs2_meta_sync - Sync all buffers associated with a glock
* @gl: The glock
*
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index b1a5f3674d43..de270c2f9b63 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -40,7 +40,6 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
void gfs2_aspace_put(struct inode *aspace);
-void gfs2_meta_inval(struct gfs2_glock *gl);
void gfs2_meta_sync(struct gfs2_glock *gl);
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 3cb0a44ba023..f7e8527a21e0 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -12,12 +12,11 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/parser.h>
#include "gfs2.h"
#include "incore.h"
-#include "mount.h"
+#include "super.h"
#include "sys.h"
#include "util.h"
@@ -37,11 +36,15 @@ enum {
Opt_quota_off,
Opt_quota_account,
Opt_quota_on,
+ Opt_quota,
+ Opt_noquota,
Opt_suiddir,
Opt_nosuiddir,
Opt_data_writeback,
Opt_data_ordered,
Opt_meta,
+ Opt_discard,
+ Opt_nodiscard,
Opt_err,
};
@@ -61,11 +64,15 @@ static const match_table_t tokens = {
{Opt_quota_off, "quota=off"},
{Opt_quota_account, "quota=account"},
{Opt_quota_on, "quota=on"},
+ {Opt_quota, "quota"},
+ {Opt_noquota, "noquota"},
{Opt_suiddir, "suiddir"},
{Opt_nosuiddir, "nosuiddir"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_ordered, "data=ordered"},
{Opt_meta, "meta"},
+ {Opt_discard, "discard"},
+ {Opt_nodiscard, "nodiscard"},
{Opt_err, NULL}
};
@@ -77,101 +84,46 @@ static const match_table_t tokens = {
* Return: errno
*/
-int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
+int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
{
- struct gfs2_args *args = &sdp->sd_args;
- char *data = data_arg;
- char *options, *o, *v;
- int error = 0;
-
- if (!remount) {
- /* Set some defaults */
- args->ar_quota = GFS2_QUOTA_DEFAULT;
- args->ar_data = GFS2_DATA_DEFAULT;
- }
+ char *o;
+ int token;
+ substring_t tmp[MAX_OPT_ARGS];
/* Split the options into tokens with the "," character and
process them */
- for (options = data; (o = strsep(&options, ",")); ) {
- int token;
- substring_t tmp[MAX_OPT_ARGS];
-
- if (!*o)
+ while (1) {
+ o = strsep(&options, ",");
+ if (o == NULL)
+ break;
+ if (*o == '\0')
continue;
token = match_token(o, tokens, tmp);
switch (token) {
case Opt_lockproto:
- v = match_strdup(&tmp[0]);
- if (!v) {
- fs_info(sdp, "no memory for lockproto\n");
- error = -ENOMEM;
- goto out_error;
- }
-
- if (remount && strcmp(v, args->ar_lockproto)) {
- kfree(v);
- goto cant_remount;
- }
-
- strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
- args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
- kfree(v);
+ match_strlcpy(args->ar_lockproto, &tmp[0],
+ GFS2_LOCKNAME_LEN);
break;
case Opt_locktable:
- v = match_strdup(&tmp[0]);
- if (!v) {
- fs_info(sdp, "no memory for locktable\n");
- error = -ENOMEM;
- goto out_error;
- }
-
- if (remount && strcmp(v, args->ar_locktable)) {
- kfree(v);
- goto cant_remount;
- }
-
- strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
- args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
- kfree(v);
+ match_strlcpy(args->ar_locktable, &tmp[0],
+ GFS2_LOCKNAME_LEN);
break;
case Opt_hostdata:
- v = match_strdup(&tmp[0]);
- if (!v) {
- fs_info(sdp, "no memory for hostdata\n");
- error = -ENOMEM;
- goto out_error;
- }
-
- if (remount && strcmp(v, args->ar_hostdata)) {
- kfree(v);
- goto cant_remount;
- }
-
- strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
- args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
- kfree(v);
+ match_strlcpy(args->ar_hostdata, &tmp[0],
+ GFS2_LOCKNAME_LEN);
break;
case Opt_spectator:
- if (remount && !args->ar_spectator)
- goto cant_remount;
args->ar_spectator = 1;
- sdp->sd_vfs->s_flags |= MS_RDONLY;
break;
case Opt_ignore_local_fs:
- if (remount && !args->ar_ignore_local_fs)
- goto cant_remount;
args->ar_ignore_local_fs = 1;
break;
case Opt_localflocks:
- if (remount && !args->ar_localflocks)
- goto cant_remount;
args->ar_localflocks = 1;
break;
case Opt_localcaching:
- if (remount && !args->ar_localcaching)
- goto cant_remount;
args->ar_localcaching = 1;
break;
case Opt_debug:
@@ -181,25 +133,23 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
args->ar_debug = 0;
break;
case Opt_upgrade:
- if (remount && !args->ar_upgrade)
- goto cant_remount;
args->ar_upgrade = 1;
break;
case Opt_acl:
args->ar_posix_acl = 1;
- sdp->sd_vfs->s_flags |= MS_POSIXACL;
break;
case Opt_noacl:
args->ar_posix_acl = 0;
- sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
break;
case Opt_quota_off:
+ case Opt_noquota:
args->ar_quota = GFS2_QUOTA_OFF;
break;
case Opt_quota_account:
args->ar_quota = GFS2_QUOTA_ACCOUNT;
break;
case Opt_quota_on:
+ case Opt_quota:
args->ar_quota = GFS2_QUOTA_ON;
break;
case Opt_suiddir:
@@ -215,29 +165,21 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
args->ar_data = GFS2_DATA_ORDERED;
break;
case Opt_meta:
- if (remount && args->ar_meta != 1)
- goto cant_remount;
args->ar_meta = 1;
break;
+ case Opt_discard:
+ args->ar_discard = 1;
+ break;
+ case Opt_nodiscard:
+ args->ar_discard = 0;
+ break;
case Opt_err:
default:
- fs_info(sdp, "unknown option: %s\n", o);
- error = -EINVAL;
- goto out_error;
+ fs_info(sdp, "invalid mount option: %s\n", o);
+ return -EINVAL;
}
}
-out_error:
- if (error)
- fs_info(sdp, "invalid mount option(s)\n");
-
- if (data != data_arg)
- kfree(data);
-
- return error;
-
-cant_remount:
- fs_info(sdp, "can't remount with option %s\n", o);
- return -EINVAL;
+ return 0;
}
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
deleted file mode 100644
index 401288acfdf3..000000000000
--- a/fs/gfs2/mount.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __MOUNT_DOT_H__
-#define __MOUNT_DOT_H__
-
-struct gfs2_sbd;
-
-int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
-
-#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 4ddab67867eb..a6dde1751e17 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -19,7 +19,6 @@
#include <linux/writeback.h>
#include <linux/swap.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/backing-dev.h>
#include "gfs2.h"
@@ -442,6 +441,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
*/
if (unlikely(page->index)) {
zero_user(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
return 0;
}
@@ -1096,6 +1096,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
.releasepage = gfs2_releasepage,
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations gfs2_ordered_aops = {
@@ -1111,6 +1112,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
.releasepage = gfs2_releasepage,
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations gfs2_jdata_aops = {
@@ -1125,6 +1127,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
.bmap = gfs2_bmap,
.invalidatepage = gfs2_invalidatepage,
.releasepage = gfs2_releasepage,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
void gfs2_set_aops(struct inode *inode)
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index c2ad36330ca3..5eb57b044382 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 7fdeb14ddd1a..9200ef221716 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -14,7 +14,6 @@
#include <linux/exportfs.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 93fe41b67f97..3b9e8de3500b 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -20,9 +20,10 @@
#include <linux/gfs2_ondisk.h>
#include <linux/ext2_fs.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/writeback.h>
#include <asm/uaccess.h>
+#include <linux/dlm.h>
+#include <linux/dlm_plock.h>
#include "gfs2.h"
#include "incore.h"
@@ -354,7 +355,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
if (ret)
goto out;
+ set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
+
ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
if (ret || !alloc_required)
goto out_unlock;
@@ -560,57 +563,24 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
return ret;
}
+#ifdef CONFIG_GFS2_FS_LOCKING_DLM
+
/**
* gfs2_setlease - acquire/release a file lease
* @file: the file pointer
* @arg: lease type
* @fl: file lock
*
+ * We don't currently have a way to enforce a lease across the whole
+ * cluster; until we do, disable leases (by just returning -EINVAL),
+ * unless the administrator has requested purely local locking.
+ *
* Returns: errno
*/
static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
{
- struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
-
- /*
- * We don't currently have a way to enforce a lease across the whole
- * cluster; until we do, disable leases (by just returning -EINVAL),
- * unless the administrator has requested purely local locking.
- */
- if (!sdp->sd_args.ar_localflocks)
- return -EINVAL;
- return generic_setlease(file, arg, fl);
-}
-
-static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- int error = -EIO;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
- sdp->sd_lockstruct.ls_lockspace, name, file, fl);
- return error;
-}
-
-static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
- struct file *file, int cmd, struct file_lock *fl)
-{
- int error = -EIO;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_plock(
- sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
- return error;
-}
-
-static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- int error = -EIO;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_punlock(
- sdp->sd_lockstruct.ls_lockspace, name, file, fl);
- return error;
+ return -EINVAL;
}
/**
@@ -626,9 +596,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
- struct lm_lockname name =
- { .ln_number = ip->i_no_addr,
- .ln_type = LM_TYPE_PLOCK };
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
@@ -640,12 +608,14 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
+ if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ return -EIO;
if (IS_GETLK(cmd))
- return gfs2_lm_plock_get(sdp, &name, file, fl);
+ return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
else if (fl->fl_type == F_UNLCK)
- return gfs2_lm_punlock(sdp, &name, file, fl);
+ return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
else
- return gfs2_lm_plock(sdp, &name, file, cmd, fl);
+ return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
}
static int do_flock(struct file *file, int cmd, struct file_lock *fl)
@@ -732,7 +702,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
}
}
-const struct file_operations gfs2_file_fops = {
+const struct file_operations *gfs2_file_fops = &(const struct file_operations){
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -750,7 +720,7 @@ const struct file_operations gfs2_file_fops = {
.setlease = gfs2_setlease,
};
-const struct file_operations gfs2_dir_fops = {
+const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
@@ -760,7 +730,9 @@ const struct file_operations gfs2_dir_fops = {
.flock = gfs2_flock,
};
-const struct file_operations gfs2_file_fops_nolock = {
+#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
+
+const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -773,10 +745,10 @@ const struct file_operations gfs2_file_fops_nolock = {
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
- .setlease = gfs2_setlease,
+ .setlease = generic_setlease,
};
-const struct file_operations gfs2_dir_fops_nolock = {
+const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f91eebdde581..51883b3ad89c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,7 +17,6 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
@@ -25,7 +24,6 @@
#include "glock.h"
#include "glops.h"
#include "inode.h"
-#include "mount.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
@@ -64,7 +62,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_quota_warn_period = 10;
gt->gt_quota_scale_num = 1;
gt->gt_quota_scale_den = 1;
- gt->gt_quota_cache_secs = 300;
gt->gt_quota_quantum = 60;
gt->gt_new_files_jdata = 0;
gt->gt_max_readahead = 1 << 18;
@@ -100,7 +97,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
mutex_init(&sdp->sd_jindex_mutex);
INIT_LIST_HEAD(&sdp->sd_quota_list);
- spin_lock_init(&sdp->sd_quota_spin);
mutex_init(&sdp->sd_quota_mutex);
init_waitqueue_head(&sdp->sd_quota_wait);
INIT_LIST_HEAD(&sdp->sd_trunc_list);
@@ -238,6 +234,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+ memcpy(sb->sb_uuid, str->sb_uuid, 16);
}
/**
@@ -299,15 +296,15 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
__free_page(page);
return 0;
}
+
/**
* gfs2_read_sb - Read super block
* @sdp: The GFS2 superblock
- * @gl: the glock for the superblock (assumed to be held)
* @silent: Don't print message if mount fails
*
*/
-static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
+static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
{
u32 hash_blocks, ind_blocks, leaf_blocks;
u32 tmp_blocks;
@@ -527,7 +524,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
return ret;
}
- ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
+ ret = gfs2_read_sb(sdp, silent);
if (ret) {
fs_err(sdp, "can't read superblock: %d\n", ret);
goto out;
@@ -630,13 +627,13 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
return rc;
}
-static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
+static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
{
- if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount)
- return;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
- sdp->sd_lockstruct.ls_lockspace);
+ char *message = "FIRSTMOUNT=Done";
+ char *envp[] = { message, NULL };
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ls->ls_first_done = 1;
+ kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
}
/**
@@ -796,7 +793,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
}
}
- gfs2_lm_others_may_mount(sdp);
+ gfs2_others_may_mount(sdp);
} else if (!sdp->sd_args.ar_spectator) {
error = gfs2_recover_journal(sdp->sd_jdesc);
if (error) {
@@ -1005,7 +1002,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
goto fail_quotad;
sdp->sd_log_flush_time = jiffies;
- sdp->sd_jindex_refresh_time = jiffies;
p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
error = IS_ERR(p);
@@ -1033,6 +1029,17 @@ fail:
return error;
}
+static const match_table_t nolock_tokens = {
+ { Opt_jid, "jid=%d\n", },
+ { Opt_err, NULL },
+};
+
+static const struct lm_lockops nolock_ops = {
+ .lm_proto_name = "lock_nolock",
+ .lm_put_lock = kmem_cache_free,
+ .lm_tokens = &nolock_tokens,
+};
+
/**
* gfs2_lm_mount - mount a locking protocol
* @sdp: the filesystem
@@ -1044,31 +1051,73 @@ fail:
static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
{
- char *proto = sdp->sd_proto_name;
- char *table = sdp->sd_table_name;
- int flags = LM_MFLAG_CONV_NODROP;
- int error;
+ const struct lm_lockops *lm;
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ struct gfs2_args *args = &sdp->sd_args;
+ const char *proto = sdp->sd_proto_name;
+ const char *table = sdp->sd_table_name;
+ const char *fsname;
+ char *o, *options;
+ int ret;
- if (sdp->sd_args.ar_spectator)
- flags |= LM_MFLAG_SPECTATOR;
+ if (!strcmp("lock_nolock", proto)) {
+ lm = &nolock_ops;
+ sdp->sd_args.ar_localflocks = 1;
+ sdp->sd_args.ar_localcaching = 1;
+#ifdef CONFIG_GFS2_FS_LOCKING_DLM
+ } else if (!strcmp("lock_dlm", proto)) {
+ lm = &gfs2_dlm_ops;
+#endif
+ } else {
+ printk(KERN_INFO "GFS2: can't find protocol %s\n", proto);
+ return -ENOENT;
+ }
fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
- error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
- gfs2_glock_cb, sdp,
- GFS2_MIN_LVB_SIZE, flags,
- &sdp->sd_lockstruct, &sdp->sd_kobj);
- if (error) {
- fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
- proto, table, sdp->sd_args.ar_hostdata);
- goto out;
- }
+ ls->ls_ops = lm;
+ ls->ls_first = 1;
+ ls->ls_id = 0;
- if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
- gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
- GFS2_MIN_LVB_SIZE)) {
- gfs2_unmount_lockproto(&sdp->sd_lockstruct);
- goto out;
+ for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
+ substring_t tmp[MAX_OPT_ARGS];
+ int token, option;
+
+ if (!o || !*o)
+ continue;
+
+ token = match_token(o, *lm->lm_tokens, tmp);
+ switch (token) {
+ case Opt_jid:
+ ret = match_int(&tmp[0], &option);
+ if (ret || option < 0)
+ goto hostdata_error;
+ ls->ls_jid = option;
+ break;
+ case Opt_id:
+ ret = match_int(&tmp[0], &option);
+ if (ret)
+ goto hostdata_error;
+ ls->ls_id = option;
+ break;
+ case Opt_first:
+ ret = match_int(&tmp[0], &option);
+ if (ret || (option != 0 && option != 1))
+ goto hostdata_error;
+ ls->ls_first = option;
+ break;
+ case Opt_nodir:
+ ret = match_int(&tmp[0], &option);
+ if (ret || (option != 0 && option != 1))
+ goto hostdata_error;
+ ls->ls_nodir = option;
+ break;
+ case Opt_err:
+ default:
+hostdata_error:
+ fs_info(sdp, "unknown hostdata (%s)\n", o);
+ return -EINVAL;
+ }
}
if (sdp->sd_args.ar_spectator)
@@ -1077,22 +1126,25 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
sdp->sd_lockstruct.ls_jid);
- fs_info(sdp, "Joined cluster. Now mounting FS...\n");
-
- if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
- !sdp->sd_args.ar_ignore_local_fs) {
- sdp->sd_args.ar_localflocks = 1;
- sdp->sd_args.ar_localcaching = 1;
+ fsname = strchr(table, ':');
+ if (fsname)
+ fsname++;
+ if (lm->lm_mount == NULL) {
+ fs_info(sdp, "Now mounting FS...\n");
+ return 0;
}
-
-out:
- return error;
+ ret = lm->lm_mount(sdp, fsname);
+ if (ret == 0)
+ fs_info(sdp, "Joined cluster. Now mounting FS...\n");
+ return ret;
}
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
{
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- gfs2_unmount_lockproto(&sdp->sd_lockstruct);
+ const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
+ if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ lm->lm_unmount)
+ lm->lm_unmount(sdp);
}
/**
@@ -1116,12 +1168,20 @@ static int fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
}
- error = gfs2_mount_args(sdp, (char *)data, 0);
+ sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
+ sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
+
+ error = gfs2_mount_args(sdp, &sdp->sd_args, data);
if (error) {
printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
goto fail;
}
+ if (sdp->sd_args.ar_spectator)
+ sb->s_flags |= MS_RDONLY;
+ if (sdp->sd_args.ar_posix_acl)
+ sb->s_flags |= MS_POSIXACL;
+
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_export_op = &gfs2_export_ops;
@@ -1199,6 +1259,8 @@ fail_sb:
dput(sdp->sd_root_dir);
if (sdp->sd_master_dir)
dput(sdp->sd_master_dir);
+ if (sb->s_root)
+ dput(sb->s_root);
sb->s_root = NULL;
fail_locking:
init_locking(sdp, &mount_gh, UNDO);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 49877546beb9..abd5429ae285 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,7 +18,6 @@
#include <linux/posix_acl.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/fiemap.h>
#include <asm/uaccess.h>
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 320323d03479..458019569dcb 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -19,7 +19,6 @@
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/time.h>
#include "gfs2.h"
@@ -27,7 +26,6 @@
#include "glock.h"
#include "inode.h"
#include "log.h"
-#include "mount.h"
#include "quota.h"
#include "recovery.h"
#include "rgrp.h"
@@ -40,6 +38,8 @@
#include "bmap.h"
#include "meta_io.h"
+#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
+
/**
* gfs2_write_inode - Make sure the inode is stable on the disk
* @inode: The inode
@@ -435,25 +435,45 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_args args = sdp->sd_args; /* Default to current settings */
int error;
- error = gfs2_mount_args(sdp, data, 1);
+ error = gfs2_mount_args(sdp, &args, data);
if (error)
return error;
+ /* Not allowed to change locking details */
+ if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
+ strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
+ strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
+ return -EINVAL;
+
+ /* Some flags must not be changed */
+ if (args_neq(&args, &sdp->sd_args, spectator) ||
+ args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
+ args_neq(&args, &sdp->sd_args, localflocks) ||
+ args_neq(&args, &sdp->sd_args, localcaching) ||
+ args_neq(&args, &sdp->sd_args, meta))
+ return -EINVAL;
+
if (sdp->sd_args.ar_spectator)
*flags |= MS_RDONLY;
- else {
- if (*flags & MS_RDONLY) {
- if (!(sb->s_flags & MS_RDONLY))
- error = gfs2_make_fs_ro(sdp);
- } else if (!(*flags & MS_RDONLY) &&
- (sb->s_flags & MS_RDONLY)) {
+
+ if ((sb->s_flags ^ *flags) & MS_RDONLY) {
+ if (*flags & MS_RDONLY)
+ error = gfs2_make_fs_ro(sdp);
+ else
error = gfs2_make_fs_rw(sdp);
- }
+ if (error)
+ return error;
}
- return error;
+ sdp->sd_args = args;
+ if (sdp->sd_args.ar_posix_acl)
+ sb->s_flags |= MS_POSIXACL;
+ else
+ sb->s_flags &= ~MS_POSIXACL;
+ return 0;
}
/**
@@ -588,6 +608,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
}
seq_printf(s, ",data=%s", state);
}
+ if (args->ar_discard)
+ seq_printf(s, ",discard");
return 0;
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b08d09696b3e..8d53f66b5bcc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -45,7 +45,6 @@
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -80,6 +79,51 @@ struct gfs2_quota_change_host {
u32 qc_id;
};
+static LIST_HEAD(qd_lru_list);
+static atomic_t qd_lru_count = ATOMIC_INIT(0);
+static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED;
+
+int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+{
+ struct gfs2_quota_data *qd;
+ struct gfs2_sbd *sdp;
+
+ if (nr == 0)
+ goto out;
+
+ if (!(gfp_mask & __GFP_FS))
+ return -1;
+
+ spin_lock(&qd_lru_lock);
+ while (nr && !list_empty(&qd_lru_list)) {
+ qd = list_entry(qd_lru_list.next,
+ struct gfs2_quota_data, qd_reclaim);
+ sdp = qd->qd_gl->gl_sbd;
+
+ /* Free from the filesystem-specific list */
+ list_del(&qd->qd_list);
+
+ gfs2_assert_warn(sdp, !qd->qd_change);
+ gfs2_assert_warn(sdp, !qd->qd_slot_count);
+ gfs2_assert_warn(sdp, !qd->qd_bh_count);
+
+ gfs2_glock_put(qd->qd_gl);
+ atomic_dec(&sdp->sd_quota_count);
+
+ /* Delete it from the common reclaim list */
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ spin_unlock(&qd_lru_lock);
+ kmem_cache_free(gfs2_quotad_cachep, qd);
+ spin_lock(&qd_lru_lock);
+ nr--;
+ }
+ spin_unlock(&qd_lru_lock);
+
+out:
+ return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
+}
+
static u64 qd2offset(struct gfs2_quota_data *qd)
{
u64 offset;
@@ -100,22 +144,18 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
if (!qd)
return -ENOMEM;
- qd->qd_count = 1;
+ atomic_set(&qd->qd_count, 1);
qd->qd_id = id;
if (user)
set_bit(QDF_USER, &qd->qd_flags);
qd->qd_slot = -1;
+ INIT_LIST_HEAD(&qd->qd_reclaim);
error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
&gfs2_quota_glops, CREATE, &qd->qd_gl);
if (error)
goto fail;
- error = gfs2_lvb_hold(qd->qd_gl);
- gfs2_glock_put(qd->qd_gl);
- if (error)
- goto fail;
-
*qdp = qd;
return 0;
@@ -135,11 +175,17 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
for (;;) {
found = 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
if (qd->qd_id == id &&
!test_bit(QDF_USER, &qd->qd_flags) == !user) {
- qd->qd_count++;
+ if (!atomic_read(&qd->qd_count) &&
+ !list_empty(&qd->qd_reclaim)) {
+ /* Remove it from reclaim list */
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ }
+ atomic_inc(&qd->qd_count);
found = 1;
break;
}
@@ -155,11 +201,11 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
new_qd = NULL;
}
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (qd || !create) {
if (new_qd) {
- gfs2_lvb_unhold(new_qd->qd_gl);
+ gfs2_glock_put(new_qd->qd_gl);
kmem_cache_free(gfs2_quotad_cachep, new_qd);
}
*qdp = qd;
@@ -175,21 +221,18 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
static void qd_hold(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
-
- spin_lock(&sdp->sd_quota_spin);
- gfs2_assert(sdp, qd->qd_count);
- qd->qd_count++;
- spin_unlock(&sdp->sd_quota_spin);
+ gfs2_assert(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
}
static void qd_put(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&sdp->sd_quota_spin);
- gfs2_assert(sdp, qd->qd_count);
- if (!--qd->qd_count)
- qd->qd_last_touched = jiffies;
- spin_unlock(&sdp->sd_quota_spin);
+ if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) {
+ /* Add to the reclaim list */
+ list_add_tail(&qd->qd_reclaim, &qd_lru_list);
+ atomic_inc(&qd_lru_count);
+ spin_unlock(&qd_lru_lock);
+ }
}
static int slot_get(struct gfs2_quota_data *qd)
@@ -198,10 +241,10 @@ static int slot_get(struct gfs2_quota_data *qd)
unsigned int c, o = 0, b;
unsigned char byte = 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
if (qd->qd_slot_count++) {
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return 0;
}
@@ -225,13 +268,13 @@ found:
sdp->sd_quota_bitmap[c][o] |= 1 << b;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return 0;
fail:
qd->qd_slot_count--;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return -ENOSPC;
}
@@ -239,23 +282,23 @@ static void slot_hold(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
gfs2_assert(sdp, qd->qd_slot_count);
qd->qd_slot_count++;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
}
static void slot_put(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
gfs2_assert(sdp, qd->qd_slot_count);
if (!--qd->qd_slot_count) {
gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
qd->qd_slot = -1;
}
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
}
static int bh_get(struct gfs2_quota_data *qd)
@@ -330,7 +373,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
if (sdp->sd_vfs->s_flags & MS_RDONLY)
return 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
@@ -341,8 +384,8 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
set_bit(QDF_LOCKED, &qd->qd_flags);
- gfs2_assert_warn(sdp, qd->qd_count);
- qd->qd_count++;
+ gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
qd->qd_change_sync = qd->qd_change;
gfs2_assert_warn(sdp, qd->qd_slot_count);
qd->qd_slot_count++;
@@ -354,7 +397,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
if (!found)
qd = NULL;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (qd) {
gfs2_assert_warn(sdp, qd->qd_change_sync);
@@ -379,24 +422,24 @@ static int qd_trylock(struct gfs2_quota_data *qd)
if (sdp->sd_vfs->s_flags & MS_RDONLY)
return 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
!test_bit(QDF_CHANGE, &qd->qd_flags)) {
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return 0;
}
list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
set_bit(QDF_LOCKED, &qd->qd_flags);
- gfs2_assert_warn(sdp, qd->qd_count);
- qd->qd_count++;
+ gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
qd->qd_change_sync = qd->qd_change;
gfs2_assert_warn(sdp, qd->qd_slot_count);
qd->qd_slot_count++;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
gfs2_assert_warn(sdp, qd->qd_change_sync);
if (bh_get(qd)) {
@@ -556,9 +599,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
x = be64_to_cpu(qc->qc_change) + change;
qc->qc_change = cpu_to_be64(x);
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
qd->qd_change = x;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (!x) {
gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
@@ -802,8 +845,8 @@ restart:
loff_t pos;
gfs2_glock_dq_uninit(q_gh);
error = gfs2_glock_nq_init(qd->qd_gl,
- LM_ST_EXCLUSIVE, GL_NOCACHE,
- q_gh);
+ LM_ST_EXCLUSIVE, GL_NOCACHE,
+ q_gh);
if (error)
return error;
@@ -820,7 +863,6 @@ restart:
gfs2_glock_dq_uninit(&i_gh);
-
gfs2_quota_in(&q, buf);
qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
@@ -890,9 +932,9 @@ static int need_sync(struct gfs2_quota_data *qd)
if (!qd->qd_qb.qb_limit)
return 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
value = qd->qd_change;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
spin_lock(&gt->gt_spin);
num = gt->gt_quota_scale_num;
@@ -985,9 +1027,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
continue;
value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
value += qd->qd_change;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
print_message(qd, "exceeded");
@@ -1171,13 +1213,12 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
qd->qd_change = qc.qc_change;
qd->qd_slot = slot;
qd->qd_slot_count = 1;
- qd->qd_last_touched = jiffies;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
list_add(&qd->qd_list, &sdp->sd_quota_list);
atomic_inc(&sdp->sd_quota_count);
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
found++;
}
@@ -1197,73 +1238,48 @@ fail:
return error;
}
-static void gfs2_quota_scan(struct gfs2_sbd *sdp)
-{
- struct gfs2_quota_data *qd, *safe;
- LIST_HEAD(dead);
-
- spin_lock(&sdp->sd_quota_spin);
- list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
- if (!qd->qd_count &&
- time_after_eq(jiffies, qd->qd_last_touched +
- gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
- list_move(&qd->qd_list, &dead);
- gfs2_assert_warn(sdp,
- atomic_read(&sdp->sd_quota_count) > 0);
- atomic_dec(&sdp->sd_quota_count);
- }
- }
- spin_unlock(&sdp->sd_quota_spin);
-
- while (!list_empty(&dead)) {
- qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
- list_del(&qd->qd_list);
-
- gfs2_assert_warn(sdp, !qd->qd_change);
- gfs2_assert_warn(sdp, !qd->qd_slot_count);
- gfs2_assert_warn(sdp, !qd->qd_bh_count);
-
- gfs2_lvb_unhold(qd->qd_gl);
- kmem_cache_free(gfs2_quotad_cachep, qd);
- }
-}
-
void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
{
struct list_head *head = &sdp->sd_quota_list;
struct gfs2_quota_data *qd;
unsigned int x;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
while (!list_empty(head)) {
qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
- if (qd->qd_count > 1 ||
- (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
+ if (atomic_read(&qd->qd_count) > 1 ||
+ (atomic_read(&qd->qd_count) &&
+ !test_bit(QDF_CHANGE, &qd->qd_flags))) {
list_move(&qd->qd_list, head);
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
schedule();
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
continue;
}
list_del(&qd->qd_list);
+ /* Also remove if this qd exists in the reclaim list */
+ if (!list_empty(&qd->qd_reclaim)) {
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ }
atomic_dec(&sdp->sd_quota_count);
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
- if (!qd->qd_count) {
+ if (!atomic_read(&qd->qd_count)) {
gfs2_assert_warn(sdp, !qd->qd_change);
gfs2_assert_warn(sdp, !qd->qd_slot_count);
} else
gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
gfs2_assert_warn(sdp, !qd->qd_bh_count);
- gfs2_lvb_unhold(qd->qd_gl);
+ gfs2_glock_put(qd->qd_gl);
kmem_cache_free(gfs2_quotad_cachep, qd);
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
}
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
@@ -1341,9 +1357,6 @@ int gfs2_quotad(void *data)
quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
&quotad_timeo, &tune->gt_quota_quantum);
- /* FIXME: This should be turned into a shrinker */
- gfs2_quota_scan(sdp);
-
/* Check for & recover partially truncated inodes */
quotad_check_trunc_list(sdp);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index cec9032be97d..0fa5fa63d0e8 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -49,4 +49,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
+extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index efd09c3d2b26..247e8f7d6b3d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -427,20 +426,23 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
}
-static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
- unsigned int message)
+static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
+ unsigned int message)
{
- if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done)
- return;
-
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- sdp->sd_lockstruct.ls_ops->lm_recovery_done(
- sdp->sd_lockstruct.ls_lockspace, jid, message);
+ char env_jid[20];
+ char env_status[20];
+ char *envp[] = { env_jid, env_status, NULL };
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ls->ls_recover_jid_done = jid;
+ ls->ls_recover_jid_status = message;
+ sprintf(env_jid, "JID=%d", jid);
+ sprintf(env_status, "RECOVERY=%s",
+ message == LM_RD_SUCCESS ? "Done" : "Failed");
+ kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
}
-
/**
- * gfs2_recover_journal - recovery a given journal
+ * gfs2_recover_journal - recover a given journal
* @jd: the struct gfs2_jdesc describing the journal
*
* Acquire the journal's lock, check to see if the journal is clean, and
@@ -561,7 +563,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
gfs2_glock_dq_uninit(&ji_gh);
- gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
+ gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
gfs2_glock_dq_uninit(&j_gh);
@@ -581,7 +583,7 @@ fail_gunlock_j:
fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
fail:
- gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
+ gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
return error;
}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b01c635d925..f03d024038ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -13,8 +13,8 @@
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/prefetch.h>
+#include <linux/blkdev.h>
#include "gfs2.h"
#include "incore.h"
@@ -132,81 +132,90 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
}
/**
+ * gfs2_bit_search
+ * @ptr: Pointer to bitmap data
+ * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
+ * @state: The state we are searching for
+ *
+ * We xor the bitmap data with a patter which is the bitwise opposite
+ * of what we are looking for, this gives rise to a pattern of ones
+ * wherever there is a match. Since we have two bits per entry, we
+ * take this pattern, shift it down by one place and then and it with
+ * the original. All the even bit positions (0,2,4, etc) then represent
+ * successful matches, so we mask with 0x55555..... to remove the unwanted
+ * odd bit positions.
+ *
+ * This allows searching of a whole u64 at once (32 blocks) with a
+ * single test (on 64 bit arches).
+ */
+
+static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
+{
+ u64 tmp;
+ static const u64 search[] = {
+ [0] = 0xffffffffffffffffULL,
+ [1] = 0xaaaaaaaaaaaaaaaaULL,
+ [2] = 0x5555555555555555ULL,
+ [3] = 0x0000000000000000ULL,
+ };
+ tmp = le64_to_cpu(*ptr) ^ search[state];
+ tmp &= (tmp >> 1);
+ tmp &= mask;
+ return tmp;
+}
+
+/**
* gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
* a block in a given allocation state.
* @buffer: the buffer that holds the bitmaps
- * @buflen: the length (in bytes) of the buffer
+ * @len: the length (in bytes) of the buffer
* @goal: start search at this block's bit-pair (within @buffer)
- * @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
+ * @state: GFS2_BLKST_XXX the state of the block we're looking for.
*
* Scope of @goal and returned block number is only within this bitmap buffer,
* not entire rgrp or filesystem. @buffer will be offset from the actual
- * beginning of a bitmap block buffer, skipping any header structures.
+ * beginning of a bitmap block buffer, skipping any header structures, but
+ * headers are always a multiple of 64 bits long so that the buffer is
+ * always aligned to a 64 bit boundary.
+ *
+ * The size of the buffer is in bytes, but is it assumed that it is
+ * always ok to to read a complete multiple of 64 bits at the end
+ * of the block in case the end is no aligned to a natural boundary.
*
* Return: the block number (bitmap buffer scope) that was found
*/
-static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
- u8 old_state)
+static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
+ u32 goal, u8 state)
{
- const u8 *byte, *start, *end;
- int bit, startbit;
- u32 g1, g2, misaligned;
- unsigned long *plong;
- unsigned long lskipval;
-
- lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
- g1 = (goal / GFS2_NBBY);
- start = buffer + g1;
- byte = start;
- end = buffer + buflen;
- g2 = ALIGN(g1, sizeof(unsigned long));
- plong = (unsigned long *)(buffer + g2);
- startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
- misaligned = g2 - g1;
- if (!misaligned)
- goto ulong_aligned;
-/* parse the bitmap a byte at a time */
-misaligned:
- while (byte < end) {
- if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
- return goal +
- (((byte - start) * GFS2_NBBY) +
- ((bit - startbit) >> 1));
- }
- bit += GFS2_BIT_SIZE;
- if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
- bit = 0;
- byte++;
- misaligned--;
- if (!misaligned) {
- plong = (unsigned long *)byte;
- goto ulong_aligned;
- }
- }
- }
- return BFITNOENT;
-
-/* parse the bitmap a unsigned long at a time */
-ulong_aligned:
- /* Stop at "end - 1" or else prefetch can go past the end and segfault.
- We could "if" it but we'd lose some of the performance gained.
- This way will only slow down searching the very last 4/8 bytes
- depending on architecture. I've experimented with several ways
- of writing this section such as using an else before the goto
- but this one seems to be the fastest. */
- while ((unsigned char *)plong < end - sizeof(unsigned long)) {
- prefetch(plong + 1);
- if (((*plong) & LBITMASK) != lskipval)
- break;
- plong++;
- }
- if ((unsigned char *)plong < end) {
- byte = (const u8 *)plong;
- misaligned += sizeof(unsigned long) - 1;
- goto misaligned;
+ u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
+ const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
+ const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
+ u64 tmp;
+ u64 mask = 0x5555555555555555ULL;
+ u32 bit;
+
+ BUG_ON(state > 3);
+
+ /* Mask off bits we don't care about at the start of the search */
+ mask <<= spoint;
+ tmp = gfs2_bit_search(ptr, mask, state);
+ ptr++;
+ while(tmp == 0 && ptr < end) {
+ tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
+ ptr++;
}
- return BFITNOENT;
+ /* Mask off any bits which are more than len bytes from the start */
+ if (ptr == end && (len & (sizeof(u64) - 1)))
+ tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
+ /* Didn't find anything, so return */
+ if (tmp == 0)
+ return BFITNOENT;
+ ptr--;
+ bit = fls64(tmp);
+ bit--; /* fls64 always adds one to the bit count */
+ bit /= 2; /* two bits per entry in the bitmap */
+ return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
}
/**
@@ -831,6 +840,58 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
spin_unlock(&sdp->sd_rindex_spin);
}
+static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+ const struct gfs2_bitmap *bi)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ struct block_device *bdev = sb->s_bdev;
+ const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
+ bdev_hardsect_size(sb->s_bdev);
+ u64 blk;
+ sector_t start = 0;
+ sector_t nr_sects = 0;
+ int rv;
+ unsigned int x;
+
+ for (x = 0; x < bi->bi_len; x++) {
+ const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x;
+ const u8 *clone = bi->bi_clone + bi->bi_offset + x;
+ u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
+ diff &= 0x55;
+ if (diff == 0)
+ continue;
+ blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
+ blk *= sects_per_blk; /* convert to sectors */
+ while(diff) {
+ if (diff & 1) {
+ if (nr_sects == 0)
+ goto start_new_extent;
+ if ((start + nr_sects) != blk) {
+ rv = blkdev_issue_discard(bdev, start,
+ nr_sects, GFP_NOFS);
+ if (rv)
+ goto fail;
+ nr_sects = 0;
+start_new_extent:
+ start = blk;
+ }
+ nr_sects += sects_per_blk;
+ }
+ diff >>= 2;
+ blk += sects_per_blk;
+ }
+ }
+ if (nr_sects) {
+ rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS);
+ if (rv)
+ goto fail;
+ }
+ return;
+fail:
+ fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
+ sdp->sd_args.ar_discard = 0;
+}
+
void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
@@ -841,6 +902,8 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
struct gfs2_bitmap *bi = rgd->rd_bits + x;
if (!bi->bi_clone)
continue;
+ if (sdp->sd_args.ar_discard)
+ gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
memcpy(bi->bi_clone + bi->bi_offset,
bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 141b781f2fcc..601913e0a482 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -15,7 +15,6 @@
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
@@ -339,7 +338,6 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
struct gfs2_holder *t_gh)
{
struct gfs2_inode *ip;
- struct gfs2_holder ji_gh;
struct gfs2_jdesc *jd;
struct lfcc *lfcc;
LIST_HEAD(list);
@@ -387,7 +385,6 @@ out:
gfs2_glock_dq_uninit(&lfcc->gh);
kfree(lfcc);
}
- gfs2_glock_dq_uninit(&ji_gh);
return error;
}
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index f6b8b00ad881..91abdbedcc86 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -14,7 +14,7 @@
#include <linux/dcache.h>
#include "incore.h"
-void gfs2_lm_unmount(struct gfs2_sbd *sdp);
+extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
{
@@ -27,21 +27,23 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
void gfs2_jindex_free(struct gfs2_sbd *sdp);
-struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
-int gfs2_jdesc_check(struct gfs2_jdesc *jd);
+extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data);
-int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
- struct gfs2_inode **ipp);
+extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
+extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
-int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
+ struct gfs2_inode **ipp);
-int gfs2_statfs_init(struct gfs2_sbd *sdp);
-void gfs2_statfs_change(struct gfs2_sbd *sdp,
- s64 total, s64 free, s64 dinodes);
-int gfs2_statfs_sync(struct gfs2_sbd *sdp);
+extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-int gfs2_freeze_fs(struct gfs2_sbd *sdp);
-void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
+extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
+extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
+ s64 dinodes);
+extern int gfs2_statfs_sync(struct gfs2_sbd *sdp);
+
+extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
+extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 26c1fa777a95..7655f5025fec 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -14,9 +14,8 @@
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/kobject.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
+#include <linux/gfs2_ondisk.h>
#include "gfs2.h"
#include "incore.h"
@@ -25,6 +24,7 @@
#include "glock.h"
#include "quota.h"
#include "util.h"
+#include "glops.h"
static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
{
@@ -37,6 +37,30 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
}
+static int gfs2_uuid_valid(const u8 *uuid)
+{
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ if (uuid[i])
+ return 1;
+ }
+ return 0;
+}
+
+static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
+{
+ const u8 *uuid = sdp->sd_sb.sb_uuid;
+ buf[0] = '\0';
+ if (!gfs2_uuid_valid(uuid))
+ return 0;
+ return snprintf(buf, PAGE_SIZE, "%02X%02X%02X%02X-%02X%02X-"
+ "%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
+ uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5],
+ uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11],
+ uuid[12], uuid[13], uuid[14], uuid[15]);
+}
+
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
{
unsigned int count;
@@ -148,6 +172,46 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
return len;
}
+static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ struct gfs2_glock *gl;
+ const struct gfs2_glock_operations *glops;
+ unsigned int glmode;
+ unsigned int gltype;
+ unsigned long long glnum;
+ char mode[16];
+ int rv;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
+ mode);
+ if (rv != 3)
+ return -EINVAL;
+
+ if (strcmp(mode, "EX") == 0)
+ glmode = LM_ST_UNLOCKED;
+ else if ((strcmp(mode, "CW") == 0) || (strcmp(mode, "DF") == 0))
+ glmode = LM_ST_DEFERRED;
+ else if ((strcmp(mode, "PR") == 0) || (strcmp(mode, "SH") == 0))
+ glmode = LM_ST_SHARED;
+ else
+ return -EINVAL;
+
+ if (gltype > LM_TYPE_JOURNAL)
+ return -EINVAL;
+ glops = gfs2_glops_list[gltype];
+ if (glops == NULL)
+ return -EINVAL;
+ rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
+ if (rv)
+ return rv;
+ gfs2_glock_cb(gl, glmode);
+ gfs2_glock_put(gl);
+ return len;
+}
+
struct gfs2_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
@@ -159,22 +223,26 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
GFS2_ATTR(id, 0444, id_show, NULL);
GFS2_ATTR(fsname, 0444, fsname_show, NULL);
+GFS2_ATTR(uuid, 0444, uuid_show, NULL);
GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
+GFS2_ATTR(demote_rq, 0200, NULL, demote_rq_store);
static struct attribute *gfs2_attrs[] = {
&gfs2_attr_id.attr,
&gfs2_attr_fsname.attr,
+ &gfs2_attr_uuid.attr,
&gfs2_attr_freeze.attr,
&gfs2_attr_withdraw.attr,
&gfs2_attr_statfs_sync.attr,
&gfs2_attr_quota_sync.attr,
&gfs2_attr_quota_refresh_user.attr,
&gfs2_attr_quota_refresh_group.attr,
+ &gfs2_attr_demote_rq.attr,
NULL,
};
@@ -224,14 +292,145 @@ static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
LOCKSTRUCT_ATTR(jid, "%u\n");
LOCKSTRUCT_ATTR(first, "%u\n");
-LOCKSTRUCT_ATTR(lvb_size, "%u\n");
-LOCKSTRUCT_ATTR(flags, "%d\n");
static struct attribute *lockstruct_attrs[] = {
&lockstruct_attr_jid.attr,
&lockstruct_attr_first.attr,
- &lockstruct_attr_lvb_size.attr,
- &lockstruct_attr_flags.attr,
+ NULL,
+};
+
+/*
+ * lock_module. Originally from lock_dlm
+ */
+
+static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf)
+{
+ const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops;
+ return sprintf(buf, "%s\n", ops->lm_proto_name);
+}
+
+static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ssize_t ret;
+ int val = 0;
+
+ if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
+ val = 1;
+ ret = sprintf(buf, "%d\n", val);
+ return ret;
+}
+
+static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ssize_t ret = len;
+ int val;
+
+ val = simple_strtol(buf, NULL, 0);
+
+ if (val == 1)
+ set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+ else if (val == 0) {
+ clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+ smp_mb__after_clear_bit();
+ gfs2_glock_thaw(sdp);
+ } else {
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%u\n", ls->ls_id);
+}
+
+static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_first);
+}
+
+static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_first_done);
+}
+
+static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_recover_jid);
+}
+
+static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
+{
+ struct gfs2_jdesc *jd;
+
+ spin_lock(&sdp->sd_jindex_spin);
+ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+ if (jd->jd_jid != jid)
+ continue;
+ jd->jd_dirty = 1;
+ break;
+ }
+ spin_unlock(&sdp->sd_jindex_spin);
+}
+
+static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ls->ls_recover_jid = simple_strtol(buf, NULL, 0);
+ gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid);
+ if (sdp->sd_recoverd_process)
+ wake_up_process(sdp->sd_recoverd_process);
+ return len;
+}
+
+static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_recover_jid_done);
+}
+
+static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
+}
+
+struct gdlm_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct gfs2_sbd *sdp, char *);
+ ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t);
+};
+
+#define GDLM_ATTR(_name,_mode,_show,_store) \
+static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
+GDLM_ATTR(block, 0644, block_show, block_store);
+GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
+GDLM_ATTR(id, 0444, lkid_show, NULL);
+GDLM_ATTR(first, 0444, lkfirst_show, NULL);
+GDLM_ATTR(first_done, 0444, first_done_show, NULL);
+GDLM_ATTR(recover, 0644, recover_show, recover_store);
+GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
+GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
+
+static struct attribute *lock_module_attrs[] = {
+ &gdlm_attr_proto_name.attr,
+ &gdlm_attr_block.attr,
+ &gdlm_attr_withdraw.attr,
+ &gdlm_attr_id.attr,
+ &lockstruct_attr_jid.attr,
+ &gdlm_attr_first.attr,
+ &gdlm_attr_first_done.attr,
+ &gdlm_attr_recover.attr,
+ &gdlm_attr_recover_done.attr,
+ &gdlm_attr_recover_status.attr,
NULL,
};
@@ -373,7 +572,6 @@ TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(quota_simul_sync, 1);
-TUNE_ATTR(quota_cache_secs, 1);
TUNE_ATTR(stall_secs, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -389,7 +587,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_complain_secs.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_quota_simul_sync.attr,
- &tune_attr_quota_cache_secs.attr,
&tune_attr_stall_secs.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_recoverd_secs.attr,
@@ -414,6 +611,11 @@ static struct attribute_group tune_group = {
.attrs = tune_attrs,
};
+static struct attribute_group lock_module_group = {
+ .name = "lock_module",
+ .attrs = lock_module_attrs,
+};
+
int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
{
int error;
@@ -436,9 +638,15 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
if (error)
goto fail_args;
+ error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
+ if (error)
+ goto fail_tune;
+
kobject_uevent(&sdp->sd_kobj, KOBJ_ADD);
return 0;
+fail_tune:
+ sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_args:
sysfs_remove_group(&sdp->sd_kobj, &args_group);
fail_lockstruct:
@@ -455,15 +663,27 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &args_group);
sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
+ sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
}
+
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
struct kobj_uevent_env *env)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+ const u8 *uuid = sdp->sd_sb.sb_uuid;
+
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
+ if (gfs2_uuid_valid(uuid)) {
+ add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-"
+ "%02X%02X-%02X%02X%02X%02X%02X%02X",
+ uuid[0], uuid[1], uuid[2], uuid[3], uuid[4],
+ uuid[5], uuid[6], uuid[7], uuid[8], uuid[9],
+ uuid[10], uuid[11], uuid[12], uuid[13],
+ uuid[14], uuid[15]);
+ }
return 0;
}
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f677b8a83f0c..053752d4b27f 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -12,9 +12,8 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
-#include <linux/gfs2_ondisk.h>
#include <linux/kallsyms.h>
-#include <linux/lm_interface.h>
+#include <linux/gfs2_ondisk.h>
#include "gfs2.h"
#include "incore.h"
@@ -88,9 +87,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
if (!tr->tr_touched) {
gfs2_log_release(sdp, tr->tr_reserved);
- gfs2_glock_dq(&tr->tr_t_gh);
- gfs2_holder_uninit(&tr->tr_t_gh);
- kfree(tr);
+ if (tr->tr_t_gh.gh_gl) {
+ gfs2_glock_dq(&tr->tr_t_gh);
+ gfs2_holder_uninit(&tr->tr_t_gh);
+ kfree(tr);
+ }
return;
}
@@ -106,9 +107,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
}
gfs2_log_commit(sdp, tr);
- gfs2_glock_dq(&tr->tr_t_gh);
- gfs2_holder_uninit(&tr->tr_t_gh);
- kfree(tr);
+ if (tr->tr_t_gh.gh_gl) {
+ gfs2_glock_dq(&tr->tr_t_gh);
+ gfs2_holder_uninit(&tr->tr_t_gh);
+ kfree(tr);
+ }
if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 374f50e95496..9d12b1118ba0 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -35,6 +34,8 @@ void gfs2_assert_i(struct gfs2_sbd *sdp)
int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ const struct lm_lockops *lm = ls->ls_ops;
va_list args;
if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
@@ -47,8 +48,12 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
- fs_err(sdp, "telling LM to withdraw\n");
- gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
+ kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+
+ if (lm->lm_unmount) {
+ fs_err(sdp, "telling LM to unmount\n");
+ lm->lm_unmount(sdp);
+ }
fs_err(sdp, "withdrawn\n");
dump_stack();
diff --git a/fs/inode.c b/fs/inode.c
index 913ab2d9a5d1..643ac43e5a5c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,6 +17,7 @@
#include <linux/hash.h>
#include <linux/swap.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/pagemap.h>
#include <linux/cdev.h>
#include <linux/bootmem.h>
@@ -147,13 +148,13 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_cdev = NULL;
inode->i_rdev = 0;
inode->dirtied_when = 0;
- if (security_inode_alloc(inode)) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, (inode));
- return NULL;
- }
+
+ if (security_inode_alloc(inode))
+ goto out_free_inode;
+
+ /* allocate and initialize an i_integrity */
+ if (ima_inode_alloc(inode))
+ goto out_free_security;
spin_lock_init(&inode->i_lock);
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
@@ -189,6 +190,15 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_mapping = mapping;
return inode;
+
+out_free_security:
+ security_inode_free(inode);
+out_free_inode:
+ if (inode->i_sb->s_op->destroy_inode)
+ inode->i_sb->s_op->destroy_inode(inode);
+ else
+ kmem_cache_free(inode_cachep, (inode));
+ return NULL;
}
EXPORT_SYMBOL(inode_init_always);
@@ -359,6 +369,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
list_move(&inode->i_list, dispose);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
continue;
@@ -460,6 +471,7 @@ static void prune_icache(int nr_to_scan)
continue;
}
list_move(&inode->i_list, &freeable);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
nr_pruned++;
}
@@ -656,6 +668,7 @@ void unlock_new_inode(struct inode *inode)
* just created it (so there can be no old holders
* that haven't tested I_LOCK).
*/
+ WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
inode->i_state &= ~(I_LOCK|I_NEW);
wake_up_inode(inode);
}
@@ -1145,6 +1158,7 @@ void generic_delete_inode(struct inode *inode)
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
@@ -1186,16 +1200,19 @@ static void generic_forget_inode(struct inode *inode)
spin_unlock(&inode_lock);
return;
}
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode_lock);
write_inode_now(inode, 1);
spin_lock(&inode_lock);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
@@ -1283,6 +1300,40 @@ sector_t bmap(struct inode * inode, sector_t block)
}
EXPORT_SYMBOL(bmap);
+/*
+ * With relative atime, only update atime if the previous atime is
+ * earlier than either the ctime or mtime or if at least a day has
+ * passed since the last atime update.
+ */
+static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+ struct timespec now)
+{
+
+ if (!(mnt->mnt_flags & MNT_RELATIME))
+ return 1;
+ /*
+ * Is mtime younger than atime? If yes, update atime:
+ */
+ if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
+ return 1;
+ /*
+ * Is ctime younger than atime? If yes, update atime:
+ */
+ if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+ return 1;
+
+ /*
+ * Is the previous atime value older than a day? If yes,
+ * update atime:
+ */
+ if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
+ return 1;
+ /*
+ * Good, we can skip the atime update:
+ */
+ return 0;
+}
+
/**
* touch_atime - update the access time
* @mnt: mount the inode is accessed on
@@ -1310,17 +1361,12 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
goto out;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
goto out;
- if (mnt->mnt_flags & MNT_RELATIME) {
- /*
- * With relative atime, only update atime if the previous
- * atime is earlier than either the ctime or mtime.
- */
- if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
- timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
- goto out;
- }
now = current_fs_time(inode->i_sb);
+
+ if (!relatime_need_update(mnt, inode, now))
+ goto out;
+
if (timespec_equal(&inode->i_atime, &now))
goto out;
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d351..aedc47a264c1 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -139,6 +139,55 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
return 0;
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap,
+ struct in6_addr *addr_mapped)
+{
+ const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
+
+ switch (sap->sa_family) {
+ case AF_INET6:
+ return &((const struct sockaddr_in6 *)sap)->sin6_addr;
+ case AF_INET:
+ ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped);
+ return addr_mapped;
+ }
+
+ return NULL;
+}
+
+/*
+ * If lockd is using a PF_INET6 listener, all incoming requests appear
+ * to come from AF_INET6 remotes. The address of AF_INET remotes are
+ * mapped to AF_INET6 automatically by the network layer. In case the
+ * user passed an AF_INET server address at mount time, ensure both
+ * addresses are AF_INET6 before comparing them.
+ */
+static int nlmclnt_cmp_addr(const struct nlm_host *host,
+ const struct sockaddr *sap)
+{
+ const struct in6_addr *addr1;
+ const struct in6_addr *addr2;
+ struct in6_addr addr1_mapped;
+ struct in6_addr addr2_mapped;
+
+ addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped);
+ if (likely(addr1 != NULL)) {
+ addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped);
+ if (likely(addr2 != NULL))
+ return ipv6_addr_equal(addr1, addr2);
+ }
+
+ return 0;
+}
+#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+static int nlmclnt_cmp_addr(const struct nlm_host *host,
+ const struct sockaddr *sap)
+{
+ return nlm_cmp_addr(nlm_addr(host), sap);
+}
+#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+
/*
* The server lockd has called us back to tell us the lock was granted
*/
@@ -166,7 +215,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
*/
if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
continue;
- if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
+ if (!nlmclnt_cmp_addr(block->b_host, addr))
continue;
if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
continue;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d1d1eb84679d..618865b3128b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
- * Copyright (C) 1996 Gertjan van Wingerde (gertjan@cs.vu.nl)
+ * Copyright (C) 1996 Gertjan van Wingerde
* Minix V2 fs support.
*
* Modified for 680x0 by Andreas Schwab
diff --git a/fs/namei.c b/fs/namei.c
index bbc15c237558..199317642ad6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -24,6 +24,7 @@
#include <linux/fsnotify.h>
#include <linux/personality.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
@@ -850,6 +851,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
if (err == -EAGAIN)
err = inode_permission(nd->path.dentry->d_inode,
MAY_EXEC);
+ if (!err)
+ err = ima_path_check(&nd->path, MAY_EXEC);
if (err)
break;
@@ -1509,6 +1512,11 @@ int may_open(struct path *path, int acc_mode, int flag)
error = inode_permission(inode, acc_mode);
if (error)
return error;
+
+ error = ima_path_check(path,
+ acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
+ if (error)
+ return error;
/*
* An append-only file must be opened in append mode for writing.
*/
diff --git a/fs/namespace.c b/fs/namespace.c
index 06f8e63f6cb1..f0e753097353 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -780,6 +780,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
{ MNT_NOATIME, ",noatime" },
{ MNT_NODIRATIME, ",nodiratime" },
{ MNT_RELATIME, ",relatime" },
+ { MNT_STRICTATIME, ",strictatime" },
{ 0, NULL }
};
const struct proc_fs_info *fs_infop;
@@ -1919,6 +1920,9 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
+ /* Default to relatime */
+ mnt_flags |= MNT_RELATIME;
+
/* Separate the per-mountpoint flags */
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
@@ -1930,13 +1934,14 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
mnt_flags |= MNT_NOATIME;
if (flags & MS_NODIRATIME)
mnt_flags |= MNT_NODIRATIME;
- if (flags & MS_RELATIME)
- mnt_flags |= MNT_RELATIME;
+ if (flags & MS_STRICTATIME)
+ mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
- MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
+ MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
+ MS_STRICTATIME);
/* ... and get the mountpoint */
retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9b728f3565a1..574158ae2398 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -255,6 +255,32 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
}
return 0;
}
+
+/*
+ * Test if two ip6 socket addresses refer to the same socket by
+ * comparing relevant fields. The padding bytes specifically, are not
+ * compared. sin6_flowinfo is not compared because it only affects QoS
+ * and sin6_scope_id is only compared if the address is "link local"
+ * because "link local" addresses need only be unique to a specific
+ * link. Conversely, ordinary unicast addresses might have different
+ * sin6_scope_id.
+ *
+ * The caller should ensure both socket addresses are AF_INET6.
+ */
+static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+{
+ const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
+ const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
+
+ if (!ipv6_addr_equal(&saddr1->sin6_addr,
+ &saddr1->sin6_addr))
+ return 0;
+ if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
+ saddr1->sin6_scope_id != saddr2->sin6_scope_id)
+ return 0;
+ return saddr1->sin6_port == saddr2->sin6_port;
+}
#else
static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
const struct sockaddr_in *sa2)
@@ -270,9 +296,52 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
(const struct sockaddr_in *)sa2);
}
+
+static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
+ const struct sockaddr * sa2)
+{
+ return 0;
+}
#endif
/*
+ * Test if two ip4 socket addresses refer to the same socket, by
+ * comparing relevant fields. The padding bytes specifically, are
+ * not compared.
+ *
+ * The caller should ensure both socket addresses are AF_INET.
+ */
+static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+{
+ const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
+ const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
+
+ if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
+ return 0;
+ return saddr1->sin_port == saddr2->sin_port;
+}
+
+/*
+ * Test if two socket addresses represent the same actual socket,
+ * by comparing (only) relevant fields.
+ */
+static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+{
+ if (sa1->sa_family != sa2->sa_family)
+ return 0;
+
+ switch (sa1->sa_family) {
+ case AF_INET:
+ return nfs_sockaddr_cmp_ip4(sa1, sa2);
+ case AF_INET6:
+ return nfs_sockaddr_cmp_ip6(sa1, sa2);
+ }
+ return 0;
+}
+
+/*
* Find a client by IP address and protocol version
* - returns NULL if no such client
*/
@@ -344,8 +413,10 @@ struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
{
struct nfs_client *clp;
+ const struct sockaddr *sap = data->addr;
list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
/* Don't match clients that failed to initialise properly */
if (clp->cl_cons_state < 0)
continue;
@@ -358,7 +429,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
continue;
/* Match the full socket address */
- if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0)
+ if (!nfs_sockaddr_cmp(sap, clap))
continue;
atomic_inc(&clp->cl_count);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e35c8199f82f..672368f865ca 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1892,8 +1892,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
cache.cred = cred;
cache.jiffies = jiffies;
status = NFS_PROTO(inode)->access(inode, &cache);
- if (status != 0)
+ if (status != 0) {
+ if (status == -ESTALE) {
+ nfs_zap_caches(inode);
+ if (!S_ISDIR(inode->i_mode))
+ set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ }
return status;
+ }
nfs_access_add_cache(inode, &cache);
out:
if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index cef62557c87d..6bbf0e6daad2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -292,7 +292,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_fattr fattr;
- struct page *pages[NFSACL_MAXPAGES] = { };
+ struct page *pages[NFSACL_MAXPAGES];
struct nfs3_setaclargs args = {
.inode = inode,
.mask = NFS_ACL,
@@ -303,7 +303,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.rpc_argp = &args,
.rpc_resp = &fattr,
};
- int status, count;
+ int status;
status = -EOPNOTSUPP;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -319,6 +319,20 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
if (S_ISDIR(inode->i_mode)) {
args.mask |= NFS_DFACL;
args.acl_default = dfacl;
+ args.len = nfsacl_size(acl, dfacl);
+ } else
+ args.len = nfsacl_size(acl, NULL);
+
+ if (args.len > NFS_ACL_INLINE_BUFSIZE) {
+ unsigned int npages = 1 + ((args.len - 1) >> PAGE_SHIFT);
+
+ status = -ENOMEM;
+ do {
+ args.pages[args.npages] = alloc_page(GFP_KERNEL);
+ if (args.pages[args.npages] == NULL)
+ goto out_freepages;
+ args.npages++;
+ } while (args.npages < npages);
}
dprintk("NFS call setacl\n");
@@ -329,10 +343,6 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
nfs_zap_acl_cache(inode);
dprintk("NFS reply setacl: %d\n", status);
- /* pages may have been allocated at the xdr layer. */
- for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
- __free_page(args.pages[count]);
-
switch (status) {
case 0:
status = nfs_refresh_inode(inode, &fattr);
@@ -346,6 +356,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
case -ENOTSUPP:
status = -EOPNOTSUPP;
}
+out_freepages:
+ while (args.npages != 0) {
+ args.npages--;
+ __free_page(args.pages[args.npages]);
+ }
out:
return status;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 11cdddec1432..6cdeacffde46 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -82,8 +82,10 @@
#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2)
#define ACL3_getaclargs_sz (NFS3_fh_sz+1)
-#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3))
-#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3))
+#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
+ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
+#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
+ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
/*
@@ -703,28 +705,18 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
struct nfs3_setaclargs *args)
{
struct xdr_buf *buf = &req->rq_snd_buf;
- unsigned int base, len_in_head, len = nfsacl_size(
- (args->mask & NFS_ACL) ? args->acl_access : NULL,
- (args->mask & NFS_DFACL) ? args->acl_default : NULL);
- int count, err;
+ unsigned int base;
+ int err;
p = xdr_encode_fhandle(p, NFS_FH(args->inode));
*p++ = htonl(args->mask);
- base = (char *)p - (char *)buf->head->iov_base;
- /* put as much of the acls into head as possible. */
- len_in_head = min_t(unsigned int, buf->head->iov_len - base, len);
- len -= len_in_head;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2));
-
- for (count = 0; (count << PAGE_SHIFT) < len; count++) {
- args->pages[count] = alloc_page(GFP_KERNEL);
- if (!args->pages[count]) {
- while (count)
- __free_page(args->pages[--count]);
- return -ENOMEM;
- }
- }
- xdr_encode_pages(buf, args->pages, 0, len);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ base = req->rq_slen;
+
+ if (args->npages != 0)
+ xdr_encode_pages(buf, args->pages, 0, args->len);
+ else
+ req->rq_slen += args->len;
err = nfsacl_encode(buf, base, args->inode,
(args->mask & NFS_ACL) ?
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 30befc39b3c6..2a2a0a7143ad 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -21,7 +21,9 @@
#define NFSDBG_FACILITY NFSDBG_VFS
/*
- * Check if fs_root is valid
+ * Convert the NFSv4 pathname components into a standard posix path.
+ *
+ * Note that the resulting string will be placed at the end of the buffer
*/
static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
char *buffer, ssize_t buflen)
@@ -99,21 +101,20 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
{
struct vfsmount *mnt = ERR_PTR(-ENOENT);
char *mnt_path;
- int page2len;
+ unsigned int maxbuflen;
unsigned int s;
mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
if (IS_ERR(mnt_path))
return mnt;
mountdata->mnt_path = mnt_path;
- page2 += strlen(mnt_path) + 1;
- page2len = PAGE_SIZE - strlen(mnt_path) - 1;
+ maxbuflen = mnt_path - 1 - page2;
for (s = 0; s < location->nservers; s++) {
const struct nfs4_string *buf = &location->servers[s];
struct sockaddr_storage addr;
- if (buf->len <= 0 || buf->len >= PAGE_SIZE)
+ if (buf->len <= 0 || buf->len >= maxbuflen)
continue;
mountdata->addr = (struct sockaddr *)&addr;
@@ -126,8 +127,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
continue;
nfs_set_port(mountdata->addr, NFS_PORT);
- strncpy(page2, buf->data, page2len);
- page2[page2len] = '\0';
+ memcpy(page2, buf->data, buf->len);
+ page2[buf->len] = '\0';
mountdata->hostname = page2;
snprintf(page, PAGE_SIZE, "%s:%s",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f65953be39c0..9250067943d8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2596,6 +2596,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
[OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
+ [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
[OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
[OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 3a9e5deed74d..19e3a96aa02c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -176,7 +176,8 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
- (OCFS2_I(inode)->ip_clusters != rec->e_cpos),
+ (OCFS2_I(inode)->ip_clusters !=
+ le32_to_cpu(rec->e_cpos)),
"Device %s, asking for sparse allocation: inode %llu, "
"cpos %u, clusters %u\n",
osb->dev_str,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a067a6cffb01..8e1709a679b7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
size = i_size_read(inode);
if (size > PAGE_CACHE_SIZE ||
- size > ocfs2_max_inline_data(inode->i_sb)) {
+ size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %Lu",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1555,6 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
int ret, written = 0;
loff_t end = pos + len;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_dinode *di = NULL;
mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
(unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
@@ -1587,7 +1588,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
/*
* Check whether the write can fit.
*/
- if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
+ di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+ if (mmap_page ||
+ end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
return 0;
do_inline_write:
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 084aba86c3b2..4b11762f249e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -532,7 +532,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
- fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
+ fe->id2.i_data.id_count = cpu_to_le16(
+ ocfs2_max_inline_data_with_xattr(osb->sb, fe));
} else {
fel = &fe->id2.i_list;
fel->l_tree_depth = 0;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c7ae45aaa36c..2332ef740f4f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1070,12 +1070,6 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
offsetof(struct ocfs2_dinode, id2.i_symlink);
}
-static inline int ocfs2_max_inline_data(struct super_block *sb)
-{
- return sb->s_blocksize -
- offsetof(struct ocfs2_dinode, id2.i_data.id_data);
-}
-
static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
struct ocfs2_dinode *di)
{
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4ddd788add67..2563df89fc2a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -547,8 +547,12 @@ int ocfs2_calc_xattr_init(struct inode *dir,
* when blocksize = 512, may reserve one more cluser for
* xattr bucket, otherwise reserve one metadata block
* for them is ok.
+ * If this is a new directory with inline data,
+ * we choose to reserve the entire inline area for
+ * directory contents and force an external xattr block.
*/
if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+ (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
(s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
if (ret) {
@@ -4791,19 +4795,33 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
char *val,
int value_len)
{
- int offset;
+ int ret, offset, block_off;
struct ocfs2_xattr_value_root *xv;
struct ocfs2_xattr_entry *xe = xs->here;
+ struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
+ void *base;
BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
- offset = le16_to_cpu(xe->xe_name_offset) +
- OCFS2_XATTR_SIZE(xe->xe_name_len);
+ ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
+ xe - xh->xh_entries,
+ &block_off,
+ &offset);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
- xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
+ base = bucket_block(xs->bucket, block_off);
+ xv = (struct ocfs2_xattr_value_root *)(base + offset +
+ OCFS2_XATTR_SIZE(xe->xe_name_len));
- return __ocfs2_xattr_set_value_outside(inode, handle,
- xv, val, value_len);
+ ret = __ocfs2_xattr_set_value_outside(inode, handle,
+ xv, val, value_len);
+ if (ret)
+ mlog_errno(ret);
+out:
+ return ret;
}
static int ocfs2_rm_xattr_cluster(struct inode *inode,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f4..38e337d51ced 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
pdev->devt = devt;
/* delay uevent until 'holders' subdir is created */
- pdev->uevent_suppress = 1;
+ dev_set_uevent_suppress(pdev, 1);
err = device_add(pdev);
if (err)
goto out_put;
@@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
if (!p->holder_dir)
goto out_del;
- pdev->uevent_suppress = 0;
+ dev_set_uevent_suppress(pdev, 0);
if (flags & ADDPART_FLAG_WHOLEDISK) {
err = device_create_file(pdev, &dev_attr_whole_disk);
if (err)
@@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
rcu_assign_pointer(ptbl->part[partno], p);
/* suppress uevent if the disk supresses it */
- if (!ddev->uevent_suppress)
+ if (!dev_get_uevent_suppress(pdev))
kobject_uevent(&pdev->kobj, KOBJ_ADD);
return p;
@@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk)
dev_set_name(ddev, disk->disk_name);
/* delay uevents, until we scanned partition table */
- ddev->uevent_suppress = 1;
+ dev_set_uevent_suppress(ddev, 1);
if (device_add(ddev))
return;
@@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk)
exit:
/* announce disk after possible partitions are created */
- ddev->uevent_suppress = 0;
+ dev_set_uevent_suppress(ddev, 0);
kobject_uevent(&ddev->kobj, KOBJ_ADD);
/* announce possible partitions */
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a48ba5179d5..14f502b89cf5 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -699,12 +699,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
int retval;
mutex_lock(&inode->i_mutex);
-
retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
-
- if (retval >= 0)
+ if (retval >= 0) {
retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
-
+ if (retval < 0) /* this can happen only if on == T */
+ fasync_helper(-1, filp, 0, &pipe->fasync_readers);
+ }
mutex_unlock(&inode->i_mutex);
if (retval < 0)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0c9de19a1633..beaa0ce3b82e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3066,7 +3066,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
int retval = -ENOENT;
ino_t ino;
int tid;
- unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
struct pid_namespace *ns;
task = get_proc_task(inode);
@@ -3083,18 +3082,18 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
goto out_no_task;
retval = 0;
- switch (pos) {
+ switch ((unsigned long)filp->f_pos) {
case 0:
ino = inode->i_ino;
- if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
+ if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
goto out;
- pos++;
+ filp->f_pos++;
/* fall through */
case 1:
ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
+ if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
goto out;
- pos++;
+ filp->f_pos++;
/* fall through */
}
@@ -3104,9 +3103,9 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
ns = filp->f_dentry->d_sb->s_fs_info;
tid = (int)filp->f_version;
filp->f_version = 0;
- for (task = first_tid(leader, tid, pos - 2, ns);
+ for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
task;
- task = next_tid(task), pos++) {
+ task = next_tid(task), filp->f_pos++) {
tid = task_pid_nr_ns(task, ns);
if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
/* returning this tgid failed, save it as the first
@@ -3117,7 +3116,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
}
}
out:
- filp->f_pos = pos;
put_task_struct(leader);
out_no_task:
return retval;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2d1345112a42..e9983837d08d 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -80,7 +80,7 @@ static const struct file_operations proc_kpagecount_operations = {
#define KPF_RECLAIM 9
#define KPF_BUDDY 10
-#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
+#define kpf_copy_bit(flags, dstpos, srcpos) (((flags >> srcpos) & 1) << dstpos)
static ssize_t kpageflags_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index b9b567a28376..5d7c7ececa64 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -114,6 +114,9 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add_file(&lru_pvec);
+ /* prevent the page from being discarded on memory pressure */
+ SetPageDirty(page);
+
unlock_page(page);
}
@@ -126,6 +129,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
return -EFBIG;
add_error:
+ pagevec_lru_add_file(&lru_pvec);
page_cache_release(pages + loop);
for (loop++; loop < npages; loop++)
__free_page(pages + loop);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index c837dfc2b3c6..2a7960310349 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -80,7 +80,7 @@ static struct buffer_head *get_block_length(struct super_block *sb,
* generated a larger block - this does occasionally happen with zlib).
*/
int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
- int length, u64 *next_index, int srclength)
+ int length, u64 *next_index, int srclength, int pages)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head **bh;
@@ -184,7 +184,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
offset = 0;
}
- if (msblk->stream.avail_out == 0) {
+ if (msblk->stream.avail_out == 0 && page < pages) {
msblk->stream.next_out = buffer[page++];
msblk->stream.avail_out = PAGE_CACHE_SIZE;
}
@@ -201,25 +201,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
zlib_init = 1;
}
- zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH);
+ zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH);
if (msblk->stream.avail_in == 0 && k < b)
put_bh(bh[k++]);
} while (zlib_err == Z_OK);
if (zlib_err != Z_STREAM_END) {
- ERROR("zlib_inflate returned unexpected result"
- " 0x%x, srclength %d, avail_in %d,"
- " avail_out %d\n", zlib_err, srclength,
- msblk->stream.avail_in,
- msblk->stream.avail_out);
+ ERROR("zlib_inflate error, data probably corrupt\n");
goto release_mutex;
}
zlib_err = zlib_inflateEnd(&msblk->stream);
if (zlib_err != Z_OK) {
- ERROR("zlib_inflateEnd returned unexpected result 0x%x,"
- " srclength %d\n", zlib_err, srclength);
+ ERROR("zlib_inflate error, data probably corrupt\n");
goto release_mutex;
}
length = msblk->stream.total_out;
@@ -268,7 +263,8 @@ block_release:
put_bh(bh[k]);
read_failure:
- ERROR("sb_bread failed reading block 0x%llx\n", cur_index);
+ ERROR("squashfs_read_data failed to read block 0x%llx\n",
+ (unsigned long long) index);
kfree(bh);
return -EIO;
}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index f29eda16d25e..1c4739e33af6 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -119,7 +119,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
entry->length = squashfs_read_data(sb, entry->data,
block, length, &entry->next_index,
- cache->block_size);
+ cache->block_size, cache->pages);
spin_lock(&cache->lock);
@@ -406,7 +406,7 @@ int squashfs_read_table(struct super_block *sb, void *buffer, u64 block,
for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
data[i] = buffer;
res = squashfs_read_data(sb, data, block, length |
- SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length);
+ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
kfree(data);
return res;
}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 7a63398bb855..9101dbde39ec 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -133,7 +133,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
type = le16_to_cpu(sqshb_ino->inode_type);
switch (type) {
case SQUASHFS_REG_TYPE: {
- unsigned int frag_offset, frag_size, frag;
+ unsigned int frag_offset, frag;
+ int frag_size;
u64 frag_blk;
struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg;
@@ -175,7 +176,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
break;
}
case SQUASHFS_LREG_TYPE: {
- unsigned int frag_offset, frag_size, frag;
+ unsigned int frag_offset, frag;
+ int frag_size;
u64 frag_blk;
struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg;
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 6b2515d027d5..0e9feb6adf7e 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -34,7 +34,7 @@ static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
/* block.c */
extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
- int);
+ int, int);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 071df5b5b491..681ec0d83799 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -389,7 +389,7 @@ static int __init init_squashfs_fs(void)
return err;
}
- printk(KERN_INFO "squashfs: version 4.0 (2009/01/03) "
+ printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
"Phillip Lougher\n");
return 0;
diff --git a/fs/super.c b/fs/super.c
index 8349ed6b1412..6ce501447ada 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -371,8 +371,10 @@ retry:
continue;
if (!grab_super(old))
goto retry;
- if (s)
+ if (s) {
+ up_write(&s->s_umount);
destroy_super(s);
+ }
return old;
}
}
@@ -387,6 +389,7 @@ retry:
err = set(s, data);
if (err) {
spin_unlock(&sb_lock);
+ up_write(&s->s_umount);
destroy_super(s);
return ERR_PTR(err);
}
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index f2c478c3424e..07703d3ff4a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -21,15 +21,28 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mutex.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include "sysfs.h"
+/*
+ * There's one bin_buffer for each open file.
+ *
+ * filp->private_data points to bin_buffer and
+ * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
+ * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
+ */
+static DEFINE_MUTEX(sysfs_bin_lock);
+
struct bin_buffer {
- struct mutex mutex;
- void *buffer;
- int mmapped;
+ struct mutex mutex;
+ void *buffer;
+ int mmapped;
+ struct vm_operations_struct *vm_ops;
+ struct file *file;
+ struct hlist_node list;
};
static int
@@ -168,6 +181,175 @@ out_free:
return count;
}
+static void bin_vma_open(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+ if (!bb->vm_ops || !bb->vm_ops->open)
+ return;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return;
+
+ bb->vm_ops->open(vma);
+
+ sysfs_put_active_two(attr_sd);
+}
+
+static void bin_vma_close(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+ if (!bb->vm_ops || !bb->vm_ops->close)
+ return;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return;
+
+ bb->vm_ops->close(vma);
+
+ sysfs_put_active_two(attr_sd);
+}
+
+static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->fault)
+ return VM_FAULT_SIGBUS;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return VM_FAULT_SIGBUS;
+
+ ret = bb->vm_ops->fault(vma, vmf);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops)
+ return -EINVAL;
+
+ if (!bb->vm_ops->page_mkwrite)
+ return 0;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return -EINVAL;
+
+ ret = bb->vm_ops->page_mkwrite(vma, page);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+static int bin_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->access)
+ return -EINVAL;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return -EINVAL;
+
+ ret = bb->vm_ops->access(vma, addr, buf, len, write);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+#ifdef CONFIG_NUMA
+static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->set_policy)
+ return 0;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return -EINVAL;
+
+ ret = bb->vm_ops->set_policy(vma, new);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ struct mempolicy *pol;
+
+ if (!bb->vm_ops || !bb->vm_ops->get_policy)
+ return vma->vm_policy;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return vma->vm_policy;
+
+ pol = bb->vm_ops->get_policy(vma, addr);
+
+ sysfs_put_active_two(attr_sd);
+ return pol;
+}
+
+static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
+ const nodemask_t *to, unsigned long flags)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->migrate)
+ return 0;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return 0;
+
+ ret = bb->vm_ops->migrate(vma, from, to, flags);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+#endif
+
+static struct vm_operations_struct bin_vm_ops = {
+ .open = bin_vma_open,
+ .close = bin_vma_close,
+ .fault = bin_fault,
+ .page_mkwrite = bin_page_mkwrite,
+ .access = bin_access,
+#ifdef CONFIG_NUMA
+ .set_policy = bin_set_policy,
+ .get_policy = bin_get_policy,
+ .migrate = bin_migrate,
+#endif
+};
+
static int mmap(struct file *file, struct vm_area_struct *vma)
{
struct bin_buffer *bb = file->private_data;
@@ -179,18 +361,37 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
mutex_lock(&bb->mutex);
/* need attr_sd for attr, its parent for kobj */
+ rc = -ENODEV;
if (!sysfs_get_active_two(attr_sd))
- return -ENODEV;
+ goto out_unlock;
rc = -EINVAL;
- if (attr->mmap)
- rc = attr->mmap(kobj, attr, vma);
+ if (!attr->mmap)
+ goto out_put;
+
+ rc = attr->mmap(kobj, attr, vma);
+ if (rc)
+ goto out_put;
- if (rc == 0 && !bb->mmapped)
- bb->mmapped = 1;
- else
- sysfs_put_active_two(attr_sd);
+ /*
+ * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+ * to satisfy versions of X which crash if the mmap fails: that
+ * substitutes a new vm_file, and we don't then want bin_vm_ops.
+ */
+ if (vma->vm_file != file)
+ goto out_put;
+ rc = -EINVAL;
+ if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+ goto out_put;
+
+ rc = 0;
+ bb->mmapped = 1;
+ bb->vm_ops = vma->vm_ops;
+ vma->vm_ops = &bin_vm_ops;
+out_put:
+ sysfs_put_active_two(attr_sd);
+out_unlock:
mutex_unlock(&bb->mutex);
return rc;
@@ -223,8 +424,13 @@ static int open(struct inode * inode, struct file * file)
goto err_out;
mutex_init(&bb->mutex);
+ bb->file = file;
file->private_data = bb;
+ mutex_lock(&sysfs_bin_lock);
+ hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
+ mutex_unlock(&sysfs_bin_lock);
+
/* open succeeded, put active references */
sysfs_put_active_two(attr_sd);
return 0;
@@ -237,11 +443,12 @@ static int open(struct inode * inode, struct file * file)
static int release(struct inode * inode, struct file * file)
{
- struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_buffer *bb = file->private_data;
- if (bb->mmapped)
- sysfs_put_active_two(attr_sd);
+ mutex_lock(&sysfs_bin_lock);
+ hlist_del(&bb->list);
+ mutex_unlock(&sysfs_bin_lock);
+
kfree(bb->buffer);
kfree(bb);
return 0;
@@ -256,6 +463,26 @@ const struct file_operations bin_fops = {
.release = release,
};
+
+void unmap_bin_file(struct sysfs_dirent *attr_sd)
+{
+ struct bin_buffer *bb;
+ struct hlist_node *tmp;
+
+ if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
+ return;
+
+ mutex_lock(&sysfs_bin_lock);
+
+ hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+ struct inode *inode = bb->file->f_path.dentry->d_inode;
+
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ }
+
+ mutex_unlock(&sysfs_bin_lock);
+}
+
/**
* sysfs_create_bin_file - create binary file for object.
* @kobj: object.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 82d3b79d0e08..66aeb4fff0c3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -434,6 +434,26 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
}
/**
+ * sysfs_pathname - return full path to sysfs dirent
+ * @sd: sysfs_dirent whose path we want
+ * @path: caller allocated buffer
+ *
+ * Gives the name "/" to the sysfs_root entry; any path returned
+ * is relative to wherever sysfs is mounted.
+ *
+ * XXX: does no error checking on @path size
+ */
+static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
+{
+ if (sd->s_parent) {
+ sysfs_pathname(sd->s_parent, path);
+ strcat(path, "/");
+ }
+ strcat(path, sd->s_name);
+ return path;
+}
+
+/**
* sysfs_add_one - add sysfs_dirent to parent
* @acxt: addrm context to use
* @sd: sysfs_dirent to be added
@@ -458,8 +478,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
int ret;
ret = __sysfs_add_one(acxt, sd);
- WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' "
- "can not be created\n", sd->s_name);
+ if (ret == -EEXIST) {
+ char *path = kzalloc(PATH_MAX, GFP_KERNEL);
+ WARN(1, KERN_WARNING
+ "sysfs: cannot create duplicate filename '%s'\n",
+ (path == NULL) ? sd->s_name :
+ strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
+ sd->s_name));
+ kfree(path);
+ }
+
return ret;
}
@@ -581,6 +609,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
sysfs_drop_dentry(sd);
sysfs_deactivate(sd);
+ unmap_bin_file(sd);
sysfs_put(sd);
}
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1f4a3f877262..289c43a47263 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -659,13 +659,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
struct sysfs_schedule_callback_struct {
- struct kobject *kobj;
+ struct list_head workq_list;
+ struct kobject *kobj;
void (*func)(void *);
void *data;
struct module *owner;
struct work_struct work;
};
+static DEFINE_MUTEX(sysfs_workq_mutex);
+static LIST_HEAD(sysfs_workq);
static void sysfs_schedule_callback_work(struct work_struct *work)
{
struct sysfs_schedule_callback_struct *ss = container_of(work,
@@ -674,6 +677,9 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
(ss->func)(ss->data);
kobject_put(ss->kobj);
module_put(ss->owner);
+ mutex_lock(&sysfs_workq_mutex);
+ list_del(&ss->workq_list);
+ mutex_unlock(&sysfs_workq_mutex);
kfree(ss);
}
@@ -695,15 +701,25 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
* until @func returns.
*
* Returns 0 if the request was submitted, -ENOMEM if storage could not
- * be allocated, -ENODEV if a reference to @owner isn't available.
+ * be allocated, -ENODEV if a reference to @owner isn't available,
+ * -EAGAIN if a callback has already been scheduled for @kobj.
*/
int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
void *data, struct module *owner)
{
- struct sysfs_schedule_callback_struct *ss;
+ struct sysfs_schedule_callback_struct *ss, *tmp;
if (!try_module_get(owner))
return -ENODEV;
+
+ mutex_lock(&sysfs_workq_mutex);
+ list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
+ if (ss->kobj == kobj) {
+ mutex_unlock(&sysfs_workq_mutex);
+ return -EAGAIN;
+ }
+ mutex_unlock(&sysfs_workq_mutex);
+
ss = kmalloc(sizeof(*ss), GFP_KERNEL);
if (!ss) {
module_put(owner);
@@ -715,6 +731,10 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
ss->data = data;
ss->owner = owner;
INIT_WORK(&ss->work, sysfs_schedule_callback_work);
+ INIT_LIST_HEAD(&ss->workq_list);
+ mutex_lock(&sysfs_workq_mutex);
+ list_add_tail(&ss->workq_list, &sysfs_workq);
+ mutex_unlock(&sysfs_workq_mutex);
schedule_work(&ss->work);
return 0;
}
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index dfa3d94cfc74..555f0ff988df 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -147,6 +147,7 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
struct bin_attribute *bin_attr;
+ inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
inode->i_op = &sysfs_inode_operations;
@@ -214,6 +215,22 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
return inode;
}
+/*
+ * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
+ * To prevent the sysfs inode numbers from being freed prematurely we take a
+ * reference to sysfs_dirent from the sysfs inode. A
+ * super_operations.delete_inode() implementation is needed to drop that
+ * reference upon inode destruction.
+ */
+void sysfs_delete_inode(struct inode *inode)
+{
+ struct sysfs_dirent *sd = inode->i_private;
+
+ truncate_inode_pages(&inode->i_data, 0);
+ clear_inode(inode);
+ sysfs_put(sd);
+}
+
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
{
struct sysfs_addrm_cxt acxt;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index ab343e371d64..49749955ccaf 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -17,11 +17,10 @@
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/magic.h>
#include "sysfs.h"
-/* Random magic number */
-#define SYSFS_MAGIC 0x62656572
static struct vfsmount *sysfs_mount;
struct super_block * sysfs_sb = NULL;
@@ -30,6 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
static const struct super_operations sysfs_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
+ .delete_inode = sysfs_delete_inode,
};
struct sysfs_dirent sysfs_root = {
@@ -53,7 +53,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
sysfs_sb = sb;
/* get root inode, initialize and unlock it */
+ mutex_lock(&sysfs_mutex);
inode = sysfs_get_inode(&sysfs_root);
+ mutex_unlock(&sysfs_mutex);
if (!inode) {
pr_debug("sysfs: could not get root inode\n");
return -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 93c6d6b27c4d..3fa0d98481e2 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -28,6 +28,7 @@ struct sysfs_elem_attr {
struct sysfs_elem_bin_attr {
struct bin_attribute *bin_attr;
+ struct hlist_head buffers;
};
/*
@@ -145,6 +146,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
* inode.c
*/
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
+void sysfs_delete_inode(struct inode *inode);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
int sysfs_inode_init(void);
@@ -163,6 +165,7 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
* bin.c
*/
extern const struct file_operations bin_fops;
+void unmap_bin_file(struct sysfs_dirent *attr_sd);
/*
* symlink.c
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index e65212dfb60e..261a1c2f22dd 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -41,7 +41,7 @@
* Stefan Reinauer <stepan@home.culture.mipt.ru>
*
* Module usage counts added on 96/04/29 by
- * Gertjan van Wingerde <gertjan@cs.vu.nl>
+ * Gertjan van Wingerde <gwingerde@gmail.com>
*
* Clean swab support on 19970406 by
* Francois-Rene Rideau <fare@tunes.org>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index cb329edc925b..aa1016bb9134 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -34,6 +34,12 @@
#include <linux/backing-dev.h>
#include <linux/freezer.h>
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
@@ -1435,10 +1441,12 @@ xfs_unregister_buftarg(
void
xfs_free_buftarg(
- xfs_buftarg_t *btp)
+ struct xfs_mount *mp,
+ struct xfs_buftarg *btp)
{
xfs_flush_buftarg(btp, 1);
- xfs_blkdev_issue_flush(btp);
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ xfs_blkdev_issue_flush(btp);
xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 288ae7c4c800..9b4d666ad31f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -413,7 +413,7 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
* Handling of buftargs.
*/
extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
-extern void xfs_free_buftarg(xfs_buftarg_t *);
+extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index c71e226da7f5..32ae5028e96b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -734,15 +734,15 @@ xfs_close_devices(
{
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
- xfs_free_buftarg(mp->m_logdev_targp);
+ xfs_free_buftarg(mp, mp->m_logdev_targp);
xfs_blkdev_put(logdev);
}
if (mp->m_rtdev_targp) {
struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
- xfs_free_buftarg(mp->m_rtdev_targp);
+ xfs_free_buftarg(mp, mp->m_rtdev_targp);
xfs_blkdev_put(rtdev);
}
- xfs_free_buftarg(mp->m_ddev_targp);
+ xfs_free_buftarg(mp, mp->m_ddev_targp);
}
/*
@@ -811,9 +811,9 @@ xfs_open_devices(
out_free_rtdev_targ:
if (mp->m_rtdev_targp)
- xfs_free_buftarg(mp->m_rtdev_targp);
+ xfs_free_buftarg(mp, mp->m_rtdev_targp);
out_free_ddev_targ:
- xfs_free_buftarg(mp->m_ddev_targp);
+ xfs_free_buftarg(mp, mp->m_ddev_targp);
out_close_rtdev:
if (rtdev)
xfs_blkdev_put(rtdev);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e2fb6210d4c5..478e587087fe 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -246,9 +246,6 @@ xfs_iget_cache_miss(
goto out_destroy;
}
- if (lock_flags)
- xfs_ilock(ip, lock_flags);
-
/*
* Preload the radix tree so we can insert safely under the
* write spinlock. Note that we cannot sleep inside the preload
@@ -256,7 +253,16 @@ xfs_iget_cache_miss(
*/
if (radix_tree_preload(GFP_KERNEL)) {
error = EAGAIN;
- goto out_unlock;
+ goto out_destroy;
+ }
+
+ /*
+ * Because the inode hasn't been added to the radix-tree yet it can't
+ * be found by another thread, so we can do the non-sleeping lock here.
+ */
+ if (lock_flags) {
+ if (!xfs_ilock_nowait(ip, lock_flags))
+ BUG();
}
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
@@ -284,7 +290,6 @@ xfs_iget_cache_miss(
out_preload_end:
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
-out_unlock:
if (lock_flags)
xfs_iunlock(ip, lock_flags);
out_destroy:
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b1047de2fffd..61af610d79b3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1455,10 +1455,19 @@ xlog_recover_add_to_trans(
item = item->ri_prev;
if (item->ri_total == 0) { /* first region to be added */
- item->ri_total = in_f->ilf_size;
- ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
- item->ri_buf = kmem_zalloc((item->ri_total *
- sizeof(xfs_log_iovec_t)), KM_SLEEP);
+ if (in_f->ilf_size == 0 ||
+ in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
+ xlog_warn(
+ "XFS: bad number of regions (%d) in inode log format",
+ in_f->ilf_size);
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
+
+ item->ri_total = in_f->ilf_size;
+ item->ri_buf =
+ kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+ KM_SLEEP);
}
ASSERT(item->ri_total > item->ri_cnt);
/* Description region is ri_buf[0] */