summaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/Kconfig85
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/dcache.c42
-rw-r--r--fs/ocfs2/dcache.h9
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/ocfs2.h6
-rw-r--r--fs/ocfs2/quota_global.c173
-rw-r--r--fs/ocfs2/super.c3
-rw-r--r--fs/ocfs2/xattr.c17
9 files changed, 161 insertions, 181 deletions
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
new file mode 100644
index 000000000000..701b7a3a872e
--- /dev/null
+++ b/fs/ocfs2/Kconfig
@@ -0,0 +1,85 @@
+config OCFS2_FS
+ tristate "OCFS2 file system support"
+ depends on NET && SYSFS
+ select CONFIGFS_FS
+ select JBD2
+ select CRC32
+ select QUOTA
+ select QUOTA_TREE
+ help
+ OCFS2 is a general purpose extent based shared disk cluster file
+ system with many similarities to ext3. It supports 64 bit inode
+ numbers, and has automatically extending metadata groups which may
+ also make it attractive for non-clustered use.
+
+ You'll want to install the ocfs2-tools package in order to at least
+ get "mount.ocfs2".
+
+ Project web page: http://oss.oracle.com/projects/ocfs2
+ Tools web page: http://oss.oracle.com/projects/ocfs2-tools
+ OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+
+ For more information on OCFS2, see the file
+ <file:Documentation/filesystems/ocfs2.txt>.
+
+config OCFS2_FS_O2CB
+ tristate "O2CB Kernelspace Clustering"
+ depends on OCFS2_FS
+ default y
+ help
+ OCFS2 includes a simple kernelspace clustering package, the OCFS2
+ Cluster Base. It only requires a very small userspace component
+ to configure it. This comes with the standard ocfs2-tools package.
+ O2CB is limited to maintaining a cluster for OCFS2 file systems.
+ It cannot manage any other cluster applications.
+
+ It is always safe to say Y here, as the clustering method is
+ run-time selectable.
+
+config OCFS2_FS_USERSPACE_CLUSTER
+ tristate "OCFS2 Userspace Clustering"
+ depends on OCFS2_FS && DLM
+ default y
+ help
+ This option will allow OCFS2 to use userspace clustering services
+ in conjunction with the DLM in fs/dlm. If you are using a
+ userspace cluster manager, say Y here.
+
+ It is safe to say Y, as the clustering method is run-time
+ selectable.
+
+config OCFS2_FS_STATS
+ bool "OCFS2 statistics"
+ depends on OCFS2_FS
+ default y
+ help
+ This option allows some fs statistics to be captured. Enabling
+ this option may increase the memory consumption.
+
+config OCFS2_DEBUG_MASKLOG
+ bool "OCFS2 logging support"
+ depends on OCFS2_FS
+ default y
+ help
+ The ocfs2 filesystem has an extensive logging system. The system
+ allows selection of events to log via files in /sys/o2cb/logmask/.
+ This option will enlarge your kernel, but it allows debugging of
+ ocfs2 filesystem issues.
+
+config OCFS2_DEBUG_FS
+ bool "OCFS2 expensive checks"
+ depends on OCFS2_FS
+ default n
+ help
+ This option will enable expensive consistency checks. Enable
+ this option for debugging only as it is likely to decrease
+ performance of the filesystem.
+
+config OCFS2_FS_POSIX_ACL
+ bool "OCFS2 POSIX Access Control Lists"
+ depends on OCFS2_FS
+ select FS_POSIX_ACL
+ default n
+ help
+ Posix Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d861096c9d81..60fe74035db5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5390,6 +5390,9 @@ int ocfs2_remove_btree_range(struct inode *inode,
goto out;
}
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(inode->i_sb, len));
+
ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
dealloc);
if (ret) {
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b1cc7c381e88..e9d7c2038c0f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -38,6 +38,7 @@
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
+#include "super.h"
static int ocfs2_dentry_revalidate(struct dentry *dentry,
@@ -294,6 +295,34 @@ out_attach:
return ret;
}
+static DEFINE_SPINLOCK(dentry_list_lock);
+
+/* We limit the number of dentry locks to drop in one go. We have
+ * this limit so that we don't starve other users of ocfs2_wq. */
+#define DL_INODE_DROP_COUNT 64
+
+/* Drop inode references from dentry locks */
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+ struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+ dentry_lock_work);
+ struct ocfs2_dentry_lock *dl;
+ int drop_count = DL_INODE_DROP_COUNT;
+
+ spin_lock(&dentry_list_lock);
+ while (osb->dentry_lock_list && drop_count--) {
+ dl = osb->dentry_lock_list;
+ osb->dentry_lock_list = dl->dl_next;
+ spin_unlock(&dentry_list_lock);
+ iput(dl->dl_inode);
+ kfree(dl);
+ spin_lock(&dentry_list_lock);
+ }
+ if (osb->dentry_lock_list)
+ queue_work(ocfs2_wq, &osb->dentry_lock_work);
+ spin_unlock(&dentry_list_lock);
+}
+
/*
* ocfs2_dentry_iput() and friends.
*
@@ -318,16 +347,23 @@ out_attach:
static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
- iput(dl->dl_inode);
ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
ocfs2_lock_res_free(&dl->dl_lockres);
- kfree(dl);
+
+ /* We leave dropping of inode reference to ocfs2_wq as that can
+ * possibly lead to inode deletion which gets tricky */
+ spin_lock(&dentry_list_lock);
+ if (!osb->dentry_lock_list)
+ queue_work(ocfs2_wq, &osb->dentry_lock_work);
+ dl->dl_next = osb->dentry_lock_list;
+ osb->dentry_lock_list = dl;
+ spin_unlock(&dentry_list_lock);
}
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl)
{
- int unlock = 0;
+ int unlock;
BUG_ON(dl->dl_count == 0);
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index c091c34d9883..d06e16c06640 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,8 +29,13 @@
extern struct dentry_operations ocfs2_dentry_ops;
struct ocfs2_dentry_lock {
+ /* Use count of dentry lock */
unsigned int dl_count;
- u64 dl_parent_blkno;
+ union {
+ /* Linked list of dentry locks to release */
+ struct ocfs2_dentry_lock *dl_next;
+ u64 dl_parent_blkno;
+ };
/*
* The ocfs2_dentry_lock keeps an inode reference until
@@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
struct ocfs2_dentry_lock *dl);
+void ocfs2_drop_dl_inodes(struct work_struct *work);
+
struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
int skip_unhashed);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b0c4cadd4c45..206a2370876a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2860,6 +2860,10 @@ static void ocfs2_unlock_ast(void *opaque, int error)
case OCFS2_UNLOCK_CANCEL_CONVERT:
mlog(0, "Cancel convert success for %s\n", lockres->l_name);
lockres->l_action = OCFS2_AST_INVALID;
+ /* Downconvert thread may have requeued this lock, we
+ * need to wake it. */
+ if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
+ ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
break;
case OCFS2_UNLOCK_DROP_LOCK:
lockres->l_level = DLM_LOCK_IV;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ad5c24a29edd..077384135f4e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -210,6 +210,7 @@ struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
struct ocfs2_quota_recovery;
+struct ocfs2_dentry_lock;
struct ocfs2_super
{
struct task_struct *commit_task;
@@ -325,6 +326,11 @@ struct ocfs2_super
struct list_head blocked_lock_list;
unsigned long blocked_lock_count;
+ /* List of dentry locks to release. Anyone can add locks to
+ * the list, ocfs2_wq processes the list */
+ struct ocfs2_dentry_lock *dentry_lock_list;
+ struct work_struct dentry_lock_work;
+
wait_queue_head_t osb_mount_event;
/* Truncate log info */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 6aff8f2d3e49..1ed0f7c86869 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
if (dquot->dq_flags & mask)
sync = 1;
spin_unlock(&dq_data_lock);
- if (!sync) {
+ /* This is a slight hack but we can't afford getting global quota
+ * lock if we already have a transaction started. */
+ if (!sync || journal_current_handle()) {
status = ocfs2_write_dquot(dquot);
goto out;
}
@@ -810,171 +812,6 @@ out:
return status;
}
-/* This is difficult. We have to lock quota inode and start transaction
- * in this function but we don't want to take the penalty of exlusive
- * quota file lock when we are just going to use cached structures. So
- * we just take read lock check whether we have dquot cached and if so,
- * we don't have to take the write lock... */
-static int ocfs2_dquot_initialize(struct inode *inode, int type)
-{
- handle_t *handle = NULL;
- int status = 0;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
- int exclusive = 0;
- int cnt;
- qid_t id;
-
- mlog_entry_void();
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (type != -1 && cnt != type)
- continue;
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 0);
- if (status < 0)
- goto out;
- /* This is just a performance optimization not a reliable test.
- * Since we hold an inode lock, noone can actually release
- * the structure until we are finished with initialization. */
- if (inode->i_dquot[cnt] != NODQUOT) {
- ocfs2_unlock_global_qf(oinfo, 0);
- continue;
- }
- /* When we have inode lock, we know that no dquot_release() can
- * run and thus we can safely check whether we need to
- * read+modify global file to get quota information or whether
- * our node already has it. */
- if (cnt == USRQUOTA)
- id = inode->i_uid;
- else if (cnt == GRPQUOTA)
- id = inode->i_gid;
- else
- BUG();
- /* Obtain exclusion from quota off... */
- down_write(&sb_dqopt(sb)->dqptr_sem);
- exclusive = !dquot_is_cached(sb, id, cnt);
- up_write(&sb_dqopt(sb)->dqptr_sem);
- if (exclusive) {
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0) {
- exclusive = 0;
- mlog_errno(status);
- goto out_ilock;
- }
- handle = ocfs2_start_trans(OCFS2_SB(sb),
- ocfs2_calc_qinit_credits(sb, cnt));
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out_ilock;
- }
- }
- dquot_initialize(inode, cnt);
- if (exclusive) {
- ocfs2_commit_trans(OCFS2_SB(sb), handle);
- ocfs2_unlock_global_qf(oinfo, 1);
- }
- ocfs2_unlock_global_qf(oinfo, 0);
- }
- mlog_exit(0);
- return 0;
-out_ilock:
- if (exclusive)
- ocfs2_unlock_global_qf(oinfo, 1);
- ocfs2_unlock_global_qf(oinfo, 0);
-out:
- mlog_exit(status);
- return status;
-}
-
-static int ocfs2_dquot_drop_slow(struct inode *inode)
-{
- int status = 0;
- int cnt;
- int got_lock[MAXQUOTAS] = {0, 0};
- handle_t *handle;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0)
- goto out;
- got_lock[cnt] = 1;
- }
- handle = ocfs2_start_trans(OCFS2_SB(sb),
- ocfs2_calc_qinit_credits(sb, USRQUOTA) +
- ocfs2_calc_qinit_credits(sb, GRPQUOTA));
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out;
- }
- dquot_drop(inode);
- ocfs2_commit_trans(OCFS2_SB(sb), handle);
-out:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (got_lock[cnt]) {
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- ocfs2_unlock_global_qf(oinfo, 1);
- }
- return status;
-}
-
-/* See the comment before ocfs2_dquot_initialize. */
-static int ocfs2_dquot_drop(struct inode *inode)
-{
- int status = 0;
- struct super_block *sb = inode->i_sb;
- struct ocfs2_mem_dqinfo *oinfo;
- int exclusive = 0;
- int cnt;
- int got_lock[MAXQUOTAS] = {0, 0};
-
- mlog_entry_void();
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!sb_has_quota_active(sb, cnt))
- continue;
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 0);
- if (status < 0)
- goto out;
- got_lock[cnt] = 1;
- }
- /* Lock against anyone releasing references so that when when we check
- * we know we are not going to be last ones to release dquot */
- down_write(&sb_dqopt(sb)->dqptr_sem);
- /* Urgh, this is a terrible hack :( */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt] != NODQUOT &&
- atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
- exclusive = 1;
- break;
- }
- }
- if (!exclusive)
- dquot_drop_locked(inode);
- up_write(&sb_dqopt(sb)->dqptr_sem);
-out:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (got_lock[cnt]) {
- oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
- ocfs2_unlock_global_qf(oinfo, 0);
- }
- /* In case we bailed out because we had to do expensive locking
- * do it now... */
- if (exclusive)
- status = ocfs2_dquot_drop_slow(inode);
- mlog_exit(status);
- return status;
-}
-
static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
{
struct ocfs2_dquot *dquot =
@@ -991,8 +828,8 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
}
struct dquot_operations ocfs2_quota_operations = {
- .initialize = ocfs2_dquot_initialize,
- .drop = ocfs2_dquot_drop,
+ .initialize = dquot_initialize,
+ .drop = dquot_drop,
.alloc_space = dquot_alloc_space,
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 43ed11345b59..b1cb38fbe807 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1887,6 +1887,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
journal->j_state = OCFS2_JOURNAL_FREE;
+ INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
+ osb->dentry_lock_list = NULL;
+
/* get some pseudo constants for clustersize bits */
osb->s_clustersize_bits =
le32_to_cpu(di->id2.i_super.s_clustersize_bits);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e1d638af6ac3..915039fffe6e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -4729,13 +4729,6 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
vb.vb_xv = (struct ocfs2_xattr_value_root *)
(vb.vb_bh->b_data + offset % blocksize);
- ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
/*
* From here on out we have to dirty the bucket. The generic
* value calls only modify one of the bucket's bhs, but we need
@@ -4748,12 +4741,18 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
if (ret) {
mlog_errno(ret);
- goto out_dirty;
+ goto out;
+ }
+
+ ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
}
xe->xe_value_size = cpu_to_le64(len);
-out_dirty:
ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
out: