27 files changed, 1168 insertions, 509 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 09e6be8aa036..2e939f5fe751 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -21,6 +21,7 @@
 #include "glock.h"
 #include "inode.h"
 #include "meta_io.h"
+#include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
@@ -116,14 +117,14 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
 		return -E2BIG;
 
-	ret = gfs2_rsqa_alloc(ip);
+	ret = gfs2_qa_get(ip);
 	if (ret)
 		return ret;
 
 	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
 		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 		if (ret)
-			return ret;
+			goto out;
 		need_unlock = true;
 	}
 
@@ -143,5 +144,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 unlock:
 	if (need_unlock)
 		gfs2_glock_dq_uninit(&gh);
+out:
+	gfs2_qa_put(ip);
 	return ret;
 }
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ba83b49ce18c..786c1ce8f030 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -805,11 +805,16 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
 		bd = bh->b_private;
 		if (bd) {
 			gfs2_assert_warn(sdp, bd->bd_bh == bh);
-			if (!list_empty(&bd->bd_list))
-				list_del_init(&bd->bd_list);
 			bd->bd_bh = NULL;
 			bh->b_private = NULL;
-			kmem_cache_free(gfs2_bufdata_cachep, bd);
+			/*
+			 * The bd may still be queued as a revoke, in which
+			 * case we must not dequeue nor free it.
+			 */
+			if (!bd->bd_blkno && !list_empty(&bd->bd_list))
+				list_del_init(&bd->bd_list);
+			if (list_empty(&bd->bd_list))
+				kmem_cache_free(gfs2_bufdata_cachep, bd);
 		}
 
 		bh = bh->b_this_page;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 08f6fbb3655e..936a8ec6b48e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2183,7 +2183,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 
 	inode_dio_wait(inode);
 
-	ret = gfs2_rsqa_alloc(ip);
+	ret = gfs2_qa_get(ip);
 	if (ret)
 		goto out;
 
@@ -2194,7 +2194,8 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 
 	ret = do_shrink(inode, newsize);
 out:
-	gfs2_rsqa_delete(ip, NULL);
+	gfs2_rs_delete(ip, NULL);
+	gfs2_qa_put(ip);
 	return ret;
 }
 
@@ -2223,7 +2224,7 @@ void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
 	struct gfs2_journal_extent *jext;
 
 	while(!list_empty(&jd->extent_list)) {
-		jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
+		jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list);
 		list_del(&jext->list);
 		kfree(jext);
 	}
@@ -2244,7 +2245,7 @@ static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 b
 	struct gfs2_journal_extent *jext;
 
 	if (!list_empty(&jd->extent_list)) {
-		jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
+		jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list);
 		if ((jext->dblock + jext->blocks) == dblock) {
 			jext->blocks += blocks;
 			return 0;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c8b62577e2f2..c3f7732415be 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -2028,7 +2028,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 
 	error = gfs2_trans_begin(sdp,
 			rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
-			RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
+			RES_DINODE + RES_STATFS + RES_QUOTA, RES_DINODE +
+				 l_blocks);
 	if (error)
 		goto out_rg_gunlock;
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index cb26be6f4351..fe305e4bfd37 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -458,10 +458,6 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 
 	sb_start_pagefault(inode->i_sb);
 
-	ret = gfs2_rsqa_alloc(ip);
-	if (ret)
-		goto out;
-
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret)
@@ -558,7 +554,6 @@ out_uninit:
 		set_page_dirty(page);
 		wait_for_stable_page(page);
 	}
-out:
 	sb_end_pagefault(inode->i_sb);
 	return block_page_mkwrite_return(ret);
 }
@@ -635,7 +630,17 @@ int gfs2_open_common(struct inode *inode, struct file *file)
 
 	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
 	file->private_data = fp;
+	if (file->f_mode & FMODE_WRITE) {
+		ret = gfs2_qa_get(GFS2_I(inode));
+		if (ret)
+			goto fail;
+	}
 	return 0;
+
+fail:
+	kfree(file->private_data);
+	file->private_data = NULL;
+	return ret;
 }
 
 /**
@@ -690,10 +695,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
 	kfree(file->private_data);
 	file->private_data = NULL;
 
-	if (!(file->f_mode & FMODE_WRITE))
-		return 0;
-
-	gfs2_rsqa_delete(ip, &inode->i_writecount);
+	if (file->f_mode & FMODE_WRITE) {
+		gfs2_rs_delete(ip, &inode->i_writecount);
+		gfs2_qa_put(ip);
+	}
 	return 0;
 }
 
@@ -849,10 +854,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	ssize_t ret;
 
-	ret = gfs2_rsqa_alloc(ip);
-	if (ret)
-		return ret;
-
 	gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
 
 	if (iocb->ki_flags & IOCB_APPEND) {
@@ -1149,17 +1150,11 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
 	if (mode & FALLOC_FL_PUNCH_HOLE) {
 		ret = __gfs2_punch_hole(file, offset, len);
 	} else {
-		ret = gfs2_rsqa_alloc(ip);
-		if (ret)
-			goto out_putw;
-
 		ret = __gfs2_fallocate(file, mode, offset, len);
-
 		if (ret)
 			gfs2_rs_deltree(&ip->i_res);
 	}
 
-out_putw:
 	put_write_access(inode);
 out_unlock:
 	gfs2_glock_dq(&gh);
@@ -1173,16 +1168,12 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
 				      struct file *out, loff_t *ppos,
 				      size_t len, unsigned int flags)
 {
-	int error;
-	struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
-
-	error = gfs2_rsqa_alloc(ip);
-	if (error)
-		return (ssize_t)error;
+	ssize_t ret;
 
 	gfs2_size_hint(out, *ppos, len);
 
-	return iter_file_splice_write(pipe, out, ppos, len, flags);
+	ret = iter_file_splice_write(pipe, out, ppos, len, flags);
+	return ret;
 }
 
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d0eceaff3cea..29f9b6684b74 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -133,6 +133,33 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
 	}
 }
 
+/**
+ * glock_blocked_by_withdraw - determine if we can still use a glock
+ * @gl: the glock
+ *
+ * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
+ * when we're withdrawn. For example, to maintain metadata integrity, we should
+ * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
+ * iopen or the transaction glocks may be safely used because none of their
+ * metadata goes through the journal. So in general, we should disallow all
+ * glocks that are journaled, and allow all the others. One exception is:
+ * we need to allow our active journal to be promoted and demoted so others
+ * may recover it and we can reacquire it when they're done.
+ */
+static bool glock_blocked_by_withdraw(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+	if (likely(!gfs2_withdrawn(sdp)))
+		return false;
+	if (gl->gl_ops->go_flags & GLOF_NONDISK)
+		return false;
+	if (!sdp->sd_jdesc ||
+	    gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr)
+		return false;
+	return true;
+}
+
 void gfs2_glock_free(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -244,7 +271,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
 	gfs2_glock_remove_from_lru(gl);
 	spin_unlock(&gl->gl_lockref.lock);
 	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
-	GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+	GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp));
 	trace_gfs2_glock_put(gl);
 	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
 }
@@ -281,7 +308,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
 
 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
 {
-	const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
+	const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
 	if ((gh->gh_state == LM_ST_EXCLUSIVE ||
 	     gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
 		return 0;
@@ -549,8 +576,8 @@ __acquires(&gl->gl_lockref.lock)
 	unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
 	int ret;
 
-	if (unlikely(gfs2_withdrawn(sdp)) &&
-	    target != LM_ST_UNLOCKED)
+	if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
+	    gh && !(gh->gh_flags & LM_FLAG_NOEXP))
 		return;
 	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
 		      LM_FLAG_PRIORITY);
@@ -575,13 +602,64 @@ __acquires(&gl->gl_lockref.lock)
 	    (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
 		clear_bit(GLF_BLOCKING, &gl->gl_flags);
 	spin_unlock(&gl->gl_lockref.lock);
-	if (glops->go_sync)
-		glops->go_sync(gl);
-	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
+	if (glops->go_sync) {
+		ret = glops->go_sync(gl);
+		/* If we had a problem syncing (due to io errors or whatever,
+		 * we should not invalidate the metadata or tell dlm to
+		 * release the glock to other nodes.
+		 */
+		if (ret) {
+			if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
+				fs_err(sdp, "Error %d syncing glock \n", ret);
+				gfs2_dump_glock(NULL, gl, true);
+			}
+			return;
+		}
+	}
+	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
+		/*
+		 * The call to go_sync should have cleared out the ail list.
+		 * If there are still items, we have a problem. We ought to
+		 * withdraw, but we can't because the withdraw code also uses
+		 * glocks. Warn about the error, dump the glock, then fall
+		 * through and wait for logd to do the withdraw for us.
+		 */
+		if ((atomic_read(&gl->gl_ail_count) != 0) &&
+		    (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
+			gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count));
+			gfs2_dump_glock(NULL, gl, true);
+		}
 		glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
-	clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
+		clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
+	}
 
 	gfs2_glock_hold(gl);
+	/*
+	 * Check for an error encountered since we called go_sync and go_inval.
+	 * If so, we can't withdraw from the glock code because the withdraw
+	 * code itself uses glocks (see function signal_our_withdraw) to
+	 * change the mount to read-only. Most importantly, we must not call
+	 * dlm to unlock the glock until the journal is in a known good state
+	 * (after journal replay) otherwise other nodes may use the object
+	 * (rgrp or dinode) and then later, journal replay will corrupt the
+	 * file system. The best we can do here is wait for the logd daemon
+	 * to see sd_log_error and withdraw, and in the meantime, requeue the
+	 * work for later.
+	 *
+	 * However, if we're just unlocking the lock (say, for unmount, when
+	 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete
+	 * then it's okay to tell dlm to unlock it.
+	 */
+	if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
+		gfs2_withdraw_delayed(sdp);
+	if (glock_blocked_by_withdraw(gl)) {
+		if (target != LM_ST_UNLOCKED ||
+		    test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) {
+			gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
+			goto out;
+		}
+	}
+
 	if (sdp->sd_lockstruct.ls_ops->lm_lock)	{
 		/* lock_dlm */
 		ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -590,8 +668,7 @@ __acquires(&gl->gl_lockref.lock)
 		    test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
 			finish_xmote(gl, target);
 			gfs2_glock_queue_work(gl, 0);
-		}
-		else if (ret) {
+		} else if (ret) {
 			fs_err(sdp, "lm_lock ret %d\n", ret);
 			GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
 		}
@@ -599,7 +676,7 @@ __acquires(&gl->gl_lockref.lock)
 		finish_xmote(gl, target);
 		gfs2_glock_queue_work(gl, 0);
 	}
-
+out:
 	spin_lock(&gl->gl_lockref.lock);
 }
 
@@ -613,7 +690,7 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
 	struct gfs2_holder *gh;
 
 	if (!list_empty(&gl->gl_holders)) {
-		gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+		gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
 			return gh;
 	}
@@ -645,6 +722,9 @@ __acquires(&gl->gl_lockref.lock)
 			goto out_unlock;
 		if (nonblock)
 			goto out_sched;
+		smp_mb();
+		if (atomic_read(&gl->gl_revokes) != 0)
+			goto out_sched;
 		set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
 		GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
 		gl->gl_target = gl->gl_demote_state;
@@ -1160,7 +1240,7 @@ fail:
 	}
 	list_add_tail(&gh->gh_list, insert_pt);
 do_cancel:
-	gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+	gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
 	if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
 		spin_unlock(&gl->gl_lockref.lock);
 		if (sdp->sd_lockstruct.ls_ops->lm_cancel)
@@ -1194,10 +1274,9 @@ trap_recursive:
 int gfs2_glock_nq(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
-	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	int error = 0;
 
-	if (unlikely(gfs2_withdrawn(sdp)))
+	if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP))
 		return -EIO;
 
 	if (test_bit(GLF_LRU, &gl->gl_flags))
@@ -1241,24 +1320,32 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
 void gfs2_glock_dq(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
-	const struct gfs2_glock_operations *glops = gl->gl_ops;
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	unsigned delay = 0;
 	int fast_path = 0;
 
 	spin_lock(&gl->gl_lockref.lock);
+	/*
+	 * If we're in the process of file system withdraw, we cannot just
+	 * dequeue any glocks until our journal is recovered, lest we
+	 * introduce file system corruption. We need two exceptions to this
+	 * rule: We need to allow unlocking of nondisk glocks and the glock
+	 * for our own journal that needs recovery.
+	 */
+	if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
+	    glock_blocked_by_withdraw(gl) &&
+	    gh->gh_gl != sdp->sd_jinode_gl) {
+		sdp->sd_glock_dqs_held++;
+		might_sleep();
+		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
+			    TASK_UNINTERRUPTIBLE);
+	}
 	if (gh->gh_flags & GL_NOCACHE)
 		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
 
 	list_del_init(&gh->gh_list);
 	clear_bit(HIF_HOLDER, &gh->gh_iflags);
 	if (find_first_holder(gl) == NULL) {
-		if (glops->go_unlock) {
-			GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
-			spin_unlock(&gl->gl_lockref.lock);
-			glops->go_unlock(gh);
-			spin_lock(&gl->gl_lockref.lock);
-			clear_bit(GLF_LOCK, &gl->gl_flags);
-		}
 		if (list_empty(&gl->gl_holders) &&
 		    !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
@@ -1555,7 +1642,7 @@ __acquires(&lru_lock)
 	list_sort(NULL, list, glock_cmp);
 
 	while(!list_empty(list)) {
-		gl = list_entry(list->next, struct gfs2_glock, gl_lru);
+		gl = list_first_entry(list, struct gfs2_glock, gl_lru);
 		list_del_init(&gl->gl_lru);
 		if (!spin_trylock(&gl->gl_lockref.lock)) {
 add_back_to_lru:
@@ -1596,7 +1683,7 @@ static long gfs2_scan_glock_lru(int nr)
 
 	spin_lock(&lru_lock);
 	while ((nr-- >= 0) && !list_empty(&lru_list)) {
-		gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
+		gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru);
 
 		/* Test for being demotable */
 		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 061d22e1ceb6..9e9c7a4b8c66 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -29,6 +29,8 @@
 
 struct workqueue_struct *gfs2_freeze_wq;
 
+extern struct workqueue_struct *gfs2_control_wq;
+
 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
 {
 	fs_err(gl->gl_name.ln_sbd,
@@ -39,7 +41,8 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
 	fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n",
 	       gl->gl_name.ln_type, gl->gl_name.ln_number,
 	       gfs2_glock2aspace(gl));
-	gfs2_lm_withdraw(gl->gl_name.ln_sbd, "AIL error\n");
+	gfs2_lm(gl->gl_name.ln_sbd, "AIL error\n");
+	gfs2_withdraw(gl->gl_name.ln_sbd);
 }
 
 /**
@@ -79,34 +82,62 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
 }
 
 
-static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	struct gfs2_trans tr;
+	int ret;
 
 	memset(&tr, 0, sizeof(tr));
 	INIT_LIST_HEAD(&tr.tr_buf);
 	INIT_LIST_HEAD(&tr.tr_databuf);
 	tr.tr_revokes = atomic_read(&gl->gl_ail_count);
 
-	if (!tr.tr_revokes)
-		return;
+	if (!tr.tr_revokes) {
+		bool have_revokes;
+		bool log_in_flight;
+
+		/*
+		 * We have nothing on the ail, but there could be revokes on
+		 * the sdp revoke queue, in which case, we still want to flush
+		 * the log and wait for it to finish.
+		 *
+		 * If the sdp revoke list is empty too, we might still have an
+		 * io outstanding for writing revokes, so we should wait for
+		 * it before returning.
+		 *
+		 * If none of these conditions are true, our revokes are all
+		 * flushed and we can return.
+		 */
+		gfs2_log_lock(sdp);
+		have_revokes = !list_empty(&sdp->sd_log_revokes);
+		log_in_flight = atomic_read(&sdp->sd_log_in_flight);
+		gfs2_log_unlock(sdp);
+		if (have_revokes)
+			goto flush;
+		if (log_in_flight)
+			log_flush_wait(sdp);
+		return 0;
+	}
 
 	/* A shortened, inline version of gfs2_trans_begin()
          * tr->alloced is not set since the transaction structure is
          * on the stack */
 	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
 	tr.tr_ip = _RET_IP_;
-	if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0)
-		return;
+	ret = gfs2_log_reserve(sdp, tr.tr_reserved);
+	if (ret < 0)
+		return ret;
 	WARN_ON_ONCE(current->journal_info);
 	current->journal_info = &tr;
 
 	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
 
 	gfs2_trans_end(sdp);
+flush:
 	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 		       GFS2_LFC_AIL_EMPTY_GL);
+	return 0;
 }
 
 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
@@ -140,35 +171,32 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
  * return to caller to demote/unlock the glock until I/O is complete.
  */
 
-static void rgrp_go_sync(struct gfs2_glock *gl)
+static int rgrp_go_sync(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	struct address_space *mapping = &sdp->sd_aspace;
-	struct gfs2_rgrpd *rgd;
+	struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
 	int error;
 
-	spin_lock(&gl->gl_lockref.lock);
-	rgd = gl->gl_object;
-	if (rgd)
-		gfs2_rgrp_brelse(rgd);
-	spin_unlock(&gl->gl_lockref.lock);
-
 	if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
-		return;
+		return 0;
 	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
 
 	gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
 		       GFS2_LFC_RGRP_GO_SYNC);
 	filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
 	error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
+	WARN_ON_ONCE(error);
 	mapping_set_error(mapping, error);
-	gfs2_ail_empty_gl(gl);
+	if (!error)
+		error = gfs2_ail_empty_gl(gl);
 
 	spin_lock(&gl->gl_lockref.lock);
 	rgd = gl->gl_object;
 	if (rgd)
 		gfs2_free_clones(rgd);
 	spin_unlock(&gl->gl_lockref.lock);
+	return error;
 }
 
 /**
@@ -191,7 +219,6 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
 		gfs2_rgrp_brelse(rgd);
 
 	WARN_ON_ONCE(!(flags & DIO_METADATA));
-	gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
 	truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
 
 	if (rgd)
@@ -236,12 +263,12 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip)
  *
  */
 
-static void inode_go_sync(struct gfs2_glock *gl)
+static int inode_go_sync(struct gfs2_glock *gl)
 {
 	struct gfs2_inode *ip = gfs2_glock2inode(gl);
 	int isreg = ip && S_ISREG(ip->i_inode.i_mode);
 	struct address_space *metamapping = gfs2_glock2aspace(gl);
-	int error;
+	int error = 0;
 
 	if (isreg) {
 		if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
@@ -274,6 +301,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
 
 out:
 	gfs2_clear_glop_pending(ip);
+	return error;
 }
 
 /**
@@ -291,8 +319,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
 {
 	struct gfs2_inode *ip = gfs2_glock2inode(gl);
 
-	gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count));
-
 	if (flags & DIO_METADATA) {
 		struct address_space *mapping = gfs2_glock2aspace(gl);
 		truncate_inode_pages(mapping, 0);
@@ -496,24 +522,29 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
  *
  */
 
-static void freeze_go_sync(struct gfs2_glock *gl)
+static int freeze_go_sync(struct gfs2_glock *gl)
 {
 	int error = 0;
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 
-	if (gl->gl_state == LM_ST_SHARED &&
+	if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
 		atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
 		error = freeze_super(sdp->sd_vfs);
 		if (error) {
 			fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
 				error);
+			if (gfs2_withdrawn(sdp)) {
+				atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+				return 0;
+			}
 			gfs2_assert_withdraw(sdp, 0);
 		}
 		queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
 		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
 			       GFS2_LFC_FREEZE_GO_SYNC);
 	}
+	return 0;
 }
 
 /**
@@ -582,8 +613,76 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 	}
 }
 
+/**
+ * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
+ * @gl: glock being freed
+ *
+ * For now, this is only used for the journal inode glock. In withdraw
+ * situations, we need to wait for the glock to be freed so that we know
+ * other nodes may proceed with recovery / journal replay.
+ */
+static void inode_go_free(struct gfs2_glock *gl)
+{
+	/* Note that we cannot reference gl_object because it's already set
+	 * to NULL by this point in its lifecycle. */
+	if (!test_bit(GLF_FREEING, &gl->gl_flags))
+		return;
+	clear_bit_unlock(GLF_FREEING, &gl->gl_flags);
+	wake_up_bit(&gl->gl_flags, GLF_FREEING);
+}
+
+/**
+ * nondisk_go_callback - used to signal when a node did a withdraw
+ * @gl: the nondisk glock
+ * @remote: true if this came from a different cluster node
+ *
+ */
+static void nondisk_go_callback(struct gfs2_glock *gl, bool remote)
+{
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+	/* Ignore the callback unless it's from another node, and it's the
+	   live lock. */
+	if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK)
+		return;
+
+	/* First order of business is to cancel the demote request. We don't
+	 * really want to demote a nondisk glock. At best it's just to inform
+	 * us of another node's withdraw. We'll keep it in SH mode. */
+	clear_bit(GLF_DEMOTE, &gl->gl_flags);
+	clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
+
+	/* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */
+	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) ||
+	    test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
+	    test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags))
+		return;
+
+	/* We only care when a node wants us to unlock, because that means
+	 * they want a journal recovered. */
+	if (gl->gl_demote_state != LM_ST_UNLOCKED)
+		return;
+
+	if (sdp->sd_args.ar_spectator) {
+		fs_warn(sdp, "Spectator node cannot recover journals.\n");
+		return;
+	}
+
+	fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n");
+	set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
+	/*
+	 * We can't call remote_withdraw directly here or gfs2_recover_journal
+	 * because this is called from the glock unlock function and the
+	 * remote_withdraw needs to enqueue and dequeue the same "live" glock
+	 * we were called from. So we queue it to the control work queue in
+	 * lock_dlm.
+	 */
+	queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
+}
+
 const struct gfs2_glock_operations gfs2_meta_glops = {
 	.go_type = LM_TYPE_META,
+	.go_flags = GLOF_NONDISK,
 };
 
 const struct gfs2_glock_operations gfs2_inode_glops = {
@@ -594,13 +693,13 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
 	.go_dump = inode_go_dump,
 	.go_type = LM_TYPE_INODE,
 	.go_flags = GLOF_ASPACE | GLOF_LRU,
+	.go_free = inode_go_free,
 };
 
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
 	.go_sync = rgrp_go_sync,
 	.go_inval = rgrp_go_inval,
 	.go_lock = gfs2_rgrp_go_lock,
-	.go_unlock = gfs2_rgrp_go_unlock,
 	.go_dump = gfs2_rgrp_dump,
 	.go_type = LM_TYPE_RGRP,
 	.go_flags = GLOF_LVB,
@@ -611,30 +710,34 @@ const struct gfs2_glock_operations gfs2_freeze_glops = {
 	.go_xmote_bh = freeze_go_xmote_bh,
 	.go_demote_ok = freeze_go_demote_ok,
 	.go_type = LM_TYPE_NONDISK,
+	.go_flags = GLOF_NONDISK,
 };
 
 const struct gfs2_glock_operations gfs2_iopen_glops = {
 	.go_type = LM_TYPE_IOPEN,
 	.go_callback = iopen_go_callback,
-	.go_flags = GLOF_LRU,
+	.go_flags = GLOF_LRU | GLOF_NONDISK,
 };
 
 const struct gfs2_glock_operations gfs2_flock_glops = {
 	.go_type = LM_TYPE_FLOCK,
-	.go_flags = GLOF_LRU,
+	.go_flags = GLOF_LRU | GLOF_NONDISK,
 };
 
 const struct gfs2_glock_operations gfs2_nondisk_glops = {
 	.go_type = LM_TYPE_NONDISK,
+	.go_flags = GLOF_NONDISK,
+	.go_callback = nondisk_go_callback,
 };
 
 const struct gfs2_glock_operations gfs2_quota_glops = {
 	.go_type = LM_TYPE_QUOTA,
-	.go_flags = GLOF_LVB | GLOF_LRU,
+	.go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK,
 };
 
 const struct gfs2_glock_operations gfs2_journal_glops = {
 	.go_type = LM_TYPE_JOURNAL,
+	.go_flags = GLOF_NONDISK,
 };
 
 const struct gfs2_glock_operations *gfs2_glops_list[] = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 9fd88ed18807..84a824293a78 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -234,20 +234,21 @@ struct lm_lockname {
 
 
 struct gfs2_glock_operations {
-	void (*go_sync) (struct gfs2_glock *gl);
+	int (*go_sync) (struct gfs2_glock *gl);
 	int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
 	void (*go_inval) (struct gfs2_glock *gl, int flags);
 	int (*go_demote_ok) (const struct gfs2_glock *gl);
 	int (*go_lock) (struct gfs2_holder *gh);
-	void (*go_unlock) (struct gfs2_holder *gh);
 	void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl,
 			const char *fs_id_buf);
 	void (*go_callback)(struct gfs2_glock *gl, bool remote);
+	void (*go_free)(struct gfs2_glock *gl);
 	const int go_type;
 	const unsigned long go_flags;
-#define GLOF_ASPACE 1
-#define GLOF_LVB    2
-#define GLOF_LRU    4
+#define GLOF_ASPACE 1 /* address space attached */
+#define GLOF_LVB    2 /* Lock Value Block attached */
+#define GLOF_LRU    4 /* LRU managed */
+#define GLOF_NONDISK   8 /* not I/O related */
 };
 
 enum {
@@ -294,6 +295,7 @@ struct gfs2_qadata { /* quota allocation data */
 	struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS];
 	struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS];
 	unsigned int qa_qd_num;
+	int qa_ref;
 };
 
 /* Resource group multi-block reservation, in order of appearance:
@@ -343,6 +345,7 @@ enum {
 	GLF_OBJECT			= 14, /* Used only for tracing */
 	GLF_BLOCKING			= 15,
 	GLF_INODE_CREATING		= 16, /* Inode creation occurring */
+	GLF_FREEING			= 18, /* Wait for glock to be freed */
 };
 
 struct gfs2_glock {
@@ -542,6 +545,7 @@ struct gfs2_jdesc {
 	struct list_head jd_revoke_list;
 	unsigned int jd_replay_tail;
 
+	u64 jd_no_addr;
 };
 
 struct gfs2_statfs_change_host {
@@ -616,8 +620,12 @@ enum {
 	SDF_RORECOVERY		= 7, /* read only recovery */
 	SDF_SKIP_DLM_UNLOCK	= 8,
 	SDF_FORCE_AIL_FLUSH     = 9,
-	SDF_AIL1_IO_ERROR	= 10,
-	SDF_FS_FROZEN           = 11,
+	SDF_FS_FROZEN           = 10,
+	SDF_WITHDRAWING		= 11, /* Will withdraw eventually */
+	SDF_WITHDRAW_IN_PROG	= 12, /* Withdraw is in progress */
+	SDF_REMOTE_WITHDRAW	= 13, /* Performing remote recovery */
+	SDF_WITHDRAW_RECOVERY	= 14, /* Wait for journal recovery when we are
+					 withdrawing */
 };
 
 enum gfs2_freeze_state {
@@ -768,6 +776,7 @@ struct gfs2_sbd {
 	struct gfs2_jdesc *sd_jdesc;
 	struct gfs2_holder sd_journal_gh;
 	struct gfs2_holder sd_jinode_gh;
+	struct gfs2_glock *sd_jinode_gl;
 
 	struct gfs2_holder sd_sc_gh;
 	struct gfs2_holder sd_qc_gh;
@@ -828,7 +837,8 @@ struct gfs2_sbd {
 	atomic_t sd_log_in_flight;
 	struct bio *sd_log_bio;
 	wait_queue_head_t sd_log_flush_wait;
-	int sd_log_error;
+	int sd_log_error; /* First log error */
+	wait_queue_head_t sd_withdraw_wait;
 
 	atomic_t sd_reserving_log;
 	wait_queue_head_t sd_reserving_log_wait;
@@ -852,6 +862,7 @@ struct gfs2_sbd {
 
 	unsigned long sd_last_warning;
 	struct dentry *debugfs_dir;    /* debugfs directory */
+	unsigned long sd_glock_dqs_held;
 };
 
 static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8294851a9dd9..70b2d3a1e866 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -144,7 +144,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 
 		error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
 		if (unlikely(error))
-			goto fail_put;
+			goto fail;
 
 		if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
 			/*
@@ -155,13 +155,13 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
 						   GL_SKIP, &i_gh);
 			if (error)
-				goto fail_put;
+				goto fail;
 
 			if (blktype != GFS2_BLKST_FREE) {
 				error = gfs2_check_blk_type(sdp, no_addr,
 							    blktype);
 				if (error)
-					goto fail_put;
+					goto fail;
 			}
 		}
 
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 		set_bit(GIF_INVALID, &ip->i_flags);
 		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 		if (unlikely(error))
-			goto fail_put;
+			goto fail;
 		glock_set_object(ip->i_iopen_gh.gh_gl, ip);
 		gfs2_glock_put(io_gl);
 		io_gl = NULL;
@@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 			/* Inode glock must be locked already */
 			error = gfs2_inode_refresh(GFS2_I(inode));
 			if (error)
-				goto fail_refresh;
+				goto fail;
 		} else {
 			ip->i_no_formal_ino = no_formal_ino;
 			inode->i_mode = DT2IF(type);
@@ -197,17 +197,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 		gfs2_glock_dq_uninit(&i_gh);
 	return inode;
 
-fail_refresh:
-	ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
-	glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
-	gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-fail_put:
+fail:
 	if (io_gl)
 		gfs2_glock_put(io_gl);
-	glock_clear_object(ip->i_gl, ip);
 	if (gfs2_holder_initialized(&i_gh))
 		gfs2_glock_dq_uninit(&i_gh);
-fail:
 	iget_failed(inode);
 	return ERR_PTR(error);
 }
@@ -594,13 +588,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return -ENAMETOOLONG;
 
-	error = gfs2_rsqa_alloc(dip);
+	error = gfs2_qa_get(dip);
 	if (error)
 		return error;
 
 	error = gfs2_rindex_update(sdp);
 	if (error)
-		return error;
+		goto fail;
 
 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	if (error)
@@ -647,7 +641,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		goto fail_gunlock;
 
 	ip = GFS2_I(inode);
-	error = gfs2_rsqa_alloc(ip);
+	error = gfs2_qa_get(ip);
 	if (error)
 		goto fail_free_acls;
 
@@ -782,11 +776,13 @@ fail_gunlock2:
 	clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
 	gfs2_glock_put(io_gl);
 fail_free_inode:
+	gfs2_qa_put(ip);
 	if (ip->i_gl) {
 		glock_clear_object(ip->i_gl, ip);
 		gfs2_glock_put(ip->i_gl);
 	}
-	gfs2_rsqa_delete(ip, NULL);
+	gfs2_rs_delete(ip, NULL);
+	gfs2_qa_put(ip);
 fail_free_acls:
 	posix_acl_release(default_acl);
 	posix_acl_release(acl);
@@ -804,6 +800,7 @@ fail_gunlock:
 	if (gfs2_holder_initialized(ghs + 1))
 		gfs2_glock_dq_uninit(ghs + 1);
 fail:
+	gfs2_qa_put(dip);
 	return error;
 }
 
@@ -905,7 +902,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	error = gfs2_rsqa_alloc(dip);
+	error = gfs2_qa_get(dip);
 	if (error)
 		return error;
 
@@ -1008,6 +1005,7 @@ out_gunlock:
 out_child:
 	gfs2_glock_dq(ghs);
 out_parent:
+	gfs2_qa_put(ip);
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
 	return error;
@@ -1368,7 +1366,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	if (error)
 		return error;
 
-	error = gfs2_rsqa_alloc(ndip);
+	error = gfs2_qa_get(ndip);
 	if (error)
 		return error;
 
@@ -1568,6 +1566,7 @@ out_gunlock_r:
 	if (gfs2_holder_initialized(&r_gh))
 		gfs2_glock_dq_uninit(&r_gh);
 out:
+	gfs2_qa_put(ndip);
 	return error;
 }
 
@@ -1879,10 +1878,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 		ouid = nuid = NO_UID_QUOTA_CHANGE;
 	if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
 		ogid = ngid = NO_GID_QUOTA_CHANGE;
-
-	error = gfs2_rsqa_alloc(ip);
+	error = gfs2_qa_get(ip);
 	if (error)
-		goto out;
+		return error;
 
 	error = gfs2_rindex_update(sdp);
 	if (error)
@@ -1920,6 +1918,7 @@ out_end_trans:
 out_gunlock_q:
 	gfs2_quota_unlock(ip);
 out:
+	gfs2_qa_put(ip);
 	return error;
 }
 
@@ -1941,21 +1940,21 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	struct gfs2_holder i_gh;
 	int error;
 
-	error = gfs2_rsqa_alloc(ip);
+	error = gfs2_qa_get(ip);
 	if (error)
 		return error;
 
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
-		return error;
+		goto out;
 
 	error = -EPERM;
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		goto out;
+		goto error;
 
 	error = setattr_prepare(dentry, attr);
 	if (error)
-		goto out;
+		goto error;
 
 	if (attr->ia_valid & ATTR_SIZE)
 		error = gfs2_setattr_size(inode, attr->ia_size);
@@ -1967,10 +1966,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 			error = posix_acl_chmod(inode, inode->i_mode);
 	}
 
-out:
+error:
 	if (!error)
 		mark_inode_dirty(inode);
 	gfs2_glock_dq_uninit(&i_gh);
+out:
+	gfs2_qa_put(ip);
 	return error;
 }
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 7c7197343ee2..9f2b5609f225 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -16,6 +16,8 @@
 
 #include "incore.h"
 #include "glock.h"
+#include "glops.h"
+#include "recovery.h"
 #include "util.h"
 #include "sys.h"
 #include "trace_gfs2.h"
@@ -124,6 +126,8 @@ static void gdlm_ast(void *arg)
 
 	switch (gl->gl_lksb.sb_status) {
 	case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
+		if (gl->gl_ops->go_free)
+			gl->gl_ops->go_free(gl);
 		gfs2_glock_free(gl);
 		return;
 	case -DLM_ECANCEL: /* Cancel while getting lock */
@@ -323,6 +327,7 @@ static void gdlm_cancel(struct gfs2_glock *gl)
 /*
  * dlm/gfs2 recovery coordination using dlm_recover callbacks
  *
+ *  0. gfs2 checks for another cluster node withdraw, needing journal replay
  *  1. dlm_controld sees lockspace members change
  *  2. dlm_controld blocks dlm-kernel locking activity
  *  3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
@@ -571,6 +576,28 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
 			 &ls->ls_control_lksb, "control_lock");
 }
 
+/**
+ * remote_withdraw - react to a node withdrawing from the file system
+ * @sdp: The superblock
+ */
+static void remote_withdraw(struct gfs2_sbd *sdp)
+{
+	struct gfs2_jdesc *jd;
+	int ret = 0, count = 0;
+
+	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+		if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
+			continue;
+		ret = gfs2_recover_journal(jd, true);
+		if (ret)
+			break;
+		count++;
+	}
+
+	/* Now drop the additional reference we acquired */
+	fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
+}
+
 static void gfs2_control_func(struct work_struct *work)
 {
 	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
@@ -581,6 +608,13 @@ static void gfs2_control_func(struct work_struct *work)
 	int recover_size;
 	int i, error;
 
+	/* First check for other nodes that may have done a withdraw. */
+	if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
+		remote_withdraw(sdp);
+		clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
+		return;
+	}
+
 	spin_lock(&ls->ls_recover_spin);
 	/*
 	 * No MOUNT_DONE means we're still mounting; control_mount()
@@ -1079,6 +1113,10 @@ static void gdlm_recover_prep(void *arg)
 	struct gfs2_sbd *sdp = arg;
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+	if (gfs2_withdrawn(sdp)) {
+		fs_err(sdp, "recover_prep ignored due to withdraw.\n");
+		return;
+	}
 	spin_lock(&ls->ls_recover_spin);
 	ls->ls_recover_block = ls->ls_recover_start;
 	set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
@@ -1101,6 +1139,11 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	int jid = slot->slot - 1;
 
+	if (gfs2_withdrawn(sdp)) {
+		fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n",
+		       jid);
+		return;
+	}
 	spin_lock(&ls->ls_recover_spin);
 	if (ls->ls_recover_size < jid + 1) {
 		fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
@@ -1125,6 +1168,10 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
 	struct gfs2_sbd *sdp = arg;
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+	if (gfs2_withdrawn(sdp)) {
+		fs_err(sdp, "recover_done ignored due to withdraw.\n");
+		return;
+	}
 	/* ensure the ls jid arrays are large enough */
 	set_recover_size(sdp, slots, num_slots);
 
@@ -1152,6 +1199,11 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+	if (gfs2_withdrawn(sdp)) {
+		fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n",
+		       jid);
+		return;
+	}
 	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
 		return;
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 00a2e721a374..3a75843ae580 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -88,8 +88,7 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 
 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
 			       struct writeback_control *wbc,
-			       struct gfs2_trans *tr,
-			       bool *withdraw)
+			       struct gfs2_trans *tr)
 __releases(&sdp->sd_ail_lock)
 __acquires(&sdp->sd_ail_lock)
 {
@@ -97,6 +96,7 @@ __acquires(&sdp->sd_ail_lock)
 	struct address_space *mapping;
 	struct gfs2_bufdata *bd, *s;
 	struct buffer_head *bh;
+	int ret = 0;
 
 	list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
 		bh = bd->bd_bh;
@@ -104,16 +104,21 @@ __acquires(&sdp->sd_ail_lock)
 		gfs2_assert(sdp, bd->bd_tr == tr);
 
 		if (!buffer_busy(bh)) {
-			if (!buffer_uptodate(bh) &&
-			    !test_and_set_bit(SDF_AIL1_IO_ERROR,
-					      &sdp->sd_flags)) {
+			if (buffer_uptodate(bh)) {
+				list_move(&bd->bd_ail_st_list,
+					  &tr->tr_ail2_list);
+				continue;
+			}
+			if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
 				gfs2_io_error_bh(sdp, bh);
-				*withdraw = true;
+				gfs2_withdraw_delayed(sdp);
 			}
-			list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
-			continue;
 		}
 
+		if (gfs2_withdrawn(sdp)) {
+			gfs2_remove_from_ail(bd);
+			continue;
+		}
 		if (!buffer_dirty(bh))
 			continue;
 		if (gl == bd->bd_gl)
@@ -124,16 +129,50 @@ __acquires(&sdp->sd_ail_lock)
 		if (!mapping)
 			continue;
 		spin_unlock(&sdp->sd_ail_lock);
-		generic_writepages(mapping, wbc);
+		ret = generic_writepages(mapping, wbc);
 		spin_lock(&sdp->sd_ail_lock);
-		if (wbc->nr_to_write <= 0)
+		if (ret || wbc->nr_to_write <= 0)
 			break;
-		return 1;
+		return -EBUSY;
 	}
 
-	return 0;
+	return ret;
 }
 
+static void dump_ail_list(struct gfs2_sbd *sdp)
+{
+	struct gfs2_trans *tr;
+	struct gfs2_bufdata *bd;
+	struct buffer_head *bh;
+
+	fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n",
+	       current->journal_info ? 1 : 0);
+
+	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
+		list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
+					    bd_ail_st_list) {
+			bh = bd->bd_bh;
+			fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd,
+			       (unsigned long long)bd->bd_blkno, bh);
+			if (!bh) {
+				fs_err(sdp, "\n");
+				continue;
+			}
+			fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d "
+			       "map:%d new:%d ar:%d aw:%d delay:%d "
+			       "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n",
+			       (unsigned long long)bh->b_blocknr,
+			       buffer_uptodate(bh), buffer_dirty(bh),
+			       buffer_locked(bh), buffer_req(bh),
+			       buffer_mapped(bh), buffer_new(bh),
+			       buffer_async_read(bh), buffer_async_write(bh),
+			       buffer_delay(bh), buffer_write_io_error(bh),
+			       buffer_unwritten(bh),
+			       buffer_defer_completion(bh),
+			       buffer_pinned(bh), buffer_escaped(bh));
+		}
+	}
+}
 
 /**
  * gfs2_ail1_flush - start writeback of some ail1 entries 
@@ -149,23 +188,36 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
 	struct list_head *head = &sdp->sd_ail1_list;
 	struct gfs2_trans *tr;
 	struct blk_plug plug;
-	bool withdraw = false;
+	int ret;
+	unsigned long flush_start = jiffies;
 
 	trace_gfs2_ail_flush(sdp, wbc, 1);
 	blk_start_plug(&plug);
 	spin_lock(&sdp->sd_ail_lock);
 restart:
+	ret = 0;
+	if (time_after(jiffies, flush_start + (HZ * 600))) {
+		dump_ail_list(sdp);
+		goto out;
+	}
 	list_for_each_entry_reverse(tr, head, tr_list) {
 		if (wbc->nr_to_write <= 0)
 			break;
-		if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw) &&
-		    !gfs2_withdrawn(sdp))
-			goto restart;
+		ret = gfs2_ail1_start_one(sdp, wbc, tr);
+		if (ret) {
+			if (ret == -EBUSY)
+				goto restart;
+			break;
+		}
 	}
+out:
 	spin_unlock(&sdp->sd_ail_lock);
 	blk_finish_plug(&plug);
-	if (withdraw)
-		gfs2_lm_withdraw(sdp, NULL);
+	if (ret) {
+		gfs2_lm(sdp, "gfs2_ail1_start_one (generic_writepages) "
+			"returned: %d\n", ret);
+		gfs2_withdraw(sdp);
+	}
 	trace_gfs2_ail_flush(sdp, wbc, 0);
 }
 
@@ -189,12 +241,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
 /**
  * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
  * @sdp: the filesystem
- * @ai: the AIL entry
+ * @tr: the transaction
+ * @max_revokes: If nonzero, issue revokes for the bd items for written buffers
  *
  */
 
 static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
-				bool *withdraw)
+				int *max_revokes)
 {
 	struct gfs2_bufdata *bd, *s;
 	struct buffer_head *bh;
@@ -203,12 +256,32 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
 					 bd_ail_st_list) {
 		bh = bd->bd_bh;
 		gfs2_assert(sdp, bd->bd_tr == tr);
-		if (buffer_busy(bh))
+		/*
+		 * If another process flagged an io error, e.g. writing to the
+		 * journal, error all other bhs and move them off the ail1 to
+		 * prevent a tight loop when unmount tries to flush ail1,
+		 * regardless of whether they're still busy. If no outside
+		 * errors were found and the buffer is busy, move to the next.
+		 * If the ail buffer is not busy and caught an error, flag it
+		 * for others.
+		 */
+		if (!sdp->sd_log_error && buffer_busy(bh))
 			continue;
 		if (!buffer_uptodate(bh) &&
-		    !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
+		    !cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
 			gfs2_io_error_bh(sdp, bh);
-			*withdraw = true;
+			gfs2_withdraw_delayed(sdp);
+		}
+		/*
+		 * If we have space for revokes and the bd is no longer on any
+		 * buf list, we can just add a revoke for it immediately and
+		 * avoid having to put it on the ail2 list, where it would need
+		 * to be revoked later.
+		 */
+		if (*max_revokes && list_empty(&bd->bd_list)) {
+			gfs2_add_revoke(sdp, bd);
+			(*max_revokes)--;
+			continue;
 		}
 		list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
 	}
@@ -217,20 +290,20 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
 /**
  * gfs2_ail1_empty - Try to empty the ail1 lists
  * @sdp: The superblock
+ * @max_revokes: If non-zero, add revokes where appropriate
  *
  * Tries to empty the ail1 lists, starting with the oldest first
  */
 
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
 {
 	struct gfs2_trans *tr, *s;
 	int oldest_tr = 1;
 	int ret;
-	bool withdraw = false;
 
 	spin_lock(&sdp->sd_ail_lock);
 	list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
-		gfs2_ail1_empty_one(sdp, tr, &withdraw);
+		gfs2_ail1_empty_one(sdp, tr, &max_revokes);
 		if (list_empty(&tr->tr_ail1_list) && oldest_tr)
 			list_move(&tr->tr_list, &sdp->sd_ail2_list);
 		else
@@ -239,8 +312,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
 	ret = list_empty(&sdp->sd_ail1_list);
 	spin_unlock(&sdp->sd_ail_lock);
 
-	if (withdraw)
-		gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n");
+	if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
+		gfs2_lm(sdp, "fatal: I/O error(s)\n");
+		gfs2_withdraw(sdp);
+	}
 
 	return ret;
 }
@@ -268,20 +343,17 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
 }
 
 /**
- * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
- * @sdp: the filesystem
- * @ai: the AIL entry
- *
+ * gfs2_ail_empty_tr - empty one of the ail lists for a transaction
  */
 
-static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+			      struct list_head *head)
 {
-	struct list_head *head = &tr->tr_ail2_list;
 	struct gfs2_bufdata *bd;
 
 	while (!list_empty(head)) {
-		bd = list_entry(head->prev, struct gfs2_bufdata,
-				bd_ail_st_list);
+		bd = list_first_entry(head, struct gfs2_bufdata,
+				      bd_ail_st_list);
 		gfs2_assert(sdp, bd->bd_tr == tr);
 		gfs2_remove_from_ail(bd);
 	}
@@ -303,7 +375,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 		if (!rm)
 			continue;
 
-		gfs2_ail2_empty_one(sdp, tr);
+		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
 		list_del(&tr->tr_list);
 		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
 		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
@@ -487,7 +559,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
 	if (list_empty(&sdp->sd_ail1_list)) {
 		tail = sdp->sd_log_head;
 	} else {
-		tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
+		tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans,
 				tr_list);
 		tail = tr->tr_first;
 	}
@@ -512,7 +584,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 }
 
 
-static void log_flush_wait(struct gfs2_sbd *sdp)
+void log_flush_wait(struct gfs2_sbd *sdp)
 {
 	DEFINE_WAIT(wait);
 
@@ -549,7 +621,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 	spin_lock(&sdp->sd_ordered_lock);
 	list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp);
 	while (!list_empty(&sdp->sd_log_ordered)) {
-		ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered);
+		ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
 		if (ip->i_inode.i_mapping->nrpages == 0) {
 			test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
 			list_del(&ip->i_ordered);
@@ -570,7 +642,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 
 	spin_lock(&sdp->sd_ordered_lock);
 	while (!list_empty(&sdp->sd_log_ordered)) {
-		ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered);
+		ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
 		list_del(&ip->i_ordered);
 		WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
 		if (ip->i_inode.i_mapping->nrpages == 0)
@@ -616,27 +688,24 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
 	}
 }
 
+/**
+ * gfs2_write_revokes - Add as many revokes to the system transaction as we can
+ * @sdp: The GFS2 superblock
+ *
+ * Our usual strategy is to defer writing revokes as much as we can in the hope
+ * that we'll eventually overwrite the journal, which will make those revokes
+ * go away.  This changes when we flush the log: at that point, there will
+ * likely be some left-over space in the last revoke block of that transaction.
+ * We can fill that space with additional revokes for blocks that have already
+ * been written back.  This will basically come at no cost now, and will save
+ * us from having to keep track of those blocks on the AIL2 list later.
+ */
 void gfs2_write_revokes(struct gfs2_sbd *sdp)
 {
-	struct gfs2_trans *tr;
-	struct gfs2_bufdata *bd, *tmp;
-	int have_revokes = 0;
+	/* number of revokes we still have room for */
 	int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
 
-	gfs2_ail1_empty(sdp);
-	spin_lock(&sdp->sd_ail_lock);
-	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
-		list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
-			if (list_empty(&bd->bd_list)) {
-				have_revokes = 1;
-				goto done;
-			}
-		}
-	}
-done:
-	spin_unlock(&sdp->sd_ail_lock);
-	if (have_revokes == 0)
-		return;
+	gfs2_log_lock(sdp);
 	while (sdp->sd_log_num_revoke > max_revokes)
 		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
 	max_revokes -= sdp->sd_log_num_revoke;
@@ -647,20 +716,7 @@ done:
 		if (!sdp->sd_log_blks_reserved)
 			atomic_dec(&sdp->sd_log_blks_free);
 	}
-	gfs2_log_lock(sdp);
-	spin_lock(&sdp->sd_ail_lock);
-	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
-		list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
-			if (max_revokes == 0)
-				goto out_of_blocks;
-			if (!list_empty(&bd->bd_list))
-				continue;
-			gfs2_add_revoke(sdp, bd);
-			max_revokes--;
-		}
-	}
-out_of_blocks:
-	spin_unlock(&sdp->sd_ail_lock);
+	gfs2_ail1_empty(sdp, max_revokes);
 	gfs2_log_unlock(sdp);
 
 	if (!sdp->sd_log_num_revoke) {
@@ -787,6 +843,40 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 }
 
 /**
+ * ail_drain - drain the ail lists after a withdraw
+ * @sdp: Pointer to GFS2 superblock
+ */
+static void ail_drain(struct gfs2_sbd *sdp)
+{
+	struct gfs2_trans *tr;
+
+	spin_lock(&sdp->sd_ail_lock);
+	/*
+	 * For transactions on the sd_ail1_list we need to drain both the
+	 * ail1 and ail2 lists. That's because function gfs2_ail1_start_one
+	 * (temporarily) moves items from its tr_ail1 list to tr_ail2 list
+	 * before revokes are sent for that block. Items on the sd_ail2_list
+	 * should have already gotten beyond that point, so no need.
+	 */
+	while (!list_empty(&sdp->sd_ail1_list)) {
+		tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans,
+				      tr_list);
+		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
+		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
+		list_del(&tr->tr_list);
+		kfree(tr);
+	}
+	while (!list_empty(&sdp->sd_ail2_list)) {
+		tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
+				      tr_list);
+		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
+		list_del(&tr->tr_list);
+		kfree(tr);
+	}
+	spin_unlock(&sdp->sd_ail_lock);
+}
+
+/**
  * gfs2_log_flush - flush incore transaction(s)
  * @sdp: the filesystem
  * @gl: The glock structure to flush.  If NULL, flush the whole incore log
@@ -796,11 +886,18 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 
 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 {
-	struct gfs2_trans *tr;
+	struct gfs2_trans *tr = NULL;
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 
 	down_write(&sdp->sd_log_flush_lock);
 
+	/*
+	 * Do this check while holding the log_flush_lock to prevent new
+	 * buffers from being added to the ail via gfs2_pin()
+	 */
+	if (gfs2_withdrawn(sdp))
+		goto out;
+
 	/* Log might have been flushed while we waited for the flush lock */
 	if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
 		up_write(&sdp->sd_log_flush_lock);
@@ -819,17 +916,27 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 		INIT_LIST_HEAD(&tr->tr_ail2_list);
 		tr->tr_first = sdp->sd_log_flush_head;
 		if (unlikely (state == SFS_FROZEN))
-			gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
+			if (gfs2_assert_withdraw_delayed(sdp,
+			       !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
+				goto out;
 	}
 
 	if (unlikely(state == SFS_FROZEN))
-		gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
-	gfs2_assert_withdraw(sdp,
-			sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke);
+		if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
+			goto out;
+	if (gfs2_assert_withdraw_delayed(sdp,
+			sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
+		goto out;
 
 	gfs2_ordered_write(sdp);
+	if (gfs2_withdrawn(sdp))
+		goto out;
 	lops_before_commit(sdp, tr);
+	if (gfs2_withdrawn(sdp))
+		goto out;
 	gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+	if (gfs2_withdrawn(sdp))
+		goto out;
 
 	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
 		log_flush_wait(sdp);
@@ -839,6 +946,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 		trace_gfs2_log_blocks(sdp, -1);
 		log_write_header(sdp, flags);
 	}
+	if (gfs2_withdrawn(sdp))
+		goto out;
 	lops_after_commit(sdp, tr);
 
 	gfs2_log_lock(sdp);
@@ -859,9 +968,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 			for (;;) {
 				gfs2_ail1_start(sdp);
 				gfs2_ail1_wait(sdp);
-				if (gfs2_ail1_empty(sdp))
+				if (gfs2_ail1_empty(sdp, 0))
 					break;
 			}
+			if (gfs2_withdrawn(sdp))
+				goto out;
 			atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 			trace_gfs2_log_blocks(sdp, -1);
 			log_write_header(sdp, flags);
@@ -874,6 +985,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 			atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 	}
 
+out:
+	if (gfs2_withdrawn(sdp)) {
+		ail_drain(sdp); /* frees all transactions */
+		tr = NULL;
+	}
+
 	trace_gfs2_log_flush(sdp, 0, flags);
 	up_write(&sdp->sd_log_flush_lock);
 
@@ -1016,16 +1133,17 @@ int gfs2_logd(void *data)
 
 		/* Check for errors writing to the journal */
 		if (sdp->sd_log_error) {
-			gfs2_lm_withdraw(sdp,
-					 "GFS2: fsid=%s: error %d: "
-					 "withdrawing the file system to "
-					 "prevent further damage.\n",
-					 sdp->sd_fsname, sdp->sd_log_error);
+			gfs2_lm(sdp,
+				"GFS2: fsid=%s: error %d: "
+				"withdrawing the file system to "
+				"prevent further damage.\n",
+				sdp->sd_fsname, sdp->sd_log_error);
+			gfs2_withdraw(sdp);
 		}
 
 		did_flush = false;
 		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
-			gfs2_ail1_empty(sdp);
+			gfs2_ail1_empty(sdp, 0);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 				       GFS2_LFC_LOGD_JFLUSH_REQD);
 			did_flush = true;
@@ -1034,7 +1152,7 @@ int gfs2_logd(void *data)
 		if (gfs2_ail_flush_reqd(sdp)) {
 			gfs2_ail1_start(sdp);
 			gfs2_ail1_wait(sdp);
-			gfs2_ail1_empty(sdp);
+			gfs2_ail1_empty(sdp, 0);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 				       GFS2_LFC_LOGD_AIL_FLUSH_REQD);
 			did_flush = true;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index c0a65e5a126b..c1cd6ae17659 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -73,6 +73,7 @@ extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 			   u32 type);
 extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
 extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
+extern void log_flush_wait(struct gfs2_sbd *sdp);
 
 extern int gfs2_logd(void *data);
 extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c090d5ad3f22..5ea96757afc4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -203,8 +203,12 @@ static void gfs2_end_log_write(struct bio *bio)
 	struct bvec_iter_all iter_all;
 
 	if (bio->bi_status) {
-		fs_err(sdp, "Error %d writing to journal, jid=%u\n",
-		       bio->bi_status, sdp->sd_jdesc->jd_jid);
+		if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status))
+			fs_err(sdp, "Error %d writing to journal, jid=%u\n",
+			       bio->bi_status, sdp->sd_jdesc->jd_jid);
+		gfs2_withdraw_delayed(sdp);
+		/* prevent more writes to the journal */
+		clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 		wake_up(&sdp->sd_logd_waitq);
 	}
 
@@ -730,7 +734,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 
 	head = &tr->tr_buf;
 	while (!list_empty(head)) {
-		bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
 		list_del_init(&bd->bd_list);
 		gfs2_unpin(sdp, bd->bd_bh, tr);
 	}
@@ -900,7 +904,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	struct gfs2_glock *gl;
 
 	while (!list_empty(head)) {
-		bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
 		list_del_init(&bd->bd_list);
 		gl = bd->bd_gl;
 		gfs2_glock_remove_revoke(gl);
@@ -1079,7 +1083,7 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 
 	head = &tr->tr_databuf;
 	while (!list_empty(head)) {
-		bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
 		list_del_init(&bd->bd_list);
 		gfs2_unpin(sdp, bd->bd_bh, tr);
 	}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0c3772974030..4b72abcf83b2 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -251,7 +251,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 	struct buffer_head *bh, *bhs[2];
 	int num = 0;
 
-	if (unlikely(gfs2_withdrawn(sdp))) {
+	if (unlikely(gfs2_withdrawn(sdp)) &&
+	    (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) {
 		*bhp = NULL;
 		return -EIO;
 	}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a1a8ef7ed3fd..e2b69ffcc6a8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -552,6 +552,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 	mutex_lock(&sdp->sd_jindex_mutex);
 
 	for (;;) {
+		struct gfs2_inode *jip;
+
 		error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
 		if (error)
 			break;
@@ -591,6 +593,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 
 		spin_lock(&sdp->sd_jindex_spin);
 		jd->jd_jid = sdp->sd_journals++;
+		jip = GFS2_I(jd->jd_inode);
+		jd->jd_no_addr = jip->i_no_addr;
 		list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
 		spin_unlock(&sdp->sd_jindex_spin);
 	}
@@ -600,48 +604,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 	return error;
 }
 
-/**
- * check_journal_clean - Make sure a journal is clean for a spectator mount
- * @sdp: The GFS2 superblock
- * @jd: The journal descriptor
- *
- * Returns: 0 if the journal is clean or locked, else an error
- */
-static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
-{
-	int error;
-	struct gfs2_holder j_gh;
-	struct gfs2_log_header_host head;
-	struct gfs2_inode *ip;
-
-	ip = GFS2_I(jd->jd_inode);
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
-				   GL_EXACT | GL_NOCACHE, &j_gh);
-	if (error) {
-		fs_err(sdp, "Error locking journal for spectator mount.\n");
-		return -EPERM;
-	}
-	error = gfs2_jdesc_check(jd);
-	if (error) {
-		fs_err(sdp, "Error checking journal for spectator mount.\n");
-		goto out_unlock;
-	}
-	error = gfs2_find_jhead(jd, &head, false);
-	if (error) {
-		fs_err(sdp, "Error parsing journal for spectator mount.\n");
-		goto out_unlock;
-	}
-	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
-		error = -EPERM;
-		fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter "
-		       "must not be a spectator.\n", jd->jd_jid);
-	}
-
-out_unlock:
-	gfs2_glock_dq_uninit(&j_gh);
-	return error;
-}
-
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
 	struct inode *master = d_inode(sdp->sd_master_dir);
@@ -694,7 +656,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 
 		error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
 					  &gfs2_journal_glops,
-					  LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
+					  LM_ST_EXCLUSIVE,
+					  LM_FLAG_NOEXP | GL_NOCACHE,
 					  &sdp->sd_journal_gh);
 		if (error) {
 			fs_err(sdp, "can't acquire journal glock: %d\n", error);
@@ -702,6 +665,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 		}
 
 		ip = GFS2_I(sdp->sd_jdesc->jd_inode);
+		sdp->sd_jinode_gl = ip->i_gl;
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
 					   LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
 					   &sdp->sd_jinode_gh);
@@ -732,7 +696,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 			struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x);
 
 			if (sdp->sd_args.ar_spectator) {
-				error = check_journal_clean(sdp, jd);
+				error = check_journal_clean(sdp, jd, true);
 				if (error)
 					goto fail_jinode_gh;
 				continue;
@@ -762,10 +726,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 	return 0;
 
 fail_jinode_gh:
-	if (!sdp->sd_args.ar_spectator)
+	/* A withdraw may have done dq/uninit so now we need to check it */
+	if (!sdp->sd_args.ar_spectator &&
+	    gfs2_holder_initialized(&sdp->sd_jinode_gh))
 		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
 fail_journal_gh:
-	if (!sdp->sd_args.ar_spectator)
+	if (!sdp->sd_args.ar_spectator &&
+	    gfs2_holder_initialized(&sdp->sd_journal_gh))
 		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
 fail_jindex:
 	gfs2_jindex_free(sdp);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e9f93045eb01..cc0c4b5800be 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -115,7 +115,7 @@ static void gfs2_qd_dispose(struct list_head *list)
 	struct gfs2_sbd *sdp;
 
 	while (!list_empty(list)) {
-		qd = list_entry(list->next, struct gfs2_quota_data, qd_lru);
+		qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
 		sdp = qd->qd_gl->gl_name.ln_sbd;
 
 		list_del(&qd->qd_lru);
@@ -525,11 +525,11 @@ static void qdsb_put(struct gfs2_quota_data *qd)
 }
 
 /**
- * gfs2_qa_alloc - make sure we have a quota allocations data structure,
- *                 if necessary
+ * gfs2_qa_get - make sure we have a quota allocations data structure,
+ *               if necessary
  * @ip: the inode for this reservation
  */
-int gfs2_qa_alloc(struct gfs2_inode *ip)
+int gfs2_qa_get(struct gfs2_inode *ip)
 {
 	int error = 0;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -540,17 +540,21 @@ int gfs2_qa_alloc(struct gfs2_inode *ip)
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_qadata == NULL) {
 		ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS);
-		if (!ip->i_qadata)
+		if (!ip->i_qadata) {
 			error = -ENOMEM;
+			goto out;
+		}
 	}
+	ip->i_qadata->qa_ref++;
+out:
 	up_write(&ip->i_rw_mutex);
 	return error;
 }
 
-void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_qa_put(struct gfs2_inode *ip)
 {
 	down_write(&ip->i_rw_mutex);
-	if (ip->i_qadata && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
+	if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) {
 		kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata);
 		ip->i_qadata = NULL;
 	}
@@ -566,27 +570,27 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return 0;
 
-	if (ip->i_qadata == NULL) {
-		error = gfs2_rsqa_alloc(ip);
-		if (error)
-			return error;
-	}
+	error = gfs2_qa_get(ip);
+	if (error)
+		return error;
 
 	qd = ip->i_qadata->qa_qd;
 
 	if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) ||
-	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
-		return -EIO;
+	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) {
+		error = -EIO;
+		goto out;
+	}
 
 	error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
 	if (error)
-		goto out;
+		goto out_unhold;
 	ip->i_qadata->qa_qd_num++;
 	qd++;
 
 	error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd);
 	if (error)
-		goto out;
+		goto out_unhold;
 	ip->i_qadata->qa_qd_num++;
 	qd++;
 
@@ -594,7 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	    !uid_eq(uid, ip->i_inode.i_uid)) {
 		error = qdsb_get(sdp, make_kqid_uid(uid), qd);
 		if (error)
-			goto out;
+			goto out_unhold;
 		ip->i_qadata->qa_qd_num++;
 		qd++;
 	}
@@ -603,14 +607,15 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
 	    !gid_eq(gid, ip->i_inode.i_gid)) {
 		error = qdsb_get(sdp, make_kqid_gid(gid), qd);
 		if (error)
-			goto out;
+			goto out_unhold;
 		ip->i_qadata->qa_qd_num++;
 		qd++;
 	}
 
-out:
+out_unhold:
 	if (error)
 		gfs2_quota_unhold(ip);
+out:
 	return error;
 }
 
@@ -621,6 +626,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip)
 
 	if (ip->i_qadata == NULL)
 		return;
+
 	gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
 
 	for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
@@ -628,6 +634,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip)
 		ip->i_qadata->qa_qd[x] = NULL;
 	}
 	ip->i_qadata->qa_qd_num = 0;
+	gfs2_qa_put(ip);
 }
 
 static int sort_qd(const void *a, const void *b)
@@ -876,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	unsigned int nalloc = 0, blocks;
 	int error;
 
-	error = gfs2_rsqa_alloc(ip);
+	error = gfs2_qa_get(ip);
 	if (error)
 		return error;
 
@@ -884,8 +891,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 			      &data_blocks, &ind_blocks);
 
 	ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
-	if (!ghs)
-		return -ENOMEM;
+	if (!ghs) {
+		error = -ENOMEM;
+		goto out;
+	}
 
 	sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
 	inode_lock(&ip->i_inode);
@@ -893,12 +902,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 		error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
 					   GL_NOCACHE, &ghs[qx]);
 		if (error)
-			goto out;
+			goto out_dq;
 	}
 
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
 	if (error)
-		goto out;
+		goto out_dq;
 
 	for (x = 0; x < num_qd; x++) {
 		offset = qd2offset(qda[x]);
@@ -950,13 +959,15 @@ out_ipres:
 	gfs2_inplace_release(ip);
 out_alloc:
 	gfs2_glock_dq_uninit(&i_gh);
-out:
+out_dq:
 	while (qx--)
 		gfs2_glock_dq_uninit(&ghs[qx]);
 	inode_unlock(&ip->i_inode);
 	kfree(ghs);
 	gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
 		       GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
+out:
+	gfs2_qa_put(ip);
 	return error;
 }
 
@@ -1259,6 +1270,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	if (ip->i_diskflags & GFS2_DIF_SYSTEM)
 		return;
 
+	BUG_ON(ip->i_qadata->qa_ref <= 0);
 	for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
 		qd = ip->i_qadata->qa_qd[x];
 
@@ -1441,7 +1453,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
 
 	spin_lock(&qd_lock);
 	while (!list_empty(head)) {
-		qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
+		qd = list_last_entry(head, struct gfs2_quota_data, qd_list);
 
 		list_del(&qd->qd_list);
 
@@ -1476,8 +1488,8 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
 	if (error == 0 || error == -EROFS)
 		return;
 	if (!gfs2_withdrawn(sdp)) {
-		fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
-		sdp->sd_log_error = error;
+		if (!cmpxchg(&sdp->sd_log_error, 0, error))
+			fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
 		wake_up(&sdp->sd_logd_waitq);
 	}
 }
@@ -1504,7 +1516,7 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp)
 		ip = NULL;
 		spin_lock(&sdp->sd_trunc_lock);
 		if (!list_empty(&sdp->sd_trunc_list)) {
-			ip = list_entry(sdp->sd_trunc_list.next,
+			ip = list_first_entry(&sdp->sd_trunc_list,
 					struct gfs2_inode, i_trunc_list);
 			list_del_init(&ip->i_trunc_list);
 		}
@@ -1541,6 +1553,8 @@ int gfs2_quotad(void *data)
 
 	while (!kthread_should_stop()) {
 
+		if (gfs2_withdrawn(sdp))
+			goto bypass;
 		/* Update the master statfs file */
 		if (sdp->sd_statfs_force_sync) {
 			int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
@@ -1561,6 +1575,7 @@ int gfs2_quotad(void *data)
 
 		try_to_freeze();
 
+bypass:
 		t = min(quotad_timeo, statfs_timeo);
 
 		prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
@@ -1674,7 +1689,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
 	if (error)
 		return error;
 
-	error = gfs2_rsqa_alloc(ip);
+	error = gfs2_qa_get(ip);
 	if (error)
 		goto out_put;
 
@@ -1743,6 +1758,7 @@ out_i:
 out_q:
 	gfs2_glock_dq_uninit(&q_gh);
 out_unlockput:
+	gfs2_qa_put(ip);
 	inode_unlock(&ip->i_inode);
 out_put:
 	qd_put(qd);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 765627d9a91e..7f9ca8ef40fc 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -15,8 +15,8 @@ struct gfs2_sbd;
 #define NO_UID_QUOTA_CHANGE INVALID_UID
 #define NO_GID_QUOTA_CHANGE INVALID_GID
 
-extern int gfs2_qa_alloc(struct gfs2_inode *ip);
-extern void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern int gfs2_qa_get(struct gfs2_inode *ip);
+extern void gfs2_qa_put(struct gfs2_inode *ip);
 extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
 extern void gfs2_quota_unhold(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 85f830e56945..96c345f49273 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -111,7 +111,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 	struct gfs2_revoke_replay *rr;
 
 	while (!list_empty(head)) {
-		rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
+		rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list);
 		list_del(&rr->rr_list);
 		kfree(rr);
 	}
@@ -305,6 +305,11 @@ void gfs2_recover_func(struct work_struct *work)
 	int error = 0;
 	int jlocked = 0;
 
+	if (gfs2_withdrawn(sdp)) {
+		fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n",
+		       jd->jd_jid);
+		goto fail;
+	}
 	t_start = ktime_get();
 	if (sdp->sd_args.ar_spectator)
 		goto fail;
@@ -393,6 +398,10 @@ void gfs2_recover_func(struct work_struct *work)
 		fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
 			jd->jd_jid, head.lh_tail, head.lh_blkno);
 
+		/* We take the sd_log_flush_lock here primarily to prevent log
+		 * flushes and simultaneous journal replays from stomping on
+		 * each other wrt sd_log_bio. */
+		down_read(&sdp->sd_log_flush_lock);
 		for (pass = 0; pass < 2; pass++) {
 			lops_before_scan(jd, &head, pass);
 			error = foreach_descriptor(jd, head.lh_tail,
@@ -403,6 +412,7 @@ void gfs2_recover_func(struct work_struct *work)
 		}
 
 		clean_journal(jd, &head);
+		up_read(&sdp->sd_log_flush_lock);
 
 		gfs2_glock_dq_uninit(&thaw_gh);
 		t_rep = ktime_get();
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e7bf91ec231c..a321c34e3d6e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -457,24 +457,24 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 	}
 
 	if (count[0] != rgd->rd_free) {
-		if (gfs2_consist_rgrpd(rgd))
-			fs_err(sdp, "free data mismatch:  %u != %u\n",
-			       count[0], rgd->rd_free);
+		gfs2_lm(sdp, "free data mismatch:  %u != %u\n",
+			count[0], rgd->rd_free);
+		gfs2_consist_rgrpd(rgd);
 		return;
 	}
 
 	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 	if (count[1] != tmp) {
-		if (gfs2_consist_rgrpd(rgd))
-			fs_err(sdp, "used data mismatch:  %u != %u\n",
-			       count[1], tmp);
+		gfs2_lm(sdp, "used data mismatch:  %u != %u\n",
+			count[1], tmp);
+		gfs2_consist_rgrpd(rgd);
 		return;
 	}
 
 	if (count[2] + count[3] != rgd->rd_dinodes) {
-		if (gfs2_consist_rgrpd(rgd))
-			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
-			       count[2] + count[3], rgd->rd_dinodes);
+		gfs2_lm(sdp, "used metadata mismatch:  %u != %u\n",
+			count[2] + count[3], rgd->rd_dinodes);
+		gfs2_consist_rgrpd(rgd);
 		return;
 	}
 }
@@ -590,16 +590,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 	}
 }
 
-/**
- * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
- *                 plus a quota allocations data structure, if necessary
- * @ip: the inode for this reservation
- */
-int gfs2_rsqa_alloc(struct gfs2_inode *ip)
-{
-	return gfs2_qa_alloc(ip);
-}
-
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
 		    const char *fs_id_buf)
 {
@@ -672,18 +662,17 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 }
 
 /**
- * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
+ * gfs2_rs_delete - delete a multi-block reservation
  * @ip: The inode for this reservation
  * @wcount: The inode's write count, or NULL
  *
  */
-void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
 {
 	down_write(&ip->i_rw_mutex);
 	if ((wcount == NULL) || (atomic_read(wcount) <= 1))
 		gfs2_rs_deltree(&ip->i_res);
 	up_write(&ip->i_rw_mutex);
-	gfs2_qa_delete(ip, wcount);
 }
 
 /**
@@ -720,8 +709,12 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 		rb_erase(n, &sdp->sd_rindex_tree);
 
 		if (gl) {
-			glock_clear_object(gl, rgd);
+			if (gl->gl_state != LM_ST_UNLOCKED) {
+				gfs2_glock_cb(gl, LM_ST_UNLOCKED);
+				flush_delayed_work(&gl->gl_work);
+			}
 			gfs2_rgrp_brelse(rgd);
+			glock_clear_object(gl, rgd);
 			gfs2_glock_put(gl);
 		}
 
@@ -733,17 +726,6 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 	}
 }
 
-static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
-{
-	struct gfs2_sbd *sdp = rgd->rd_sbd;
-
-	fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
-	fs_info(sdp, "ri_length = %u\n", rgd->rd_length);
-	fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
-	fs_info(sdp, "ri_data = %u\n", rgd->rd_data);
-	fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes);
-}
-
 /**
  * gfs2_compute_bitstructs - Compute the bitmap sizes
  * @rgd: The resource group descriptor
@@ -814,11 +796,20 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 	}
 	bi = rgd->rd_bits + (length - 1);
 	if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
-		if (gfs2_consist_rgrpd(rgd)) {
-			gfs2_rindex_print(rgd);
-			fs_err(sdp, "start=%u len=%u offset=%u\n",
-			       bi->bi_start, bi->bi_bytes, bi->bi_offset);
-		}
+		gfs2_lm(sdp,
+			"ri_addr = %llu\n"
+			"ri_length = %u\n"
+			"ri_data0 = %llu\n"
+			"ri_data = %u\n"
+			"ri_bitbytes = %u\n"
+			"start=%u len=%u offset=%u\n",
+			(unsigned long long)rgd->rd_addr,
+			rgd->rd_length,
+			(unsigned long long)rgd->rd_data0,
+			rgd->rd_data,
+			rgd->rd_bitbytes,
+			bi->bi_start, bi->bi_bytes, bi->bi_offset);
+		gfs2_consist_rgrpd(rgd);
 		return -EIO;
 	}
 
@@ -1286,23 +1277,6 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
 			bi->bi_bh = NULL;
 		}
 	}
-
-}
-
-/**
- * gfs2_rgrp_go_unlock - Unlock a rgrp glock
- * @gh: The glock holder for the resource group
- *
- */
-
-void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
-{
-	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
-	int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
-		test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
-
-	if (rgd && demote_requested)
-		gfs2_rgrp_brelse(rgd);
 }
 
 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
@@ -1832,10 +1806,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
 
 	while (1) {
-		down_write(&sdp->sd_log_flush_lock);
 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
 				      true);
-		up_write(&sdp->sd_log_flush_lock);
 		if (error == -ENOSPC)
 			break;
 		if (WARN_ON_ONCE(error))
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index c14a673ae36f..a1d7e14fc55b 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -33,7 +33,6 @@ extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
 extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd);
-extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
 
 extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 
@@ -45,9 +44,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
-extern int gfs2_rsqa_alloc(struct gfs2_inode *ip);
 extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
 			       u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 68cc7c291a81..37fc41632aa2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -61,11 +61,13 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp)
 	sdp->sd_journals = 0;
 	spin_unlock(&sdp->sd_jindex_spin);
 
+	sdp->sd_jdesc = NULL;
 	while (!list_empty(&list)) {
-		jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+		jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
 		gfs2_free_journal_extents(jd);
 		list_del(&jd->jd_list);
 		iput(jd->jd_inode);
+		jd->jd_inode = NULL;
 		kfree(jd);
 	}
 }
@@ -171,9 +173,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 		goto fail_threads;
 
 	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
+	if (gfs2_withdrawn(sdp)) {
+		error = -EIO;
+		goto fail;
+	}
 
 	error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
-	if (error)
+	if (error || gfs2_withdrawn(sdp))
 		goto fail;
 
 	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
@@ -187,7 +193,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 	gfs2_log_pointers_init(sdp, head.lh_blkno);
 
 	error = gfs2_quota_init(sdp);
-	if (error)
+	if (error || gfs2_withdrawn(sdp))
 		goto fail;
 
 	set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
@@ -446,7 +452,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
 
 out:
 	while (!list_empty(&list)) {
-		lfcc = list_entry(list.next, struct lfcc, list);
+		lfcc = list_first_entry(&list, struct lfcc, list);
 		list_del(&lfcc->list);
 		gfs2_glock_dq_uninit(&lfcc->gh);
 		kfree(lfcc);
@@ -599,34 +605,63 @@ out:
 int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
 	struct gfs2_holder freeze_gh;
-	int error;
-
-	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
-				   &freeze_gh);
-	if (error && !gfs2_withdrawn(sdp))
-		return error;
+	int error = 0;
+	int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+	gfs2_holder_mark_uninitialized(&freeze_gh);
+	if (sdp->sd_freeze_gl &&
+	    !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
+		if (!log_write_allowed) {
+			error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
+						   LM_ST_SHARED, GL_NOCACHE |
+						   LM_FLAG_TRY, &freeze_gh);
+			if (error == GLR_TRYFAILED)
+				error = 0;
+		} else {
+			error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
+						   LM_ST_SHARED, GL_NOCACHE,
+						   &freeze_gh);
+			if (error && !gfs2_withdrawn(sdp))
+				return error;
+		}
+	}
 
 	flush_workqueue(gfs2_delete_workqueue);
-	if (sdp->sd_quotad_process)
+	if (!log_write_allowed && current == sdp->sd_quotad_process)
+		fs_warn(sdp, "The quotad daemon is withdrawing.\n");
+	else if (sdp->sd_quotad_process)
 		kthread_stop(sdp->sd_quotad_process);
 	sdp->sd_quotad_process = NULL;
-	if (sdp->sd_logd_process)
+
+	if (!log_write_allowed && current == sdp->sd_logd_process)
+		fs_warn(sdp, "The logd daemon is withdrawing.\n");
+	else if (sdp->sd_logd_process)
 		kthread_stop(sdp->sd_logd_process);
 	sdp->sd_logd_process = NULL;
 
-	gfs2_quota_sync(sdp->sd_vfs, 0);
-	gfs2_statfs_sync(sdp->sd_vfs, 0);
-
-	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
-		       GFS2_LFC_MAKE_FS_RO);
-	wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
-	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
+	if (log_write_allowed) {
+		gfs2_quota_sync(sdp->sd_vfs, 0);
+		gfs2_statfs_sync(sdp->sd_vfs, 0);
 
+		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
+			       GFS2_LFC_MAKE_FS_RO);
+		wait_event(sdp->sd_reserving_log_wait,
+			   atomic_read(&sdp->sd_reserving_log) == 0);
+		gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) ==
+				 sdp->sd_jdesc->jd_blocks);
+	} else {
+		wait_event_timeout(sdp->sd_reserving_log_wait,
+				   atomic_read(&sdp->sd_reserving_log) == 0,
+				   HZ * 5);
+	}
 	if (gfs2_holder_initialized(&freeze_gh))
 		gfs2_glock_dq_uninit(&freeze_gh);
 
 	gfs2_quota_cleanup(sdp);
 
+	if (!log_write_allowed)
+		sdp->sd_vfs->s_flags |= SB_RDONLY;
+
 	return error;
 }
 
@@ -677,8 +712,10 @@ restart:
 	gfs2_glock_put(sdp->sd_freeze_gl);
 
 	if (!sdp->sd_args.ar_spectator) {
-		gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
-		gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+		if (gfs2_holder_initialized(&sdp->sd_journal_gh))
+			gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+		if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
+			gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
 		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
 		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
 		iput(sdp->sd_sc_inode);
@@ -1356,14 +1393,6 @@ out_unlock:
 	if (gfs2_rs_active(&ip->i_res))
 		gfs2_rs_deltree(&ip->i_res);
 
-	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
-		glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
-		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
-			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
-			gfs2_glock_dq(&ip->i_iopen_gh);
-		}
-		gfs2_holder_uninit(&ip->i_iopen_gh);
-	}
 	if (gfs2_holder_initialized(&gh)) {
 		glock_clear_object(ip->i_gl, ip);
 		gfs2_glock_dq_uninit(&gh);
@@ -1372,22 +1401,30 @@ out_unlock:
 		fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
 out:
 	truncate_inode_pages_final(&inode->i_data);
-	gfs2_rsqa_delete(ip, NULL);
+	if (ip->i_qadata)
+		gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
+	gfs2_rs_delete(ip, NULL);
+	gfs2_qa_put(ip);
 	gfs2_ordered_del_inode(ip);
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
-	glock_clear_object(ip->i_gl, ip);
-	wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
-	gfs2_glock_add_to_lru(ip->i_gl);
-	gfs2_glock_put_eventually(ip->i_gl);
-	ip->i_gl = NULL;
+	if (ip->i_gl) {
+		glock_clear_object(ip->i_gl, ip);
+		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
+		gfs2_glock_add_to_lru(ip->i_gl);
+		gfs2_glock_put_eventually(ip->i_gl);
+		ip->i_gl = NULL;
+	}
 	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
 
 		glock_clear_object(gl, ip);
-		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+			gfs2_glock_dq(&ip->i_iopen_gh);
+		}
 		gfs2_glock_hold(gl);
-		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+		gfs2_holder_uninit(&ip->i_iopen_gh);
 		gfs2_glock_put_eventually(gl);
 	}
 }
@@ -1401,6 +1438,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 		return NULL;
 	ip->i_flags = 0;
 	ip->i_gl = NULL;
+	gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
 	memset(&ip->i_res, 0, sizeof(ip->i_res));
 	RB_CLEAR_NODE(&ip->i_res.rs_node);
 	ip->i_rahead = 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index b8bf811a1305..51900554ed81 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -26,7 +26,6 @@ extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
 
 extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
 extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
-
 extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
 				     struct gfs2_inode **ipp);
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 8ccb68f4ed16..d28c41bd69b0 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -136,7 +136,8 @@ static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 	if (val != 1)
 		return -EINVAL;
 
-	gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n");
+	gfs2_lm(sdp, "withdrawing from cluster at user's request\n");
+	gfs2_withdraw(sdp);
 
 	return len;
 }
@@ -434,6 +435,8 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
 	 * never clear the DFL_BLOCK_LOCKS flag, so all our locks would
 	 * permanently stop working.
 	 */
+	if (!sdp->sd_jdesc)
+		goto out;
 	if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator)
 		goto out;
 	rv = -ENOENT;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index a685637a5b55..ffe840505082 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -228,6 +228,10 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
 		fs_info(sdp, "GFS2:adding buf while frozen\n");
 		gfs2_assert_withdraw(sdp, 0);
 	}
+	if (unlikely(gfs2_withdrawn(sdp))) {
+		fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
+			(unsigned long long)bd->bd_bh->b_blocknr);
+	}
 	gfs2_pin(sdp, bd->bd_bh);
 	mh->__pad0 = cpu_to_be64(0);
 	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index ec600b487498..9b64d40ab379 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -11,12 +11,18 @@
 #include <linux/buffer_head.h>
 #include <linux/crc32.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/delay.h>
 #include <linux/uaccess.h>
 
 #include "gfs2.h"
 #include "incore.h"
 #include "glock.h"
+#include "glops.h"
+#include "log.h"
+#include "lops.h"
+#include "recovery.h"
 #include "rgrp.h"
+#include "super.h"
 #include "util.h"
 
 struct kmem_cache *gfs2_glock_cachep __read_mostly;
@@ -33,32 +39,257 @@ void gfs2_assert_i(struct gfs2_sbd *sdp)
 	fs_emerg(sdp, "fatal assertion failed\n");
 }
 
-int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
+/**
+ * check_journal_clean - Make sure a journal is clean for a spectator mount
+ * @sdp: The GFS2 superblock
+ * @jd: The journal descriptor
+ *
+ * Returns: 0 if the journal is clean or locked, else an error
+ */
+int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+			bool verbose)
+{
+	int error;
+	struct gfs2_holder j_gh;
+	struct gfs2_log_header_host head;
+	struct gfs2_inode *ip;
+
+	ip = GFS2_I(jd->jd_inode);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
+				   GL_EXACT | GL_NOCACHE, &j_gh);
+	if (error) {
+		if (verbose)
+			fs_err(sdp, "Error %d locking journal for spectator "
+			       "mount.\n", error);
+		return -EPERM;
+	}
+	error = gfs2_jdesc_check(jd);
+	if (error) {
+		if (verbose)
+			fs_err(sdp, "Error checking journal for spectator "
+			       "mount.\n");
+		goto out_unlock;
+	}
+	error = gfs2_find_jhead(jd, &head, false);
+	if (error) {
+		if (verbose)
+			fs_err(sdp, "Error parsing journal for spectator "
+			       "mount.\n");
+		goto out_unlock;
+	}
+	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+		error = -EPERM;
+		if (verbose)
+			fs_err(sdp, "jid=%u: Journal is dirty, so the first "
+			       "mounter must not be a spectator.\n",
+			       jd->jd_jid);
+	}
+
+out_unlock:
+	gfs2_glock_dq_uninit(&j_gh);
+	return error;
+}
+
+static void signal_our_withdraw(struct gfs2_sbd *sdp)
+{
+	struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl;
+	struct inode *inode = sdp->sd_jdesc->jd_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	u64 no_formal_ino = ip->i_no_formal_ino;
+	int ret = 0;
+	int tries;
+
+	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
+		return;
+
+	/* Prevent any glock dq until withdraw recovery is complete */
+	set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+	/*
+	 * Don't tell dlm we're bailing until we have no more buffers in the
+	 * wind. If journal had an IO error, the log code should just purge
+	 * the outstanding buffers rather than submitting new IO. Making the
+	 * file system read-only will flush the journal, etc.
+	 *
+	 * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
+	 * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
+	 * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
+	 * therefore we need to clear SDF_JOURNAL_LIVE manually.
+	 */
+	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+	if (!sb_rdonly(sdp->sd_vfs))
+		ret = gfs2_make_fs_ro(sdp);
+
+	/*
+	 * Drop the glock for our journal so another node can recover it.
+	 */
+	if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
+		gfs2_glock_dq_wait(&sdp->sd_journal_gh);
+		gfs2_holder_uninit(&sdp->sd_journal_gh);
+	}
+	sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
+	gfs2_glock_dq(&sdp->sd_jinode_gh);
+	if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
+		/* Make sure gfs2_unfreeze works if partially-frozen */
+		flush_workqueue(gfs2_freeze_wq);
+		atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+		thaw_super(sdp->sd_vfs);
+	} else {
+		wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
+	}
+
+	/*
+	 * holder_uninit to force glock_put, to force dlm to let go
+	 */
+	gfs2_holder_uninit(&sdp->sd_jinode_gh);
+
+	/*
+	 * Note: We need to be careful here:
+	 * Our iput of jd_inode will evict it. The evict will dequeue its
+	 * glock, but the glock dq will wait for the withdraw unless we have
+	 * exception code in glock_dq.
+	 */
+	iput(inode);
+	/*
+	 * Wait until the journal inode's glock is freed. This allows try locks
+	 * on other nodes to be successful, otherwise we remain the owner of
+	 * the glock as far as dlm is concerned.
+	 */
+	if (gl->gl_ops->go_free) {
+		set_bit(GLF_FREEING, &gl->gl_flags);
+		wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
+	}
+
+	if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
+		clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+		goto skip_recovery;
+	}
+	/*
+	 * Dequeue the "live" glock, but keep a reference so it's never freed.
+	 */
+	gfs2_glock_hold(gl);
+	gfs2_glock_dq_wait(&sdp->sd_live_gh);
+	/*
+	 * We enqueue the "live" glock in EX so that all other nodes
+	 * get a demote request and act on it. We don't really want the
+	 * lock in EX, so we send a "try" lock with 1CB to produce a callback.
+	 */
+	fs_warn(sdp, "Requesting recovery of jid %d.\n",
+		sdp->sd_lockstruct.ls_jid);
+	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
+			   &sdp->sd_live_gh);
+	msleep(GL_GLOCK_MAX_HOLD);
+	/*
+	 * This will likely fail in a cluster, but succeed standalone:
+	 */
+	ret = gfs2_glock_nq(&sdp->sd_live_gh);
+
+	/*
+	 * If we actually got the "live" lock in EX mode, there are no other
+	 * nodes available to replay our journal. So we try to replay it
+	 * ourselves. We hold the "live" glock to prevent other mounters
+	 * during recovery, then just dequeue it and reacquire it in our
+	 * normal SH mode. Just in case the problem that caused us to
+	 * withdraw prevents us from recovering our journal (e.g. io errors
+	 * and such) we still check if the journal is clean before proceeding
+	 * but we may wait forever until another mounter does the recovery.
+	 */
+	if (ret == 0) {
+		fs_warn(sdp, "No other mounters found. Trying to recover our "
+			"own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
+		if (gfs2_recover_journal(sdp->sd_jdesc, 1))
+			fs_warn(sdp, "Unable to recover our journal jid %d.\n",
+				sdp->sd_lockstruct.ls_jid);
+		gfs2_glock_dq_wait(&sdp->sd_live_gh);
+		gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
+				   &sdp->sd_live_gh);
+		gfs2_glock_nq(&sdp->sd_live_gh);
+	}
+
+	gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */
+	clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+
+	/*
+	 * At this point our journal is evicted, so we need to get a new inode
+	 * for it. Once done, we need to call gfs2_find_jhead which
+	 * calls gfs2_map_journal_extents to map it for us again.
+	 *
+	 * Note that we don't really want it to look up a FREE block. The
+	 * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
+	 * which would otherwise fail because it requires grabbing an rgrp
+	 * glock, which would fail with -EIO because we're withdrawing.
+	 */
+	inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
+				  sdp->sd_jdesc->jd_no_addr, no_formal_ino,
+				  GFS2_BLKST_FREE);
+	if (IS_ERR(inode)) {
+		fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
+			sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
+		goto skip_recovery;
+	}
+	sdp->sd_jdesc->jd_inode = inode;
+
+	/*
+	 * Now wait until recovery is complete.
+	 */
+	for (tries = 0; tries < 10; tries++) {
+		ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
+		if (!ret)
+			break;
+		msleep(HZ);
+		fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
+			sdp->sd_lockstruct.ls_jid);
+	}
+skip_recovery:
+	if (!ret)
+		fs_warn(sdp, "Journal recovery complete for jid %d.\n",
+			sdp->sd_lockstruct.ls_jid);
+	else
+		fs_warn(sdp, "Journal recovery skipped for %d until next "
+			"mount.\n", sdp->sd_lockstruct.ls_jid);
+	fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
+	sdp->sd_glock_dqs_held = 0;
+	wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
+}
+
+void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
 {
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	const struct lm_lockops *lm = ls->ls_ops;
-	va_list args;
 	struct va_format vaf;
+	va_list args;
 
 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
-	    test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
-		return 0;
-
-	if (fmt) {
-		va_start(args, fmt);
+	    test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	fs_err(sdp, "%pV", &vaf);
+	va_end(args);
+}
 
-		vaf.fmt = fmt;
-		vaf.va = &args;
+int gfs2_withdraw(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	const struct lm_lockops *lm = ls->ls_ops;
 
-		fs_err(sdp, "%pV", &vaf);
+	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
+	    test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
+		if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
+			return -1;
 
-		va_end(args);
+		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
+			    TASK_UNINTERRUPTIBLE);
+		return -1;
 	}
 
+	set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+
 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
 		fs_err(sdp, "about to withdraw this file system\n");
 		BUG_ON(sdp->sd_args.ar_debug);
 
+		signal_our_withdraw(sdp);
+
 		kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
 
 		if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
@@ -69,8 +300,11 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
 			lm->lm_unmount(sdp);
 		}
 		set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
-		fs_err(sdp, "withdrawn\n");
+		fs_err(sdp, "File system withdrawn\n");
 		dump_stack();
+		clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+		smp_mb__after_atomic();
+		wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
 	}
 
 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
@@ -81,35 +315,45 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
 
 /**
  * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
- * Returns: -1 if this call withdrew the machine,
- *          -2 if it was already withdrawn
  */
 
-int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
-			   const char *function, char *file, unsigned int line)
+void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
+			    const char *function, char *file, unsigned int line,
+			    bool delayed)
 {
-	int me;
-	me = gfs2_lm_withdraw(sdp,
-			      "fatal: assertion \"%s\" failed\n"
-			      "   function = %s, file = %s, line = %u\n",
-			      assertion, function, file, line);
+	if (gfs2_withdrawn(sdp))
+		return;
+
+	fs_err(sdp,
+	       "fatal: assertion \"%s\" failed\n"
+	       "   function = %s, file = %s, line = %u\n",
+	       assertion, function, file, line);
+
+	/*
+	 * If errors=panic was specified on mount, it won't help to delay the
+	 * withdraw.
+	 */
+	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
+		delayed = false;
+
+	if (delayed)
+		gfs2_withdraw_delayed(sdp);
+	else
+		gfs2_withdraw(sdp);
 	dump_stack();
-	return (me) ? -1 : -2;
 }
 
 /**
  * gfs2_assert_warn_i - Print a message to the console if @assertion is false
- * Returns: -1 if we printed something
- *          -2 if we didn't
  */
 
-int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
-		       const char *function, char *file, unsigned int line)
+void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
+			const char *function, char *file, unsigned int line)
 {
 	if (time_before(jiffies,
 			sdp->sd_last_warning +
 			gfs2_tune_get(sdp, gt_complain_secs) * HZ))
-		return -2;
+		return;
 
 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
 		fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
@@ -127,69 +371,59 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
 		      sdp->sd_fsname, function, file, line);
 
 	sdp->sd_last_warning = jiffies;
-
-	return -1;
 }
 
 /**
  * gfs2_consist_i - Flag a filesystem consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- *          0 if it was already withdrawn
  */
 
-int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
-		   char *file, unsigned int line)
+void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
+		    char *file, unsigned int line)
 {
-	int rv;
-	rv = gfs2_lm_withdraw(sdp,
-			      "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
-			      function, file, line);
-	return rv;
+	gfs2_lm(sdp,
+		"fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
+		function, file, line);
+	gfs2_withdraw(sdp);
 }
 
 /**
  * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- *          0 if it was already withdrawn
  */
 
-int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
-			 const char *function, char *file, unsigned int line)
+void gfs2_consist_inode_i(struct gfs2_inode *ip,
+			  const char *function, char *file, unsigned int line)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	int rv;
-	rv = gfs2_lm_withdraw(sdp,
-			      "fatal: filesystem consistency error\n"
-			      "  inode = %llu %llu\n"
-			      "  function = %s, file = %s, line = %u\n",
-			      (unsigned long long)ip->i_no_formal_ino,
-			      (unsigned long long)ip->i_no_addr,
-			      function, file, line);
-	return rv;
+
+	gfs2_lm(sdp,
+		"fatal: filesystem consistency error\n"
+		"  inode = %llu %llu\n"
+		"  function = %s, file = %s, line = %u\n",
+		(unsigned long long)ip->i_no_formal_ino,
+		(unsigned long long)ip->i_no_addr,
+		function, file, line);
+	gfs2_withdraw(sdp);
 }
 
 /**
  * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- *          0 if it was already withdrawn
  */
 
-int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
-			 const char *function, char *file, unsigned int line)
+void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
+			  const char *function, char *file, unsigned int line)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
-	int rv;
 
 	sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
 	gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
-	rv = gfs2_lm_withdraw(sdp,
-			      "fatal: filesystem consistency error\n"
-			      "  RG = %llu\n"
-			      "  function = %s, file = %s, line = %u\n",
-			      (unsigned long long)rgd->rd_addr,
-			      function, file, line);
-	return rv;
+	gfs2_lm(sdp,
+		"fatal: filesystem consistency error\n"
+		"  RG = %llu\n"
+		"  function = %s, file = %s, line = %u\n",
+		(unsigned long long)rgd->rd_addr,
+		function, file, line);
+	gfs2_withdraw(sdp);
 }
 
 /**
@@ -203,12 +437,14 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 		       unsigned int line)
 {
 	int me;
-	me = gfs2_lm_withdraw(sdp,
-			      "fatal: invalid metadata block\n"
-			      "  bh = %llu (%s)\n"
-			      "  function = %s, file = %s, line = %u\n",
-			      (unsigned long long)bh->b_blocknr, type,
-			      function, file, line);
+
+	gfs2_lm(sdp,
+		"fatal: invalid metadata block\n"
+		"  bh = %llu (%s)\n"
+		"  function = %s, file = %s, line = %u\n",
+		(unsigned long long)bh->b_blocknr, type,
+		function, file, line);
+	me = gfs2_withdraw(sdp);
 	return (me) ? -1 : -2;
 }
 
@@ -223,12 +459,14 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 			   char *file, unsigned int line)
 {
 	int me;
-	me = gfs2_lm_withdraw(sdp,
-			      "fatal: invalid metadata block\n"
-			      "  bh = %llu (type: exp=%u, found=%u)\n"
-			      "  function = %s, file = %s, line = %u\n",
-			      (unsigned long long)bh->b_blocknr, type, t,
-			      function, file, line);
+
+	gfs2_lm(sdp,
+		"fatal: invalid metadata block\n"
+		"  bh = %llu (type: exp=%u, found=%u)\n"
+		"  function = %s, file = %s, line = %u\n",
+		(unsigned long long)bh->b_blocknr, type, t,
+		function, file, line);
+	me = gfs2_withdraw(sdp);
 	return (me) ? -1 : -2;
 }
 
@@ -241,12 +479,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
 		    unsigned int line)
 {
-	int rv;
-	rv = gfs2_lm_withdraw(sdp,
-			      "fatal: I/O error\n"
-			      "  function = %s, file = %s, line = %u\n",
-			      function, file, line);
-	return rv;
+	gfs2_lm(sdp,
+		"fatal: I/O error\n"
+		"  function = %s, file = %s, line = %u\n",
+		function, file, line);
+	return gfs2_withdraw(sdp);
 }
 
 /**
@@ -258,14 +495,14 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 			const char *function, char *file, unsigned int line,
 			bool withdraw)
 {
-	if (!gfs2_withdrawn(sdp))
-		fs_err(sdp,
-		       "fatal: I/O error\n"
-		       "  block = %llu\n"
-		       "  function = %s, file = %s, line = %u\n",
-		       (unsigned long long)bh->b_blocknr,
-		       function, file, line);
+	if (gfs2_withdrawn(sdp))
+		return;
+
+	fs_err(sdp, "fatal: I/O error\n"
+	       "  block = %llu\n"
+	       "  function = %s, file = %s, line = %u\n",
+	       (unsigned long long)bh->b_blocknr, function, file, line);
 	if (withdraw)
-		gfs2_lm_withdraw(sdp, NULL);
+		gfs2_withdraw(sdp);
 }
 
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index f2702bc9837c..a3542560da6f 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -36,41 +36,59 @@ do { \
 } while (0)
 
 
-int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
-			   const char *function, char *file, unsigned int line);
+void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
+			    const char *function, char *file, unsigned int line,
+			    bool delayed);
 
 #define gfs2_assert_withdraw(sdp, assertion) \
-((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
-					__func__, __FILE__, __LINE__))
-
-
-int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
-		       const char *function, char *file, unsigned int line);
+	({ \
+		bool _bool = (assertion); \
+		if (unlikely(!_bool)) \
+			gfs2_assert_withdraw_i((sdp), #assertion, \
+					__func__, __FILE__, __LINE__, false); \
+		!_bool; \
+	})
+
+#define gfs2_assert_withdraw_delayed(sdp, assertion) \
+	({ \
+		bool _bool = (assertion); \
+		if (unlikely(!_bool)) \
+			gfs2_assert_withdraw_i((sdp), #assertion, \
+					__func__, __FILE__, __LINE__, true); \
+		!_bool; \
+	})
+
+void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
+			const char *function, char *file, unsigned int line);
 
 #define gfs2_assert_warn(sdp, assertion) \
-((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
-					__func__, __FILE__, __LINE__))
-
+	({ \
+		bool _bool = (assertion); \
+		if (unlikely(!_bool)) \
+			gfs2_assert_warn_i((sdp), #assertion, \
+					__func__, __FILE__, __LINE__); \
+		!_bool; \
+	})
 
-int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
-		   const char *function, char *file, unsigned int line);
+void gfs2_consist_i(struct gfs2_sbd *sdp,
+		    const char *function, char *file, unsigned int line);
 
 #define gfs2_consist(sdp) \
-gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
+gfs2_consist_i((sdp), __func__, __FILE__, __LINE__)
 
 
-int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
-			 const char *function, char *file, unsigned int line);
+void gfs2_consist_inode_i(struct gfs2_inode *ip,
+			  const char *function, char *file, unsigned int line);
 
 #define gfs2_consist_inode(ip) \
-gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
+gfs2_consist_inode_i((ip), __func__, __FILE__, __LINE__)
 
 
-int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
-			 const char *function, char *file, unsigned int line);
+void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
+			  const char *function, char *file, unsigned int line);
 
 #define gfs2_consist_rgrpd(rgd) \
-gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
+gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__)
 
 
 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -129,6 +147,9 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
 		    char *file, unsigned int line);
 
+extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+			       bool verbose);
+
 #define gfs2_io_error(sdp) \
 gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
 
@@ -165,18 +186,29 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
 }
 
 /**
+ * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks
+ * @sdp: the superblock
+ */
+static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp)
+{
+	set_bit(SDF_WITHDRAWING, &sdp->sd_flags);
+}
+
+/**
  * gfs2_withdrawn - test whether the file system is withdrawing or withdrawn
  * @sdp: the superblock
  */
 static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp)
 {
-	return test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
+	return test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
+		test_bit(SDF_WITHDRAWING, &sdp->sd_flags);
 }
 
 #define gfs2_tune_get(sdp, field) \
 gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
 
 __printf(2, 3)
-int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...);
+void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...);
+int gfs2_withdraw(struct gfs2_sbd *sdp);
 
 #endif /* __UTIL_DOT_H__ */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index bbe593d16bea..9d7667bc4292 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1222,7 +1222,7 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
 	struct gfs2_holder gh;
 	int ret;
 
-	ret = gfs2_rsqa_alloc(ip);
+	ret = gfs2_qa_get(ip);
 	if (ret)
 		return ret;
 
@@ -1231,15 +1231,19 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
 	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
 		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 		if (ret)
-			return ret;
+			goto out;
 	} else {
-		if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
-			return -EIO;
+		if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) {
+			ret = -EIO;
+			goto out;
+		}
 		gfs2_holder_mark_uninitialized(&gh);
 	}
 	ret = __gfs2_xattr_set(inode, name, value, size, flags, handler->flags);
 	if (gfs2_holder_initialized(&gh))
 		gfs2_glock_dq_uninit(&gh);
+out:
+	gfs2_qa_put(ip);
 	return ret;
 }