summaryrefslogtreecommitdiffstats
path: root/fs/xfs/libxfs/xfs_defer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 17:56:02 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 17:56:02 +0200
commit781fca5b104693bc9242199cc47c690dcaf6a4cb (patch)
treed216d4299ae5715331a535c84bab390a907bebd6 /fs/xfs/libxfs/xfs_defer.c
parentMerge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/t... (diff)
parentxfs: fix a null pointer dereference in xfs_bmap_extents_to_btree (diff)
downloadlinux-781fca5b104693bc9242199cc47c690dcaf6a4cb.tar.xz
linux-781fca5b104693bc9242199cc47c690dcaf6a4cb.zip
Merge tag 'xfs-4.19-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "This is the second part of the XFS changes for 4.19. The biggest changes are the removal of buffer heads frm XFS, a massive reworking of the deferred transaction operations handling code, the removal of the long defunct barrier/nobarrier mount options, and the addition of a few more online repair functions. Summary: - Use extent maps to track pagecache page status instead of bufferhead state. - Refactor pagecache read and write paths to use the new iomap library functions, which enable us to drop the old bufferhead code for pagesize == blocksize filesystems. - Set up parallel per-block-per-page metadata to track subpage information that was tracked by buffer heads, which enables us to drop the old bufferhead code for pagesize > blocksize filesystems. - Tie a deferred ops control structure to a transaction so that we can take advantage of an upper-level dfops without having to plumb pointer passing through the code. - Refactor the deferred ops code to track deferred ops as part of the transaction structure (instead of as a separate data structure) so that we can simplify the scoping rules around defer_ops. - Refactor twisty delwri buffer submission code to avoid deadlocks. - Shorten and fix indenting problems in the scrub code. - Detect obviously bad summary counts at mount and fix them. - Directly associate deferred ops control structure with a transaction so that callers no longer have to manage it themselves. - Remove a couple of IRIX-era inode macros. - Remove the long-deprecated 'barrier' and 'nobarrier' mount options. - Clean up the inode fork structure a bit. - Check for bad fs summary counter values in the superblock. - Reduce COW fork lookups during writeback. - Refactor the deferred ops control structures into the transaction structure, thereby eliminating the need for transaction users to handle the deferred ops as a separate data structure. - Add the ability to repair AG headers online. - Fix a crash due to insufficient return value checking. - Various fixes and cleanups" * tag 'xfs-4.19-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (155 commits) xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree xfs: remove b_last_holder & associated macros iomap: Switch to offset_in_page for clarity xfs: Close race between direct IO and xfs_break_layouts() xfs: repair the AGI xfs: repair the AGFL xfs: repair the AGF xfs: remove dead error handling code in xfs_dquot_disk_alloc() xfs: use WRITE_ONCE to update if_seq xfs: fix a comment in xfs_log_reserve xfs: only validate summary counts on primary superblock xfs: substitute spaces with tabs xfs: fold dfops into the transaction xfs: always defer agfl block frees xfs: pass transaction to xfs_defer_add() xfs: replace xfs_defer_ops ->dop_pending with on-stack list xfs: cancel dfops on xfs_defer_finish() error xfs: clean out superfluous dfops dop params/vars xfs: drop dop param from xfs_defer_op_type ->finish_item() callback xfs: automatic dfops inode relogging ...
Diffstat (limited to 'fs/xfs/libxfs/xfs_defer.c')
-rw-r--r--fs/xfs/libxfs/xfs_defer.c323
1 files changed, 169 insertions, 154 deletions
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index c3e5bffda4f5..e792b167150a 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -14,6 +14,9 @@
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
#include "xfs_trace.h"
/*
@@ -177,146 +180,157 @@ static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
* the pending list.
*/
STATIC void
-xfs_defer_intake_work(
- struct xfs_trans *tp,
- struct xfs_defer_ops *dop)
+xfs_defer_create_intents(
+ struct xfs_trans *tp)
{
struct list_head *li;
struct xfs_defer_pending *dfp;
- list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
+ list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
dfp->dfp_count);
- trace_xfs_defer_intake_work(tp->t_mountp, dfp);
+ trace_xfs_defer_create_intent(tp->t_mountp, dfp);
list_sort(tp->t_mountp, &dfp->dfp_work,
dfp->dfp_type->diff_items);
list_for_each(li, &dfp->dfp_work)
dfp->dfp_type->log_item(tp, dfp->dfp_intent, li);
}
-
- list_splice_tail_init(&dop->dop_intake, &dop->dop_pending);
}
/* Abort all the intents that were committed. */
STATIC void
xfs_defer_trans_abort(
struct xfs_trans *tp,
- struct xfs_defer_ops *dop,
- int error)
+ struct list_head *dop_pending)
{
struct xfs_defer_pending *dfp;
- trace_xfs_defer_trans_abort(tp->t_mountp, dop, _RET_IP_);
+ trace_xfs_defer_trans_abort(tp, _RET_IP_);
/* Abort intent items that don't have a done item. */
- list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
+ list_for_each_entry(dfp, dop_pending, dfp_list) {
trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
dfp->dfp_type->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}
-
- /* Shut down FS. */
- xfs_force_shutdown(tp->t_mountp, (error == -EFSCORRUPTED) ?
- SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR);
}
/* Roll a transaction so we can do some deferred op processing. */
STATIC int
xfs_defer_trans_roll(
- struct xfs_trans **tp,
- struct xfs_defer_ops *dop)
+ struct xfs_trans **tpp)
{
+ struct xfs_trans *tp = *tpp;
+ struct xfs_buf_log_item *bli;
+ struct xfs_inode_log_item *ili;
+ struct xfs_log_item *lip;
+ struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS];
+ struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES];
+ int bpcount = 0, ipcount = 0;
int i;
int error;
- /* Log all the joined inodes. */
- for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
- xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
-
- /* Hold the (previously bjoin'd) buffer locked across the roll. */
- for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
- xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+ list_for_each_entry(lip, &tp->t_items, li_trans) {
+ switch (lip->li_type) {
+ case XFS_LI_BUF:
+ bli = container_of(lip, struct xfs_buf_log_item,
+ bli_item);
+ if (bli->bli_flags & XFS_BLI_HOLD) {
+ if (bpcount >= XFS_DEFER_OPS_NR_BUFS) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+ xfs_trans_dirty_buf(tp, bli->bli_buf);
+ bplist[bpcount++] = bli->bli_buf;
+ }
+ break;
+ case XFS_LI_INODE:
+ ili = container_of(lip, struct xfs_inode_log_item,
+ ili_item);
+ if (ili->ili_lock_flags == 0) {
+ if (ipcount >= XFS_DEFER_OPS_NR_INODES) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+ xfs_trans_log_inode(tp, ili->ili_inode,
+ XFS_ILOG_CORE);
+ iplist[ipcount++] = ili->ili_inode;
+ }
+ break;
+ default:
+ break;
+ }
+ }
- trace_xfs_defer_trans_roll((*tp)->t_mountp, dop, _RET_IP_);
+ trace_xfs_defer_trans_roll(tp, _RET_IP_);
/* Roll the transaction. */
- error = xfs_trans_roll(tp);
+ error = xfs_trans_roll(tpp);
+ tp = *tpp;
if (error) {
- trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
- xfs_defer_trans_abort(*tp, dop, error);
+ trace_xfs_defer_trans_roll_error(tp, error);
return error;
}
- dop->dop_committed = true;
/* Rejoin the joined inodes. */
- for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
- xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
+ for (i = 0; i < ipcount; i++)
+ xfs_trans_ijoin(tp, iplist[i], 0);
/* Rejoin the buffers and dirty them so the log moves forward. */
- for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
- xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
- xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+ for (i = 0; i < bpcount; i++) {
+ xfs_trans_bjoin(tp, bplist[i]);
+ xfs_trans_bhold(tp, bplist[i]);
}
return error;
}
-/* Do we have any work items to finish? */
-bool
-xfs_defer_has_unfinished_work(
- struct xfs_defer_ops *dop)
-{
- return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake);
-}
-
/*
- * Add this inode to the deferred op. Each joined inode is relogged
- * each time we roll the transaction.
+ * Reset an already used dfops after finish.
*/
-int
-xfs_defer_ijoin(
- struct xfs_defer_ops *dop,
- struct xfs_inode *ip)
+static void
+xfs_defer_reset(
+ struct xfs_trans *tp)
{
- int i;
-
- for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) {
- if (dop->dop_inodes[i] == ip)
- return 0;
- else if (dop->dop_inodes[i] == NULL) {
- dop->dop_inodes[i] = ip;
- return 0;
- }
- }
+ ASSERT(list_empty(&tp->t_dfops));
- ASSERT(0);
- return -EFSCORRUPTED;
+ /*
+ * Low mode state transfers across transaction rolls to mirror dfops
+ * lifetime. Clear it now that dfops is reset.
+ */
+ tp->t_flags &= ~XFS_TRANS_LOWMODE;
}
/*
- * Add this buffer to the deferred op. Each joined buffer is relogged
- * each time we roll the transaction.
+ * Free up any items left in the list.
*/
-int
-xfs_defer_bjoin(
- struct xfs_defer_ops *dop,
- struct xfs_buf *bp)
+static void
+xfs_defer_cancel_list(
+ struct xfs_mount *mp,
+ struct list_head *dop_list)
{
- int i;
+ struct xfs_defer_pending *dfp;
+ struct xfs_defer_pending *pli;
+ struct list_head *pwi;
+ struct list_head *n;
- for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
- if (dop->dop_bufs[i] == bp)
- return 0;
- else if (dop->dop_bufs[i] == NULL) {
- dop->dop_bufs[i] = bp;
- return 0;
+ /*
+ * Free the pending items. Caller should already have arranged
+ * for the intent items to be released.
+ */
+ list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
+ trace_xfs_defer_cancel_list(mp, dfp);
+ list_del(&dfp->dfp_list);
+ list_for_each_safe(pwi, n, &dfp->dfp_work) {
+ list_del(pwi);
+ dfp->dfp_count--;
+ dfp->dfp_type->cancel_item(pwi);
}
+ ASSERT(dfp->dfp_count == 0);
+ kmem_free(dfp);
}
-
- ASSERT(0);
- return -EFSCORRUPTED;
}
/*
@@ -328,9 +342,8 @@ xfs_defer_bjoin(
* If an inode is provided, relog it to the new transaction.
*/
int
-xfs_defer_finish(
- struct xfs_trans **tp,
- struct xfs_defer_ops *dop)
+xfs_defer_finish_noroll(
+ struct xfs_trans **tp)
{
struct xfs_defer_pending *dfp;
struct list_head *li;
@@ -338,35 +351,28 @@ xfs_defer_finish(
void *state;
int error = 0;
void (*cleanup_fn)(struct xfs_trans *, void *, int);
- struct xfs_defer_ops *orig_dop;
+ LIST_HEAD(dop_pending);
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
- trace_xfs_defer_finish((*tp)->t_mountp, dop, _RET_IP_);
-
- /*
- * Attach dfops to the transaction during deferred ops processing. This
- * explicitly causes calls into the allocator to defer AGFL block frees.
- * Note that this code can go away once all dfops users attach to the
- * associated tp.
- */
- ASSERT(!(*tp)->t_agfl_dfops || ((*tp)->t_agfl_dfops == dop));
- orig_dop = (*tp)->t_agfl_dfops;
- (*tp)->t_agfl_dfops = dop;
+ trace_xfs_defer_finish(*tp, _RET_IP_);
/* Until we run out of pending work to finish... */
- while (xfs_defer_has_unfinished_work(dop)) {
- /* Log intents for work items sitting in the intake. */
- xfs_defer_intake_work(*tp, dop);
-
- /* Roll the transaction. */
- error = xfs_defer_trans_roll(tp, dop);
+ while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
+ /* log intents and pull in intake items */
+ xfs_defer_create_intents(*tp);
+ list_splice_tail_init(&(*tp)->t_dfops, &dop_pending);
+
+ /*
+ * Roll the transaction.
+ */
+ error = xfs_defer_trans_roll(tp);
if (error)
goto out;
/* Log an intent-done item for the first pending item. */
- dfp = list_first_entry(&dop->dop_pending,
- struct xfs_defer_pending, dfp_list);
+ dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
+ dfp_list);
trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
dfp->dfp_count);
@@ -377,7 +383,7 @@ xfs_defer_finish(
list_for_each_safe(li, n, &dfp->dfp_work) {
list_del(li);
dfp->dfp_count--;
- error = dfp->dfp_type->finish_item(*tp, dop, li,
+ error = dfp->dfp_type->finish_item(*tp, li,
dfp->dfp_done, &state);
if (error == -EAGAIN) {
/*
@@ -396,7 +402,6 @@ xfs_defer_finish(
*/
if (cleanup_fn)
cleanup_fn(*tp, state, error);
- xfs_defer_trans_abort(*tp, dop, error);
goto out;
}
}
@@ -425,72 +430,72 @@ xfs_defer_finish(
}
out:
- (*tp)->t_agfl_dfops = orig_dop;
- if (error)
- trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
- else
- trace_xfs_defer_finish_done((*tp)->t_mountp, dop, _RET_IP_);
- return error;
+ if (error) {
+ xfs_defer_trans_abort(*tp, &dop_pending);
+ xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
+ trace_xfs_defer_finish_error(*tp, error);
+ xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
+ xfs_defer_cancel(*tp);
+ return error;
+ }
+
+ trace_xfs_defer_finish_done(*tp, _RET_IP_);
+ return 0;
}
-/*
- * Free up any items left in the list.
- */
-void
-xfs_defer_cancel(
- struct xfs_defer_ops *dop)
+int
+xfs_defer_finish(
+ struct xfs_trans **tp)
{
- struct xfs_defer_pending *dfp;
- struct xfs_defer_pending *pli;
- struct list_head *pwi;
- struct list_head *n;
-
- trace_xfs_defer_cancel(NULL, dop, _RET_IP_);
+ int error;
/*
- * Free the pending items. Caller should already have arranged
- * for the intent items to be released.
+ * Finish and roll the transaction once more to avoid returning to the
+ * caller with a dirty transaction.
*/
- list_for_each_entry_safe(dfp, pli, &dop->dop_intake, dfp_list) {
- trace_xfs_defer_intake_cancel(NULL, dfp);
- list_del(&dfp->dfp_list);
- list_for_each_safe(pwi, n, &dfp->dfp_work) {
- list_del(pwi);
- dfp->dfp_count--;
- dfp->dfp_type->cancel_item(pwi);
- }
- ASSERT(dfp->dfp_count == 0);
- kmem_free(dfp);
- }
- list_for_each_entry_safe(dfp, pli, &dop->dop_pending, dfp_list) {
- trace_xfs_defer_pending_cancel(NULL, dfp);
- list_del(&dfp->dfp_list);
- list_for_each_safe(pwi, n, &dfp->dfp_work) {
- list_del(pwi);
- dfp->dfp_count--;
- dfp->dfp_type->cancel_item(pwi);
+ error = xfs_defer_finish_noroll(tp);
+ if (error)
+ return error;
+ if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
+ error = xfs_defer_trans_roll(tp);
+ if (error) {
+ xfs_force_shutdown((*tp)->t_mountp,
+ SHUTDOWN_CORRUPT_INCORE);
+ return error;
}
- ASSERT(dfp->dfp_count == 0);
- kmem_free(dfp);
}
+ xfs_defer_reset(*tp);
+ return 0;
+}
+
+void
+xfs_defer_cancel(
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+
+ trace_xfs_defer_cancel(tp, _RET_IP_);
+ xfs_defer_cancel_list(mp, &tp->t_dfops);
}
/* Add an item for later deferred processing. */
void
xfs_defer_add(
- struct xfs_defer_ops *dop,
+ struct xfs_trans *tp,
enum xfs_defer_ops_type type,
struct list_head *li)
{
struct xfs_defer_pending *dfp = NULL;
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+
/*
* Add the item to a pending item at the end of the intake list.
* If the last pending item has the same type, reuse it. Else,
* create a new pending item at the end of the intake list.
*/
- if (!list_empty(&dop->dop_intake)) {
- dfp = list_last_entry(&dop->dop_intake,
+ if (!list_empty(&tp->t_dfops)) {
+ dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
if (dfp->dfp_type->type != type ||
(dfp->dfp_type->max_items &&
@@ -505,7 +510,7 @@ xfs_defer_add(
dfp->dfp_done = NULL;
dfp->dfp_count = 0;
INIT_LIST_HEAD(&dfp->dfp_work);
- list_add_tail(&dfp->dfp_list, &dop->dop_intake);
+ list_add_tail(&dfp->dfp_list, &tp->t_dfops);
}
list_add_tail(li, &dfp->dfp_work);
@@ -520,15 +525,25 @@ xfs_defer_init_op_type(
defer_op_types[type->type] = type;
}
-/* Initialize a deferred operation. */
+/*
+ * Move deferred ops from one transaction to another and reset the source to
+ * initial state. This is primarily used to carry state forward across
+ * transaction rolls with pending dfops.
+ */
void
-xfs_defer_init(
- struct xfs_defer_ops *dop,
- xfs_fsblock_t *fbp)
+xfs_defer_move(
+ struct xfs_trans *dtp,
+ struct xfs_trans *stp)
{
- memset(dop, 0, sizeof(struct xfs_defer_ops));
- *fbp = NULLFSBLOCK;
- INIT_LIST_HEAD(&dop->dop_intake);
- INIT_LIST_HEAD(&dop->dop_pending);
- trace_xfs_defer_init(NULL, dop, _RET_IP_);
+ list_splice_init(&stp->t_dfops, &dtp->t_dfops);
+
+ /*
+ * Low free space mode was historically controlled by a dfops field.
+ * This meant that low mode state potentially carried across multiple
+ * transaction rolls. Transfer low mode on a dfops move to preserve
+ * that behavior.
+ */
+ dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
+
+ xfs_defer_reset(stp);
}