summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_cil.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
-rw-r--r--fs/xfs/xfs_log_cil.c371
1 files changed, 190 insertions, 181 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 02b9cf3f8252..cfe97973ba36 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -80,6 +80,83 @@ xlog_cil_init_post_recovery(
log->l_curr_block);
}
+STATIC int
+xlog_cil_lv_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ int index;
+ char *ptr;
+
+ /* format new vectors into array */
+ lip->li_ops->iop_format(lip, lv->lv_iovecp);
+
+ /* copy data into existing array */
+ ptr = lv->lv_buf;
+ for (index = 0; index < lv->lv_niovecs; index++) {
+ struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
+
+ memcpy(ptr, vec->i_addr, vec->i_len);
+ vec->i_addr = ptr;
+ ptr += vec->i_len;
+ }
+
+ /*
+ * some size calculations for log vectors over-estimate, so the caller
+ * doesn't know the amount of space actually used by the item. Return
+ * the byte count to the caller so they can check and store it
+ * appropriately.
+ */
+ return ptr - lv->lv_buf;
+}
+
+/*
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+ struct xlog *log,
+ struct xfs_log_vec *lv,
+ struct xfs_log_vec *old_lv,
+ int *diff_len,
+ int *diff_iovecs)
+{
+ /* Account for the new LV being passed in */
+ if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
+ *diff_len += lv->lv_buf_len;
+ *diff_iovecs += lv->lv_niovecs;
+ }
+
+ /*
+ * If there is no old LV, this is the first time we've seen the item in
+ * this CIL context and so we need to pin it. If we are replacing the
+ * old_lv, then remove the space it accounts for and free it.
+ */
+ if (!old_lv)
+ lv->lv_item->li_ops->iop_pin(lv->lv_item);
+ else if (old_lv != lv) {
+ ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
+
+ *diff_len -= old_lv->lv_buf_len;
+ *diff_iovecs -= old_lv->lv_niovecs;
+ kmem_free(old_lv);
+ }
+
+ /* attach new log vector to log item */
+ lv->lv_item->li_lv = lv;
+
+ /*
+ * If this is the first time the item is being committed to the
+ * CIL, store the sequence number on the log item so we can
+ * tell in future commits whether this is the first checkpoint
+ * the item is being committed into.
+ */
+ if (!lv->lv_item->li_seq)
+ lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
/*
* Format log item into a flat buffers
*
@@ -106,35 +183,39 @@ xlog_cil_init_post_recovery(
* format the regions into the iclog as though they are being formatted
* directly out of the objects themselves.
*/
-static struct xfs_log_vec *
-xlog_cil_prepare_log_vecs(
- struct xfs_trans *tp)
+static void
+xlog_cil_insert_format_items(
+ struct xlog *log,
+ struct xfs_trans *tp,
+ int *diff_len,
+ int *diff_iovecs)
{
struct xfs_log_item_desc *lidp;
- struct xfs_log_vec *lv = NULL;
- struct xfs_log_vec *ret_lv = NULL;
/* Bail out if we didn't find a log item. */
if (list_empty(&tp->t_items)) {
ASSERT(0);
- return NULL;
+ return;
}
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
- struct xfs_log_vec *new_lv;
- void *ptr;
- int index;
- int len = 0;
- uint niovecs;
+ struct xfs_log_item *lip = lidp->lid_item;
+ struct xfs_log_vec *lv;
+ struct xfs_log_vec *old_lv;
+ int niovecs = 0;
+ int nbytes = 0;
+ int buf_size;
bool ordered = false;
/* Skip items which aren't dirty in this transaction. */
if (!(lidp->lid_flags & XFS_LID_DIRTY))
continue;
+ /* get number of vecs and size of data to be stored */
+ lip->li_ops->iop_size(lip, &niovecs, &nbytes);
+
/* Skip items that do not have any vectors for writing */
- niovecs = IOP_SIZE(lidp->lid_item);
if (!niovecs)
continue;
@@ -146,109 +227,63 @@ xlog_cil_prepare_log_vecs(
if (niovecs == XFS_LOG_VEC_ORDERED) {
ordered = true;
niovecs = 0;
+ nbytes = 0;
}
- new_lv = kmem_zalloc(sizeof(*new_lv) +
- niovecs * sizeof(struct xfs_log_iovec),
- KM_SLEEP|KM_NOFS);
-
- new_lv->lv_item = lidp->lid_item;
- new_lv->lv_niovecs = niovecs;
- if (ordered) {
- /* track as an ordered logvec */
- new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
- goto next;
- }
-
- /* The allocated iovec region lies beyond the log vector. */
- new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
+ /* grab the old item if it exists for reservation accounting */
+ old_lv = lip->li_lv;
- /* build the vector array and calculate it's length */
- IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
- for (index = 0; index < new_lv->lv_niovecs; index++)
- len += new_lv->lv_iovecp[index].i_len;
+ /* calc buffer size */
+ buf_size = sizeof(struct xfs_log_vec) + nbytes +
+ niovecs * sizeof(struct xfs_log_iovec);
- new_lv->lv_buf_len = len;
- new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
- KM_SLEEP|KM_NOFS);
- ptr = new_lv->lv_buf;
+ /* compare to existing item size */
+ if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
+ /* same or smaller, optimise common overwrite case */
+ lv = lip->li_lv;
+ lv->lv_next = NULL;
- for (index = 0; index < new_lv->lv_niovecs; index++) {
- struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
+ if (ordered)
+ goto insert;
- memcpy(ptr, vec->i_addr, vec->i_len);
- vec->i_addr = ptr;
- ptr += vec->i_len;
- }
- ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
-
-next:
- if (!ret_lv)
- ret_lv = new_lv;
- else
- lv->lv_next = new_lv;
- lv = new_lv;
- }
-
- return ret_lv;
-}
-
-/*
- * Prepare the log item for insertion into the CIL. Calculate the difference in
- * log space and vectors it will consume, and if it is a new item pin it as
- * well.
- */
-STATIC void
-xfs_cil_prepare_item(
- struct xlog *log,
- struct xfs_log_vec *lv,
- int *len,
- int *diff_iovecs)
-{
- struct xfs_log_vec *old = lv->lv_item->li_lv;
+ /*
+ * set the item up as though it is a new insertion so
+ * that the space reservation accounting is correct.
+ */
+ *diff_iovecs -= lv->lv_niovecs;
+ *diff_len -= lv->lv_buf_len;
- if (old) {
- /* existing lv on log item, space used is a delta */
- ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) ||
- old->lv_buf_len == XFS_LOG_VEC_ORDERED);
+ /* Ensure the lv is set up according to ->iop_size */
+ lv->lv_niovecs = niovecs;
+ lv->lv_buf = (char *)lv + buf_size - nbytes;
- /*
- * If the new item is ordered, keep the old one that is already
- * tracking dirty or ordered regions
- */
- if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
- ASSERT(!lv->lv_buf);
- kmem_free(lv);
- return;
+ lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
+ goto insert;
}
- *len += lv->lv_buf_len - old->lv_buf_len;
- *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
- kmem_free(old->lv_buf);
- kmem_free(old);
- } else {
- /* new lv, must pin the log item */
- ASSERT(!lv->lv_item->li_lv);
-
- if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
- *len += lv->lv_buf_len;
- *diff_iovecs += lv->lv_niovecs;
+ /* allocate new data chunk */
+ lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+ lv->lv_item = lip;
+ lv->lv_size = buf_size;
+ lv->lv_niovecs = niovecs;
+ if (ordered) {
+ /* track as an ordered logvec */
+ ASSERT(lip->li_lv == NULL);
+ lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+ goto insert;
}
- IOP_PIN(lv->lv_item);
- }
+ /* The allocated iovec region lies beyond the log vector. */
+ lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
- /* attach new log vector to log item */
- lv->lv_item->li_lv = lv;
+ /* The allocated data region lies beyond the iovec region */
+ lv->lv_buf = (char *)lv + buf_size - nbytes;
- /*
- * If this is the first time the item is being committed to the
- * CIL, store the sequence number on the log item so we can
- * tell in future commits whether this is the first checkpoint
- * the item is being committed into.
- */
- if (!lv->lv_item->li_seq)
- lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+ lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
+insert:
+ ASSERT(lv->lv_buf_len <= nbytes);
+ xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
+ }
}
/*
@@ -261,53 +296,47 @@ xfs_cil_prepare_item(
static void
xlog_cil_insert_items(
struct xlog *log,
- struct xfs_log_vec *log_vector,
- struct xlog_ticket *ticket)
+ struct xfs_trans *tp)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx = cil->xc_ctx;
- struct xfs_log_vec *lv;
+ struct xfs_log_item_desc *lidp;
int len = 0;
int diff_iovecs = 0;
int iclog_space;
- ASSERT(log_vector);
+ ASSERT(tp);
/*
- * Do all the accounting aggregation and switching of log vectors
- * around in a separate loop to the insertion of items into the CIL.
- * Then we can do a separate loop to update the CIL within a single
- * lock/unlock pair. This reduces the number of round trips on the CIL
- * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
- * hold time for the transaction commit.
- *
- * If this is the first time the item is being placed into the CIL in
- * this context, pin it so it can't be written to disk until the CIL is
- * flushed to the iclog and the iclog written to disk.
- *
* We can do this safely because the context can't checkpoint until we
* are done so it doesn't matter exactly how we update the CIL.
*/
+ xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
+
+ /*
+ * Now (re-)position everything modified at the tail of the CIL.
+ * We do this here so we only need to take the CIL lock once during
+ * the transaction commit.
+ */
spin_lock(&cil->xc_cil_lock);
- for (lv = log_vector; lv; ) {
- struct xfs_log_vec *next = lv->lv_next;
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
- ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil));
- lv->lv_next = NULL;
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
- /*
- * xfs_cil_prepare_item() may free the lv, so move the item on
- * the CIL first.
- */
- list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
- xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
- lv = next;
+ list_move_tail(&lip->li_cil, &cil->xc_cil);
}
/* account for space used by new iovec headers */
len += diff_iovecs * sizeof(xlog_op_header_t);
ctx->nvecs += diff_iovecs;
+ /* attach the transaction to the CIL if it has any busy extents */
+ if (!list_empty(&tp->t_busy))
+ list_splice_init(&tp->t_busy, &ctx->busy_extents);
+
/*
* Now transfer enough transaction reservation to the context ticket
* for the checkpoint. The context ticket is special - the unit
@@ -316,10 +345,8 @@ xlog_cil_insert_items(
* during the transaction commit.
*/
if (ctx->ticket->t_curr_res == 0) {
- /* first commit in checkpoint, steal the header reservation */
- ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
- ticket->t_curr_res -= ctx->ticket->t_unit_res;
+ tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
}
/* do we need space for more log record headers? */
@@ -333,10 +360,10 @@ xlog_cil_insert_items(
hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
ctx->ticket->t_unit_res += hdrs;
ctx->ticket->t_curr_res += hdrs;
- ticket->t_curr_res -= hdrs;
- ASSERT(ticket->t_curr_res >= len);
+ tp->t_ticket->t_curr_res -= hdrs;
+ ASSERT(tp->t_ticket->t_curr_res >= len);
}
- ticket->t_curr_res -= len;
+ tp->t_ticket->t_curr_res -= len;
ctx->space_used += len;
spin_unlock(&cil->xc_cil_lock);
@@ -350,7 +377,6 @@ xlog_cil_free_logvec(
for (lv = log_vector; lv; ) {
struct xfs_log_vec *next = lv->lv_next;
- kmem_free(lv->lv_buf);
kmem_free(lv);
lv = next;
}
@@ -376,9 +402,9 @@ xlog_cil_committed(
xfs_extent_busy_clear(mp, &ctx->busy_extents,
(mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
- spin_lock(&ctx->cil->xc_cil_lock);
+ spin_lock(&ctx->cil->xc_push_lock);
list_del(&ctx->committing);
- spin_unlock(&ctx->cil->xc_cil_lock);
+ spin_unlock(&ctx->cil->xc_push_lock);
xlog_cil_free_logvec(ctx->lv_chain);
@@ -433,7 +459,7 @@ xlog_cil_push(
down_write(&cil->xc_ctx_lock);
ctx = cil->xc_ctx;
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
push_seq = cil->xc_push_seq;
ASSERT(push_seq <= ctx->sequence);
@@ -444,10 +470,10 @@ xlog_cil_push(
*/
if (list_empty(&cil->xc_cil)) {
cil->xc_push_seq = 0;
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
goto out_skip;
}
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
/* check for a previously pushed seqeunce */
@@ -515,9 +541,9 @@ xlog_cil_push(
* that higher sequences will wait for us to write out a commit record
* before they do.
*/
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
list_add(&ctx->committing, &cil->xc_committing);
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
up_write(&cil->xc_ctx_lock);
/*
@@ -552,7 +578,7 @@ xlog_cil_push(
* order the commit records so replay will get them in the right order.
*/
restart:
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
/*
* Higher sequences will wait for this one so skip them.
@@ -565,11 +591,11 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
goto restart;
}
}
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
/* xfs_log_done always frees the ticket on error. */
commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
@@ -588,10 +614,10 @@ restart:
* callbacks to the iclog we can assign the commit LSN to the context
* and wake up anyone who is waiting for the commit to complete.
*/
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
ctx->commit_lsn = commit_lsn;
wake_up_all(&cil->xc_commit_wait);
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
/* release the hounds! */
return xfs_log_release_iclog(log->l_mp, commit_iclog);
@@ -644,12 +670,12 @@ xlog_cil_push_background(
if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
return;
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
if (cil->xc_push_seq < cil->xc_current_sequence) {
cil->xc_push_seq = cil->xc_current_sequence;
queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
}
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
}
@@ -672,14 +698,14 @@ xlog_cil_push_foreground(
* If the CIL is empty or we've already pushed the sequence then
* there's no work we need to do.
*/
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
return;
}
cil->xc_push_seq = push_seq;
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
/* do the push now */
xlog_cil_push(log);
@@ -706,43 +732,25 @@ xfs_log_commit_cil(
int flags)
{
struct xlog *log = mp->m_log;
+ struct xfs_cil *cil = log->l_cilp;
int log_flags = 0;
- struct xfs_log_vec *log_vector;
if (flags & XFS_TRANS_RELEASE_LOG_RES)
log_flags = XFS_LOG_REL_PERM_RESERV;
- /*
- * Do all the hard work of formatting items (including memory
- * allocation) outside the CIL context lock. This prevents stalling CIL
- * pushes when we are low on memory and a transaction commit spends a
- * lot of time in memory reclaim.
- */
- log_vector = xlog_cil_prepare_log_vecs(tp);
- if (!log_vector)
- return ENOMEM;
-
/* lock out background commit */
- down_read(&log->l_cilp->xc_ctx_lock);
- if (commit_lsn)
- *commit_lsn = log->l_cilp->xc_ctx->sequence;
+ down_read(&cil->xc_ctx_lock);
- /* xlog_cil_insert_items() destroys log_vector list */
- xlog_cil_insert_items(log, log_vector, tp->t_ticket);
+ xlog_cil_insert_items(log, tp);
/* check we didn't blow the reservation */
if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(log->l_mp, tp->t_ticket);
+ xlog_print_tic_res(mp, tp->t_ticket);
- /* attach the transaction to the CIL if it has any busy extents */
- if (!list_empty(&tp->t_busy)) {
- spin_lock(&log->l_cilp->xc_cil_lock);
- list_splice_init(&tp->t_busy,
- &log->l_cilp->xc_ctx->busy_extents);
- spin_unlock(&log->l_cilp->xc_cil_lock);
- }
+ tp->t_commit_lsn = cil->xc_ctx->sequence;
+ if (commit_lsn)
+ *commit_lsn = tp->t_commit_lsn;
- tp->t_commit_lsn = *commit_lsn;
xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
xfs_trans_unreserve_and_mod_sb(tp);
@@ -757,11 +765,11 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, *commit_lsn, 0);
+ xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
xlog_cil_push_background(log);
- up_read(&log->l_cilp->xc_ctx_lock);
+ up_read(&cil->xc_ctx_lock);
return 0;
}
@@ -800,7 +808,7 @@ xlog_cil_force_lsn(
* on commits for those as well.
*/
restart:
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) {
if (ctx->sequence > sequence)
continue;
@@ -809,7 +817,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
goto restart;
}
if (ctx->sequence != sequence)
@@ -817,7 +825,7 @@ restart:
/* found it! */
commit_lsn = ctx->commit_lsn;
}
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
return commit_lsn;
}
@@ -875,6 +883,7 @@ xlog_cil_init(
INIT_LIST_HEAD(&cil->xc_cil);
INIT_LIST_HEAD(&cil->xc_committing);
spin_lock_init(&cil->xc_cil_lock);
+ spin_lock_init(&cil->xc_push_lock);
init_rwsem(&cil->xc_ctx_lock);
init_waitqueue_head(&cil->xc_commit_wait);