summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c82
1 files changed, 51 insertions, 31 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 5fa6cd947dd4..bf4e60871068 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -14,13 +14,14 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_log_recover.h"
+#include "xfs_log_priv.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_ag.h"
-static kmem_zone_t *xfs_buf_zone;
+static struct kmem_cache *xfs_buf_cache;
/*
* Locking orders
@@ -220,7 +221,7 @@ _xfs_buf_alloc(
int i;
*bpp = NULL;
- bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL);
+ bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL);
/*
* We don't want certain flags to appear in b_flags unless they are
@@ -247,7 +248,7 @@ _xfs_buf_alloc(
*/
error = xfs_buf_get_maps(bp, nmaps);
if (error) {
- kmem_cache_free(xfs_buf_zone, bp);
+ kmem_cache_free(xfs_buf_cache, bp);
return error;
}
@@ -307,7 +308,7 @@ xfs_buf_free(
kmem_free(bp->b_addr);
xfs_buf_free_maps(bp);
- kmem_cache_free(xfs_buf_zone, bp);
+ kmem_cache_free(xfs_buf_cache, bp);
}
static int
@@ -394,7 +395,7 @@ xfs_buf_alloc_pages(
}
XFS_STATS_INC(bp->b_mount, xb_page_retries);
- congestion_wait(BLK_RW_ASYNC, HZ / 50);
+ memalloc_retry_wait(gfp_mask);
}
return 0;
}
@@ -405,7 +406,7 @@ xfs_buf_alloc_pages(
STATIC int
_xfs_buf_map_pages(
struct xfs_buf *bp,
- uint flags)
+ xfs_buf_flags_t flags)
{
ASSERT(bp->b_flags & _XBF_PAGES);
if (bp->b_page_count == 1) {
@@ -813,7 +814,15 @@ xfs_buf_read_map(
* buffer.
*/
if (error) {
- if (!xfs_is_shutdown(target->bt_mount))
+ /*
+ * Check against log shutdown for error reporting because
+ * metadata writeback may require a read first and we need to
+ * report errors in metadata writeback until the log is shut
+ * down. High level transaction read functions already check
+ * against mount shutdown, anyway, so we only need to be
+ * concerned about low level IO interactions here.
+ */
+ if (!xlog_is_shutdown(target->bt_mount->m_log))
xfs_buf_ioerror_alert(bp, fa);
bp->b_flags &= ~XBF_DONE;
@@ -843,9 +852,6 @@ xfs_buf_readahead_map(
{
struct xfs_buf *bp;
- if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
- return;
-
xfs_buf_read_map(target, map, nmaps,
XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
__this_address);
@@ -862,7 +868,7 @@ xfs_buf_read_uncached(
struct xfs_buftarg *target,
xfs_daddr_t daddr,
size_t numblks,
- int flags,
+ xfs_buf_flags_t flags,
struct xfs_buf **bpp,
const struct xfs_buf_ops *ops)
{
@@ -897,7 +903,7 @@ int
xfs_buf_get_uncached(
struct xfs_buftarg *target,
size_t numblks,
- int flags,
+ xfs_buf_flags_t flags,
struct xfs_buf **bpp)
{
int error;
@@ -1177,10 +1183,10 @@ xfs_buf_ioend_handle_error(
struct xfs_error_cfg *cfg;
/*
- * If we've already decided to shutdown the filesystem because of I/O
- * errors, there's no point in giving this a retry.
+ * If we've already shutdown the journal because of I/O errors, there's
+ * no point in giving this a retry.
*/
- if (xfs_is_shutdown(mp))
+ if (xlog_is_shutdown(mp->m_log))
goto out_stale;
xfs_buf_ioerror_alert_ratelimited(bp);
@@ -1440,12 +1446,10 @@ next_chunk:
atomic_inc(&bp->b_io_remaining);
nr_pages = bio_max_segs(total_nr_pages);
- bio = bio_alloc(GFP_NOIO, nr_pages);
- bio_set_dev(bio, bp->b_target->bt_bdev);
+ bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
- bio->bi_opf = op;
for (; size && nr_pages; nr_pages--, page_index++) {
int rbytes, nbytes = PAGE_SIZE - offset;
@@ -1593,8 +1597,23 @@ __xfs_buf_submit(
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
- /* on shutdown we stale and complete the buffer immediately */
- if (xfs_is_shutdown(bp->b_mount)) {
+ /*
+ * On log shutdown we stale and complete the buffer immediately. We can
+ * be called to read the superblock before the log has been set up, so
+ * be careful checking the log state.
+ *
+ * Checking the mount shutdown state here can result in the log tail
+ * moving inappropriately on disk as the log may not yet be shut down.
+ * i.e. failing this buffer on mount shutdown can remove it from the AIL
+ * and move the tail of the log forwards without having written this
+ * buffer to disk. This corrupts the log tail state in memory, and
+ * because the log may not be shut down yet, it can then be propagated
+ * to disk before the log is shutdown. Hence we check log shutdown
+ * state here rather than mount state to avoid corrupting the log tail
+ * on shutdown.
+ */
+ if (bp->b_mount->m_log &&
+ xlog_is_shutdown(bp->b_mount->m_log)) {
xfs_buf_ioend_fail(bp);
return -EIO;
}
@@ -1808,10 +1827,10 @@ xfs_buftarg_drain(
* If one or more failed buffers were freed, that means dirty metadata
* was thrown away. This should only ever happen after I/O completion
* handling has elevated I/O error(s) to permanent failures and shuts
- * down the fs.
+ * down the journal.
*/
if (write_fail) {
- ASSERT(xfs_is_shutdown(btp->bt_mount));
+ ASSERT(xlog_is_shutdown(btp->bt_mount->m_log));
xfs_alert(btp->bt_mount,
"Please run xfs_repair to determine the extent of the problem.");
}
@@ -1892,6 +1911,7 @@ xfs_free_buftarg(
list_lru_destroy(&btp->bt_lru);
blkdev_issue_flush(btp->bt_bdev);
+ fs_put_dax(btp->bt_daxdev);
kmem_free(btp);
}
@@ -1932,11 +1952,10 @@ xfs_setsize_buftarg_early(
return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
}
-xfs_buftarg_t *
+struct xfs_buftarg *
xfs_alloc_buftarg(
struct xfs_mount *mp,
- struct block_device *bdev,
- struct dax_device *dax_dev)
+ struct block_device *bdev)
{
xfs_buftarg_t *btp;
@@ -1945,7 +1964,7 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
- btp->bt_daxdev = dax_dev;
+ btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off);
/*
* Buffer IO error rate limiting. Limit it to no more than 10 messages
@@ -2094,12 +2113,13 @@ xfs_buf_delwri_submit_buffers(
blk_start_plug(&plug);
list_for_each_entry_safe(bp, n, buffer_list, b_list) {
if (!wait_list) {
+ if (!xfs_buf_trylock(bp))
+ continue;
if (xfs_buf_ispinned(bp)) {
+ xfs_buf_unlock(bp);
pinned++;
continue;
}
- if (!xfs_buf_trylock(bp))
- continue;
} else {
xfs_buf_lock(bp);
}
@@ -2258,12 +2278,12 @@ xfs_buf_delwri_pushbuf(
int __init
xfs_buf_init(void)
{
- xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
+ xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD,
NULL);
- if (!xfs_buf_zone)
+ if (!xfs_buf_cache)
goto out;
return 0;
@@ -2275,7 +2295,7 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
- kmem_cache_destroy(xfs_buf_zone);
+ kmem_cache_destroy(xfs_buf_cache);
}
void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)