diff options
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 371 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_format.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 77 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 50 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 47 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 564 | ||||
-rw-r--r-- | fs/xfs/xfs_rtalloc.c | 2 |
12 files changed, 763 insertions, 416 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index de2d26d32844..79c981984dca 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5404,39 +5404,237 @@ error0: } /* + * Determine whether an extent shift can be accomplished by a merge with the + * extent that precedes the target hole of the shift. + */ +STATIC bool +xfs_bmse_can_merge( + struct xfs_bmbt_irec *left, /* preceding extent */ + struct xfs_bmbt_irec *got, /* current extent to shift */ + xfs_fileoff_t shift) /* shift fsb */ +{ + xfs_fileoff_t startoff; + + startoff = got->br_startoff - shift; + + /* + * The extent, once shifted, must be adjacent in-file and on-disk with + * the preceding extent. + */ + if ((left->br_startoff + left->br_blockcount != startoff) || + (left->br_startblock + left->br_blockcount != got->br_startblock) || + (left->br_state != got->br_state) || + (left->br_blockcount + got->br_blockcount > MAXEXTLEN)) + return false; + + return true; +} + +/* + * A bmap extent shift adjusts the file offset of an extent to fill a preceding + * hole in the file. If an extent shift would result in the extent being fully + * adjacent to the extent that currently precedes the hole, we can merge with + * the preceding extent rather than do the shift. + * + * This function assumes the caller has verified a shift-by-merge is possible + * with the provided extents via xfs_bmse_can_merge(). + */ +STATIC int +xfs_bmse_merge( + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t shift, /* shift fsb */ + int current_ext, /* idx of gotp */ + struct xfs_bmbt_rec_host *gotp, /* extent to shift */ + struct xfs_bmbt_rec_host *leftp, /* preceding extent */ + struct xfs_btree_cur *cur, + int *logflags) /* output */ +{ + struct xfs_ifork *ifp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + xfs_filblks_t blockcount; + int error, i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_bmbt_get_all(gotp, &got); + xfs_bmbt_get_all(leftp, &left); + blockcount = left.br_blockcount + got.br_blockcount; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(xfs_bmse_can_merge(&left, &got, shift)); + + /* + * Merge the in-core extents. Note that the host record pointers and + * current_ext index are invalid once the extent has been removed via + * xfs_iext_remove(). + */ + xfs_bmbt_set_blockcount(leftp, blockcount); + xfs_iext_remove(ip, current_ext, 1, 0); + + /* + * Update the on-disk extent count, the btree if necessary and log the + * inode. + */ + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + *logflags |= XFS_ILOG_CORE; + if (!cur) { + *logflags |= XFS_ILOG_DEXT; + return 0; + } + + /* lookup and remove the extent to merge */ + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + error = xfs_btree_delete(cur, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + /* lookup and update size of the previous extent */ + error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, + left.br_blockcount, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + left.br_blockcount = blockcount; + + error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, + left.br_blockcount, left.br_state); + if (error) + goto out_error; + + return 0; + +out_error: + return error; +} + +/* + * Shift a single extent. + */ +STATIC int +xfs_bmse_shift_one( + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t offset_shift_fsb, + int *current_ext, + struct xfs_bmbt_rec_host *gotp, + struct xfs_btree_cur *cur, + int *logflags) +{ + struct xfs_ifork *ifp; + xfs_fileoff_t startoff; + struct xfs_bmbt_rec_host *leftp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + int error; + int i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + + xfs_bmbt_get_all(gotp, &got); + startoff = got.br_startoff - offset_shift_fsb; + + /* delalloc extents should be prevented by caller */ + XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock), + out_error); + + /* + * If this is the first extent in the file, make sure there's enough + * room at the start of the file and jump right to the shift as there's + * no left extent to merge. + */ + if (*current_ext == 0) { + if (got.br_startoff < offset_shift_fsb) + return -EINVAL; + goto shift_extent; + } + + /* grab the left extent and check for a large enough hole */ + leftp = xfs_iext_get_ext(ifp, *current_ext - 1); + xfs_bmbt_get_all(leftp, &left); + + if (startoff < left.br_startoff + left.br_blockcount) + return -EINVAL; + + /* check whether to merge the extent or shift it down */ + if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) + goto shift_extent; + + return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext, + gotp, leftp, cur, logflags); + +shift_extent: + /* + * Increment the extent index for the next iteration, update the start + * offset of the in-core extent and update the btree if applicable. + */ + (*current_ext)++; + xfs_bmbt_set_startoff(gotp, startoff); + *logflags |= XFS_ILOG_CORE; + if (!cur) { + *logflags |= XFS_ILOG_DEXT; + return 0; + } + + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + got.br_startoff = startoff; + error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, got.br_state); + if (error) + return error; + + return 0; + +out_error: + return error; +} + +/* * Shift extent records to the left to cover a hole. * - * The maximum number of extents to be shifted in a single operation - * is @num_exts, and @current_ext keeps track of the current extent - * index we have shifted. @offset_shift_fsb is the length by which each - * extent is shifted. If there is no hole to shift the extents - * into, this will be considered invalid operation and we abort immediately. + * The maximum number of extents to be shifted in a single operation is + * @num_exts. @start_fsb specifies the file offset to start the shift and the + * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb + * is the length by which each extent is shifted. If there is no hole to shift + * the extents into, this will be considered invalid operation and we abort + * immediately. */ int xfs_bmap_shift_extents( struct xfs_trans *tp, struct xfs_inode *ip, - int *done, xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, - xfs_extnum_t *current_ext, + int *done, + xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, int num_exts) { - struct xfs_btree_cur *cur; + struct xfs_btree_cur *cur = NULL; struct xfs_bmbt_rec_host *gotp; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec left; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; xfs_extnum_t nexts = 0; - xfs_fileoff_t startoff; + xfs_extnum_t current_ext; int error = 0; - int i; int whichfork = XFS_DATA_FORK; - int logflags; - xfs_filblks_t blockcount = 0; + int logflags = 0; int total_extents; if (unlikely(XFS_TEST_ERROR( @@ -5451,7 +5649,8 @@ xfs_bmap_shift_extents( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - ASSERT(current_ext != NULL); + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS)) { @@ -5461,142 +5660,62 @@ xfs_bmap_shift_extents( return error; } - /* - * If *current_ext is 0, we would need to lookup the extent - * from where we would start shifting and store it in gotp. - */ - if (!*current_ext) { - gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); - /* - * gotp can be null in 2 cases: 1) if there are no extents - * or 2) start_fsb lies in a hole beyond which there are - * no extents. Either way, we are done. - */ - if (!gotp) { - *done = 1; - return 0; - } - } - - /* We are going to change core inode */ - logflags = XFS_ILOG_CORE; if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; - } else { - cur = NULL; - logflags |= XFS_ILOG_DEXT; + } + + /* + * Look up the extent index for the fsb where we start shifting. We can + * henceforth iterate with current_ext as extent list changes are locked + * out via ilock. + * + * gotp can be null in 2 cases: 1) if there are no extents or 2) + * start_fsb lies in a hole beyond which there are no extents. Either + * way, we are done. + */ + gotp = xfs_iext_bno_to_ext(ifp, start_fsb, ¤t_ext); + if (!gotp) { + *done = 1; + goto del_cursor; } /* * There may be delalloc extents in the data fork before the range we - * are collapsing out, so we cannot - * use the count of real extents here. Instead we have to calculate it - * from the incore fork. + * are collapsing out, so we cannot use the count of real extents here. + * Instead we have to calculate it from the incore fork. */ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); - while (nexts++ < num_exts && *current_ext < total_extents) { - - gotp = xfs_iext_get_ext(ifp, *current_ext); - xfs_bmbt_get_all(gotp, &got); - startoff = got.br_startoff - offset_shift_fsb; - - /* - * Before shifting extent into hole, make sure that the hole - * is large enough to accomodate the shift. - */ - if (*current_ext) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - *current_ext - 1), &left); - - if (startoff < left.br_startoff + left.br_blockcount) - error = -EINVAL; - } else if (offset_shift_fsb > got.br_startoff) { - /* - * When first extent is shifted, offset_shift_fsb - * should be less than the stating offset of - * the first extent. - */ - error = -EINVAL; - } - + while (nexts++ < num_exts && current_ext < total_extents) { + error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, + ¤t_ext, gotp, cur, &logflags); if (error) goto del_cursor; - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - /* Check if we can merge 2 adjacent extents */ - if (*current_ext && - left.br_startoff + left.br_blockcount == startoff && - left.br_startblock + left.br_blockcount == - got.br_startblock && - left.br_state == got.br_state && - left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { - blockcount = left.br_blockcount + - got.br_blockcount; - xfs_iext_remove(ip, *current_ext, 1, 0); - if (cur) { - error = xfs_btree_delete(cur, &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); - gotp = xfs_iext_get_ext(ifp, --*current_ext); - xfs_bmbt_get_all(gotp, &got); - - /* Make cursor point to the extent we will update */ - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - xfs_bmbt_set_blockcount(gotp, blockcount); - got.br_blockcount = blockcount; - } else { - /* We have to update the startoff */ - xfs_bmbt_set_startoff(gotp, startoff); - got.br_startoff = startoff; - } - - if (cur) { - error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state); - if (error) - goto del_cursor; - } - - (*current_ext)++; + /* update total extent count and grab the next record */ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + if (current_ext >= total_extents) + break; + gotp = xfs_iext_get_ext(ifp, current_ext); } /* Check if we are done */ - if (*current_ext == total_extents) + if (current_ext == total_extents) { *done = 1; + } else if (next_fsb) { + xfs_bmbt_get_all(gotp, &got); + *next_fsb = got.br_startoff; + } del_cursor: if (cur) xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_trans_log_inode(tp, ip, logflags); + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index b879ca56a64c..44db6db86402 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -178,9 +178,8 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, - int *done, xfs_fileoff_t start_fsb, - xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, - xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, - int num_exts); + xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, + int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, + struct xfs_bmap_free *flist, int num_exts); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index c9aee52a37e2..7e42fdfd2f1d 100644 --- a/fs/xfs/libxfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c @@ -270,7 +270,6 @@ xfs_dir3_data_get_ftype( { __uint8_t ftype = dep->name[dep->namelen]; - ASSERT(ftype < XFS_DIR3_FT_MAX); if (ftype >= XFS_DIR3_FT_MAX) return XFS_DIR3_FT_UNKNOWN; return ftype; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11e9b4caa54f..2f502537a39c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -434,10 +434,22 @@ xfs_start_page_writeback( { ASSERT(PageLocked(page)); ASSERT(!PageWriteback(page)); - if (clear_dirty) + + /* + * if the page was not fully cleaned, we need to ensure that the higher + * layers come back to it correctly. That means we need to keep the page + * dirty, and for WB_SYNC_ALL writeback we need to ensure the + * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to + * write this page in this writeback sweep will be made. + */ + if (clear_dirty) { clear_page_dirty_for_io(page); - set_page_writeback(page); + set_page_writeback(page); + } else + set_page_writeback_keepwrite(page); + unlock_page(page); + /* If no buffers on the page are to be written, finish it here */ if (!buffers) end_page_writeback(page); @@ -1753,11 +1765,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2f1e30d39a35..d8b77b5bf4d9 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1205,6 +1205,7 @@ xfs_free_file_space( xfs_bmap_free_t free_list; xfs_bmbt_irec_t imap; xfs_off_t ioffset; + xfs_off_t iendoffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; @@ -1233,12 +1234,13 @@ xfs_free_file_space( inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); - ioffset = offset & ~(rounding - 1); - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - ioffset, -1); + ioffset = round_down(offset, rounding); + iendoffset = round_up(offset + len, rounding) - 1; + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, + iendoffset); if (error) goto out; - truncate_pagecache_range(VFS_I(ip), ioffset, -1); + truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); /* * Need to zero the stuff we're not freeing, on disk. @@ -1456,24 +1458,50 @@ xfs_collapse_file_space( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; - xfs_extnum_t current_ext = 0; struct xfs_bmap_free free_list; xfs_fsblock_t first_block; int committed; xfs_fileoff_t start_fsb; + xfs_fileoff_t next_fsb; xfs_fileoff_t shift_fsb; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); trace_xfs_collapse_file_space(ip); - start_fsb = XFS_B_TO_FSB(mp, offset + len); + next_fsb = XFS_B_TO_FSB(mp, offset + len); shift_fsb = XFS_B_TO_FSB(mp, len); error = xfs_free_file_space(ip, offset, len); if (error) return error; + /* + * Trim eofblocks to avoid shifting uninitialized post-eof preallocation + * into the accessible region of the file. + */ + if (xfs_can_free_eofblocks(ip, true)) { + error = xfs_free_eofblocks(mp, ip, false); + if (error) + return error; + } + + /* + * Writeback and invalidate cache for the remainder of the file as we're + * about to shift down every extent from the collapse range to EOF. The + * free of the collapse range above might have already done some of + * this, but we shouldn't rely on it to do anything outside of the range + * that was freed. + */ + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + offset + len, -1); + if (error) + return error; + error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + (offset + len) >> PAGE_CACHE_SHIFT, -1); + if (error) + return error; + while (!error && !done) { tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); /* @@ -1505,10 +1533,10 @@ xfs_collapse_file_space( * We are using the write transaction in which max 2 bmbt * updates are allowed */ - error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, - shift_fsb, ¤t_ext, - &first_block, &free_list, - XFS_BMAP_MAX_SHIFT_EXTENTS); + start_fsb = next_fsb; + error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, + &done, &next_fsb, &first_block, &free_list, + XFS_BMAP_MAX_SHIFT_EXTENTS); if (error) goto out; @@ -1618,7 +1646,7 @@ xfs_swap_extents_check_format( return 0; } -int +static int xfs_swap_extent_flush( struct xfs_inode *ip) { diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 76007deed31f..30fa5db9aea8 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -501,7 +501,7 @@ xfs_buf_item_unpin( * buffer being bad.. */ -DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); +static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); STATIC uint xfs_buf_item_push( diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0fe36e4d5cef..eb596b419942 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -291,12 +291,22 @@ xfs_file_read_iter( if (inode->i_mapping->nrpages) { ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + size - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } @@ -632,10 +642,19 @@ xfs_file_dio_aio_write( if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + count - 1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + count - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } /* diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 981b2cf51985..b45f7b27b5df 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -33,7 +33,6 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_bmap_util.h" -#include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 72129493e9d3..ec6dcdc181ee 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -849,6 +849,36 @@ xfs_setattr_size( return error; truncate_setsize(inode, newsize); + /* + * The "we can't serialise against page faults" pain gets worse. + * + * If the file is mapped then we have to clean the page at the old EOF + * when extending the file. Extending the file can expose changes the + * underlying page mapping (e.g. from beyond EOF to a hole or + * unwritten), and so on the next attempt to write to that page we need + * to remap it for write. i.e. we need .page_mkwrite() to be called. + * Hence we need to clean the page to clean the pte and so a new write + * fault will be triggered appropriately. + * + * If we do it before we change the inode size, then we can race with a + * page fault that maps the page with exactly the same problem. If we do + * it after we change the file size, then a new page fault can come in + * and allocate space before we've run the rest of the truncate + * transaction. That's kinda grotesque, but it's better than have data + * over a hole, and so that's the lesser evil that has been chosen here. + * + * The real solution, however, is to have some mechanism for locking out + * page faults while a truncate is in progress. + */ + if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { + error = filemap_write_and_wait_range( + VFS_I(ip)->i_mapping, + round_down(oldsize, PAGE_CACHE_SIZE), + round_up(oldsize, PAGE_CACHE_SIZE) - 1); + if (error) + return error; + } + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f6b79e5325dd..f506c457011e 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -463,12 +463,40 @@ xlog_cil_push( spin_unlock(&cil->xc_push_lock); goto out_skip; } - spin_unlock(&cil->xc_push_lock); /* check for a previously pushed seqeunce */ - if (push_seq < cil->xc_ctx->sequence) + if (push_seq < cil->xc_ctx->sequence) { + spin_unlock(&cil->xc_push_lock); goto out_skip; + } + + /* + * We are now going to push this context, so add it to the committing + * list before we do anything else. This ensures that anyone waiting on + * this push can easily detect the difference between a "push in + * progress" and "CIL is empty, nothing to do". + * + * IOWs, a wait loop can now check for: + * the current sequence not being found on the committing list; + * an empty CIL; and + * an unchanged sequence number + * to detect a push that had nothing to do and therefore does not need + * waiting on. If the CIL is not empty, we get put on the committing + * list before emptying the CIL and bumping the sequence number. Hence + * an empty CIL and an unchanged sequence number means we jumped out + * above after doing nothing. + * + * Hence the waiter will either find the commit sequence on the + * committing list or the sequence number will be unchanged and the CIL + * still dirty. In that latter case, the push has not yet started, and + * so the waiter will have to continue trying to check the CIL + * committing list until it is found. In extreme cases of delay, the + * sequence may fully commit between the attempts the wait makes to wait + * on the commit sequence. + */ + list_add(&ctx->committing, &cil->xc_committing); + spin_unlock(&cil->xc_push_lock); /* * pull all the log vectors off the items in the CIL, and @@ -532,7 +560,6 @@ xlog_cil_push( */ spin_lock(&cil->xc_push_lock); cil->xc_current_sequence = new_ctx->sequence; - list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); @@ -855,13 +882,15 @@ restart: * Hence by the time we have got here it our sequence may not have been * pushed yet. This is true if the current sequence still matches the * push sequence after the above wait loop and the CIL still contains - * dirty objects. + * dirty objects. This is guaranteed by the push code first adding the + * context to the committing list before emptying the CIL. * - * When the push occurs, it will empty the CIL and atomically increment - * the currect sequence past the push sequence and move it into the - * committing list. Of course, if the CIL is clean at the time of the - * push, it won't have pushed the CIL at all, so in that case we should - * try the push for this sequence again from the start just in case. + * Hence if we don't find the context in the committing list and the + * current sequence number is unchanged then the CIL contents are + * significant. If the CIL is empty, if means there was nothing to push + * and that means there is nothing to wait for. If the CIL is not empty, + * it means we haven't yet started the push, because if it had started + * we would have found the context on the committing list. */ if (sequence == cil->xc_current_sequence && !list_empty(&cil->xc_cil)) { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 29e101fc32c5..79cfe7e6ec7a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1445,160 +1445,6 @@ xlog_clear_stale_blocks( ****************************************************************************** */ -STATIC xlog_recover_t * -xlog_recover_find_tid( - struct hlist_head *head, - xlog_tid_t tid) -{ - xlog_recover_t *trans; - - hlist_for_each_entry(trans, head, r_list) { - if (trans->r_log_tid == tid) - return trans; - } - return NULL; -} - -STATIC void -xlog_recover_new_tid( - struct hlist_head *head, - xlog_tid_t tid, - xfs_lsn_t lsn) -{ - xlog_recover_t *trans; - - trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); - trans->r_log_tid = tid; - trans->r_lsn = lsn; - INIT_LIST_HEAD(&trans->r_itemq); - - INIT_HLIST_NODE(&trans->r_list); - hlist_add_head(&trans->r_list, head); -} - -STATIC void -xlog_recover_add_item( - struct list_head *head) -{ - xlog_recover_item_t *item; - - item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); - INIT_LIST_HEAD(&item->ri_list); - list_add_tail(&item->ri_list, head); -} - -STATIC int -xlog_recover_add_to_cont_trans( - struct xlog *log, - struct xlog_recover *trans, - xfs_caddr_t dp, - int len) -{ - xlog_recover_item_t *item; - xfs_caddr_t ptr, old_ptr; - int old_len; - - if (list_empty(&trans->r_itemq)) { - /* finish copying rest of trans header */ - xlog_recover_add_item(&trans->r_itemq); - ptr = (xfs_caddr_t) &trans->r_theader + - sizeof(xfs_trans_header_t) - len; - memcpy(ptr, dp, len); /* d, s, l */ - return 0; - } - /* take the tail entry */ - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); - - old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; - old_len = item->ri_buf[item->ri_cnt-1].i_len; - - ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); - memcpy(&ptr[old_len], dp, len); /* d, s, l */ - item->ri_buf[item->ri_cnt-1].i_len += len; - item->ri_buf[item->ri_cnt-1].i_addr = ptr; - trace_xfs_log_recover_item_add_cont(log, trans, item, 0); - return 0; -} - -/* - * The next region to add is the start of a new region. It could be - * a whole region or it could be the first part of a new region. Because - * of this, the assumption here is that the type and size fields of all - * format structures fit into the first 32 bits of the structure. - * - * This works because all regions must be 32 bit aligned. Therefore, we - * either have both fields or we have neither field. In the case we have - * neither field, the data part of the region is zero length. We only have - * a log_op_header and can throw away the header since a new one will appear - * later. If we have at least 4 bytes, then we can determine how many regions - * will appear in the current log item. - */ -STATIC int -xlog_recover_add_to_trans( - struct xlog *log, - struct xlog_recover *trans, - xfs_caddr_t dp, - int len) -{ - xfs_inode_log_format_t *in_f; /* any will do */ - xlog_recover_item_t *item; - xfs_caddr_t ptr; - - if (!len) - return 0; - if (list_empty(&trans->r_itemq)) { - /* we need to catch log corruptions here */ - if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { - xfs_warn(log->l_mp, "%s: bad header magic number", - __func__); - ASSERT(0); - return -EIO; - } - if (len == sizeof(xfs_trans_header_t)) - xlog_recover_add_item(&trans->r_itemq); - memcpy(&trans->r_theader, dp, len); /* d, s, l */ - return 0; - } - - ptr = kmem_alloc(len, KM_SLEEP); - memcpy(ptr, dp, len); - in_f = (xfs_inode_log_format_t *)ptr; - - /* take the tail entry */ - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); - if (item->ri_total != 0 && - item->ri_total == item->ri_cnt) { - /* tail item is in use, get a new one */ - xlog_recover_add_item(&trans->r_itemq); - item = list_entry(trans->r_itemq.prev, - xlog_recover_item_t, ri_list); - } - - if (item->ri_total == 0) { /* first region to be added */ - if (in_f->ilf_size == 0 || - in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { - xfs_warn(log->l_mp, - "bad number of regions (%d) in inode log format", - in_f->ilf_size); - ASSERT(0); - kmem_free(ptr); - return -EIO; - } - - item->ri_total = in_f->ilf_size; - item->ri_buf = - kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), - KM_SLEEP); - } - ASSERT(item->ri_total > item->ri_cnt); - /* Description region is ri_buf[0] */ - item->ri_buf[item->ri_cnt].i_addr = ptr; - item->ri_buf[item->ri_cnt].i_len = len; - item->ri_cnt++; - trace_xfs_log_recover_item_add(log, trans, item, 0); - return 0; -} - /* * Sort the log items in the transaction. * @@ -3254,31 +3100,6 @@ xlog_recover_do_icreate_pass2( return 0; } -/* - * Free up any resources allocated by the transaction - * - * Remember that EFIs, EFDs, and IUNLINKs are handled later. - */ -STATIC void -xlog_recover_free_trans( - struct xlog_recover *trans) -{ - xlog_recover_item_t *item, *n; - int i; - - list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { - /* Free the regions in the item. */ - list_del(&item->ri_list); - for (i = 0; i < item->ri_cnt; i++) - kmem_free(item->ri_buf[i].i_addr); - /* Free the item itself */ - kmem_free(item->ri_buf); - kmem_free(item); - } - /* Free the transaction recover structure */ - kmem_free(trans); -} - STATIC void xlog_recover_buffer_ra_pass2( struct xlog *log, @@ -3528,22 +3349,309 @@ out: if (!list_empty(&done_list)) list_splice_init(&done_list, &trans->r_itemq); - xlog_recover_free_trans(trans); - error2 = xfs_buf_delwri_submit(&buffer_list); return error ? error : error2; } +STATIC void +xlog_recover_add_item( + struct list_head *head) +{ + xlog_recover_item_t *item; + + item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); + INIT_LIST_HEAD(&item->ri_list); + list_add_tail(&item->ri_list, head); +} + STATIC int -xlog_recover_unmount_trans( - struct xlog *log) +xlog_recover_add_to_cont_trans( + struct xlog *log, + struct xlog_recover *trans, + xfs_caddr_t dp, + int len) +{ + xlog_recover_item_t *item; + xfs_caddr_t ptr, old_ptr; + int old_len; + + if (list_empty(&trans->r_itemq)) { + /* finish copying rest of trans header */ + xlog_recover_add_item(&trans->r_itemq); + ptr = (xfs_caddr_t) &trans->r_theader + + sizeof(xfs_trans_header_t) - len; + memcpy(ptr, dp, len); + return 0; + } + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); + + old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; + old_len = item->ri_buf[item->ri_cnt-1].i_len; + + ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); + memcpy(&ptr[old_len], dp, len); + item->ri_buf[item->ri_cnt-1].i_len += len; + item->ri_buf[item->ri_cnt-1].i_addr = ptr; + trace_xfs_log_recover_item_add_cont(log, trans, item, 0); + return 0; +} + +/* + * The next region to add is the start of a new region. It could be + * a whole region or it could be the first part of a new region. Because + * of this, the assumption here is that the type and size fields of all + * format structures fit into the first 32 bits of the structure. + * + * This works because all regions must be 32 bit aligned. Therefore, we + * either have both fields or we have neither field. In the case we have + * neither field, the data part of the region is zero length. We only have + * a log_op_header and can throw away the header since a new one will appear + * later. If we have at least 4 bytes, then we can determine how many regions + * will appear in the current log item. + */ +STATIC int +xlog_recover_add_to_trans( + struct xlog *log, + struct xlog_recover *trans, + xfs_caddr_t dp, + int len) { - /* Do nothing now */ - xfs_warn(log->l_mp, "%s: Unmount LR", __func__); + xfs_inode_log_format_t *in_f; /* any will do */ + xlog_recover_item_t *item; + xfs_caddr_t ptr; + + if (!len) + return 0; + if (list_empty(&trans->r_itemq)) { + /* we need to catch log corruptions here */ + if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { + xfs_warn(log->l_mp, "%s: bad header magic number", + __func__); + ASSERT(0); + return -EIO; + } + if (len == sizeof(xfs_trans_header_t)) + xlog_recover_add_item(&trans->r_itemq); + memcpy(&trans->r_theader, dp, len); + return 0; + } + + ptr = kmem_alloc(len, KM_SLEEP); + memcpy(ptr, dp, len); + in_f = (xfs_inode_log_format_t *)ptr; + + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); + if (item->ri_total != 0 && + item->ri_total == item->ri_cnt) { + /* tail item is in use, get a new one */ + xlog_recover_add_item(&trans->r_itemq); + item = list_entry(trans->r_itemq.prev, + xlog_recover_item_t, ri_list); + } + + if (item->ri_total == 0) { /* first region to be added */ + if (in_f->ilf_size == 0 || + in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { + xfs_warn(log->l_mp, + "bad number of regions (%d) in inode log format", + in_f->ilf_size); + ASSERT(0); + kmem_free(ptr); + return -EIO; + } + + item->ri_total = in_f->ilf_size; + item->ri_buf = + kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), + KM_SLEEP); + } + ASSERT(item->ri_total > item->ri_cnt); + /* Description region is ri_buf[0] */ + item->ri_buf[item->ri_cnt].i_addr = ptr; + item->ri_buf[item->ri_cnt].i_len = len; + item->ri_cnt++; + trace_xfs_log_recover_item_add(log, trans, item, 0); return 0; } /* + * Free up any resources allocated by the transaction + * + * Remember that EFIs, EFDs, and IUNLINKs are handled later. + */ +STATIC void +xlog_recover_free_trans( + struct xlog_recover *trans) +{ + xlog_recover_item_t *item, *n; + int i; + + list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { + /* Free the regions in the item. */ + list_del(&item->ri_list); + for (i = 0; i < item->ri_cnt; i++) + kmem_free(item->ri_buf[i].i_addr); + /* Free the item itself */ + kmem_free(item->ri_buf); + kmem_free(item); + } + /* Free the transaction recover structure */ + kmem_free(trans); +} + +/* + * On error or completion, trans is freed. + */ +STATIC int +xlog_recovery_process_trans( + struct xlog *log, + struct xlog_recover *trans, + xfs_caddr_t dp, + unsigned int len, + unsigned int flags, + int pass) +{ + int error = 0; + bool freeit = false; + + /* mask off ophdr transaction container flags */ + flags &= ~XLOG_END_TRANS; + if (flags & XLOG_WAS_CONT_TRANS) + flags &= ~XLOG_CONTINUE_TRANS; + + /* + * Callees must not free the trans structure. We'll decide if we need to + * free it or not based on the operation being done and it's result. + */ + switch (flags) { + /* expected flag values */ + case 0: + case XLOG_CONTINUE_TRANS: + error = xlog_recover_add_to_trans(log, trans, dp, len); + break; + case XLOG_WAS_CONT_TRANS: + error = xlog_recover_add_to_cont_trans(log, trans, dp, len); + break; + case XLOG_COMMIT_TRANS: + error = xlog_recover_commit_trans(log, trans, pass); + /* success or fail, we are now done with this transaction. */ + freeit = true; + break; + + /* unexpected flag values */ + case XLOG_UNMOUNT_TRANS: + /* just skip trans */ + xfs_warn(log->l_mp, "%s: Unmount LR", __func__); + freeit = true; + break; + case XLOG_START_TRANS: + default: + xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); + ASSERT(0); + error = -EIO; + break; + } + if (error || freeit) + xlog_recover_free_trans(trans); + return error; +} + +/* + * Lookup the transaction recovery structure associated with the ID in the + * current ophdr. If the transaction doesn't exist and the start flag is set in + * the ophdr, then allocate a new transaction for future ID matches to find. + * Either way, return what we found during the lookup - an existing transaction + * or nothing. + */ +STATIC struct xlog_recover * +xlog_recover_ophdr_to_trans( + struct hlist_head rhash[], + struct xlog_rec_header *rhead, + struct xlog_op_header *ohead) +{ + struct xlog_recover *trans; + xlog_tid_t tid; + struct hlist_head *rhp; + + tid = be32_to_cpu(ohead->oh_tid); + rhp = &rhash[XLOG_RHASH(tid)]; + hlist_for_each_entry(trans, rhp, r_list) { + if (trans->r_log_tid == tid) + return trans; + } + + /* + * skip over non-start transaction headers - we could be + * processing slack space before the next transaction starts + */ + if (!(ohead->oh_flags & XLOG_START_TRANS)) + return NULL; + + ASSERT(be32_to_cpu(ohead->oh_len) == 0); + + /* + * This is a new transaction so allocate a new recovery container to + * hold the recovery ops that will follow. + */ + trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP); + trans->r_log_tid = tid; + trans->r_lsn = be64_to_cpu(rhead->h_lsn); + INIT_LIST_HEAD(&trans->r_itemq); + INIT_HLIST_NODE(&trans->r_list); + hlist_add_head(&trans->r_list, rhp); + + /* + * Nothing more to do for this ophdr. Items to be added to this new + * transaction will be in subsequent ophdr containers. + */ + return NULL; +} + +STATIC int +xlog_recover_process_ophdr( + struct xlog *log, + struct hlist_head rhash[], + struct xlog_rec_header *rhead, + struct xlog_op_header *ohead, + xfs_caddr_t dp, + xfs_caddr_t end, + int pass) +{ + struct xlog_recover *trans; + unsigned int len; + + /* Do we understand who wrote this op? */ + if (ohead->oh_clientid != XFS_TRANSACTION && + ohead->oh_clientid != XFS_LOG) { + xfs_warn(log->l_mp, "%s: bad clientid 0x%x", + __func__, ohead->oh_clientid); + ASSERT(0); + return -EIO; + } + + /* + * Check the ophdr contains all the data it is supposed to contain. + */ + len = be32_to_cpu(ohead->oh_len); + if (dp + len > end) { + xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); + WARN_ON(1); + return -EIO; + } + + trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); + if (!trans) { + /* nothing to do, so skip over this ophdr */ + return 0; + } + + return xlog_recovery_process_trans(log, trans, dp, len, + ohead->oh_flags, pass); +} + +/* * There are two valid states of the r_state field. 0 indicates that the * transaction structure is in a normal state. We have either seen the * start of the transaction or the last operation we added was not a partial @@ -3560,86 +3668,30 @@ xlog_recover_process_data( xfs_caddr_t dp, int pass) { - xfs_caddr_t lp; + struct xlog_op_header *ohead; + xfs_caddr_t end; int num_logops; - xlog_op_header_t *ohead; - xlog_recover_t *trans; - xlog_tid_t tid; int error; - unsigned long hash; - uint flags; - lp = dp + be32_to_cpu(rhead->h_len); + end = dp + be32_to_cpu(rhead->h_len); num_logops = be32_to_cpu(rhead->h_num_logops); /* check the log format matches our own - else we can't recover */ if (xlog_header_check_recover(log->l_mp, rhead)) return -EIO; - while ((dp < lp) && num_logops) { - ASSERT(dp + sizeof(xlog_op_header_t) <= lp); - ohead = (xlog_op_header_t *)dp; - dp += sizeof(xlog_op_header_t); - if (ohead->oh_clientid != XFS_TRANSACTION && - ohead->oh_clientid != XFS_LOG) { - xfs_warn(log->l_mp, "%s: bad clientid 0x%x", - __func__, ohead->oh_clientid); - ASSERT(0); - return -EIO; - } - tid = be32_to_cpu(ohead->oh_tid); - hash = XLOG_RHASH(tid); - trans = xlog_recover_find_tid(&rhash[hash], tid); - if (trans == NULL) { /* not found; add new tid */ - if (ohead->oh_flags & XLOG_START_TRANS) - xlog_recover_new_tid(&rhash[hash], tid, - be64_to_cpu(rhead->h_lsn)); - } else { - if (dp + be32_to_cpu(ohead->oh_len) > lp) { - xfs_warn(log->l_mp, "%s: bad length 0x%x", - __func__, be32_to_cpu(ohead->oh_len)); - WARN_ON(1); - return -EIO; - } - flags = ohead->oh_flags & ~XLOG_END_TRANS; - if (flags & XLOG_WAS_CONT_TRANS) - flags &= ~XLOG_CONTINUE_TRANS; - switch (flags) { - case XLOG_COMMIT_TRANS: - error = xlog_recover_commit_trans(log, - trans, pass); - break; - case XLOG_UNMOUNT_TRANS: - error = xlog_recover_unmount_trans(log); - break; - case XLOG_WAS_CONT_TRANS: - error = xlog_recover_add_to_cont_trans(log, - trans, dp, - be32_to_cpu(ohead->oh_len)); - break; - case XLOG_START_TRANS: - xfs_warn(log->l_mp, "%s: bad transaction", - __func__); - ASSERT(0); - error = -EIO; - break; - case 0: - case XLOG_CONTINUE_TRANS: - error = xlog_recover_add_to_trans(log, trans, - dp, be32_to_cpu(ohead->oh_len)); - break; - default: - xfs_warn(log->l_mp, "%s: bad flag 0x%x", - __func__, flags); - ASSERT(0); - error = -EIO; - break; - } - if (error) { - xlog_recover_free_trans(trans); - return error; - } - } + while ((dp < end) && num_logops) { + + ohead = (struct xlog_op_header *)dp; + dp += sizeof(*ohead); + ASSERT(dp <= end); + + /* errors will abort recovery */ + error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, + dp, end, pass); + if (error) + return error; + dp += be32_to_cpu(ohead->oh_len); num_logops--; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index d1160cce5dad..d45aebe04dde 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -46,7 +46,7 @@ * Keeps track of a current summary block, so we don't keep reading * it from the buffer cache. */ -int +static int xfs_rtget_summary( xfs_mount_t *mp, /* file system mount structure */ xfs_trans_t *tp, /* transaction pointer */ |