summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c207
1 files changed, 84 insertions, 123 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8cd6e8d8fe9c..68415cb4f23c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -27,13 +27,10 @@
#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
@@ -44,7 +41,6 @@
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
-#include "xfs_rw.h"
#include "xfs_error.h"
#include "xfs_utils.h"
#include "xfs_quota.h"
@@ -177,7 +173,7 @@ xfs_imap_to_bp(
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
XFS_RANDOM_ITOBP_INOTOBP))) {
- if (iget_flags & XFS_IGET_BULKSTAT) {
+ if (iget_flags & XFS_IGET_UNTRUSTED) {
xfs_trans_brelse(tp, bp);
return XFS_ERROR(EINVAL);
}
@@ -426,7 +422,7 @@ xfs_iformat(
if (!XFS_DFORK_Q(dip))
return 0;
ASSERT(ip->i_afp == NULL);
- ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
+ ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
ip->i_afp->if_ext_max =
XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
switch (dip->di_aformat) {
@@ -509,7 +505,7 @@ xfs_iformat_local(
ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
else {
real_size = roundup(size, 4);
- ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+ ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
}
ifp->if_bytes = size;
ifp->if_real_bytes = real_size;
@@ -636,7 +632,7 @@ xfs_iformat_btree(
}
ifp->if_broot_bytes = size;
- ifp->if_broot = kmem_alloc(size, KM_SLEEP);
+ ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
ASSERT(ifp->if_broot != NULL);
/*
* Copy and convert from the on-disk structure
@@ -787,7 +783,6 @@ xfs_iread(
xfs_mount_t *mp,
xfs_trans_t *tp,
xfs_inode_t *ip,
- xfs_daddr_t bno,
uint iget_flags)
{
xfs_buf_t *bp;
@@ -797,11 +792,9 @@ xfs_iread(
/*
* Fill in the location information in the in-core inode.
*/
- ip->i_imap.im_blkno = bno;
error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
if (error)
return error;
- ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
/*
* Get pointers to the on-disk inode and the buffer containing it.
@@ -925,7 +918,6 @@ xfs_iread_extents(
int error;
xfs_ifork_t *ifp;
xfs_extnum_t nextents;
- size_t size;
if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
@@ -933,7 +925,6 @@ xfs_iread_extents(
return XFS_ERROR(EFSCORRUPTED);
}
nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
- size = nextents * sizeof(xfs_bmbt_rec_t);
ifp = XFS_IFORK_PTR(ip, whichfork);
/*
@@ -1229,7 +1220,7 @@ xfs_isize_check(
(xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
map_first),
XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
- NULL, NULL))
+ NULL))
return;
ASSERT(nimaps == 1);
ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
@@ -1463,7 +1454,7 @@ xfs_itruncate_finish(
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_transp == *tp);
ASSERT(ip->i_itemp != NULL);
- ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD);
+ ASSERT(ip->i_itemp->ili_lock_flags == 0);
ntp = *tp;
@@ -1592,11 +1583,10 @@ xfs_itruncate_finish(
xfs_bmap_init(&free_list, &first_block);
error = xfs_bunmapi(ntp, ip,
first_unmap_block, unmap_len,
- xfs_bmapi_aflag(fork) |
- (sync ? 0 : XFS_BMAPI_ASYNC),
+ xfs_bmapi_aflag(fork),
XFS_ITRUNC_MAX_EXTENTS,
&first_block, &free_list,
- NULL, &done);
+ &done);
if (error) {
/*
* If the bunmapi call encounters an error,
@@ -1615,12 +1605,8 @@ xfs_itruncate_finish(
*/
error = xfs_bmap_finish(tp, &free_list, &committed);
ntp = *tp;
- if (committed) {
- /* link the inode into the next xact in the chain */
- xfs_trans_ijoin(ntp, ip,
- XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ihold(ntp, ip);
- }
+ if (committed)
+ xfs_trans_ijoin(ntp, ip);
if (error) {
/*
@@ -1649,9 +1635,7 @@ xfs_itruncate_finish(
error = xfs_trans_commit(*tp, 0);
*tp = ntp;
- /* link the inode into the next transaction in the chain */
- xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ihold(ntp, ip);
+ xfs_trans_ijoin(ntp, ip);
if (error)
return error;
@@ -1940,10 +1924,10 @@ xfs_ifree_cluster(
int blks_per_cluster;
int nbufs;
int ninodes;
- int i, j, found, pre_flushed;
+ int i, j;
xfs_daddr_t blkno;
xfs_buf_t *bp;
- xfs_inode_t *ip, **ip_found;
+ xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
struct xfs_perag *pag;
@@ -1960,114 +1944,97 @@ xfs_ifree_cluster(
nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
}
- ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
-
for (j = 0; j < nbufs; j++, inum += ninodes) {
+ int found = 0;
+
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
+ /*
+ * We obtain and lock the backing buffer first in the process
+ * here, as we have to ensure that any dirty inode that we
+ * can't get the flush lock on is attached to the buffer.
+ * If we scan the in-memory inodes first, then buffer IO can
+ * complete before we get a lock on it, and hence we may fail
+ * to mark all the active inodes on the buffer stale.
+ */
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+ mp->m_bsize * blks_per_cluster,
+ XBF_LOCK);
/*
- * Look for each inode in memory and attempt to lock it,
- * we can be racing with flush and tail pushing here.
- * any inode we get the locks on, add to an array of
- * inode items to process later.
+ * Walk the inodes already attached to the buffer and mark them
+ * stale. These will all have the flush locks held, so an
+ * in-memory inode walk can't lock them.
+ */
+ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ while (lip) {
+ if (lip->li_type == XFS_LI_INODE) {
+ iip = (xfs_inode_log_item_t *)lip;
+ ASSERT(iip->ili_logged == 1);
+ lip->li_cb = xfs_istale_done;
+ xfs_trans_ail_copy_lsn(mp->m_ail,
+ &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
+ xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
+ found++;
+ }
+ lip = lip->li_bio_list;
+ }
+
+ /*
+ * For each inode in memory attempt to add it to the inode
+ * buffer and set it up for being staled on buffer IO
+ * completion. This is safe as we've locked out tail pushing
+ * and flushing by locking the buffer.
*
- * The get the buffer lock, we could beat a flush
- * or tail pushing thread to the lock here, in which
- * case they will go looking for the inode buffer
- * and fail, we need some other form of interlock
- * here.
+ * We have already marked every inode that was part of a
+ * transaction stale above, which means there is no point in
+ * even trying to lock them.
*/
- found = 0;
for (i = 0; i < ninodes; i++) {
read_lock(&pag->pag_ici_lock);
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
- /* Inode not in memory or we found it already,
- * nothing to do
- */
+ /* Inode not in memory or stale, nothing to do */
if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
- if (xfs_inode_clean(ip)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
-
- /* If we can get the locks then add it to the
- * list, otherwise by the time we get the bp lock
- * below it will already be attached to the
- * inode buffer.
- */
-
- /* This inode will already be locked - by us, lets
- * keep it that way.
- */
-
- if (ip == free_ip) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- } else {
- ip_found[found++] = ip;
- }
- }
+ /* don't try to lock/unlock the current inode */
+ if (ip != free_ip &&
+ !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
+ read_unlock(&pag->pag_ici_lock);
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
-
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- } else {
- ip_found[found++] = ip;
- }
- } else {
+ if (!xfs_iflock_nowait(ip)) {
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
+ continue;
}
- read_unlock(&pag->pag_ici_lock);
- }
-
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * blks_per_cluster,
- XBF_LOCK);
- pre_flushed = 0;
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- while (lip) {
- if (lip->li_type == XFS_LI_INODE) {
- iip = (xfs_inode_log_item_t *)lip;
- ASSERT(iip->ili_logged == 1);
- lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
- xfs_trans_ail_copy_lsn(mp->m_ail,
- &iip->ili_flush_lsn,
- &iip->ili_item.li_lsn);
- xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
- pre_flushed++;
+ xfs_iflags_set(ip, XFS_ISTALE);
+ if (xfs_inode_clean(ip)) {
+ ASSERT(ip != free_ip);
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ continue;
}
- lip = lip->li_bio_list;
- }
- for (i = 0; i < found; i++) {
- ip = ip_found[i];
iip = ip->i_itemp;
-
if (!iip) {
+ /* inode with unlogged changes only */
+ ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
+ found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
@@ -2075,20 +2042,18 @@ xfs_ifree_cluster(
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
- xfs_buf_attach_iodone(bp,
- (void(*)(xfs_buf_t*,xfs_log_item_t*))
- xfs_istale_done, (xfs_log_item_t *)iip);
- if (ip != free_ip) {
+ xfs_buf_attach_iodone(bp, xfs_istale_done,
+ &iip->ili_item);
+
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
}
- if (found || pre_flushed)
+ if (found)
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
- kmem_free(ip_found);
xfs_perag_put(pag);
}
@@ -2224,7 +2189,7 @@ xfs_iroot_realloc(
*/
if (ifp->if_broot_bytes == 0) {
new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
- ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
+ ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
ifp->if_broot_bytes = (int)new_size;
return;
}
@@ -2240,7 +2205,7 @@ xfs_iroot_realloc(
new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
- KM_SLEEP);
+ KM_SLEEP | KM_NOFS);
op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -2266,7 +2231,7 @@ xfs_iroot_realloc(
else
new_size = 0;
if (new_size > 0) {
- new_broot = kmem_alloc(new_size, KM_SLEEP);
+ new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
/*
* First copy over the btree block header.
*/
@@ -2370,7 +2335,8 @@ xfs_idata_realloc(
real_size = roundup(new_size, 4);
if (ifp->if_u1.if_data == NULL) {
ASSERT(ifp->if_real_bytes == 0);
- ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+ ifp->if_u1.if_data = kmem_alloc(real_size,
+ KM_SLEEP | KM_NOFS);
} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
/*
* Only do the realloc if the underlying size
@@ -2381,11 +2347,12 @@ xfs_idata_realloc(
kmem_realloc(ifp->if_u1.if_data,
real_size,
ifp->if_real_bytes,
- KM_SLEEP);
+ KM_SLEEP | KM_NOFS);
}
} else {
ASSERT(ifp->if_real_bytes == 0);
- ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+ ifp->if_u1.if_data = kmem_alloc(real_size,
+ KM_SLEEP | KM_NOFS);
memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
ifp->if_bytes);
}
@@ -2649,8 +2616,6 @@ xfs_iflush_cluster(
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- ASSERT(pag->pagi_inodeok);
- ASSERT(pag->pag_ici_init);
inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
@@ -2754,7 +2719,6 @@ cluster_corrupt_out:
* mark it as stale and brelse.
*/
if (XFS_BUF_IODONE_FUNC(bp)) {
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
XFS_BUF_ERROR(bp,EIO);
@@ -3092,8 +3056,7 @@ xfs_iflush_int(
* and unlock the inode's flush lock when the inode is
* completely written to disk.
*/
- xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*))
- xfs_iflush_done, (xfs_log_item_t *)iip);
+ xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
@@ -3537,13 +3500,11 @@ xfs_iext_remove_indirect(
xfs_extnum_t ext_diff; /* extents to remove in current list */
xfs_extnum_t nex1; /* number of extents before idx */
xfs_extnum_t nex2; /* extents after idx + count */
- int nlists; /* entries in indirection array */
int page_idx = idx; /* index in target extent list */
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
ASSERT(erp != NULL);
- nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
nex1 = page_idx;
ext_cnt = count;
while (ext_cnt) {