summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-14 23:06:06 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-14 23:06:06 +0200
commit2fc61f25fb296827387a5f01129dbc00cbe3ca58 (patch)
tree5659c402b3a5f357158a7d8e4f284db15ee69e7f /fs
parentMerge tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux (diff)
parentxfs: ensure that fpunch, fcollapse, and finsert operations are aligned to rt ... (diff)
downloadlinux-2fc61f25fb296827387a5f01129dbc00cbe3ca58.tar.xz
linux-2fc61f25fb296827387a5f01129dbc00cbe3ca58.zip
Merge tag 'xfs-5.10-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "The biggest changes are two new features for the ondisk metadata: one to record the sizes of the inode btrees in the AG to increase redundancy checks and to improve mount times; and a second new feature to support timestamps until the year 2486. We also fixed a problem where reflinking into a file that requires synchronous writes wouldn't actually flush the updates to disk; clean up a fair amount of cruft; and started fixing some bugs in the realtime volume code. Summary: - Clean up the buffer ioend calling path so that the retry strategy isn't quite so scattered everywhere. - Clean up m_sb_bp handling. - New feature: storing inode btree counts in the AGI to speed up certain mount time per-AG block reservation operatoins and add a little more metadata redundancy. - New feature: Widen inode timestamps and quota grace expiration timestamps to support dates through the year 2486. - Get rid of more of our custom buffer allocation API wrappers. - Use a proper VLA for shortform xattr structure namevals. - Force the log after reflinking or deduping into a file that is opened with O_SYNC or O_DSYNC. - Fix some math errors in the realtime allocator" * tag 'xfs-5.10-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (42 commits) xfs: ensure that fpunch, fcollapse, and finsert operations are aligned to rt extent size xfs: make sure the rt allocator doesn't run off the end xfs: Remove unneeded semicolon xfs: force the log after remapping a synchronous-writes file xfs: Convert xfs_attr_sf macros to inline functions xfs: Use variable-size array for nameval in xfs_attr_sf_entry xfs: Remove typedef xfs_attr_shortform_t xfs: remove typedef xfs_attr_sf_entry_t xfs: Remove kmem_zalloc_large() xfs: enable big timestamps xfs: trace timestamp limits xfs: widen ondisk quota expiration timestamps to handle y2038+ xfs: widen ondisk inode timestamps to deal with y2038+ xfs: redefine xfs_ictimestamp_t xfs: redefine xfs_timestamp_t xfs: move xfs_log_dinode_to_disk to the log recovery code xfs: refactor quota timestamp coding xfs: refactor default quota grace period setting code xfs: refactor quota expiration timer modification xfs: explicitly define inode timestamp range ...
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/kmem.c22
-rw-r--r--fs/xfs/kmem.h7
-rw-r--r--fs/xfs/libxfs/xfs_ag.c5
-rw-r--r--fs/xfs/libxfs/xfs_attr.c14
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c43
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h29
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h6
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c35
-rw-r--r--fs/xfs/libxfs/xfs_format.h211
-rw-r--r--fs/xfs/libxfs/xfs_fs.h1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c5
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c65
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c130
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h15
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c8
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h7
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h1
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h8
-rw-r--r--fs/xfs/libxfs/xfs_sb.c6
-rw-r--r--fs/xfs/libxfs/xfs_shared.h3
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c17
-rw-r--r--fs/xfs/scrub/agheader.c30
-rw-r--r--fs/xfs/scrub/agheader_repair.c24
-rw-r--r--fs/xfs/scrub/inode.c31
-rw-r--r--fs/xfs/scrub/symlink.c2
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_attr_list.c6
-rw-r--r--fs/xfs/xfs_bmap_util.c16
-rw-r--r--fs/xfs/xfs_buf.c208
-rw-r--r--fs/xfs/xfs_buf.h17
-rw-r--r--fs/xfs/xfs_buf_item.c264
-rw-r--r--fs/xfs/xfs_buf_item.h12
-rw-r--r--fs/xfs/xfs_buf_item_recover.c2
-rw-r--r--fs/xfs/xfs_dquot.c66
-rw-r--r--fs/xfs/xfs_dquot.h3
-rw-r--r--fs/xfs/xfs_file.c17
-rw-r--r--fs/xfs/xfs_icache.c19
-rw-r--r--fs/xfs/xfs_inode.c83
-rw-r--r--fs/xfs/xfs_inode.h38
-rw-r--r--fs/xfs/xfs_inode_item.c61
-rw-r--r--fs/xfs/xfs_inode_item.h5
-rw-r--r--fs/xfs/xfs_inode_item_recover.c76
-rw-r--r--fs/xfs/xfs_ioctl.c7
-rw-r--r--fs/xfs/xfs_log_recover.c60
-rw-r--r--fs/xfs/xfs_mount.c32
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_ondisk.h38
-rw-r--r--fs/xfs/xfs_qm.c13
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c18
-rw-r--r--fs/xfs/xfs_quota.h8
-rw-r--r--fs/xfs/xfs_rtalloc.c13
-rw-r--r--fs/xfs/xfs_super.c28
-rw-r--r--fs/xfs/xfs_trace.h29
-rw-r--r--fs/xfs/xfs_trans.c2
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c46
-rw-r--r--fs/xfs/xfs_trans_dquot.c6
59 files changed, 1183 insertions, 746 deletions
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index e841ed781a25..e986b95d94c9 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -93,25 +93,3 @@ kmem_alloc_large(size_t size, xfs_km_flags_t flags)
return ptr;
return __kmem_vmalloc(size, flags);
}
-
-void *
-kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags)
-{
- int retries = 0;
- gfp_t lflags = kmem_flags_convert(flags);
- void *ptr;
-
- trace_kmem_realloc(newsize, flags, _RET_IP_);
-
- do {
- ptr = krealloc(old, newsize, lflags);
- if (ptr || (flags & KM_MAYFAIL))
- return ptr;
- if (!(++retries % 100))
- xfs_err(NULL,
- "%s(%u) possible memory allocation deadlock size %zu in %s (mode:0x%x)",
- current->comm, current->pid,
- newsize, __func__, lflags);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- } while (1);
-}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 8e8555817e6d..38007117697e 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -59,7 +59,6 @@ kmem_flags_convert(xfs_km_flags_t flags)
extern void *kmem_alloc(size_t, xfs_km_flags_t);
extern void *kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags);
extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
-extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
static inline void kmem_free(const void *ptr)
{
kvfree(ptr);
@@ -72,12 +71,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
return kmem_alloc(size, flags | KM_ZERO);
}
-static inline void *
-kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
-{
- return kmem_alloc_large(size, flags | KM_ZERO);
-}
-
/*
* Zone interfaces
*/
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 8cf73fe4338e..9331f3516afa 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -333,6 +333,11 @@ xfs_agiblock_init(
}
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
+ if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+ agi->agi_iblocks = cpu_to_be32(1);
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ agi->agi_fblocks = cpu_to_be32(1);
+ }
}
typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp,
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 2e055c079f39..fd8e6418a0d3 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -428,7 +428,7 @@ xfs_attr_set(
*/
if (XFS_IFORK_Q(dp) == 0) {
int sf_size = sizeof(struct xfs_attr_sf_hdr) +
- XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen,
+ xfs_attr_sf_entsize_byname(args->namelen,
args->valuelen);
error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
@@ -523,6 +523,14 @@ out_trans_cancel:
* External routines when attribute list is inside the inode
*========================================================================*/
+static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
+{
+ struct xfs_attr_shortform *sf;
+
+ sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data;
+ return be16_to_cpu(sf->hdr.totsize);
+}
+
/*
* Add a name to the shortform attribute list structure
* This is the external routine.
@@ -555,8 +563,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
return -ENOSPC;
- newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
- newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
+ newsize = xfs_attr_sf_totsize(args->dp);
+ newsize += xfs_attr_sf_entsize_byname(args->namelen, args->valuelen);
forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
if (!forkoff)
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 305d4bc07337..bb128db220ac 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -684,9 +684,9 @@ xfs_attr_sf_findname(
sf = (struct xfs_attr_shortform *)args->dp->i_afp->if_u1.if_data;
sfe = &sf->list[0];
end = sf->hdr.count;
- for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
+ for (i = 0; i < end; sfe = xfs_attr_sf_nextentry(sfe),
base += size, i++) {
- size = XFS_ATTR_SF_ENTSIZE(sfe);
+ size = xfs_attr_sf_entsize(sfe);
if (!xfs_attr_match(args, sfe->namelen, sfe->nameval,
sfe->flags))
continue;
@@ -728,15 +728,15 @@ xfs_attr_shortform_add(
ifp = dp->i_afp;
ASSERT(ifp->if_flags & XFS_IFINLINE);
- sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
+ sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
if (xfs_attr_sf_findname(args, &sfe, NULL) == -EEXIST)
ASSERT(0);
offset = (char *)sfe - (char *)sf;
- size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
+ size = xfs_attr_sf_entsize_byname(args->namelen, args->valuelen);
xfs_idata_realloc(dp, size, XFS_ATTR_FORK);
- sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
- sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset);
+ sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
+ sfe = (struct xfs_attr_sf_entry *)((char *)sf + offset);
sfe->namelen = args->namelen;
sfe->valuelen = args->valuelen;
@@ -787,12 +787,12 @@ xfs_attr_shortform_remove(
dp = args->dp;
mp = dp->i_mount;
- sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
+ sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data;
error = xfs_attr_sf_findname(args, &sfe, &base);
if (error != -EEXIST)
return error;
- size = XFS_ATTR_SF_ENTSIZE(sfe);
+ size = xfs_attr_sf_entsize(sfe);
/*
* Fix up the attribute fork data, covering the hole
@@ -837,8 +837,8 @@ xfs_attr_shortform_remove(
int
xfs_attr_shortform_lookup(xfs_da_args_t *args)
{
- xfs_attr_shortform_t *sf;
- xfs_attr_sf_entry_t *sfe;
+ struct xfs_attr_shortform *sf;
+ struct xfs_attr_sf_entry *sfe;
int i;
struct xfs_ifork *ifp;
@@ -846,10 +846,10 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
ifp = args->dp->i_afp;
ASSERT(ifp->if_flags & XFS_IFINLINE);
- sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
+ sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
sfe = &sf->list[0];
for (i = 0; i < sf->hdr.count;
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+ sfe = xfs_attr_sf_nextentry(sfe), i++) {
if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
sfe->flags))
return -EEXIST;
@@ -873,10 +873,10 @@ xfs_attr_shortform_getvalue(
int i;
ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE);
- sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
+ sf = (struct xfs_attr_shortform *)args->dp->i_afp->if_u1.if_data;
sfe = &sf->list[0];
for (i = 0; i < sf->hdr.count;
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+ sfe = xfs_attr_sf_nextentry(sfe), i++) {
if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
sfe->flags))
return xfs_attr_copy_value(args,
@@ -908,12 +908,12 @@ xfs_attr_shortform_to_leaf(
dp = args->dp;
ifp = dp->i_afp;
- sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
+ sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
size = be16_to_cpu(sf->hdr.totsize);
tmpbuffer = kmem_alloc(size, 0);
ASSERT(tmpbuffer != NULL);
memcpy(tmpbuffer, ifp->if_u1.if_data, size);
- sf = (xfs_attr_shortform_t *)tmpbuffer;
+ sf = (struct xfs_attr_shortform *)tmpbuffer;
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK);
@@ -951,7 +951,7 @@ xfs_attr_shortform_to_leaf(
ASSERT(error != -ENOSPC);
if (error)
goto out;
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
+ sfe = xfs_attr_sf_nextentry(sfe);
}
error = 0;
*leaf_bp = bp;
@@ -992,9 +992,8 @@ xfs_attr_shortform_allfit(
return 0;
if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
return 0;
- bytes += sizeof(struct xfs_attr_sf_entry) - 1
- + name_loc->namelen
- + be16_to_cpu(name_loc->valuelen);
+ bytes += xfs_attr_sf_entsize_byname(name_loc->namelen,
+ be16_to_cpu(name_loc->valuelen));
}
if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
@@ -1039,7 +1038,7 @@ xfs_attr_shortform_verify(
* xfs_attr_sf_entry is defined with a 1-byte variable
* array at the end, so we must subtract that off.
*/
- if (((char *)sfep + sizeof(*sfep) - 1) >= endp)
+ if (((char *)sfep + sizeof(*sfep)) >= endp)
return __this_address;
/* Don't allow names with known bad length. */
@@ -1051,7 +1050,7 @@ xfs_attr_shortform_verify(
* within the data buffer. The next entry starts after the
* name component, so nextentry is an acceptable test.
*/
- next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep);
+ next_sfep = xfs_attr_sf_nextentry(sfep);
if ((char *)next_sfep > endp)
return __this_address;
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index bb004fb7944a..37578b369d9b 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -13,7 +13,6 @@
* to fit into the literal area of the inode.
*/
typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
-typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
/*
* We generate this then sort it, attr_list() must return things in hash-order.
@@ -27,16 +26,26 @@ typedef struct xfs_attr_sf_sort {
unsigned char *name; /* name value, pointer into buffer */
} xfs_attr_sf_sort_t;
-#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \
- (((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)))
#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \
((1 << (NBBY*(int)sizeof(uint8_t))) - 1)
-#define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \
- ((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
-#define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \
- ((xfs_attr_sf_entry_t *)((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
-#define XFS_ATTR_SF_TOTSIZE(dp) /* total space in use */ \
- (be16_to_cpu(((xfs_attr_shortform_t *) \
- ((dp)->i_afp->if_u1.if_data))->hdr.totsize))
+
+/* space name/value uses */
+static inline int xfs_attr_sf_entsize_byname(uint8_t nlen, uint8_t vlen)
+{
+ return sizeof(struct xfs_attr_sf_entry) + nlen + vlen;
+}
+
+/* space an entry uses */
+static inline int xfs_attr_sf_entsize(struct xfs_attr_sf_entry *sfep)
+{
+ return struct_size(sfep, nameval, sfep->namelen + sfep->valuelen);
+}
+
+/* next entry in struct */
+static inline struct xfs_attr_sf_entry *
+xfs_attr_sf_nextentry(struct xfs_attr_sf_entry *sfep)
+{
+ return (void *)sfep + xfs_attr_sf_entsize(sfep);
+}
#endif /* __XFS_ATTR_SF_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 059ac108b1b3..b40a4e80f5ee 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -579,7 +579,7 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
/*
* Entries are packed toward the top as tight as possible.
*/
-typedef struct xfs_attr_shortform {
+struct xfs_attr_shortform {
struct xfs_attr_sf_hdr { /* constant-structure header block */
__be16 totsize; /* total bytes in shortform list */
__u8 count; /* count of active entries */
@@ -589,9 +589,9 @@ typedef struct xfs_attr_shortform {
uint8_t namelen; /* actual length of name (no NULL) */
uint8_t valuelen; /* actual length of value (no NULL) */
uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
- uint8_t nameval[1]; /* name & value bytes concatenated */
+ uint8_t nameval[]; /* name & value bytes concatenated */
} list[1]; /* variable sized array */
-} xfs_attr_shortform_t;
+};
typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
__be16 base; /* base of free region */
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 5a2db00b9d5f..6766417d5ba4 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -69,6 +69,13 @@ xfs_dquot_verify(
ddq_type != XFS_DQTYPE_GROUP)
return __this_address;
+ if ((ddq->d_type & XFS_DQTYPE_BIGTIME) &&
+ !xfs_sb_version_hasbigtime(&mp->m_sb))
+ return __this_address;
+
+ if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !ddq->d_id)
+ return __this_address;
+
if (id != -1 && id != be32_to_cpu(ddq->d_id))
return __this_address;
@@ -288,3 +295,31 @@ const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
.verify_read = xfs_dquot_buf_readahead_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
+
+/* Convert an on-disk timer value into an incore timer value. */
+time64_t
+xfs_dquot_from_disk_ts(
+ struct xfs_disk_dquot *ddq,
+ __be32 dtimer)
+{
+ uint32_t t = be32_to_cpu(dtimer);
+
+ if (t != 0 && (ddq->d_type & XFS_DQTYPE_BIGTIME))
+ return xfs_dq_bigtime_to_unix(t);
+
+ return t;
+}
+
+/* Convert an incore timer value into an on-disk timer value. */
+__be32
+xfs_dquot_to_disk_ts(
+ struct xfs_dquot *dqp,
+ time64_t timer)
+{
+ uint32_t t = timer;
+
+ if (timer != 0 && (dqp->q_type & XFS_DQTYPE_BIGTIME))
+ t = xfs_dq_unix_to_bigtime(timer);
+
+ return cpu_to_be32(t);
+}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 31b7ece985bb..dd764da08f6f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -449,10 +449,12 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
+#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
- XFS_SB_FEAT_RO_COMPAT_REFLINK)
+ XFS_SB_FEAT_RO_COMPAT_REFLINK| \
+ XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
@@ -465,10 +467,12 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
#define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */
+#define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */
#define XFS_SB_FEAT_INCOMPAT_ALL \
(XFS_SB_FEAT_INCOMPAT_FTYPE| \
XFS_SB_FEAT_INCOMPAT_SPINODES| \
- XFS_SB_FEAT_INCOMPAT_META_UUID)
+ XFS_SB_FEAT_INCOMPAT_META_UUID| \
+ XFS_SB_FEAT_INCOMPAT_BIGTIME)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
@@ -563,6 +567,23 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
}
+static inline bool xfs_sb_version_hasbigtime(struct xfs_sb *sbp)
+{
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME);
+}
+
+/*
+ * Inode btree block counter. We record the number of inobt and finobt blocks
+ * in the AGI header so that we can skip the finobt walk at mount time when
+ * setting up per-AG reservations.
+ */
+static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp)
+{
+ return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT);
+}
+
/*
* end of superblock version macros
*/
@@ -765,6 +786,9 @@ typedef struct xfs_agi {
__be32 agi_free_root; /* root of the free inode btree */
__be32 agi_free_level;/* levels in free inode btree */
+ __be32 agi_iblocks; /* inobt blocks used */
+ __be32 agi_fblocks; /* finobt blocks used */
+
/* structure must be padded to 64 bit alignment */
} xfs_agi_t;
@@ -785,7 +809,8 @@ typedef struct xfs_agi {
#define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
#define XFS_AGI_FREE_ROOT (1 << 11)
#define XFS_AGI_FREE_LEVEL (1 << 12)
-#define XFS_AGI_NUM_BITS_R2 13
+#define XFS_AGI_IBLOCKS (1 << 13) /* both inobt/finobt block counters */
+#define XFS_AGI_NUM_BITS_R2 14
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
@@ -831,10 +856,87 @@ struct xfs_agfl {
ASSERT(xfs_daddr_to_agno(mp, d) == \
xfs_daddr_to_agno(mp, (d) + (len) - 1)))
-typedef struct xfs_timestamp {
+/*
+ * XFS Timestamps
+ * ==============
+ *
+ * Traditional ondisk inode timestamps consist of signed 32-bit counters for
+ * seconds and nanoseconds; time zero is the Unix epoch, Jan 1 00:00:00 UTC
+ * 1970, which means that the timestamp epoch is the same as the Unix epoch.
+ * Therefore, the ondisk min and max defined here can be used directly to
+ * constrain the incore timestamps on a Unix system. Note that we actually
+ * encode a __be64 value on disk.
+ *
+ * When the bigtime feature is enabled, ondisk inode timestamps become an
+ * unsigned 64-bit nanoseconds counter. This means that the bigtime inode
+ * timestamp epoch is the start of the classic timestamp range, which is
+ * Dec 31 20:45:52 UTC 1901. Because the epochs are not the same, callers
+ * /must/ use the bigtime conversion functions when encoding and decoding raw
+ * timestamps.
+ */
+typedef __be64 xfs_timestamp_t;
+
+/* Legacy timestamp encoding format. */
+struct xfs_legacy_timestamp {
__be32 t_sec; /* timestamp seconds */
__be32 t_nsec; /* timestamp nanoseconds */
-} xfs_timestamp_t;
+};
+
+/*
+ * Smallest possible ondisk seconds value with traditional timestamps. This
+ * corresponds exactly with the incore timestamp Dec 13 20:45:52 UTC 1901.
+ */
+#define XFS_LEGACY_TIME_MIN ((int64_t)S32_MIN)
+
+/*
+ * Largest possible ondisk seconds value with traditional timestamps. This
+ * corresponds exactly with the incore timestamp Jan 19 03:14:07 UTC 2038.
+ */
+#define XFS_LEGACY_TIME_MAX ((int64_t)S32_MAX)
+
+/*
+ * Smallest possible ondisk seconds value with bigtime timestamps. This
+ * corresponds (after conversion to a Unix timestamp) with the traditional
+ * minimum timestamp of Dec 13 20:45:52 UTC 1901.
+ */
+#define XFS_BIGTIME_TIME_MIN ((int64_t)0)
+
+/*
+ * Largest supported ondisk seconds value with bigtime timestamps. This
+ * corresponds (after conversion to a Unix timestamp) with an incore timestamp
+ * of Jul 2 20:20:24 UTC 2486.
+ *
+ * We round down the ondisk limit so that the bigtime quota and inode max
+ * timestamps will be the same.
+ */
+#define XFS_BIGTIME_TIME_MAX ((int64_t)((-1ULL / NSEC_PER_SEC) & ~0x3ULL))
+
+/*
+ * Bigtime epoch is set exactly to the minimum time value that a traditional
+ * 32-bit timestamp can represent when using the Unix epoch as a reference.
+ * Hence the Unix epoch is at a fixed offset into the supported bigtime
+ * timestamp range.
+ *
+ * The bigtime epoch also matches the minimum value an on-disk 32-bit XFS
+ * timestamp can represent so we will not lose any fidelity in converting
+ * to/from unix and bigtime timestamps.
+ *
+ * The following conversion factor converts a seconds counter from the Unix
+ * epoch to the bigtime epoch.
+ */
+#define XFS_BIGTIME_EPOCH_OFFSET (-(int64_t)S32_MIN)
+
+/* Convert a timestamp from the Unix epoch to the bigtime epoch. */
+static inline uint64_t xfs_unix_to_bigtime(time64_t unix_seconds)
+{
+ return (uint64_t)unix_seconds + XFS_BIGTIME_EPOCH_OFFSET;
+}
+
+/* Convert a timestamp from the bigtime epoch to the Unix epoch. */
+static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds)
+{
+ return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET;
+}
/*
* On-disk inode structure.
@@ -1061,12 +1163,22 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */
#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
+#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
+
#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
+#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
#define XFS_DIFLAG2_ANY \
- (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)
+ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
+ XFS_DIFLAG2_BIGTIME)
+
+static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
+{
+ return dip->di_version >= 3 &&
+ (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME));
+}
/*
* Inode number format:
@@ -1152,13 +1264,98 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DQTYPE_USER 0x01 /* user dquot record */
#define XFS_DQTYPE_PROJ 0x02 /* project dquot record */
#define XFS_DQTYPE_GROUP 0x04 /* group dquot record */
+#define XFS_DQTYPE_BIGTIME 0x80 /* large expiry timestamps */
/* bitmask to determine if this is a user/group/project dquot */
#define XFS_DQTYPE_REC_MASK (XFS_DQTYPE_USER | \
XFS_DQTYPE_PROJ | \
XFS_DQTYPE_GROUP)
-#define XFS_DQTYPE_ANY (XFS_DQTYPE_REC_MASK)
+#define XFS_DQTYPE_ANY (XFS_DQTYPE_REC_MASK | \
+ XFS_DQTYPE_BIGTIME)
+
+/*
+ * XFS Quota Timers
+ * ================
+ *
+ * Traditional quota grace period expiration timers are an unsigned 32-bit
+ * seconds counter; time zero is the Unix epoch, Jan 1 00:00:01 UTC 1970.
+ * Note that an expiration value of zero means that the quota limit has not
+ * been reached, and therefore no expiration has been set. Therefore, the
+ * ondisk min and max defined here can be used directly to constrain the incore
+ * quota expiration timestamps on a Unix system.
+ *
+ * When bigtime is enabled, we trade two bits of precision to expand the
+ * expiration timeout range to match that of big inode timestamps. The min and
+ * max recorded here are the on-disk limits, not a Unix timestamp.
+ *
+ * The grace period for each quota type is stored in the root dquot (id = 0)
+ * and is applied to a non-root dquot when it exceeds the soft or hard limits.
+ * The length of quota grace periods are unsigned 32-bit quantities measured in
+ * units of seconds. A value of zero means to use the default period.
+ */
+
+/*
+ * Smallest possible ondisk quota expiration value with traditional timestamps.
+ * This corresponds exactly with the incore expiration Jan 1 00:00:01 UTC 1970.
+ */
+#define XFS_DQ_LEGACY_EXPIRY_MIN ((int64_t)1)
+
+/*
+ * Largest possible ondisk quota expiration value with traditional timestamps.
+ * This corresponds exactly with the incore expiration Feb 7 06:28:15 UTC 2106.
+ */
+#define XFS_DQ_LEGACY_EXPIRY_MAX ((int64_t)U32_MAX)
+
+/*
+ * Smallest possible ondisk quota expiration value with bigtime timestamps.
+ * This corresponds (after conversion to a Unix timestamp) with the incore
+ * expiration of Jan 1 00:00:04 UTC 1970.
+ */
+#define XFS_DQ_BIGTIME_EXPIRY_MIN (XFS_DQ_LEGACY_EXPIRY_MIN)
+
+/*
+ * Largest supported ondisk quota expiration value with bigtime timestamps.
+ * This corresponds (after conversion to a Unix timestamp) with an incore
+ * expiration of Jul 2 20:20:24 UTC 2486.
+ *
+ * The ondisk field supports values up to -1U, which corresponds to an incore
+ * expiration in 2514. This is beyond the maximum the bigtime inode timestamp,
+ * so we cap the maximum bigtime quota expiration to the max inode timestamp.
+ */
+#define XFS_DQ_BIGTIME_EXPIRY_MAX ((int64_t)4074815106U)
+
+/*
+ * The following conversion factors assist in converting a quota expiration
+ * timestamp between the incore and ondisk formats.
+ */
+#define XFS_DQ_BIGTIME_SHIFT (2)
+#define XFS_DQ_BIGTIME_SLACK ((int64_t)(1ULL << XFS_DQ_BIGTIME_SHIFT) - 1)
+
+/* Convert an incore quota expiration timestamp to an ondisk bigtime value. */
+static inline uint32_t xfs_dq_unix_to_bigtime(time64_t unix_seconds)
+{
+ /*
+ * Round the expiration timestamp up to the nearest bigtime timestamp
+ * that we can store, to give users the most time to fix problems.
+ */
+ return ((uint64_t)unix_seconds + XFS_DQ_BIGTIME_SLACK) >>
+ XFS_DQ_BIGTIME_SHIFT;
+}
+
+/* Convert an ondisk bigtime quota expiration value to an incore timestamp. */
+static inline time64_t xfs_dq_bigtime_to_unix(uint32_t ondisk_seconds)
+{
+ return (time64_t)ondisk_seconds << XFS_DQ_BIGTIME_SHIFT;
+}
+
+/*
+ * Default quota grace periods, ranging from zero (use the compiled defaults)
+ * to ~136 years. These are applied to a non-root dquot that has exceeded
+ * either limit.
+ */
+#define XFS_DQ_GRACE_MIN ((int64_t)0)
+#define XFS_DQ_GRACE_MAX ((int64_t)U32_MAX)
/*
* This is the main portion of the on-disk representation of quota information
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 84bcffa87753..2a2e3cfd94f0 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -249,6 +249,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_SPINODES (1 << 18) /* sparse inode chunks */
#define XFS_FSOP_GEOM_FLAGS_RMAPBT (1 << 19) /* reverse mapping btree */
#define XFS_FSOP_GEOM_FLAGS_REFLINK (1 << 20) /* files can share blocks */
+#define XFS_FSOP_GEOM_FLAGS_BIGTIME (1 << 21) /* 64-bit nsec timestamps */
/*
* Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index a6b37db55169..974e71bc4a3a 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2473,6 +2473,7 @@ xfs_ialloc_log_agi(
offsetof(xfs_agi_t, agi_unlinked),
offsetof(xfs_agi_t, agi_free_root),
offsetof(xfs_agi_t, agi_free_level),
+ offsetof(xfs_agi_t, agi_iblocks),
sizeof(xfs_agi_t)
};
#ifdef DEBUG
@@ -2806,6 +2807,10 @@ xfs_ialloc_setup_geometry(
uint64_t icount;
uint inodes;
+ igeo->new_diflags2 = 0;
+ if (xfs_sb_version_hasbigtime(&mp->m_sb))
+ igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME;
+
/* Compute inode btree geometry. */
igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 3c8aebc36e64..cc919a2ee870 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -67,6 +67,25 @@ xfs_finobt_set_root(
XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
}
+/* Update the inode btree block counter for this btree. */
+static inline void
+xfs_inobt_mod_blockcount(
+ struct xfs_btree_cur *cur,
+ int howmuch)
+{
+ struct xfs_buf *agbp = cur->bc_ag.agbp;
+ struct xfs_agi *agi = agbp->b_addr;
+
+ if (!xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb))
+ return;
+
+ if (cur->bc_btnum == XFS_BTNUM_FINO)
+ be32_add_cpu(&agi->agi_fblocks, howmuch);
+ else if (cur->bc_btnum == XFS_BTNUM_INO)
+ be32_add_cpu(&agi->agi_iblocks, howmuch);
+ xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_IBLOCKS);
+}
+
STATIC int
__xfs_inobt_alloc_block(
struct xfs_btree_cur *cur,
@@ -102,6 +121,7 @@ __xfs_inobt_alloc_block(
new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
*stat = 1;
+ xfs_inobt_mod_blockcount(cur, 1);
return 0;
}
@@ -134,6 +154,7 @@ __xfs_inobt_free_block(
struct xfs_buf *bp,
enum xfs_ag_resv_type resv)
{
+ xfs_inobt_mod_blockcount(cur, -1);
return xfs_free_extent(cur->bc_tp,
XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
&XFS_RMAP_OINFO_INOBT, resv);
@@ -480,19 +501,29 @@ xfs_inobt_commit_staged_btree(
{
struct xfs_agi *agi = agbp->b_addr;
struct xbtree_afakeroot *afake = cur->bc_ag.afake;
+ int fields;
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
if (cur->bc_btnum == XFS_BTNUM_INO) {
+ fields = XFS_AGI_ROOT | XFS_AGI_LEVEL;
agi->agi_root = cpu_to_be32(afake->af_root);
agi->agi_level = cpu_to_be32(afake->af_levels);
- xfs_ialloc_log_agi(tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
+ if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) {
+ agi->agi_iblocks = cpu_to_be32(afake->af_blocks);
+ fields |= XFS_AGI_IBLOCKS;
+ }
+ xfs_ialloc_log_agi(tp, agbp, fields);
xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops);
} else {
+ fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL;
agi->agi_free_root = cpu_to_be32(afake->af_root);
agi->agi_free_level = cpu_to_be32(afake->af_levels);
- xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREE_ROOT |
- XFS_AGI_FREE_LEVEL);
+ if (xfs_sb_version_hasinobtcounts(&cur->bc_mp->m_sb)) {
+ agi->agi_fblocks = cpu_to_be32(afake->af_blocks);
+ fields |= XFS_AGI_IBLOCKS;
+ }
+ xfs_ialloc_log_agi(tp, agbp, fields);
xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops);
}
}
@@ -673,6 +704,28 @@ xfs_inobt_count_blocks(
return error;
}
+/* Read finobt block count from AGI header. */
+static int
+xfs_finobt_read_blocks(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ xfs_extlen_t *tree_blocks)
+{
+ struct xfs_buf *agbp;
+ struct xfs_agi *agi;
+ int error;
+
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ if (error)
+ return error;
+
+ agi = agbp->b_addr;
+ *tree_blocks = be32_to_cpu(agi->agi_fblocks);
+ xfs_trans_brelse(tp, agbp);
+ return 0;
+}
+
/*
* Figure out how many blocks to reserve and how many are used by this btree.
*/
@@ -690,7 +743,11 @@ xfs_finobt_calc_reserves(
if (!xfs_sb_version_hasfinobt(&mp->m_sb))
return 0;
- error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len);
+ if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
+ error = xfs_finobt_read_blocks(mp, tp, agno, &tree_len);
+ else
+ error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO,
+ &tree_len);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 52451809c478..b4164256993d 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -603,7 +603,7 @@ xfs_iext_realloc_root(
if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
new_size = NODE_SIZE;
- new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS);
+ new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL);
memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
ifp->if_u1.if_root = new;
cur->leaf = new;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 8d5dd08eab75..c667c63f2cb0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -157,6 +157,36 @@ xfs_imap_to_bp(
return 0;
}
+static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
+{
+ struct timespec64 tv;
+ uint32_t n;
+
+ tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n));
+ tv.tv_nsec = n;
+
+ return tv;
+}
+
+/* Convert an ondisk timestamp to an incore timestamp. */
+struct timespec64
+xfs_inode_from_disk_ts(
+ struct xfs_dinode *dip,
+ const xfs_timestamp_t ts)
+{
+ struct timespec64 tv;
+ struct xfs_legacy_timestamp *lts;
+
+ if (xfs_dinode_has_bigtime(dip))
+ return xfs_inode_decode_bigtime(be64_to_cpu(ts));
+
+ lts = (struct xfs_legacy_timestamp *)&ts;
+ tv.tv_sec = (int)be32_to_cpu(lts->t_sec);
+ tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec);
+
+ return tv;
+}
+
int
xfs_inode_from_disk(
struct xfs_inode *ip,
@@ -211,12 +241,9 @@ xfs_inode_from_disk(
* a time before epoch is converted to a time long after epoch
* on 64 bit systems.
*/
- inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
- inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
- inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
- inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
- inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
- inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
+ inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
+ inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
+ inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
to->di_size = be64_to_cpu(from->di_size);
to->di_nblocks = be64_to_cpu(from->di_nblocks);
@@ -229,8 +256,7 @@ xfs_inode_from_disk(
if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
inode_set_iversion_queried(inode,
be64_to_cpu(from->di_changecount));
- to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec);
- to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec);
+ to->di_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
to->di_flags2 = be64_to_cpu(from->di_flags2);
to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
}
@@ -252,6 +278,25 @@ out_destroy_data_fork:
return error;
}
+/* Convert an incore timestamp to an ondisk timestamp. */
+static inline xfs_timestamp_t
+xfs_inode_to_disk_ts(
+ struct xfs_inode *ip,
+ const struct timespec64 tv)
+{
+ struct xfs_legacy_timestamp *lts;
+ xfs_timestamp_t ts;
+
+ if (xfs_inode_has_bigtime(ip))
+ return cpu_to_be64(xfs_inode_encode_bigtime(tv));
+
+ lts = (struct xfs_legacy_timestamp *)&ts;
+ lts->t_sec = cpu_to_be32(tv.tv_sec);
+ lts->t_nsec = cpu_to_be32(tv.tv_nsec);
+
+ return ts;
+}
+
void
xfs_inode_to_disk(
struct xfs_inode *ip,
@@ -271,12 +316,9 @@ xfs_inode_to_disk(
to->di_projid_hi = cpu_to_be16(from->di_projid >> 16);
memset(to->di_pad, 0, sizeof(to->di_pad));
- to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
- to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
- to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
- to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
- to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
- to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+ to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
+ to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
+ to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
to->di_nlink = cpu_to_be32(inode->i_nlink);
to->di_gen = cpu_to_be32(inode->i_generation);
to->di_mode = cpu_to_be16(inode->i_mode);
@@ -295,8 +337,7 @@ xfs_inode_to_disk(
if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
to->di_version = 3;
to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
- to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec);
- to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec);
+ to->di_crtime = xfs_inode_to_disk_ts(ip, from->di_crtime);
to->di_flags2 = cpu_to_be64(from->di_flags2);
to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
to->di_ino = cpu_to_be64(ip->i_ino);
@@ -310,58 +351,6 @@ xfs_inode_to_disk(
}
}
-void
-xfs_log_dinode_to_disk(
- struct xfs_log_dinode *from,
- struct xfs_dinode *to)
-{
- to->di_magic = cpu_to_be16(from->di_magic);
- to->di_mode = cpu_to_be16(from->di_mode);
- to->di_version = from->di_version;
- to->di_format = from->di_format;
- to->di_onlink = 0;
- to->di_uid = cpu_to_be32(from->di_uid);
- to->di_gid = cpu_to_be32(from->di_gid);
- to->di_nlink = cpu_to_be32(from->di_nlink);
- to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
- to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
- memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-
- to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
- to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
- to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
- to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
- to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
- to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
-
- to->di_size = cpu_to_be64(from->di_size);
- to->di_nblocks = cpu_to_be64(from->di_nblocks);
- to->di_extsize = cpu_to_be32(from->di_extsize);
- to->di_nextents = cpu_to_be32(from->di_nextents);
- to->di_anextents = cpu_to_be16(from->di_anextents);
- to->di_forkoff = from->di_forkoff;
- to->di_aformat = from->di_aformat;
- to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
- to->di_dmstate = cpu_to_be16(from->di_dmstate);
- to->di_flags = cpu_to_be16(from->di_flags);
- to->di_gen = cpu_to_be32(from->di_gen);
-
- if (from->di_version == 3) {
- to->di_changecount = cpu_to_be64(from->di_changecount);
- to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
- to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
- to->di_flags2 = cpu_to_be64(from->di_flags2);
- to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
- to->di_ino = cpu_to_be64(from->di_ino);
- to->di_lsn = cpu_to_be64(from->di_lsn);
- memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
- uuid_copy(&to->di_uuid, &from->di_uuid);
- to->di_flushiter = 0;
- } else {
- to->di_flushiter = cpu_to_be16(from->di_flushiter);
- }
-}
-
static xfs_failaddr_t
xfs_dinode_verify_fork(
struct xfs_dinode *dip,
@@ -568,6 +557,11 @@ xfs_dinode_verify(
if (fa)
return fa;
+ /* bigtime iflag can only happen on bigtime filesystems */
+ if (xfs_dinode_has_bigtime(dip) &&
+ !xfs_sb_version_hasbigtime(&mp->m_sb))
+ return __this_address;
+
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6b08b9d060c2..536666143fe7 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -32,6 +32,11 @@ struct xfs_icdinode {
struct timespec64 di_crtime; /* time created */
};
+static inline bool xfs_icdinode_has_bigtime(const struct xfs_icdinode *icd)
+{
+ return icd->di_flags2 & XFS_DIFLAG2_BIGTIME;
+}
+
/*
* Inode location information. Stored in the inode and passed to
* xfs_imap_to_bp() to get a buffer and dinode for a given inode.
@@ -49,8 +54,6 @@ void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
xfs_lsn_t lsn);
int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
-void xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
- struct xfs_dinode *to);
xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_dinode *dip);
@@ -60,4 +63,12 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
uint32_t cowextsize, uint16_t mode, uint16_t flags,
uint64_t flags2);
+static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv)
+{
+ return xfs_unix_to_bigtime(tv.tv_sec) * NSEC_PER_SEC + tv.tv_nsec;
+}
+
+struct timespec64 xfs_inode_from_disk_ts(struct xfs_dinode *dip,
+ const xfs_timestamp_t ts);
+
#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 0cf853d42d62..7575de5cecb1 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -386,8 +386,8 @@ xfs_iroot_realloc(
cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
new_max = cur_max + rec_diff;
new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
- ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
- KM_NOFS);
+ ifp->if_broot = krealloc(ifp->if_broot, new_size,
+ GFP_NOFS | __GFP_NOFAIL);
op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -496,8 +496,8 @@ xfs_idata_realloc(
* in size so that it can be logged and stay on word boundaries.
* We enforce that here.
*/
- ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data,
- roundup(new_size, 4), KM_NOFS);
+ ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4),
+ GFP_NOFS | __GFP_NOFAIL);
ifp->if_bytes = new_size;
}
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index e3400c9c71cd..8bd00da6d2a4 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -368,10 +368,13 @@ static inline int xfs_ilog_fdata(int w)
* directly mirrors the xfs_dinode structure as it must contain all the same
* information.
*/
-typedef struct xfs_ictimestamp {
+typedef uint64_t xfs_ictimestamp_t;
+
+/* Legacy timestamp encoding format. */
+struct xfs_legacy_ictimestamp {
int32_t t_sec; /* timestamp seconds */
int32_t t_nsec; /* timestamp nanoseconds */
-} xfs_ictimestamp_t;
+};
/*
* Define the format of the inode core that is logged. This structure must be
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 641132d0e39d..3cca2bfe714c 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -121,7 +121,6 @@ struct xlog_recover {
void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len,
const struct xfs_buf_ops *ops);
bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
-void xlog_recover_iodone(struct xfs_buf *bp);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id);
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 076bdc7037ee..0f0af4e35032 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -23,7 +23,8 @@ typedef uint8_t xfs_dqtype_t;
#define XFS_DQTYPE_STRINGS \
{ XFS_DQTYPE_USER, "USER" }, \
{ XFS_DQTYPE_PROJ, "PROJ" }, \
- { XFS_DQTYPE_GROUP, "GROUP" }
+ { XFS_DQTYPE_GROUP, "GROUP" }, \
+ { XFS_DQTYPE_BIGTIME, "BIGTIME" }
/*
* flags for q_flags field in the dquot.
@@ -143,4 +144,9 @@ extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
extern void xfs_dqblk_repair(struct xfs_mount *mp, struct xfs_dqblk *dqb,
xfs_dqid_t id, xfs_dqtype_t type);
+struct xfs_dquot;
+time64_t xfs_dquot_from_disk_ts(struct xfs_disk_dquot *ddq,
+ __be32 dtimer);
+__be32 xfs_dquot_to_disk_ts(struct xfs_dquot *ddq, time64_t timer);
+
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index ae9aaf1f34bf..5aeafa59ed27 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -954,7 +954,7 @@ xfs_log_sb(
struct xfs_trans *tp)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_buf *bp = xfs_trans_getsb(tp, mp);
+ struct xfs_buf *bp = xfs_trans_getsb(tp);
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
@@ -1084,7 +1084,7 @@ xfs_sync_sb_buf(
if (error)
return error;
- bp = xfs_trans_getsb(tp, mp);
+ bp = xfs_trans_getsb(tp);
xfs_log_sb(tp);
xfs_trans_bhold(tp, bp);
xfs_trans_set_sync(tp);
@@ -1166,6 +1166,8 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT;
if (xfs_sb_version_hasreflink(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK;
+ if (xfs_sb_version_hasbigtime(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME;
if (xfs_sb_version_hassector(sbp))
geo->logsectsize = sbp->sb_logsectsize;
else
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 708feb8eac76..c795ae47b3c9 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -176,6 +176,9 @@ struct xfs_ino_geometry {
unsigned int ialloc_align;
unsigned int agino_log; /* #bits for agino in inum */
+
+ /* precomputed value for di_flags2 */
+ uint64_t new_diflags2;
};
#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index b7e222befb08..90f1d5645052 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -132,6 +132,17 @@ xfs_trans_log_inode(
}
/*
+ * If we're updating the inode core or the timestamps and it's possible
+ * to upgrade this inode to bigtime format, do so now.
+ */
+ if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
+ xfs_sb_version_hasbigtime(&ip->i_mount->m_sb) &&
+ !xfs_inode_has_bigtime(ip)) {
+ ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME;
+ flags |= XFS_ILOG_CORE;
+ }
+
+ /*
* Record the specific change for fdatasync optimisation. This allows
* fdatasync to skip log forces for inodes that are only timestamp
* dirty.
@@ -177,9 +188,9 @@ xfs_trans_log_inode(
/*
* Always OR in the bits from the ili_last_fields field. This is to
- * coordinate with the xfs_iflush() and xfs_iflush_done() routines in
- * the eventual clearing of the ili_fields bits. See the big comment in
- * xfs_iflush() for an explanation of this coordination mechanism.
+ * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
+ * in the eventual clearing of the ili_fields bits. See the big comment
+ * in xfs_iflush() for an explanation of this coordination mechanism.
*/
iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
spin_unlock(&iip->ili_lock);
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index e9bcf1faa183..ae8e2e0ac64a 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -781,6 +781,35 @@ xchk_agi_xref_icounts(
xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp);
}
+/* Check agi_[fi]blocks against tree size */
+static inline void
+xchk_agi_xref_fiblocks(
+ struct xfs_scrub *sc)
+{
+ struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
+ xfs_agblock_t blocks;
+ int error = 0;
+
+ if (!xfs_sb_version_hasinobtcounts(&sc->mp->m_sb))
+ return;
+
+ if (sc->sa.ino_cur) {
+ error = xfs_btree_count_blocks(sc->sa.ino_cur, &blocks);
+ if (!xchk_should_check_xref(sc, &error, &sc->sa.ino_cur))
+ return;
+ if (blocks != be32_to_cpu(agi->agi_iblocks))
+ xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp);
+ }
+
+ if (sc->sa.fino_cur) {
+ error = xfs_btree_count_blocks(sc->sa.fino_cur, &blocks);
+ if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur))
+ return;
+ if (blocks != be32_to_cpu(agi->agi_fblocks))
+ xchk_block_xref_set_corrupt(sc, sc->sa.agi_bp);
+ }
+}
+
/* Cross-reference with the other btrees. */
STATIC void
xchk_agi_xref(
@@ -804,6 +833,7 @@ xchk_agi_xref(
xchk_agi_xref_icounts(sc);
xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_FS);
xchk_xref_is_not_shared(sc, agbno, 1);
+ xchk_agi_xref_fiblocks(sc);
/* scrub teardown will take care of sc->sa for us */
}
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index bca2ab1d4be9..401f71579ce6 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -810,10 +810,34 @@ xrep_agi_calc_from_btrees(
error = xfs_ialloc_count_inodes(cur, &count, &freecount);
if (error)
goto err;
+ if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+ xfs_agblock_t blocks;
+
+ error = xfs_btree_count_blocks(cur, &blocks);
+ if (error)
+ goto err;
+ agi->agi_iblocks = cpu_to_be32(blocks);
+ }
xfs_btree_del_cursor(cur, error);
agi->agi_count = cpu_to_be32(count);
agi->agi_freecount = cpu_to_be32(freecount);
+
+ if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
+ xfs_sb_version_hasinobtcounts(&mp->m_sb)) {
+ xfs_agblock_t blocks;
+
+ cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno,
+ XFS_BTNUM_FINO);
+ if (error)
+ goto err;
+ error = xfs_btree_count_blocks(cur, &blocks);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, error);
+ agi->agi_fblocks = cpu_to_be32(blocks);
+ }
+
return 0;
err:
xfs_btree_del_cursor(cur, error);
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 6d483ab29e63..3aa85b64de36 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -190,11 +190,30 @@ xchk_inode_flags2(
if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
goto bad;
+ /* no bigtime iflag without the bigtime feature */
+ if (xfs_dinode_has_bigtime(dip) &&
+ !xfs_sb_version_hasbigtime(&mp->m_sb))
+ goto bad;
+
return;
bad:
xchk_ino_set_corrupt(sc, ino);
}
+static inline void
+xchk_dinode_nsec(
+ struct xfs_scrub *sc,
+ xfs_ino_t ino,
+ struct xfs_dinode *dip,
+ const xfs_timestamp_t ts)
+{
+ struct timespec64 tv;
+
+ tv = xfs_inode_from_disk_ts(dip, ts);
+ if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC)
+ xchk_ino_set_corrupt(sc, ino);
+}
+
/* Scrub all the ondisk inode fields. */
STATIC void
xchk_dinode(
@@ -293,12 +312,9 @@ xchk_dinode(
}
/* di_[amc]time.nsec */
- if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
- xchk_ino_set_corrupt(sc, ino);
- if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
- xchk_ino_set_corrupt(sc, ino);
- if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
- xchk_ino_set_corrupt(sc, ino);
+ xchk_dinode_nsec(sc, ino, dip, dip->di_atime);
+ xchk_dinode_nsec(sc, ino, dip, dip->di_mtime);
+ xchk_dinode_nsec(sc, ino, dip, dip->di_ctime);
/*
* di_size. xfs_dinode_verify checks for things that screw up
@@ -403,8 +419,7 @@ xchk_dinode(
}
if (dip->di_version >= 3) {
- if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
- xchk_ino_set_corrupt(sc, ino);
+ xchk_dinode_nsec(sc, ino, dip, dip->di_crtime);
xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
xchk_inode_cowextsize(sc, dip, ino, mode, flags,
flags2);
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index 5641ae512c9e..c08be5ede066 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -22,7 +22,7 @@ xchk_setup_symlink(
struct xfs_inode *ip)
{
/* Allocate the buffer without the inode lock held. */
- sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, 0);
+ sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, GFP_KERNEL);
if (!sc->buf)
return -ENOMEM;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index d4c687b5cd06..c544951a0c07 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -192,7 +192,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (acl) {
args.valuelen = XFS_ACL_SIZE(acl->a_count);
- args.value = kmem_zalloc_large(args.valuelen, 0);
+ args.value = kvzalloc(args.valuelen, GFP_KERNEL);
if (!args.value)
return -ENOMEM;
xfs_acl_to_disk(args.value, acl);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 50f922cad91a..8f8837fe21cf 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -61,7 +61,7 @@ xfs_attr_shortform_list(
int error = 0;
ASSERT(dp->i_afp != NULL);
- sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
+ sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data;
ASSERT(sf != NULL);
if (!sf->hdr.count)
return 0;
@@ -96,7 +96,7 @@ xfs_attr_shortform_list(
*/
if (context->seen_enough)
break;
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
+ sfe = xfs_attr_sf_nextentry(sfe);
}
trace_xfs_attr_list_sf_all(context);
return 0;
@@ -136,7 +136,7 @@ xfs_attr_shortform_list(
/* These are bytes, and both on-disk, don't endian-flip */
sbp->valuelen = sfe->valuelen;
sbp->flags = sfe->flags;
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
+ sfe = xfs_attr_sf_nextentry(sfe);
sbp++;
nsbuf++;
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5123f82f2477..f2a8a0e75e1f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -946,6 +946,14 @@ xfs_free_file_space(
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
+ /* We can only free complete realtime extents. */
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
+
+ if ((startoffset_fsb | endoffset_fsb) & (extsz - 1))
+ return -EINVAL;
+ }
+
/*
* Need to zero the stuff we're not freeing, on disk.
*/
@@ -1139,6 +1147,14 @@ xfs_insert_file_space(
trace_xfs_insert_file_space(ip);
+ /* We can only insert complete realtime extents. */
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
+
+ if ((stop_fsb | shift_fsb) & (extsz - 1))
+ return -EINVAL;
+ }
+
error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
if (error)
return error;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d4cdcb6fb2fe..4e4cf91f4f9f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -52,6 +52,15 @@ static kmem_zone_t *xfs_buf_zone;
* b_lock (trylock due to inversion)
*/
+static int __xfs_buf_submit(struct xfs_buf *bp, bool wait);
+
+static inline int
+xfs_buf_submit(
+ struct xfs_buf *bp)
+{
+ return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
+}
+
static inline int
xfs_buf_is_vmapped(
struct xfs_buf *bp)
@@ -751,7 +760,7 @@ found:
return 0;
}
-STATIC int
+int
_xfs_buf_read(
xfs_buf_t *bp,
xfs_buf_flags_t flags)
@@ -759,7 +768,7 @@ _xfs_buf_read(
ASSERT(!(flags & XBF_WRITE));
ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
- bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
+ bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
return xfs_buf_submit(bp);
@@ -1170,20 +1179,145 @@ xfs_buf_wait_unpin(
set_current_state(TASK_RUNNING);
}
+static void
+xfs_buf_ioerror_alert_ratelimited(
+ struct xfs_buf *bp)
+{
+ static unsigned long lasttime;
+ static struct xfs_buftarg *lasttarg;
+
+ if (bp->b_target != lasttarg ||
+ time_after(jiffies, (lasttime + 5*HZ))) {
+ lasttime = jiffies;
+ xfs_buf_ioerror_alert(bp, __this_address);
+ }
+ lasttarg = bp->b_target;
+}
+
/*
- * Buffer Utility Routines
+ * Account for this latest trip around the retry handler, and decide if
+ * we've failed enough times to constitute a permanent failure.
*/
+static bool
+xfs_buf_ioerror_permanent(
+ struct xfs_buf *bp,
+ struct xfs_error_cfg *cfg)
+{
+ struct xfs_mount *mp = bp->b_mount;
-void
+ if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
+ ++bp->b_retries > cfg->max_retries)
+ return true;
+ if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
+ time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
+ return true;
+
+ /* At unmount we may treat errors differently */
+ if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
+ return true;
+
+ return false;
+}
+
+/*
+ * On a sync write or shutdown we just want to stale the buffer and let the
+ * caller handle the error in bp->b_error appropriately.
+ *
+ * If the write was asynchronous then no one will be looking for the error. If
+ * this is the first failure of this type, clear the error state and write the
+ * buffer out again. This means we always retry an async write failure at least
+ * once, but we also need to set the buffer up to behave correctly now for
+ * repeated failures.
+ *
+ * If we get repeated async write failures, then we take action according to the
+ * error configuration we have been set up to use.
+ *
+ * Returns true if this function took care of error handling and the caller must
+ * not touch the buffer again. Return false if the caller should proceed with
+ * normal I/O completion handling.
+ */
+static bool
+xfs_buf_ioend_handle_error(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_error_cfg *cfg;
+
+ /*
+ * If we've already decided to shutdown the filesystem because of I/O
+ * errors, there's no point in giving this a retry.
+ */
+ if (XFS_FORCED_SHUTDOWN(mp))
+ goto out_stale;
+
+ xfs_buf_ioerror_alert_ratelimited(bp);
+
+ /*
+ * We're not going to bother about retrying this during recovery.
+ * One strike!
+ */
+ if (bp->b_flags & _XBF_LOGRECOVERY) {
+ xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+ return false;
+ }
+
+ /*
+ * Synchronous writes will have callers process the error.
+ */
+ if (!(bp->b_flags & XBF_ASYNC))
+ goto out_stale;
+
+ trace_xfs_buf_iodone_async(bp, _RET_IP_);
+
+ cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
+ if (bp->b_last_error != bp->b_error ||
+ !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) {
+ bp->b_last_error = bp->b_error;
+ if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
+ !bp->b_first_retry_time)
+ bp->b_first_retry_time = jiffies;
+ goto resubmit;
+ }
+
+ /*
+ * Permanent error - we need to trigger a shutdown if we haven't already
+ * to indicate that inconsistency will result from this action.
+ */
+ if (xfs_buf_ioerror_permanent(bp, cfg)) {
+ xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+ goto out_stale;
+ }
+
+ /* Still considered a transient error. Caller will schedule retries. */
+ if (bp->b_flags & _XBF_INODES)
+ xfs_buf_inode_io_fail(bp);
+ else if (bp->b_flags & _XBF_DQUOTS)
+ xfs_buf_dquot_io_fail(bp);
+ else
+ ASSERT(list_empty(&bp->b_li_list));
+ xfs_buf_ioerror(bp, 0);
+ xfs_buf_relse(bp);
+ return true;
+
+resubmit:
+ xfs_buf_ioerror(bp, 0);
+ bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
+ xfs_buf_submit(bp);
+ return true;
+out_stale:
+ xfs_buf_stale(bp);
+ bp->b_flags |= XBF_DONE;
+ bp->b_flags &= ~XBF_WRITE;
+ trace_xfs_buf_error_relse(bp, _RET_IP_);
+ return false;
+}
+
+static void
xfs_buf_ioend(
struct xfs_buf *bp)
{
- bool read = bp->b_flags & XBF_READ;
-
trace_xfs_buf_iodone(bp, _RET_IP_);
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
-
/*
* Pull in IO completion errors now. We are guaranteed to be running
* single threaded, so we don't need the lock to read b_io_error.
@@ -1191,39 +1325,47 @@ xfs_buf_ioend(
if (!bp->b_error && bp->b_io_error)
xfs_buf_ioerror(bp, bp->b_io_error);
- if (read) {
+ if (bp->b_flags & XBF_READ) {
if (!bp->b_error && bp->b_ops)
bp->b_ops->verify_read(bp);
if (!bp->b_error)
bp->b_flags |= XBF_DONE;
- xfs_buf_ioend_finish(bp);
- return;
- }
+ } else {
+ if (!bp->b_error) {
+ bp->b_flags &= ~XBF_WRITE_FAIL;
+ bp->b_flags |= XBF_DONE;
+ }
- if (!bp->b_error) {
- bp->b_flags &= ~XBF_WRITE_FAIL;
- bp->b_flags |= XBF_DONE;
- }
+ if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
+ return;
- /*
- * If this is a log recovery buffer, we aren't doing transactional IO
- * yet so we need to let it handle IO completions.
- */
- if (bp->b_flags & _XBF_LOGRECOVERY) {
- xlog_recover_iodone(bp);
- return;
- }
+ /* clear the retry state */
+ bp->b_last_error = 0;
+ bp->b_retries = 0;
+ bp->b_first_retry_time = 0;
- if (bp->b_flags & _XBF_INODES) {
- xfs_buf_inode_iodone(bp);
- return;
- }
+ /*
+ * Note that for things like remote attribute buffers, there may
+ * not be a buffer log item here, so processing the buffer log
+ * item must remain optional.
+ */
+ if (bp->b_log_item)
+ xfs_buf_item_done(bp);
+
+ if (bp->b_flags & _XBF_INODES)
+ xfs_buf_inode_iodone(bp);
+ else if (bp->b_flags & _XBF_DQUOTS)
+ xfs_buf_dquot_iodone(bp);
- if (bp->b_flags & _XBF_DQUOTS) {
- xfs_buf_dquot_iodone(bp);
- return;
}
- xfs_buf_iodone(bp);
+
+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
+ _XBF_LOGRECOVERY);
+
+ if (bp->b_flags & XBF_ASYNC)
+ xfs_buf_relse(bp);
+ else
+ complete(&bp->b_iowait);
}
static void
@@ -1506,7 +1648,7 @@ xfs_buf_iowait(
* safe to reference the buffer after a call to this function unless the caller
* holds an additional reference itself.
*/
-int
+static int
__xfs_buf_submit(
struct xfs_buf *bp,
bool wait)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 755b652e695a..bfd2907e7bc4 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -249,6 +249,7 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags,
int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
size_t numblks, int flags, struct xfs_buf **bpp,
const struct xfs_buf_ops *ops);
+int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags);
void xfs_buf_hold(struct xfs_buf *bp);
/* Releasing Buffers */
@@ -269,28 +270,12 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_buf *bp);
-extern void xfs_buf_ioend(struct xfs_buf *bp);
-static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
-{
- if (bp->b_flags & XBF_ASYNC)
- xfs_buf_relse(bp);
- else
- complete(&bp->b_iowait);
-}
extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
xfs_failaddr_t failaddr);
#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa);
void xfs_buf_ioend_fail(struct xfs_buf *);
-
-extern int __xfs_buf_submit(struct xfs_buf *bp, bool);
-static inline int xfs_buf_submit(struct xfs_buf *bp)
-{
- bool wait = bp->b_flags & XBF_ASYNC ? false : true;
- return __xfs_buf_submit(bp, wait);
-}
-
void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 408d1b572d3f..0356f2e340a1 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -30,8 +30,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_buf_log_item, bli_item);
}
-static void xfs_buf_item_done(struct xfs_buf *bp);
-
/* Is this log iovec plausibly large enough to contain the buffer log format? */
bool
xfs_buf_log_check_iovec(
@@ -463,7 +461,7 @@ xfs_buf_item_unpin(
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_item_done(bp);
- xfs_iflush_done(bp);
+ xfs_buf_inode_iodone(bp);
ASSERT(list_empty(&bp->b_li_list));
} else {
xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
@@ -956,153 +954,10 @@ xfs_buf_item_relse(
xfs_buf_item_free(bip);
}
-/*
- * Decide if we're going to retry the write after a failure, and prepare
- * the buffer for retrying the write.
- */
-static bool
-xfs_buf_ioerror_fail_without_retry(
- struct xfs_buf *bp)
-{
- struct xfs_mount *mp = bp->b_mount;
- static ulong lasttime;
- static xfs_buftarg_t *lasttarg;
-
- /*
- * If we've already decided to shutdown the filesystem because of
- * I/O errors, there's no point in giving this a retry.
- */
- if (XFS_FORCED_SHUTDOWN(mp))
- return true;
-
- if (bp->b_target != lasttarg ||
- time_after(jiffies, (lasttime + 5*HZ))) {
- lasttime = jiffies;
- xfs_buf_ioerror_alert(bp, __this_address);
- }
- lasttarg = bp->b_target;
-
- /* synchronous writes will have callers process the error */
- if (!(bp->b_flags & XBF_ASYNC))
- return true;
- return false;
-}
-
-static bool
-xfs_buf_ioerror_retry(
- struct xfs_buf *bp,
- struct xfs_error_cfg *cfg)
-{
- if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
- bp->b_last_error == bp->b_error)
- return false;
-
- bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
- bp->b_last_error = bp->b_error;
- if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
- !bp->b_first_retry_time)
- bp->b_first_retry_time = jiffies;
- return true;
-}
-
-/*
- * Account for this latest trip around the retry handler, and decide if
- * we've failed enough times to constitute a permanent failure.
- */
-static bool
-xfs_buf_ioerror_permanent(
- struct xfs_buf *bp,
- struct xfs_error_cfg *cfg)
-{
- struct xfs_mount *mp = bp->b_mount;
-
- if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
- ++bp->b_retries > cfg->max_retries)
- return true;
- if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
- time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
- return true;
-
- /* At unmount we may treat errors differently */
- if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
- return true;
-
- return false;
-}
-
-/*
- * On a sync write or shutdown we just want to stale the buffer and let the
- * caller handle the error in bp->b_error appropriately.
- *
- * If the write was asynchronous then no one will be looking for the error. If
- * this is the first failure of this type, clear the error state and write the
- * buffer out again. This means we always retry an async write failure at least
- * once, but we also need to set the buffer up to behave correctly now for
- * repeated failures.
- *
- * If we get repeated async write failures, then we take action according to the
- * error configuration we have been set up to use.
- *
- * Multi-state return value:
- *
- * XBF_IOERROR_FINISH: clear IO error retry state and run callback completions
- * XBF_IOERROR_DONE: resubmitted immediately, do not run any completions
- * XBF_IOERROR_FAIL: transient error, run failure callback completions and then
- * release the buffer
- */
-enum {
- XBF_IOERROR_FINISH,
- XBF_IOERROR_DONE,
- XBF_IOERROR_FAIL,
-};
-
-static int
-xfs_buf_iodone_error(
- struct xfs_buf *bp)
-{
- struct xfs_mount *mp = bp->b_mount;
- struct xfs_error_cfg *cfg;
-
- if (xfs_buf_ioerror_fail_without_retry(bp))
- goto out_stale;
-
- trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
-
- cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
- if (xfs_buf_ioerror_retry(bp, cfg)) {
- xfs_buf_ioerror(bp, 0);
- xfs_buf_submit(bp);
- return XBF_IOERROR_DONE;
- }
-
- /*
- * Permanent error - we need to trigger a shutdown if we haven't already
- * to indicate that inconsistency will result from this action.
- */
- if (xfs_buf_ioerror_permanent(bp, cfg)) {
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
- goto out_stale;
- }
-
- /* Still considered a transient error. Caller will schedule retries. */
- return XBF_IOERROR_FAIL;
-
-out_stale:
- xfs_buf_stale(bp);
- bp->b_flags |= XBF_DONE;
- trace_xfs_buf_error_relse(bp, _RET_IP_);
- return XBF_IOERROR_FINISH;
-}
-
-static void
+void
xfs_buf_item_done(
struct xfs_buf *bp)
{
- struct xfs_buf_log_item *bip = bp->b_log_item;
-
- if (!bip)
- return;
-
/*
* If we are forcibly shutting down, this may well be off the AIL
* already. That's because we simulate the log-committed callbacks to
@@ -1111,113 +966,12 @@ xfs_buf_item_done(
* xfs_trans_ail_delete() takes care of these.
*
* Either way, AIL is useless if we're forcing a shutdown.
+ *
+ * Note that log recovery writes might have buffer items that are not on
+ * the AIL even when the file system is not shut down.
*/
- xfs_trans_ail_delete(&bip->bli_item, SHUTDOWN_CORRUPT_INCORE);
- bp->b_log_item = NULL;
- xfs_buf_item_free(bip);
- xfs_buf_rele(bp);
-}
-
-static inline void
-xfs_buf_clear_ioerror_retry_state(
- struct xfs_buf *bp)
-{
- bp->b_last_error = 0;
- bp->b_retries = 0;
- bp->b_first_retry_time = 0;
-}
-
-/*
- * Inode buffer iodone callback function.
- */
-void
-xfs_buf_inode_iodone(
- struct xfs_buf *bp)
-{
- if (bp->b_error) {
- struct xfs_log_item *lip;
- int ret = xfs_buf_iodone_error(bp);
-
- if (ret == XBF_IOERROR_FINISH)
- goto finish_iodone;
- if (ret == XBF_IOERROR_DONE)
- return;
- ASSERT(ret == XBF_IOERROR_FAIL);
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
- set_bit(XFS_LI_FAILED, &lip->li_flags);
- }
- xfs_buf_ioerror(bp, 0);
- xfs_buf_relse(bp);
- return;
- }
-
-finish_iodone:
- xfs_buf_clear_ioerror_retry_state(bp);
- xfs_buf_item_done(bp);
- xfs_iflush_done(bp);
- xfs_buf_ioend_finish(bp);
-}
-
-/*
- * Dquot buffer iodone callback function.
- */
-void
-xfs_buf_dquot_iodone(
- struct xfs_buf *bp)
-{
- if (bp->b_error) {
- struct xfs_log_item *lip;
- int ret = xfs_buf_iodone_error(bp);
-
- if (ret == XBF_IOERROR_FINISH)
- goto finish_iodone;
- if (ret == XBF_IOERROR_DONE)
- return;
- ASSERT(ret == XBF_IOERROR_FAIL);
- spin_lock(&bp->b_mount->m_ail->ail_lock);
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
- xfs_set_li_failed(lip, bp);
- }
- spin_unlock(&bp->b_mount->m_ail->ail_lock);
- xfs_buf_ioerror(bp, 0);
- xfs_buf_relse(bp);
- return;
- }
-
-finish_iodone:
- xfs_buf_clear_ioerror_retry_state(bp);
- /* a newly allocated dquot buffer might have a log item attached */
- xfs_buf_item_done(bp);
- xfs_dquot_done(bp);
- xfs_buf_ioend_finish(bp);
-}
-
-/*
- * Dirty buffer iodone callback function.
- *
- * Note that for things like remote attribute buffers, there may not be a buffer
- * log item here, so processing the buffer log item must remain be optional.
- */
-void
-xfs_buf_iodone(
- struct xfs_buf *bp)
-{
- if (bp->b_error) {
- int ret = xfs_buf_iodone_error(bp);
-
- if (ret == XBF_IOERROR_FINISH)
- goto finish_iodone;
- if (ret == XBF_IOERROR_DONE)
- return;
- ASSERT(ret == XBF_IOERROR_FAIL);
- ASSERT(list_empty(&bp->b_li_list));
- xfs_buf_ioerror(bp, 0);
- xfs_buf_relse(bp);
- return;
- }
-
-finish_iodone:
- xfs_buf_clear_ioerror_retry_state(bp);
- xfs_buf_item_done(bp);
- xfs_buf_ioend_finish(bp);
+ xfs_trans_ail_delete(&bp->b_log_item->bli_item,
+ (bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
+ SHUTDOWN_CORRUPT_INCORE);
+ xfs_buf_item_relse(bp);
}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 23507cbb4c41..50aa0f5ef959 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -50,12 +50,24 @@ struct xfs_buf_log_item {
};
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
+void xfs_buf_item_done(struct xfs_buf *bp);
void xfs_buf_item_relse(struct xfs_buf *);
bool xfs_buf_item_put(struct xfs_buf_log_item *);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_inode_iodone(struct xfs_buf *);
+void xfs_buf_inode_io_fail(struct xfs_buf *bp);
+#ifdef CONFIG_XFS_QUOTA
void xfs_buf_dquot_iodone(struct xfs_buf *);
+void xfs_buf_dquot_io_fail(struct xfs_buf *bp);
+#else
+static inline void xfs_buf_dquot_iodone(struct xfs_buf *bp)
+{
+}
+static inline void xfs_buf_dquot_io_fail(struct xfs_buf *bp)
+{
+}
+#endif /* CONFIG_XFS_QUOTA */
void xfs_buf_iodone(struct xfs_buf *);
bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 8f0457d67d77..24c7a8d11e1a 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -414,7 +414,7 @@ xlog_recover_validate_buf_type(
*
* Write verifiers update the metadata LSN from log items attached to
* the buffer. Therefore, initialize a bli purely to carry the LSN to
- * the verifier. We'll clean it up in our ->iodone() callback.
+ * the verifier.
*/
if (bp->b_ops) {
struct xfs_buf_log_item *bip;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index bcd73b9c2994..3072814e407d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -98,12 +98,33 @@ xfs_qm_adjust_dqlimits(
xfs_dquot_set_prealloc_limits(dq);
}
+/* Set the expiration time of a quota's grace period. */
+time64_t
+xfs_dquot_set_timeout(
+ struct xfs_mount *mp,
+ time64_t timeout)
+{
+ struct xfs_quotainfo *qi = mp->m_quotainfo;
+
+ return clamp_t(time64_t, timeout, qi->qi_expiry_min,
+ qi->qi_expiry_max);
+}
+
+/* Set the length of the default grace period. */
+time64_t
+xfs_dquot_set_grace_period(
+ time64_t grace)
+{
+ return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
+}
+
/*
* Determine if this quota counter is over either limit and set the quota
* timers as appropriate.
*/
static inline void
xfs_qm_adjust_res_timer(
+ struct xfs_mount *mp,
struct xfs_dquot_res *res,
struct xfs_quota_limits *qlim)
{
@@ -112,7 +133,8 @@ xfs_qm_adjust_res_timer(
if ((res->softlimit && res->count > res->softlimit) ||
(res->hardlimit && res->count > res->hardlimit)) {
if (res->timer == 0)
- res->timer = ktime_get_real_seconds() + qlim->time;
+ res->timer = xfs_dquot_set_timeout(mp,
+ ktime_get_real_seconds() + qlim->time);
} else {
if (res->timer == 0)
res->warnings = 0;
@@ -145,9 +167,9 @@ xfs_qm_adjust_dqtimers(
ASSERT(dq->q_id);
defq = xfs_get_defquota(qi, xfs_dquot_type(dq));
- xfs_qm_adjust_res_timer(&dq->q_blk, &defq->blk);
- xfs_qm_adjust_res_timer(&dq->q_ino, &defq->ino);
- xfs_qm_adjust_res_timer(&dq->q_rtb, &defq->rtb);
+ xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_blk, &defq->blk);
+ xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_ino, &defq->ino);
+ xfs_qm_adjust_res_timer(dq->q_mount, &dq->q_rtb, &defq->rtb);
}
/*
@@ -201,6 +223,8 @@ xfs_qm_init_dquot_blk(
d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
d->dd_diskdq.d_id = cpu_to_be32(curid);
d->dd_diskdq.d_type = type;
+ if (curid > 0 && xfs_sb_version_hasbigtime(&mp->m_sb))
+ d->dd_diskdq.d_type |= XFS_DQTYPE_BIGTIME;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
@@ -514,9 +538,9 @@ xfs_dquot_from_disk(
dqp->q_ino.warnings = be16_to_cpu(ddqp->d_iwarns);
dqp->q_rtb.warnings = be16_to_cpu(ddqp->d_rtbwarns);
- dqp->q_blk.timer = be32_to_cpu(ddqp->d_btimer);
- dqp->q_ino.timer = be32_to_cpu(ddqp->d_itimer);
- dqp->q_rtb.timer = be32_to_cpu(ddqp->d_rtbtimer);
+ dqp->q_blk.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_btimer);
+ dqp->q_ino.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_itimer);
+ dqp->q_rtb.timer = xfs_dquot_from_disk_ts(ddqp, ddqp->d_rtbtimer);
/*
* Reservation counters are defined as reservation plus current usage
@@ -559,9 +583,9 @@ xfs_dquot_to_disk(
ddqp->d_iwarns = cpu_to_be16(dqp->q_ino.warnings);
ddqp->d_rtbwarns = cpu_to_be16(dqp->q_rtb.warnings);
- ddqp->d_btimer = cpu_to_be32(dqp->q_blk.timer);
- ddqp->d_itimer = cpu_to_be32(dqp->q_ino.timer);
- ddqp->d_rtbtimer = cpu_to_be32(dqp->q_rtb.timer);
+ ddqp->d_btimer = xfs_dquot_to_disk_ts(dqp, dqp->q_blk.timer);
+ ddqp->d_itimer = xfs_dquot_to_disk_ts(dqp, dqp->q_ino.timer);
+ ddqp->d_rtbtimer = xfs_dquot_to_disk_ts(dqp, dqp->q_rtb.timer);
}
/* Allocate and initialize the dquot buffer for this in-core dquot. */
@@ -1107,7 +1131,7 @@ xfs_qm_dqflush_done(
}
void
-xfs_dquot_done(
+xfs_buf_dquot_iodone(
struct xfs_buf *bp)
{
struct xfs_log_item *lip, *n;
@@ -1118,6 +1142,18 @@ xfs_dquot_done(
}
}
+void
+xfs_buf_dquot_io_fail(
+ struct xfs_buf *bp)
+{
+ struct xfs_log_item *lip;
+
+ spin_lock(&bp->b_mount->m_ail->ail_lock);
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+ xfs_set_li_failed(lip, bp);
+ spin_unlock(&bp->b_mount->m_ail->ail_lock);
+}
+
/* Check incore dquot for errors before we flush. */
static xfs_failaddr_t
xfs_qm_dqflush_check(
@@ -1145,6 +1181,14 @@ xfs_qm_dqflush_check(
!dqp->q_rtb.timer)
return __this_address;
+ /* bigtime flag should never be set on root dquots */
+ if (dqp->q_type & XFS_DQTYPE_BIGTIME) {
+ if (!xfs_sb_version_hasbigtime(&dqp->q_mount->m_sb))
+ return __this_address;
+ if (dqp->q_id == 0)
+ return __this_address;
+ }
+
return NULL;
}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 282a65da93c7..f642884a6834 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -237,4 +237,7 @@ typedef int (*xfs_qm_dqiterate_fn)(struct xfs_dquot *dq,
int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
xfs_qm_dqiterate_fn iter_fn, void *priv);
+time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout);
+time64_t xfs_dquot_set_grace_period(time64_t grace);
+
#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a29f78a663ca..3d1b95124744 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1008,6 +1008,21 @@ xfs_file_fadvise(
return ret;
}
+/* Does this file, inode, or mount want synchronous writes? */
+static inline bool xfs_file_sync_writes(struct file *filp)
+{
+ struct xfs_inode *ip = XFS_I(file_inode(filp));
+
+ if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC)
+ return true;
+ if (filp->f_flags & (__O_SYNC | O_DSYNC))
+ return true;
+ if (IS_SYNC(file_inode(filp)))
+ return true;
+
+ return false;
+}
+
STATIC loff_t
xfs_file_remap_range(
struct file *file_in,
@@ -1065,7 +1080,7 @@ xfs_file_remap_range(
if (ret)
goto out_unlock;
- if (mp->m_flags & XFS_MOUNT_WSYNC)
+ if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
xfs_log_force_inode(dest);
out_unlock:
xfs_iunlock2_io_mmap(src, dest);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 101028ebb571..deb99300d171 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -52,7 +52,6 @@ xfs_inode_alloc(
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
/* initialise the xfs inode */
@@ -123,7 +122,7 @@ void
xfs_inode_free(
struct xfs_inode *ip)
{
- ASSERT(!xfs_isiflocked(ip));
+ ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
/*
* Because we use RCU freeing we need to ensure the inode always
@@ -1035,23 +1034,21 @@ xfs_reclaim_inode(
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
goto out;
- if (!xfs_iflock_nowait(ip))
+ if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
goto out_iunlock;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_iunpin_wait(ip);
- /* xfs_iflush_abort() drops the flush lock */
xfs_iflush_abort(ip);
goto reclaim;
}
if (xfs_ipincount(ip))
- goto out_ifunlock;
+ goto out_clear_flush;
if (!xfs_inode_clean(ip))
- goto out_ifunlock;
+ goto out_clear_flush;
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
reclaim:
- ASSERT(!xfs_isiflocked(ip));
/*
* Because we use RCU freeing we need to ensure the inode always appears
@@ -1101,8 +1098,8 @@ reclaim:
__xfs_inode_free(ip);
return;
-out_ifunlock:
- xfs_ifunlock(ip);
+out_clear_flush:
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
out_iunlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
@@ -1211,7 +1208,7 @@ xfs_reclaim_inodes(
while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
xfs_ail_push_all_sync(mp->m_ail);
xfs_reclaim_inodes_ag(mp, &nr_to_scan);
- };
+ }
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c06129cffba9..49624973eecc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -598,22 +598,6 @@ xfs_lock_two_inodes(
}
}
-void
-__xfs_iflock(
- struct xfs_inode *ip)
-{
- wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
- DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
-
- do {
- prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- if (xfs_isiflocked(ip))
- io_schedule();
- } while (!xfs_iflock_nowait(ip));
-
- finish_wait(wq, &wait.wq_entry);
-}
-
STATIC uint
_xfs_dic2xflags(
uint16_t di_flags,
@@ -840,7 +824,7 @@ xfs_ialloc(
if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
inode_set_iversion(inode, 1);
- ip->i_d.di_flags2 = 0;
+ ip->i_d.di_flags2 = mp->m_ino_geo.new_diflags2;
ip->i_d.di_cowextsize = 0;
ip->i_d.di_crtime = tv;
}
@@ -2531,11 +2515,8 @@ retry:
* valid, the wrong inode or stale.
*/
spin_lock(&ip->i_flags_lock);
- if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) {
- spin_unlock(&ip->i_flags_lock);
- rcu_read_unlock();
- return;
- }
+ if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE))
+ goto out_iflags_unlock;
/*
* Don't try to lock/unlock the current inode, but we _cannot_ skip the
@@ -2552,16 +2533,14 @@ retry:
}
}
ip->i_flags |= XFS_ISTALE;
- spin_unlock(&ip->i_flags_lock);
- rcu_read_unlock();
/*
- * If we can't get the flush lock, the inode is already attached. All
+ * If the inode is flushing, it is already attached to the buffer. All
* we needed to do here is mark the inode stale so buffer IO completion
* will remove it from the AIL.
*/
iip = ip->i_itemp;
- if (!xfs_iflock_nowait(ip)) {
+ if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
ASSERT(!list_empty(&iip->ili_item.li_bio_list));
ASSERT(iip->ili_last_fields);
goto out_iunlock;
@@ -2573,10 +2552,12 @@ retry:
* commit as the flock synchronises removal of the inode from the
* cluster buffer against inode reclaim.
*/
- if (!iip || list_empty(&iip->ili_item.li_bio_list)) {
- xfs_ifunlock(ip);
+ if (!iip || list_empty(&iip->ili_item.li_bio_list))
goto out_iunlock;
- }
+
+ __xfs_iflags_set(ip, XFS_IFLUSHING);
+ spin_unlock(&ip->i_flags_lock);
+ rcu_read_unlock();
/* we have a dirty inode in memory that has not yet been flushed. */
spin_lock(&iip->ili_lock);
@@ -2586,9 +2567,16 @@ retry:
spin_unlock(&iip->ili_lock);
ASSERT(iip->ili_last_fields);
+ if (ip != free_ip)
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return;
+
out_iunlock:
if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_iflags_unlock:
+ spin_unlock(&ip->i_flags_lock);
+ rcu_read_unlock();
}
/*
@@ -2631,8 +2619,9 @@ xfs_ifree_cluster(
/*
* We obtain and lock the backing buffer first in the process
- * here, as we have to ensure that any dirty inode that we
- * can't get the flush lock on is attached to the buffer.
+ * here to ensure dirty inodes attached to the buffer remain in
+ * the flushing state while we mark them stale.
+ *
* If we scan the in-memory inodes first, then buffer IO can
* complete before we get a lock on it, and hence we may fail
* to mark all the active inodes on the buffer stale.
@@ -2717,7 +2706,7 @@ xfs_ifree(
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
- ip->i_d.di_flags2 = 0;
+ ip->i_d.di_flags2 = ip->i_mount->m_ino_geo.new_diflags2;
ip->i_d.di_dmevmask = 0;
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -3443,7 +3432,7 @@ xfs_iflush(
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(xfs_isiflocked(ip));
+ ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
ASSERT(iip->ili_item.li_buf == bp);
@@ -3553,8 +3542,8 @@ xfs_iflush(
*
* What we do is move the bits to the ili_last_fields field. When
* logging the inode, these bits are moved back to the ili_fields field.
- * In the xfs_iflush_done() routine we clear ili_last_fields, since we
- * know that the information those bits represent is permanently on
+ * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
+ * we know that the information those bits represent is permanently on
* disk. As long as the flush completes before the inode is logged
* again, then both ili_fields and ili_last_fields will be cleared.
*/
@@ -3568,7 +3557,7 @@ flush_out:
/*
* Store the current LSN of the inode so that we can tell whether the
- * item has moved in the AIL from xfs_iflush_done().
+ * item has moved in the AIL from xfs_buf_inode_iodone().
*/
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
@@ -3613,7 +3602,7 @@ xfs_iflush_cluster(
/*
* Quick and dirty check to avoid locks if possible.
*/
- if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK))
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING))
continue;
if (xfs_ipincount(ip))
continue;
@@ -3627,7 +3616,7 @@ xfs_iflush_cluster(
*/
spin_lock(&ip->i_flags_lock);
ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
- if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) {
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) {
spin_unlock(&ip->i_flags_lock);
continue;
}
@@ -3635,24 +3624,17 @@ xfs_iflush_cluster(
/*
* ILOCK will pin the inode against reclaim and prevent
* concurrent transactions modifying the inode while we are
- * flushing the inode.
+ * flushing the inode. If we get the lock, set the flushing
+ * state before we drop the i_flags_lock.
*/
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
spin_unlock(&ip->i_flags_lock);
continue;
}
+ __xfs_iflags_set(ip, XFS_IFLUSHING);
spin_unlock(&ip->i_flags_lock);
/*
- * Skip inodes that are already flush locked as they have
- * already been written to the buffer.
- */
- if (!xfs_iflock_nowait(ip)) {
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- continue;
- }
-
- /*
* Abort flushing this inode if we are shut down because the
* inode may not currently be in the AIL. This can occur when
* log I/O failure unpins the inode without inserting into the
@@ -3661,7 +3643,6 @@ xfs_iflush_cluster(
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
xfs_iunpin_wait(ip);
- /* xfs_iflush_abort() drops the flush lock */
xfs_iflush_abort(ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
error = -EIO;
@@ -3670,7 +3651,7 @@ xfs_iflush_cluster(
/* don't block waiting on a log force to unpin dirty inodes */
if (xfs_ipincount(ip)) {
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
continue;
}
@@ -3678,7 +3659,7 @@ xfs_iflush_cluster(
if (!xfs_inode_clean(ip))
error = xfs_iflush(ip, bp);
else
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (error)
break;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index e9a8bb184d1f..751a3d1d7d84 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -194,6 +194,11 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
return ip->i_cowfp && ip->i_cowfp->if_bytes;
}
+static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
+{
+ return ip->i_d.di_flags2 & XFS_DIFLAG2_BIGTIME;
+}
+
/*
* Return the buftarg used for data allocations on a given inode.
*/
@@ -211,8 +216,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
#define XFS_INEW (1 << __XFS_INEW_BIT)
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
-#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
-#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
+#define XFS_IFLUSHING (1 << 7) /* inode is being flushed */
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
#define XFS_IEOFBLOCKS (1 << 9) /* has the preallocblocks tag set */
@@ -234,36 +238,6 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
/*
- * Synchronize processes attempting to flush the in-core inode back to disk.
- */
-
-static inline int xfs_isiflocked(struct xfs_inode *ip)
-{
- return xfs_iflags_test(ip, XFS_IFLOCK);
-}
-
-extern void __xfs_iflock(struct xfs_inode *ip);
-
-static inline int xfs_iflock_nowait(struct xfs_inode *ip)
-{
- return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
-}
-
-static inline void xfs_iflock(struct xfs_inode *ip)
-{
- if (!xfs_iflock_nowait(ip))
- __xfs_iflock(ip);
-}
-
-static inline void xfs_ifunlock(struct xfs_inode *ip)
-{
- ASSERT(xfs_isiflocked(ip));
- xfs_iflags_clear(ip, XFS_IFLOCK);
- smp_mb();
- wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
-}
-
-/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
* 1<<16 - 1<<32-1 -- lockdep annotation (integers)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6c65938cee1c..17e20a6d8b4e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -295,6 +295,28 @@ xfs_inode_item_format_attr_fork(
}
}
+/*
+ * Convert an incore timestamp to a log timestamp. Note that the log format
+ * specifies host endian format!
+ */
+static inline xfs_ictimestamp_t
+xfs_inode_to_log_dinode_ts(
+ struct xfs_inode *ip,
+ const struct timespec64 tv)
+{
+ struct xfs_legacy_ictimestamp *lits;
+ xfs_ictimestamp_t its;
+
+ if (xfs_inode_has_bigtime(ip))
+ return xfs_inode_encode_bigtime(tv);
+
+ lits = (struct xfs_legacy_ictimestamp *)&its;
+ lits->t_sec = tv.tv_sec;
+ lits->t_nsec = tv.tv_nsec;
+
+ return its;
+}
+
static void
xfs_inode_to_log_dinode(
struct xfs_inode *ip,
@@ -313,12 +335,9 @@ xfs_inode_to_log_dinode(
memset(to->di_pad, 0, sizeof(to->di_pad));
memset(to->di_pad3, 0, sizeof(to->di_pad3));
- to->di_atime.t_sec = inode->i_atime.tv_sec;
- to->di_atime.t_nsec = inode->i_atime.tv_nsec;
- to->di_mtime.t_sec = inode->i_mtime.tv_sec;
- to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
- to->di_ctime.t_sec = inode->i_ctime.tv_sec;
- to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
+ to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
+ to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
+ to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime);
to->di_nlink = inode->i_nlink;
to->di_gen = inode->i_generation;
to->di_mode = inode->i_mode;
@@ -340,8 +359,7 @@ xfs_inode_to_log_dinode(
if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
to->di_version = 3;
to->di_changecount = inode_peek_iversion(inode);
- to->di_crtime.t_sec = from->di_crtime.tv_sec;
- to->di_crtime.t_nsec = from->di_crtime.tv_nsec;
+ to->di_crtime = xfs_inode_to_log_dinode_ts(ip, from->di_crtime);
to->di_flags2 = from->di_flags2;
to->di_cowextsize = from->di_cowextsize;
to->di_ino = ip->i_ino;
@@ -491,8 +509,7 @@ xfs_inode_item_push(
(ip->i_flags & XFS_ISTALE))
return XFS_ITEM_PINNED;
- /* If the inode is already flush locked, we're already flushing. */
- if (xfs_isiflocked(ip))
+ if (xfs_iflags_test(ip, XFS_IFLUSHING))
return XFS_ITEM_FLUSHING;
if (!xfs_buf_trylock(bp))
@@ -703,7 +720,7 @@ xfs_iflush_finish(
iip->ili_last_fields = 0;
iip->ili_flush_lsn = 0;
spin_unlock(&iip->ili_lock);
- xfs_ifunlock(iip->ili_inode);
+ xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING);
if (drop_buffer)
xfs_buf_rele(bp);
}
@@ -711,11 +728,11 @@ xfs_iflush_finish(
/*
* Inode buffer IO completion routine. It is responsible for removing inodes
- * attached to the buffer from the AIL if they have not been re-logged, as well
- * as completing the flush and unlocking the inode.
+ * attached to the buffer from the AIL if they have not been re-logged and
+ * completing the inode flush.
*/
void
-xfs_iflush_done(
+xfs_buf_inode_iodone(
struct xfs_buf *bp)
{
struct xfs_log_item *lip, *n;
@@ -754,11 +771,21 @@ xfs_iflush_done(
list_splice_tail(&flushed_inodes, &bp->b_li_list);
}
+void
+xfs_buf_inode_io_fail(
+ struct xfs_buf *bp)
+{
+ struct xfs_log_item *lip;
+
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
+ set_bit(XFS_LI_FAILED, &lip->li_flags);
+}
+
/*
- * This is the inode flushing abort routine. It is called from xfs_iflush when
+ * This is the inode flushing abort routine. It is called when
* the filesystem is shutting down to clean up the inode state. It is
* responsible for removing the inode item from the AIL if it has not been
- * re-logged, and unlocking the inode's flush lock.
+ * re-logged and clearing the inode's flush state.
*/
void
xfs_iflush_abort(
@@ -790,7 +817,7 @@ xfs_iflush_abort(
list_del_init(&iip->ili_item.li_bio_list);
spin_unlock(&iip->ili_lock);
}
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
if (bp)
xfs_buf_rele(bp);
}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 048b5e7dee90..4b926e32831c 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -25,8 +25,8 @@ struct xfs_inode_log_item {
*
* We need atomic changes between inode dirtying, inode flushing and
* inode completion, but these all hold different combinations of
- * ILOCK and iflock and hence we need some other method of serialising
- * updates to the flush state.
+ * ILOCK and IFLUSHING and hence we need some other method of
+ * serialising updates to the flush state.
*/
spinlock_t ili_lock; /* flush state lock */
unsigned int ili_last_fields; /* fields when flushed */
@@ -43,7 +43,6 @@ static inline int xfs_inode_clean(struct xfs_inode *ip)
extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
extern void xfs_inode_item_destroy(struct xfs_inode *);
-extern void xfs_iflush_done(struct xfs_buf *);
extern void xfs_iflush_abort(struct xfs_inode *);
extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
struct xfs_inode_log_format *);
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 5e0d291835b3..cb44f7653f03 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -115,6 +115,82 @@ out_free_ip:
return error;
}
+static inline bool xfs_log_dinode_has_bigtime(const struct xfs_log_dinode *ld)
+{
+ return ld->di_version >= 3 &&
+ (ld->di_flags2 & XFS_DIFLAG2_BIGTIME);
+}
+
+/* Convert a log timestamp to an ondisk timestamp. */
+static inline xfs_timestamp_t
+xfs_log_dinode_to_disk_ts(
+ struct xfs_log_dinode *from,
+ const xfs_ictimestamp_t its)
+{
+ struct xfs_legacy_timestamp *lts;
+ struct xfs_legacy_ictimestamp *lits;
+ xfs_timestamp_t ts;
+
+ if (xfs_log_dinode_has_bigtime(from))
+ return cpu_to_be64(its);
+
+ lts = (struct xfs_legacy_timestamp *)&ts;
+ lits = (struct xfs_legacy_ictimestamp *)&its;
+ lts->t_sec = cpu_to_be32(lits->t_sec);
+ lts->t_nsec = cpu_to_be32(lits->t_nsec);
+
+ return ts;
+}
+
+STATIC void
+xfs_log_dinode_to_disk(
+ struct xfs_log_dinode *from,
+ struct xfs_dinode *to)
+{
+ to->di_magic = cpu_to_be16(from->di_magic);
+ to->di_mode = cpu_to_be16(from->di_mode);
+ to->di_version = from->di_version;
+ to->di_format = from->di_format;
+ to->di_onlink = 0;
+ to->di_uid = cpu_to_be32(from->di_uid);
+ to->di_gid = cpu_to_be32(from->di_gid);
+ to->di_nlink = cpu_to_be32(from->di_nlink);
+ to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+ to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+ memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+
+ to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
+ to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
+ to->di_ctime = xfs_log_dinode_to_disk_ts(from, from->di_ctime);
+
+ to->di_size = cpu_to_be64(from->di_size);
+ to->di_nblocks = cpu_to_be64(from->di_nblocks);
+ to->di_extsize = cpu_to_be32(from->di_extsize);
+ to->di_nextents = cpu_to_be32(from->di_nextents);
+ to->di_anextents = cpu_to_be16(from->di_anextents);
+ to->di_forkoff = from->di_forkoff;
+ to->di_aformat = from->di_aformat;
+ to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+ to->di_dmstate = cpu_to_be16(from->di_dmstate);
+ to->di_flags = cpu_to_be16(from->di_flags);
+ to->di_gen = cpu_to_be32(from->di_gen);
+
+ if (from->di_version == 3) {
+ to->di_changecount = cpu_to_be64(from->di_changecount);
+ to->di_crtime = xfs_log_dinode_to_disk_ts(from,
+ from->di_crtime);
+ to->di_flags2 = cpu_to_be64(from->di_flags2);
+ to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
+ to->di_ino = cpu_to_be64(from->di_ino);
+ to->di_lsn = cpu_to_be64(from->di_lsn);
+ memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+ uuid_copy(&to->di_uuid, &from->di_uuid);
+ to->di_flushiter = 0;
+ } else {
+ to->di_flushiter = cpu_to_be16(from->di_flushiter);
+ }
+}
+
STATIC int
xlog_recover_inode_commit_pass2(
struct xlog *log,
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6f22a66777cd..bca7659fb5c6 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -404,7 +404,7 @@ xfs_ioc_attr_list(
context.cursor.offset))
return -EINVAL;
- buffer = kmem_zalloc_large(bufsize, 0);
+ buffer = kvzalloc(bufsize, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
@@ -1190,7 +1190,8 @@ xfs_flags2diflags2(
unsigned int xflags)
{
uint64_t di_flags2 =
- (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
+ (ip->i_d.di_flags2 & (XFS_DIFLAG2_REFLINK |
+ XFS_DIFLAG2_BIGTIME));
if (xflags & FS_XFLAG_DAX)
di_flags2 |= XFS_DIFLAG2_DAX;
@@ -1690,7 +1691,7 @@ xfs_ioc_getbmap(
if (bmx.bmv_count > ULONG_MAX / recsize)
return -ENOMEM;
- buf = kmem_zalloc_large(bmx.bmv_count * sizeof(*buf), 0);
+ buf = kvzalloc(bmx.bmv_count * sizeof(*buf), GFP_KERNEL);
if (!buf)
return -ENOMEM;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e2ec91b2d0f4..a17d788921d6 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -265,32 +265,6 @@ xlog_header_check_mount(
return 0;
}
-void
-xlog_recover_iodone(
- struct xfs_buf *bp)
-{
- if (bp->b_error) {
- /*
- * We're not going to bother about retrying
- * this during recovery. One strike!
- */
- if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
- xfs_buf_ioerror_alert(bp, __this_address);
- xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
- }
- }
-
- /*
- * On v5 supers, a bli could be attached to update the metadata LSN.
- * Clean it up.
- */
- if (bp->b_log_item)
- xfs_buf_item_relse(bp);
- ASSERT(bp->b_log_item == NULL);
- bp->b_flags &= ~_XBF_LOGRECOVERY;
- xfs_buf_ioend_finish(bp);
-}
-
/*
* This routine finds (to an approximation) the first block in the physical
* log which contains the given cycle. It uses a binary search algorithm.
@@ -2097,7 +2071,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
- ptr = kmem_realloc(old_ptr, len + old_len, 0);
+ ptr = krealloc(old_ptr, len + old_len, GFP_KERNEL | __GFP_NOFAIL);
memcpy(&ptr[old_len], dp, len);
item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -3294,14 +3268,14 @@ xlog_do_log_recovery(
*/
STATIC int
xlog_do_recover(
- struct xlog *log,
- xfs_daddr_t head_blk,
- xfs_daddr_t tail_blk)
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk)
{
- struct xfs_mount *mp = log->l_mp;
- int error;
- xfs_buf_t *bp;
- xfs_sb_t *sbp;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_buf *bp = mp->m_sb_bp;
+ struct xfs_sb *sbp = &mp->m_sb;
+ int error;
trace_xfs_log_recover(log, head_blk, tail_blk);
@@ -3315,9 +3289,8 @@ xlog_do_recover(
/*
* If IO errors happened during recovery, bail out.
*/
- if (XFS_FORCED_SHUTDOWN(mp)) {
+ if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- }
/*
* We now update the tail_lsn since much of the recovery has completed
@@ -3331,16 +3304,12 @@ xlog_do_recover(
xlog_assign_tail_lsn(mp);
/*
- * Now that we've finished replaying all buffer and inode
- * updates, re-read in the superblock and reverify it.
+ * Now that we've finished replaying all buffer and inode updates,
+ * re-read the superblock and reverify it.
*/
- bp = xfs_getsb(mp);
- bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
- ASSERT(!(bp->b_flags & XBF_WRITE));
- bp->b_flags |= XBF_READ;
- bp->b_ops = &xfs_sb_buf_ops;
-
- error = xfs_buf_submit(bp);
+ xfs_buf_lock(bp);
+ xfs_buf_hold(bp);
+ error = _xfs_buf_read(bp, XBF_READ);
if (error) {
if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_buf_ioerror_alert(bp, __this_address);
@@ -3351,7 +3320,6 @@ xlog_do_recover(
}
/* Convert superblock from on-disk format */
- sbp = &mp->m_sb;
xfs_sb_from_disk(sbp, bp->b_addr);
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c8ae49a1e99c..150ee5cb8645 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -80,9 +80,9 @@ xfs_uuid_mount(
}
if (hole < 0) {
- xfs_uuid_table = kmem_realloc(xfs_uuid_table,
+ xfs_uuid_table = krealloc(xfs_uuid_table,
(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
- 0);
+ GFP_KERNEL | __GFP_NOFAIL);
hole = xfs_uuid_table_size++;
}
xfs_uuid_table[hole] = *uuid;
@@ -1059,11 +1059,12 @@ xfs_unmountfs(
* We can potentially deadlock here if we have an inode cluster
* that has been freed has its buffer still pinned in memory because
* the transaction is still sitting in a iclog. The stale inodes
- * on that buffer will have their flush locks held until the
- * transaction hits the disk and the callbacks run. the inode
- * flush takes the flush lock unconditionally and with nothing to
- * push out the iclog we will never get that unlocked. hence we
- * need to force the log first.
+ * on that buffer will be pinned to the buffer until the
+ * transaction hits the disk and the callbacks run. Pushing the AIL will
+ * skip the stale inodes and may never see the pinned buffer, so
+ * nothing will push out the iclog and unpin the buffer. Hence we
+ * need to force the log here to ensure all items are flushed into the
+ * AIL before we go any further.
*/
xfs_log_force(mp, XFS_LOG_SYNC);
@@ -1289,23 +1290,6 @@ xfs_mod_frextents(
}
/*
- * xfs_getsb() is called to obtain the buffer for the superblock.
- * The buffer is returned locked and read in from disk.
- * The buffer should be released with a call to xfs_brelse().
- */
-struct xfs_buf *
-xfs_getsb(
- struct xfs_mount *mp)
-{
- struct xfs_buf *bp = mp->m_sb_bp;
-
- xfs_buf_lock(bp);
- xfs_buf_hold(bp);
- ASSERT(bp->b_flags & XBF_DONE);
- return bp;
-}
-
-/*
* Used to free the superblock along various error paths.
*/
void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a72cfcaa4ad1..dfa429b77ee2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -410,7 +410,6 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
bool reserved);
extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
-extern struct xfs_buf *xfs_getsb(xfs_mount_t *);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 5f04d8a5ab2a..0aa87c210104 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -15,6 +15,10 @@
"XFS: offsetof(" #structname ", " #member ") is wrong, " \
"expected " #off)
+#define XFS_CHECK_VALUE(value, expected) \
+ BUILD_BUG_ON_MSG((value) != (expected), \
+ "XFS: value of " #value " is wrong, expected " #expected)
+
static inline void __init
xfs_check_ondisk_structs(void)
{
@@ -23,7 +27,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224);
XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36);
- XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 336);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block, 4);
@@ -41,7 +45,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20);
XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24);
- XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8);
XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8);
XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8);
@@ -84,12 +89,12 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9);
XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 40);
- XFS_CHECK_OFFSET(xfs_attr_shortform_t, hdr.totsize, 0);
- XFS_CHECK_OFFSET(xfs_attr_shortform_t, hdr.count, 2);
- XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].namelen, 4);
- XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].valuelen, 5);
- XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].flags, 6);
- XFS_CHECK_OFFSET(xfs_attr_shortform_t, list[0].nameval, 7);
+ XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.totsize, 0);
+ XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.count, 2);
+ XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].namelen, 4);
+ XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].valuelen, 5);
+ XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].flags, 6);
+ XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].nameval, 7);
XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12);
XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16);
XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8);
@@ -121,7 +126,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176);
XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28);
- XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_ictimestamp_t, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_ictimestamp, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52);
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
@@ -152,6 +158,20 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24);
XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64);
+
+ /*
+ * Make sure the incore inode timestamp range corresponds to hand
+ * converted values based on the ondisk format specification.
+ */
+ XFS_CHECK_VALUE(XFS_BIGTIME_TIME_MIN - XFS_BIGTIME_EPOCH_OFFSET,
+ XFS_LEGACY_TIME_MIN);
+ XFS_CHECK_VALUE(XFS_BIGTIME_TIME_MAX - XFS_BIGTIME_EPOCH_OFFSET,
+ 16299260424LL);
+
+ /* Do the same with the incore quota expiration range. */
+ XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4);
+ XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
+ 16299260424LL);
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index be67570badf8..3f82e0c92c2d 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -661,6 +661,17 @@ xfs_qm_init_quotainfo(
/* Precalc some constants */
qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
+ if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+ qinf->qi_expiry_min =
+ xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN);
+ qinf->qi_expiry_max =
+ xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MAX);
+ } else {
+ qinf->qi_expiry_min = XFS_DQ_LEGACY_EXPIRY_MIN;
+ qinf->qi_expiry_max = XFS_DQ_LEGACY_EXPIRY_MAX;
+ }
+ trace_xfs_quota_expiry_range(mp, qinf->qi_expiry_min,
+ qinf->qi_expiry_max);
mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
@@ -879,6 +890,8 @@ xfs_qm_reset_dqcounts(
ddq->d_bwarns = 0;
ddq->d_iwarns = 0;
ddq->d_rtbwarns = 0;
+ if (xfs_sb_version_hasbigtime(&mp->m_sb))
+ ddq->d_type |= XFS_DQTYPE_BIGTIME;
}
if (xfs_sb_version_hascrc(&mp->m_sb)) {
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9c078c35d924..e3dabab44097 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -65,6 +65,10 @@ struct xfs_quotainfo {
struct xfs_def_quota qi_grp_default;
struct xfs_def_quota qi_prj_default;
struct shrinker qi_shrinker;
+
+ /* Minimum and maximum quota expiration timestamp values. */
+ time64_t qi_expiry_min;
+ time64_t qi_expiry_max;
};
static inline struct radix_tree_root *
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 1c542b4a5220..ca1b57d291dc 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -479,13 +479,19 @@ xfs_setqlim_warns(
static inline void
xfs_setqlim_timer(
+ struct xfs_mount *mp,
struct xfs_dquot_res *res,
struct xfs_quota_limits *qlim,
s64 timer)
{
- res->timer = timer;
- if (qlim)
- qlim->time = timer;
+ if (qlim) {
+ /* Set the length of the default grace period. */
+ res->timer = xfs_dquot_set_grace_period(timer);
+ qlim->time = res->timer;
+ } else {
+ /* Set the grace period expiration on a quota. */
+ res->timer = xfs_dquot_set_timeout(mp, timer);
+ }
}
/*
@@ -574,7 +580,7 @@ xfs_qm_scall_setqlim(
if (newlim->d_fieldmask & QC_SPC_WARNS)
xfs_setqlim_warns(res, qlim, newlim->d_spc_warns);
if (newlim->d_fieldmask & QC_SPC_TIMER)
- xfs_setqlim_timer(res, qlim, newlim->d_spc_timer);
+ xfs_setqlim_timer(mp, res, qlim, newlim->d_spc_timer);
/* Blocks on the realtime device. */
hard = (newlim->d_fieldmask & QC_RT_SPC_HARD) ?
@@ -590,7 +596,7 @@ xfs_qm_scall_setqlim(
if (newlim->d_fieldmask & QC_RT_SPC_WARNS)
xfs_setqlim_warns(res, qlim, newlim->d_rt_spc_warns);
if (newlim->d_fieldmask & QC_RT_SPC_TIMER)
- xfs_setqlim_timer(res, qlim, newlim->d_rt_spc_timer);
+ xfs_setqlim_timer(mp, res, qlim, newlim->d_rt_spc_timer);
/* Inodes */
hard = (newlim->d_fieldmask & QC_INO_HARD) ?
@@ -606,7 +612,7 @@ xfs_qm_scall_setqlim(
if (newlim->d_fieldmask & QC_INO_WARNS)
xfs_setqlim_warns(res, qlim, newlim->d_ino_warns);
if (newlim->d_fieldmask & QC_INO_TIMER)
- xfs_setqlim_timer(res, qlim, newlim->d_ino_timer);
+ xfs_setqlim_timer(mp, res, qlim, newlim->d_ino_timer);
if (id != 0) {
/*
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 06b22e35fc90..5a62398940d0 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -108,8 +108,6 @@ extern void xfs_qm_mount_quotas(struct xfs_mount *);
extern void xfs_qm_unmount(struct xfs_mount *);
extern void xfs_qm_unmount_quotas(struct xfs_mount *);
-void xfs_dquot_done(struct xfs_buf *);
-
#else
static inline int
xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid,
@@ -151,12 +149,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
#define xfs_qm_mount_quotas(mp)
#define xfs_qm_unmount(mp)
#define xfs_qm_unmount_quotas(mp)
-
-static inline void xfs_dquot_done(struct xfs_buf *bp)
-{
- return;
-}
-
#endif /* CONFIG_XFS_QUOTA */
#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6209e7b6b895..5b89c12f1566 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -247,6 +247,9 @@ xfs_rtallocate_extent_block(
end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
i <= end;
i++) {
+ /* Make sure we don't scan off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
+
/*
* See if there's a free extent of maxlen starting at i.
* If it's not so then next will contain the first non-free.
@@ -442,6 +445,14 @@ xfs_rtallocate_extent_near(
*/
if (bno >= mp->m_sb.sb_rextents)
bno = mp->m_sb.sb_rextents - 1;
+
+ /* Make sure we don't run off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
+ if (maxlen < minlen) {
+ *rtblock = NULLRTBLOCK;
+ return 0;
+ }
+
/*
* Try the exact allocation first.
*/
@@ -862,7 +873,7 @@ xfs_alloc_rsum_cache(
* lower bound on the minimum level with any free extents. We can
* continue without the cache if it couldn't be allocated.
*/
- mp->m_rsum_cache = kmem_zalloc_large(rbmblocks, 0);
+ mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL);
if (!mp->m_rsum_cache)
xfs_warn(mp, "could not allocate realtime summary cache");
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 71ac6c1cdc36..baf5de30eebb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -654,11 +654,11 @@ xfs_fs_destroy_inode(
ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
/*
- * We always use background reclaim here because even if the
- * inode is clean, it still may be under IO and hence we have
- * to take the flush lock. The background reclaim path handles
- * this more efficiently than we can here, so simply let background
- * reclaim tear down all inodes.
+ * We always use background reclaim here because even if the inode is
+ * clean, it still may be under IO and hence we have wait for IO
+ * completion to occur before we can reclaim the inode. The background
+ * reclaim path handles this more efficiently than we can here, so
+ * simply let background reclaim tear down all inodes.
*/
xfs_inode_set_reclaim_tag(ip);
}
@@ -1484,8 +1484,14 @@ xfs_fc_fill_super(
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1;
- sb->s_time_min = S32_MIN;
- sb->s_time_max = S32_MAX;
+ if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+ sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
+ sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
+ } else {
+ sb->s_time_min = XFS_LEGACY_TIME_MIN;
+ sb->s_time_max = XFS_LEGACY_TIME_MAX;
+ }
+ trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
sb->s_iflags |= SB_I_CGROUPWB;
set_posix_acl_flag(sb);
@@ -1494,6 +1500,10 @@ xfs_fc_fill_super(
if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
sb->s_flags |= SB_I_VERSION;
+ if (xfs_sb_version_hasbigtime(&mp->m_sb))
+ xfs_warn(mp,
+ "EXPERIMENTAL big timestamp feature in use. Use at your own risk!");
+
if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
bool rtdev_is_dax = false, datadev_is_dax;
@@ -1549,6 +1559,10 @@ xfs_fc_fill_super(
goto out_filestream_unmount;
}
+ if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
+ xfs_warn(mp,
+ "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!");
+
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index abb1d859f226..dcdcf99cfa5d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -338,7 +338,7 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);
DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
+DEFINE_BUF_EVENT(xfs_buf_iodone_async);
DEFINE_BUF_EVENT(xfs_buf_error_relse);
DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
@@ -3676,7 +3676,6 @@ DEFINE_EVENT(xfs_kmem_class, name, \
DEFINE_KMEM_EVENT(kmem_alloc);
DEFINE_KMEM_EVENT(kmem_alloc_io);
DEFINE_KMEM_EVENT(kmem_alloc_large);
-DEFINE_KMEM_EVENT(kmem_realloc);
TRACE_EVENT(xfs_check_new_dalign,
TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
@@ -3844,6 +3843,32 @@ TRACE_EVENT(xfs_btree_bload_block,
__entry->nr_records)
)
+DECLARE_EVENT_CLASS(xfs_timestamp_range_class,
+ TP_PROTO(struct xfs_mount *mp, time64_t min, time64_t max),
+ TP_ARGS(mp, min, max),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(long long, min)
+ __field(long long, max)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->min = min;
+ __entry->max = max;
+ ),
+ TP_printk("dev %d:%d min %lld max %lld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->min,
+ __entry->max)
+)
+
+#define DEFINE_TIMESTAMP_RANGE_EVENT(name) \
+DEFINE_EVENT(xfs_timestamp_range_class, name, \
+ TP_PROTO(struct xfs_mount *mp, long long min, long long max), \
+ TP_ARGS(mp, min, max))
+DEFINE_TIMESTAMP_RANGE_EVENT(xfs_inode_timestamp_range);
+DEFINE_TIMESTAMP_RANGE_EVENT(xfs_quota_expiry_range);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ed72867b1a19..ca18a040336a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -468,7 +468,7 @@ xfs_trans_apply_sb_deltas(
xfs_buf_t *bp;
int whole = 0;
- bp = xfs_trans_getsb(tp, tp->t_mountp);
+ bp = xfs_trans_getsb(tp);
sbp = bp->b_addr;
/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index b752501818d2..f46534b75236 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -209,7 +209,7 @@ xfs_trans_read_buf(
flags, bpp, ops);
}
-struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *);
+struct xfs_buf *xfs_trans_getsb(struct xfs_trans *);
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 11cd666cd99a..42d63b830cb9 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -166,50 +166,34 @@ xfs_trans_get_buf_map(
}
/*
- * Get and lock the superblock buffer of this file system for the
- * given transaction.
- *
- * We don't need to use incore_match() here, because the superblock
- * buffer is a private buffer which we keep a pointer to in the
- * mount structure.
+ * Get and lock the superblock buffer for the given transaction.
*/
-xfs_buf_t *
+struct xfs_buf *
xfs_trans_getsb(
- xfs_trans_t *tp,
- struct xfs_mount *mp)
+ struct xfs_trans *tp)
{
- xfs_buf_t *bp;
- struct xfs_buf_log_item *bip;
+ struct xfs_buf *bp = tp->t_mountp->m_sb_bp;
/*
- * Default to just trying to lock the superblock buffer
- * if tp is NULL.
+ * Just increment the lock recursion count if the buffer is already
+ * attached to this transaction.
*/
- if (tp == NULL)
- return xfs_getsb(mp);
-
- /*
- * If the superblock buffer already has this transaction
- * pointer in its b_fsprivate2 field, then we know we already
- * have it locked. In this case we just increment the lock
- * recursion count and return the buffer to the caller.
- */
- bp = mp->m_sb_bp;
if (bp->b_transp == tp) {
- bip = bp->b_log_item;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
+
trace_xfs_trans_getsb_recur(bip);
- return bp;
- }
+ } else {
+ xfs_buf_lock(bp);
+ xfs_buf_hold(bp);
+ _xfs_trans_bjoin(tp, bp, 1);
- bp = xfs_getsb(mp);
- if (bp == NULL)
- return NULL;
+ trace_xfs_trans_getsb(bp->b_log_item);
+ }
- _xfs_trans_bjoin(tp, bp, 1);
- trace_xfs_trans_getsb(bp->b_log_item);
return bp;
}
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index c6ba7ef18e06..133fc6fc3edd 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -55,6 +55,12 @@ xfs_trans_log_dquot(
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ /* Upgrade the dquot to bigtime format if possible. */
+ if (dqp->q_id != 0 &&
+ xfs_sb_version_hasbigtime(&tp->t_mountp->m_sb) &&
+ !(dqp->q_type & XFS_DQTYPE_BIGTIME))
+ dqp->q_type |= XFS_DQTYPE_BIGTIME;
+
tp->t_flags |= XFS_TRANS_DIRTY;
set_bit(XFS_LI_DIRTY, &dqp->q_logitem.qli_item.li_flags);
}