summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_buf.h21
-rw-r--r--fs/xfs/xfs_buf_item.c13
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_sysfs.c81
4 files changed, 111 insertions, 7 deletions
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index adef116db0c3..8bfb974f0772 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -183,7 +183,26 @@ typedef struct xfs_buf {
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */
int b_error; /* error code on I/O */
- int b_last_error; /* previous async I/O error */
+
+ /*
+ * async write failure retry count. Initialised to zero on the first
+ * failure, then when it exceeds the maximum configured without a
+ * success the write is considered to be failed permanently and the
+ * iodone handler will take appropriate action.
+ *
+ * For retry timeouts, we record the jiffie of the first failure. This
+ * means that we can change the retry timeout for buffers already under
+ * I/O and thus avoid getting stuck in a retry loop with a long timeout.
+ *
+ * last_error is used to ensure that we are getting repeated errors, not
+ * different errors. e.g. a block device might change ENOSPC to EIO when
+ * a failure timeout occurs, so we want to re-initialise the error
+ * retry behaviour appropriately when that happens.
+ */
+ int b_retries;
+ unsigned long b_first_retry_time; /* in jiffies */
+ int b_last_error;
+
const struct xfs_buf_ops *b_ops;
#ifdef XFS_BUF_LOCK_TRACKING
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index b8d0cd4adb81..0d95c59f7c68 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1085,6 +1085,9 @@ xfs_buf_iodone_callback_error(
bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
XBF_DONE | XBF_WRITE_FAIL);
bp->b_last_error = bp->b_error;
+ bp->b_retries = 0;
+ bp->b_first_retry_time = jiffies;
+
xfs_buf_ioerror(bp, 0);
xfs_buf_submit(bp);
return true;
@@ -1095,8 +1098,13 @@ xfs_buf_iodone_callback_error(
* error configuration we have been set up to use.
*/
cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
- if (!cfg->max_retries)
- goto permanent_error;
+
+ if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
+ ++bp->b_retries > cfg->max_retries)
+ goto permanent_error;
+ if (cfg->retry_timeout &&
+ time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
+ goto permanent_error;
/* still a transient error, higher layers will retry */
xfs_buf_ioerror(bp, 0);
@@ -1139,6 +1147,7 @@ xfs_buf_iodone_callbacks(
* retry state here in preparation for the next error that may occur.
*/
bp->b_last_error = 0;
+ bp->b_retries = 0;
xfs_buf_do_callbacks(bp);
bp->b_fspriv = NULL;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0c5a97644d78..2fafa9438bcf 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -52,9 +52,12 @@ enum {
XFS_ERR_ERRNO_MAX,
};
+#define XFS_ERR_RETRY_FOREVER -1
+
struct xfs_error_cfg {
struct xfs_kobj kobj;
int max_retries;
+ unsigned long retry_timeout; /* in jiffies, 0 = no timeout */
};
typedef struct xfs_mount {
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 71046d904985..918d144febd9 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -374,10 +374,6 @@ struct kobj_type xfs_log_ktype = {
* and any other future type of IO (e.g. special inode or directory error
* handling) we care to support.
*/
-static struct attribute *xfs_error_attrs[] = {
- NULL,
-};
-
static inline struct xfs_error_cfg *
to_error_cfg(struct kobject *kobject)
{
@@ -385,6 +381,79 @@ to_error_cfg(struct kobject *kobject)
return container_of(kobj, struct xfs_error_cfg, kobj);
}
+static ssize_t
+max_retries_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", cfg->max_retries);
+}
+
+static ssize_t
+max_retries_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < -1)
+ return -EINVAL;
+
+ cfg->max_retries = val;
+ return count;
+}
+XFS_SYSFS_ATTR_RW(max_retries);
+
+static ssize_t
+retry_timeout_seconds_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%ld\n",
+ jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC);
+}
+
+static ssize_t
+retry_timeout_seconds_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_error_cfg *cfg = to_error_cfg(kobject);
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ /* 1 day timeout maximum */
+ if (val < 0 || val > 86400)
+ return -EINVAL;
+
+ cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC);
+ return count;
+}
+XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
+
+static struct attribute *xfs_error_attrs[] = {
+ ATTR_LIST(max_retries),
+ ATTR_LIST(retry_timeout_seconds),
+ NULL,
+};
+
+
struct kobj_type xfs_error_cfg_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
@@ -404,11 +473,13 @@ struct kobj_type xfs_error_ktype = {
struct xfs_error_init {
char *name;
int max_retries;
+ int retry_timeout; /* in seconds */
};
static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = {
{ .name = "default",
.max_retries = -1,
+ .retry_timeout = 0,
},
};
@@ -439,6 +510,8 @@ xfs_error_sysfs_init_class(
goto out_error;
cfg->max_retries = init[i].max_retries;
+ cfg->retry_timeout = msecs_to_jiffies(
+ init[i].retry_timeout * MSEC_PER_SEC);
}
return 0;