1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
|
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_SCRUB_H__
#define __XFS_SCRUB_SCRUB_H__
struct xfs_scrub;
struct xchk_relax {
unsigned long next_resched;
unsigned int resched_nr;
bool interruptible;
};
/* Yield to the scheduler at most 10x per second. */
#define XCHK_RELAX_NEXT (jiffies + (HZ / 10))
#define INIT_XCHK_RELAX \
(struct xchk_relax){ \
.next_resched = XCHK_RELAX_NEXT, \
.resched_nr = 0, \
.interruptible = true, \
}
/*
* Relax during a scrub operation and exit if there's a fatal signal pending.
*
* If preemption is disabled, we need to yield to the scheduler every now and
* then so that we don't run afoul of the soft lockup watchdog or RCU stall
* detector. cond_resched calls are somewhat expensive (~5ns) so we want to
* ratelimit this to 10x per second. Amortize the cost of the other checks by
* only doing it once every 100 calls.
*/
static inline int xchk_maybe_relax(struct xchk_relax *widget)
{
/* Amortize the cost of scheduling and checking signals. */
if (likely(++widget->resched_nr < 100))
return 0;
widget->resched_nr = 0;
if (unlikely(widget->next_resched <= jiffies)) {
cond_resched();
widget->next_resched = XCHK_RELAX_NEXT;
}
if (widget->interruptible && fatal_signal_pending(current))
return -EINTR;
return 0;
}
/*
* Standard flags for allocating memory within scrub. NOFS context is
* configured by the process allocation scope. Scrub and repair must be able
* to back out gracefully if there isn't enough memory. Force-cast to avoid
* complaints from static checkers.
*/
#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
__GFP_RETRY_MAYFAIL))
/*
* For opening files by handle for fsck operations, we don't trust the inumber
* or the allocation state; therefore, perform an untrusted lookup. We don't
* want these inodes to pollute the cache, so mark them for immediate removal.
*/
#define XCHK_IGET_FLAGS (XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE)
/* Type info and names for the scrub types. */
enum xchk_type {
ST_NONE = 1, /* disabled */
ST_PERAG, /* per-AG metadata */
ST_FS, /* per-FS metadata */
ST_INODE, /* per-inode metadata */
};
struct xchk_meta_ops {
/* Acquire whatever resources are needed for the operation. */
int (*setup)(struct xfs_scrub *sc);
/* Examine metadata for errors. */
int (*scrub)(struct xfs_scrub *);
/* Repair or optimize the metadata. */
int (*repair)(struct xfs_scrub *);
/*
* Re-scrub the metadata we repaired, in case there's extra work that
* we need to do to check our repair work. If this is NULL, we'll use
* the ->scrub function pointer, assuming that the regular scrub is
* sufficient.
*/
int (*repair_eval)(struct xfs_scrub *sc);
/* Decide if we even have this piece of metadata. */
bool (*has)(struct xfs_mount *);
/* type describing required/allowed inputs */
enum xchk_type type;
};
/* Buffer pointers and btree cursors for an entire AG. */
struct xchk_ag {
struct xfs_perag *pag;
/* AG btree roots */
struct xfs_buf *agf_bp;
struct xfs_buf *agi_bp;
/* AG btrees */
struct xfs_btree_cur *bno_cur;
struct xfs_btree_cur *cnt_cur;
struct xfs_btree_cur *ino_cur;
struct xfs_btree_cur *fino_cur;
struct xfs_btree_cur *rmap_cur;
struct xfs_btree_cur *refc_cur;
};
struct xfs_scrub {
/* General scrub state. */
struct xfs_mount *mp;
struct xfs_scrub_metadata *sm;
const struct xchk_meta_ops *ops;
struct xfs_trans *tp;
/* File that scrub was called with. */
struct file *file;
/*
* File that is undergoing the scrub operation. This can differ from
* the file that scrub was called with if we're checking file-based fs
* metadata (e.g. rt bitmaps) or if we're doing a scrub-by-handle for
* something that can't be opened directly (e.g. symlinks).
*/
struct xfs_inode *ip;
/* Kernel memory buffer used by scrubbers; freed at teardown. */
void *buf;
/*
* Clean up resources owned by whatever is in the buffer. Cleanup can
* be deferred with this hook as a means for scrub functions to pass
* data to repair functions. This function must not free the buffer
* itself.
*/
void (*buf_cleanup)(void *buf);
/* xfile used by the scrubbers; freed at teardown. */
struct xfile *xfile;
/* buffer target for in-memory btrees; also freed at teardown. */
struct xfs_buftarg *xmbtp;
/* Lock flags for @ip. */
uint ilock_flags;
/* The orphanage, for stashing files that have lost their parent. */
uint orphanage_ilock_flags;
struct xfs_inode *orphanage;
/* A temporary file on this filesystem, for staging new metadata. */
struct xfs_inode *tempip;
uint temp_ilock_flags;
/* See the XCHK/XREP state flags below. */
unsigned int flags;
/*
* The XFS_SICK_* flags that correspond to the metadata being scrubbed
* or repaired. We will use this mask to update the in-core fs health
* status with whatever we find.
*/
unsigned int sick_mask;
/* next time we want to cond_resched() */
struct xchk_relax relax;
/* State tracking for single-AG operations. */
struct xchk_ag sa;
};
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */
#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */
#define XCHK_FSGATES_RMAP (1U << 6) /* rmapbt live update enabled */
#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
/*
* The XCHK_FSGATES* flags reflect functionality in the main filesystem that
* are only enabled for this particular online fsck. When not in use, the
* features are gated off via dynamic code patching, which is why the state
* must be enabled during scrub setup and can only be torn down afterwards.
*/
#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \
XCHK_FSGATES_QUOTA | \
XCHK_FSGATES_DIRENTS | \
XCHK_FSGATES_RMAP)
struct xfs_scrub_subord {
struct xfs_scrub sc;
struct xfs_scrub *parent_sc;
unsigned int old_smtype;
unsigned int old_smflags;
};
struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
unsigned int subtype);
void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
/*
* We /could/ terminate a scrub/repair operation early. If we're not
* in a good place to continue (fatal signal, etc.) then bail out.
* Note that we're careful not to make any judgements about *error.
*/
static inline bool
xchk_should_terminate(
struct xfs_scrub *sc,
int *error)
{
if (xchk_maybe_relax(&sc->relax)) {
if (*error == 0)
*error = -EINTR;
return true;
}
return false;
}
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
int xchk_superblock(struct xfs_scrub *sc);
int xchk_agf(struct xfs_scrub *sc);
int xchk_agfl(struct xfs_scrub *sc);
int xchk_agi(struct xfs_scrub *sc);
int xchk_allocbt(struct xfs_scrub *sc);
int xchk_iallocbt(struct xfs_scrub *sc);
int xchk_rmapbt(struct xfs_scrub *sc);
int xchk_refcountbt(struct xfs_scrub *sc);
int xchk_inode(struct xfs_scrub *sc);
int xchk_bmap_data(struct xfs_scrub *sc);
int xchk_bmap_attr(struct xfs_scrub *sc);
int xchk_bmap_cow(struct xfs_scrub *sc);
int xchk_directory(struct xfs_scrub *sc);
int xchk_xattr(struct xfs_scrub *sc);
int xchk_symlink(struct xfs_scrub *sc);
int xchk_parent(struct xfs_scrub *sc);
int xchk_dirtree(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xchk_rtbitmap(struct xfs_scrub *sc);
int xchk_rtsummary(struct xfs_scrub *sc);
#else
static inline int
xchk_rtbitmap(struct xfs_scrub *sc)
{
return -ENOENT;
}
static inline int
xchk_rtsummary(struct xfs_scrub *sc)
{
return -ENOENT;
}
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_quota(struct xfs_scrub *sc);
int xchk_quotacheck(struct xfs_scrub *sc);
#else
static inline int
xchk_quota(struct xfs_scrub *sc)
{
return -ENOENT;
}
static inline int
xchk_quotacheck(struct xfs_scrub *sc)
{
return -ENOENT;
}
#endif
int xchk_fscounters(struct xfs_scrub *sc);
int xchk_nlinks(struct xfs_scrub *sc);
/* cross-referencing helpers */
void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_only_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len, const struct xfs_owner_info *oinfo);
void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len, const struct xfs_owner_info *oinfo);
void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
xfs_extlen_t len);
void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
xfs_extlen_t len);
void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno,
xfs_extlen_t len);
void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
xfs_extlen_t len);
#ifdef CONFIG_XFS_RT
void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
xfs_extlen_t len);
#else
# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
#endif
#endif /* __XFS_SCRUB_SCRUB_H__ */
|