summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c6
-rw-r--r--fs/afs/dir_edit.c4
-rw-r--r--fs/aio.c15
-rw-r--r--fs/bcachefs/alloc_foreground.c6
-rw-r--r--fs/bcachefs/clock.c50
-rw-r--r--fs/bcachefs/clock_types.h2
-rw-r--r--fs/bcachefs/ec.c76
-rw-r--r--fs/bcachefs/ec_types.h2
-rw-r--r--fs/bcachefs/fsck.c7
-rw-r--r--fs/bcachefs/journal_reclaim.c1
-rw-r--r--fs/bcachefs/mean_and_variance.h6
-rw-r--r--fs/bcachefs/util.c2
-rw-r--r--fs/bcachefs/util.h118
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/cachefiles/io.c2
-rw-r--r--fs/ceph/caps.c6
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/super.c3
-rw-r--r--fs/coredump.c16
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/erofs/data.c30
-rw-r--r--fs/erofs/decompressor_lzma.c2
-rw-r--r--fs/erofs/inode.c19
-rw-r--r--fs/erofs/super.c16
-rw-r--r--fs/erofs/zutil.c3
-rw-r--r--fs/exec.c4
-rw-r--r--fs/f2fs/checkpoint.c11
-rw-r--r--fs/f2fs/compress.c2
-rw-r--r--fs/f2fs/data.c27
-rw-r--r--fs/f2fs/extent_cache.c48
-rw-r--r--fs/f2fs/f2fs.h78
-rw-r--r--fs/f2fs/file.c135
-rw-r--r--fs/f2fs/gc.c24
-rw-r--r--fs/f2fs/inline.c28
-rw-r--r--fs/f2fs/inode.c84
-rw-r--r--fs/f2fs/namei.c20
-rw-r--r--fs/f2fs/recovery.c11
-rw-r--r--fs/f2fs/segment.c54
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c11
-rw-r--r--fs/f2fs/sysfs.c12
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/hostfs/hostfs.h7
-rw-r--r--fs/hostfs/hostfs_kern.c78
-rw-r--r--fs/hostfs/hostfs_user.c7
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/inode.c42
-rw-r--r--fs/jffs2/Kconfig3
-rw-r--r--fs/jfs/jfs_dmap.c2
-rw-r--r--fs/jfs/jfs_dtree.c2
-rw-r--r--fs/jfs/jfs_imap.c5
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_metapage.c316
-rw-r--r--fs/jfs/jfs_metapage.h16
-rw-r--r--fs/jfs/xattr.c23
-rw-r--r--fs/locks.c9
-rw-r--r--fs/namei.c26
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/netfs/Kconfig18
-rw-r--r--fs/netfs/buffered_read.c18
-rw-r--r--fs/netfs/buffered_write.c14
-rw-r--r--fs/netfs/direct_read.c2
-rw-r--r--fs/netfs/direct_write.c8
-rw-r--r--fs/netfs/fscache_cache.c4
-rw-r--r--fs/netfs/fscache_cookie.c28
-rw-r--r--fs/netfs/fscache_io.c12
-rw-r--r--fs/netfs/fscache_main.c2
-rw-r--r--fs/netfs/fscache_volume.c4
-rw-r--r--fs/netfs/internal.h33
-rw-r--r--fs/netfs/io.c12
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/netfs/misc.c4
-rw-r--r--fs/netfs/write_collect.c16
-rw-r--r--fs/netfs/write_issue.c37
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/iostat.h4
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nilfs2/bmap.c10
-rw-r--r--fs/nilfs2/btnode.c25
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/segment.c91
-rw-r--r--fs/nilfs2/sysfs.c6
-rw-r--r--fs/ntfs3/attrib.c132
-rw-r--r--fs/ntfs3/bitmap.c2
-rw-r--r--fs/ntfs3/dir.c57
-rw-r--r--fs/ntfs3/file.c124
-rw-r--r--fs/ntfs3/frecord.c110
-rw-r--r--fs/ntfs3/fslog.c77
-rw-r--r--fs/ntfs3/fsntfs.c11
-rw-r--r--fs/ntfs3/index.c4
-rw-r--r--fs/ntfs3/inode.c119
-rw-r--r--fs/ntfs3/namei.c6
-rw-r--r--fs/ntfs3/ntfs.h15
-rw-r--r--fs/ntfs3/ntfs_fs.h36
-rw-r--r--fs/ntfs3/super.c71
-rw-r--r--fs/ntfs3/xattr.c25
-rw-r--r--fs/ocfs2/dir.c46
-rw-r--r--fs/ocfs2/dlmglue.c28
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/stackglue.h2
-rw-r--r--fs/ocfs2/xattr.c27
-rw-r--r--fs/pidfs.c63
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/proc/internal.h33
-rw-r--r--fs/proc/page.c42
-rw-r--r--fs/proc/task_mmu.c504
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/smb/server/connection.h4
-rw-r--r--fs/smb/server/mgmt/user_session.c2
-rw-r--r--fs/smb/server/oplock.h7
-rw-r--r--fs/smb/server/server.c1
-rw-r--r--fs/smb/server/server.h1
-rw-r--r--fs/smb/server/smb2pdu.c2
-rw-r--r--fs/smb/server/smb2pdu.h2
-rw-r--r--fs/smb/server/transport_rdma.c4
-rw-r--r--fs/smb/server/transport_tcp.c4
-rw-r--r--fs/smb/server/vfs_cache.c173
-rw-r--r--fs/smb/server/vfs_cache.h3
-rw-r--r--fs/super.c11
-rw-r--r--fs/tests/binfmt_elf_kunit.c (renamed from fs/binfmt_elf_test.c)0
-rw-r--r--fs/tests/exec_kunit.c (renamed from fs/exec_test.c)0
-rw-r--r--fs/ufs/super.c1
-rw-r--r--fs/userfaultfd.c2
-rw-r--r--fs/xattr.c91
-rw-r--r--fs/xfs/xfs_sysctl.c6
135 files changed, 2352 insertions, 1405 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 67afe68972d5..f8622ed72e08 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -533,14 +533,14 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
break;
}
- offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
+ offset = round_down(ctx->pos, sizeof(*dblock)) - folio_pos(folio);
size = min_t(loff_t, folio_size(folio),
- req->actual_len - folio_file_pos(folio));
+ req->actual_len - folio_pos(folio));
do {
dblock = kmap_local_folio(folio, offset);
ret = afs_dir_iterate_block(dvnode, ctx, dblock,
- folio_file_pos(folio) + offset);
+ folio_pos(folio) + offset);
kunmap_local(dblock);
if (ret != 1)
goto out;
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index e2fa577b66fe..a71bff10496b 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -256,7 +256,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
folio = folio0;
}
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
@@ -417,7 +417,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
folio = folio0;
}
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
diff --git a/fs/aio.c b/fs/aio.c
index 93ef59d358b3..6066f64967b3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -410,17 +410,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
struct kioctx *ctx;
unsigned long flags;
pgoff_t idx;
- int rc;
-
- /*
- * We cannot support the _NO_COPY case here, because copy needs to
- * happen under the ctx->completion_lock. That does not work with the
- * migration workflow of MIGRATE_SYNC_NO_COPY.
- */
- if (mode == MIGRATE_SYNC_NO_COPY)
- return -EINVAL;
-
- rc = 0;
+ int rc = 0;
/* mapping->i_private_lock here protects against the kioctx teardown. */
spin_lock(&mapping->i_private_lock);
@@ -465,7 +455,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
* events from being lost.
*/
spin_lock_irqsave(&ctx->completion_lock, flags);
- folio_migrate_copy(dst, src);
+ folio_copy(dst, src);
+ folio_migrate_flags(dst, src);
BUG_ON(ctx->ring_folios[idx] != src);
ctx->ring_folios[idx] = dst;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index cabf866c7956..618d2ff0292e 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -496,12 +496,6 @@ again:
for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
alloc_cursor < k.k->p.offset;
alloc_cursor++) {
- ret = btree_trans_too_many_iters(trans);
- if (ret) {
- ob = ERR_PTR(ret);
- break;
- }
-
s->buckets_seen++;
u64 bucket = alloc_cursor & ~(~0ULL << 56);
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index df3763c18c0e..1d6b691e8da6 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -6,15 +6,29 @@
#include <linux/kthread.h>
#include <linux/preempt.h>
-static inline long io_timer_cmp(io_timer_heap *h,
- struct io_timer *l,
- struct io_timer *r)
+static inline bool io_timer_cmp(const void *l, const void *r, void __always_unused *args)
{
- return l->expire - r->expire;
+ struct io_timer **_l = (struct io_timer **)l;
+ struct io_timer **_r = (struct io_timer **)r;
+
+ return (*_l)->expire < (*_r)->expire;
+}
+
+static inline void io_timer_swp(void *l, void *r, void __always_unused *args)
+{
+ struct io_timer **_l = (struct io_timer **)l;
+ struct io_timer **_r = (struct io_timer **)r;
+
+ swap(*_l, *_r);
}
void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = io_timer_cmp,
+ .swp = io_timer_swp,
+ };
+
spin_lock(&clock->timer_lock);
if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) {
@@ -23,22 +37,27 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
return;
}
- for (size_t i = 0; i < clock->timers.used; i++)
+ for (size_t i = 0; i < clock->timers.nr; i++)
if (clock->timers.data[i] == timer)
goto out;
- BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
+ BUG_ON(!min_heap_push(&clock->timers, &timer, &callbacks, NULL));
out:
spin_unlock(&clock->timer_lock);
}
void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = io_timer_cmp,
+ .swp = io_timer_swp,
+ };
+
spin_lock(&clock->timer_lock);
- for (size_t i = 0; i < clock->timers.used; i++)
+ for (size_t i = 0; i < clock->timers.nr; i++)
if (clock->timers.data[i] == timer) {
- heap_del(&clock->timers, i, io_timer_cmp, NULL);
+ min_heap_del(&clock->timers, i, &callbacks, NULL);
break;
}
@@ -123,10 +142,17 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now)
{
struct io_timer *ret = NULL;
+ const struct min_heap_callbacks callbacks = {
+ .less = io_timer_cmp,
+ .swp = io_timer_swp,
+ };
+
+ if (clock->timers.nr &&
+ time_after_eq64(now, clock->timers.data[0]->expire)) {
+ ret = *min_heap_peek(&clock->timers);
+ min_heap_pop(&clock->timers, &callbacks, NULL);
+ }
- if (clock->timers.used &&
- time_after_eq64(now, clock->timers.data[0]->expire))
- heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
return ret;
}
@@ -150,7 +176,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
printbuf_tabstop_push(out, 40);
prt_printf(out, "current time:\t%llu\n", now);
- for (unsigned i = 0; i < clock->timers.used; i++)
+ for (unsigned i = 0; i < clock->timers.nr; i++)
prt_printf(out, "%ps %ps:\t%llu\n",
clock->timers.data[i]->fn,
clock->timers.data[i]->fn2,
diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h
index 9c25d0fcf294..37554e4514fe 100644
--- a/fs/bcachefs/clock_types.h
+++ b/fs/bcachefs/clock_types.h
@@ -24,7 +24,7 @@ struct io_timer {
/* Amount to buffer up on a percpu counter */
#define IO_CLOCK_PCPU_SECTORS 128
-typedef HEAP(struct io_timer *) io_timer_heap;
+typedef DEFINE_MIN_HEAP(struct io_timer *, io_timer_heap) io_timer_heap;
struct io_clock {
atomic64_t now;
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 86948d110f6b..9b5b5c9a6c63 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -901,8 +901,8 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
mutex_lock(&c->ec_stripes_heap_lock);
if (n.size > h->size) {
- memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
- n.used = h->used;
+ memcpy(n.data, h->data, h->nr * sizeof(h->data[0]));
+ n.nr = h->nr;
swap(*h, n);
}
mutex_unlock(&c->ec_stripes_heap_lock);
@@ -993,7 +993,7 @@ static u64 stripe_idx_to_delete(struct bch_fs *c)
lockdep_assert_held(&c->ec_stripes_heap_lock);
- if (h->used &&
+ if (h->nr &&
h->data[0].blocks_nonempty == 0 &&
!bch2_stripe_is_open(c, h->data[0].idx))
return h->data[0].idx;
@@ -1001,14 +1001,6 @@ static u64 stripe_idx_to_delete(struct bch_fs *c)
return 0;
}
-static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
- struct ec_stripe_heap_entry l,
- struct ec_stripe_heap_entry r)
-{
- return ((l.blocks_nonempty > r.blocks_nonempty) -
- (l.blocks_nonempty < r.blocks_nonempty));
-}
-
static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
size_t i)
{
@@ -1017,39 +1009,71 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
}
+static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args)
+{
+ struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
+ struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
+
+ return ((_l->blocks_nonempty > _r->blocks_nonempty) <
+ (_l->blocks_nonempty < _r->blocks_nonempty));
+}
+
+static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
+{
+ struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
+ struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
+ ec_stripes_heap *_h = (ec_stripes_heap *)h;
+ size_t i = _l - _h->data;
+ size_t j = _r - _h->data;
+
+ swap(*_l, *_r);
+
+ ec_stripes_heap_set_backpointer(_h, i);
+ ec_stripes_heap_set_backpointer(_h, j);
+}
+
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
struct stripe *m = genradix_ptr(&c->stripes, idx);
- BUG_ON(m->heap_idx >= h->used);
+ BUG_ON(m->heap_idx >= h->nr);
BUG_ON(h->data[m->heap_idx].idx != idx);
}
void bch2_stripes_heap_del(struct bch_fs *c,
struct stripe *m, size_t idx)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = ec_stripes_heap_cmp,
+ .swp = ec_stripes_heap_swap,
+ };
+
mutex_lock(&c->ec_stripes_heap_lock);
heap_verify_backpointer(c, idx);
- heap_del(&c->ec_stripes_heap, m->heap_idx,
- ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
+ min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap);
mutex_unlock(&c->ec_stripes_heap_lock);
}
void bch2_stripes_heap_insert(struct bch_fs *c,
struct stripe *m, size_t idx)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = ec_stripes_heap_cmp,
+ .swp = ec_stripes_heap_swap,
+ };
+
mutex_lock(&c->ec_stripes_heap_lock);
- BUG_ON(heap_full(&c->ec_stripes_heap));
+ BUG_ON(min_heap_full(&c->ec_stripes_heap));
- heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
+ genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr;
+ min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) {
.idx = idx,
.blocks_nonempty = m->blocks_nonempty,
}),
- ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
+ &callbacks,
+ &c->ec_stripes_heap);
heap_verify_backpointer(c, idx);
mutex_unlock(&c->ec_stripes_heap_lock);
@@ -1058,6 +1082,10 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
+ const struct min_heap_callbacks callbacks = {
+ .less = ec_stripes_heap_cmp,
+ .swp = ec_stripes_heap_swap,
+ };
ec_stripes_heap *h = &c->ec_stripes_heap;
bool do_deletes;
size_t i;
@@ -1068,10 +1096,8 @@ void bch2_stripes_heap_update(struct bch_fs *c,
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
- heap_sift_up(h, i, ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
- heap_sift_down(h, i, ec_stripes_heap_cmp,
- ec_stripes_heap_set_backpointer);
+ min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap);
+ min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap);
heap_verify_backpointer(c, idx);
@@ -1864,7 +1890,7 @@ static s64 get_existing_stripe(struct bch_fs *c,
return -1;
mutex_lock(&c->ec_stripes_heap_lock);
- for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
+ for (heap_idx = 0; heap_idx < h->nr; heap_idx++) {
/* No blocks worth reusing, stripe will just be deleted: */
if (!h->data[heap_idx].blocks_nonempty)
continue;
@@ -2195,7 +2221,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
size_t i;
mutex_lock(&c->ec_stripes_heap_lock);
- for (i = 0; i < min_t(size_t, h->used, 50); i++) {
+ for (i = 0; i < min_t(size_t, h->nr, 50); i++) {
m = genradix_ptr(&c->stripes, h->data[i].idx);
prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h
index 976426da3a12..1df03dccfc72 100644
--- a/fs/bcachefs/ec_types.h
+++ b/fs/bcachefs/ec_types.h
@@ -36,6 +36,6 @@ struct ec_stripe_heap_entry {
unsigned blocks_nonempty;
};
-typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
+typedef DEFINE_MIN_HEAP(struct ec_stripe_heap_entry, ec_stripes_heap) ec_stripes_heap;
#endif /* _BCACHEFS_EC_TYPES_H */
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index cc4f0963c0c5..9138944c5ae6 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -283,6 +283,7 @@ static int reattach_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
u32 inode_snapshot)
{
+ struct bch_fs *c = trans->c;
struct bch_hash_info dir_hash;
struct bch_inode_unpacked lostfound;
char name_buf[20];
@@ -317,7 +318,7 @@ static int reattach_inode(struct btree_trans *trans,
return ret;
}
- dir_hash = bch2_hash_info_init(trans->c, &lostfound);
+ dir_hash = bch2_hash_info_init(c, &lostfound);
name = (struct qstr) QSTR(name_buf);
@@ -330,8 +331,10 @@ static int reattach_inode(struct btree_trans *trans,
inode->bi_subvol ?: inode->bi_inum,
&dir_offset,
STR_HASH_must_create);
- if (ret)
+ if (ret) {
+ bch_err_msg(c, ret, "error creating dirent");
return ret;
+ }
inode->bi_dir = lostfound.bi_inum;
inode->bi_dir_offset = dir_offset;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index d8a630742887..70b998d9f19c 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -206,6 +206,7 @@ void bch2_journal_space_available(struct journal *j)
if (nr_online < metadata_replicas_required(c)) {
struct printbuf buf = PRINTBUF;
+ buf.atomic++;
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
"rw journal devs:", nr_online, metadata_replicas_required(c));
diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h
index 4fcf062dd22c..47e4a3c3d26e 100644
--- a/fs/bcachefs/mean_and_variance.h
+++ b/fs/bcachefs/mean_and_variance.h
@@ -111,11 +111,11 @@ static inline u128_u u128_shl(u128_u i, s8 shift)
{
u128_u r;
- r.lo = i.lo << shift;
+ r.lo = i.lo << (shift & 63);
if (shift < 64)
- r.hi = (i.hi << shift) | (i.lo >> (64 - shift));
+ r.hi = (i.hi << (shift & 63)) | (i.lo >> (-shift & 63));
else {
- r.hi = i.lo << (shift - 64);
+ r.hi = i.lo << (-shift & 63);
r.lo = 0;
}
return r;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 4ec7e44d6e36..138320eaa2ad 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * random utiility code, for bcache but in theory not specific to bcache
+ * random utility code, for bcache but in theory not specific to bcache
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2def4f761ca6..902b7f5406a2 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -8,6 +8,7 @@
#include <linux/errno.h>
#include <linux/freezer.h>
#include <linux/kernel.h>
+#include <linux/min_heap.h>
#include <linux/sched/clock.h>
#include <linux/llist.h>
#include <linux/log2.h>
@@ -54,17 +55,9 @@ static inline size_t buf_pages(void *p, size_t len)
PAGE_SIZE);
}
-#define HEAP(type) \
-struct { \
- size_t size, used; \
- type *data; \
-}
-
-#define DECLARE_HEAP(type, name) HEAP(type) name
-
#define init_heap(heap, _size, gfp) \
({ \
- (heap)->used = 0; \
+ (heap)->nr = 0; \
(heap)->size = (_size); \
(heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\
(gfp)); \
@@ -76,113 +69,6 @@ do { \
(heap)->data = NULL; \
} while (0)
-#define heap_set_backpointer(h, i, _fn) \
-do { \
- void (*fn)(typeof(h), size_t) = _fn; \
- if (fn) \
- fn(h, i); \
-} while (0)
-
-#define heap_swap(h, i, j, set_backpointer) \
-do { \
- swap((h)->data[i], (h)->data[j]); \
- heap_set_backpointer(h, i, set_backpointer); \
- heap_set_backpointer(h, j, set_backpointer); \
-} while (0)
-
-#define heap_peek(h) \
-({ \
- EBUG_ON(!(h)->used); \
- (h)->data[0]; \
-})
-
-#define heap_full(h) ((h)->used == (h)->size)
-
-#define heap_sift_down(h, i, cmp, set_backpointer) \
-do { \
- size_t _c, _j = i; \
- \
- for (; _j * 2 + 1 < (h)->used; _j = _c) { \
- _c = _j * 2 + 1; \
- if (_c + 1 < (h)->used && \
- cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \
- _c++; \
- \
- if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
- break; \
- heap_swap(h, _c, _j, set_backpointer); \
- } \
-} while (0)
-
-#define heap_sift_up(h, i, cmp, set_backpointer) \
-do { \
- while (i) { \
- size_t p = (i - 1) / 2; \
- if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
- break; \
- heap_swap(h, i, p, set_backpointer); \
- i = p; \
- } \
-} while (0)
-
-#define __heap_add(h, d, cmp, set_backpointer) \
-({ \
- size_t _i = (h)->used++; \
- (h)->data[_i] = d; \
- heap_set_backpointer(h, _i, set_backpointer); \
- \
- heap_sift_up(h, _i, cmp, set_backpointer); \
- _i; \
-})
-
-#define heap_add(h, d, cmp, set_backpointer) \
-({ \
- bool _r = !heap_full(h); \
- if (_r) \
- __heap_add(h, d, cmp, set_backpointer); \
- _r; \
-})
-
-#define heap_add_or_replace(h, new, cmp, set_backpointer) \
-do { \
- if (!heap_add(h, new, cmp, set_backpointer) && \
- cmp(h, new, heap_peek(h)) >= 0) { \
- (h)->data[0] = new; \
- heap_set_backpointer(h, 0, set_backpointer); \
- heap_sift_down(h, 0, cmp, set_backpointer); \
- } \
-} while (0)
-
-#define heap_del(h, i, cmp, set_backpointer) \
-do { \
- size_t _i = (i); \
- \
- BUG_ON(_i >= (h)->used); \
- (h)->used--; \
- if ((_i) < (h)->used) { \
- heap_swap(h, _i, (h)->used, set_backpointer); \
- heap_sift_up(h, _i, cmp, set_backpointer); \
- heap_sift_down(h, _i, cmp, set_backpointer); \
- } \
-} while (0)
-
-#define heap_pop(h, d, cmp, set_backpointer) \
-({ \
- bool _r = (h)->used; \
- if (_r) { \
- (d) = (h)->data[0]; \
- heap_del(h, 0, cmp, set_backpointer); \
- } \
- _r; \
-})
-
-#define heap_resort(heap, cmp, set_backpointer) \
-do { \
- ssize_t _i; \
- for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
- heap_sift_down(heap, _i, cmp, set_backpointer); \
-} while (0)
-
#define ANYSINT_MAX(t) \
((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5ae8045f4df4..19fa49cd9907 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2150,5 +2150,5 @@ core_initcall(init_elf_binfmt);
module_exit(exit_elf_binfmt);
#ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
-#include "binfmt_elf_test.c"
+#include "tests/binfmt_elf_kunit.c"
#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b592fc8cf368..0533d0f82dc9 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2981,8 +2981,7 @@ static int relocate_one_folio(struct reloc_control *rc,
if (folio_test_readahead(folio))
page_cache_async_readahead(inode->i_mapping, ra, NULL,
- folio, index,
- last_index + 1 - index);
+ folio, last_index + 1 - index);
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fb3675f5bf50..4ca711a773ef 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5306,7 +5306,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
if (folio_test_readahead(folio))
page_cache_async_readahead(mapping, &sctx->ra, NULL, folio,
- index, last_index + 1 - index);
+ last_index + 1 - index);
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index e667dbcd20e8..a91acd03ee12 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -630,7 +630,7 @@ static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq)
_enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start);
- subreq->max_len = ULONG_MAX;
+ subreq->max_len = MAX_RW_COUNT;
subreq->max_nr_segs = BIO_MAX_VECS;
if (!cachefiles_cres_file(cres)) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c4941ba245ac..e98aa8219303 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3067,10 +3067,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
flags, &_got);
WARN_ON_ONCE(ret == -EAGAIN);
if (!ret) {
+#ifdef CONFIG_DEBUG_FS
struct ceph_mds_client *mdsc = fsc->mdsc;
struct cap_wait cw;
+#endif
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+#ifdef CONFIG_DEBUG_FS
cw.ino = ceph_ino(inode);
cw.tgid = current->tgid;
cw.need = need;
@@ -3079,6 +3082,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
spin_lock(&mdsc->caps_list_lock);
list_add(&cw.list, &mdsc->cap_wait_list);
spin_unlock(&mdsc->caps_list_lock);
+#endif
/* make sure used fmode not timeout */
ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
@@ -3097,9 +3101,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
remove_wait_queue(&ci->i_cap_wq, &wait);
ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
+#ifdef CONFIG_DEBUG_FS
spin_lock(&mdsc->caps_list_lock);
list_del(&cw.list);
spin_unlock(&mdsc->caps_list_lock);
+#endif
if (ret == -EAGAIN)
continue;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 82a2e2a06a65..18c72b305858 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -141,7 +141,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
if (ptr_pos >= i_size_read(dir))
return NULL;
- if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) {
+ if (!cache_ctl->page || ptr_pgoff != cache_ctl->page->index) {
ceph_readdir_cache_release(cache_ctl);
cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff);
if (!cache_ctl->page) {
@@ -1589,7 +1589,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
}
spin_lock(&mdsc->dentry_list_lock);
- __dentry_dir_lease_touch(mdsc, di),
+ __dentry_dir_lease_touch(mdsc, di);
spin_unlock(&mdsc->dentry_list_lock);
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 249ddfbb1b03..8f8de8f33abb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1863,7 +1863,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
unsigned idx = ctl->index % nsize;
pgoff_t pgoff = ctl->index / nsize;
- if (!ctl->page || pgoff != page_index(ctl->page)) {
+ if (!ctl->page || pgoff != ctl->page->index) {
ceph_readdir_cache_release(ctl);
if (idx == 0)
ctl->page = grab_cache_page(&dir->i_data, pgoff);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c2157f6e0c69..276e34ab3e2c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5446,6 +5446,8 @@ static void delayed_work(struct work_struct *work)
}
mutex_unlock(&mdsc->mutex);
+ ceph_flush_cap_releases(mdsc, s);
+
mutex_lock(&s->s_mutex);
if (renew_caps)
send_renew_caps(mdsc, s);
@@ -5505,7 +5507,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
mdsc->last_renew_caps = jiffies;
INIT_LIST_HEAD(&mdsc->cap_delay_list);
+#ifdef CONFIG_DEBUG_FS
INIT_LIST_HEAD(&mdsc->cap_wait_list);
+#endif
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index cfa18cf915a0..9bcc7f181bfe 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -416,6 +416,8 @@ struct ceph_quotarealm_inode {
struct inode *inode;
};
+#ifdef CONFIG_DEBUG_FS
+
struct cap_wait {
struct list_head list;
u64 ino;
@@ -424,6 +426,8 @@ struct cap_wait {
int want;
};
+#endif
+
enum {
CEPH_MDSC_STOPPING_BEGIN = 1,
CEPH_MDSC_STOPPING_FLUSHING = 2,
@@ -512,7 +516,9 @@ struct ceph_mds_client {
spinlock_t caps_list_lock;
struct list_head caps_list; /* unused (reserved or
unreserved) */
+#ifdef CONFIG_DEBUG_FS
struct list_head cap_wait_list;
+#endif
int caps_total_count; /* total caps allocated */
int caps_use_count; /* in use */
int caps_use_max; /* max used caps */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 885cb5d4e771..0cdf84cd1791 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -961,7 +961,8 @@ static int __init init_caches(void)
if (!ceph_mds_request_cachep)
goto bad_mds_req;
- ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT);
+ ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10,
+ (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *));
if (!ceph_wb_pagevec_pool)
goto bad_pagevec_pool;
diff --git a/fs/coredump.c b/fs/coredump.c
index a57a06b80f57..7f12ff6ad1d3 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -361,17 +361,16 @@ out:
return ispipe;
}
-static int zap_process(struct task_struct *start, int exit_code)
+static int zap_process(struct signal_struct *signal, int exit_code)
{
struct task_struct *t;
int nr = 0;
- /* Allow SIGKILL, see prepare_signal() */
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
+ signal->flags = SIGNAL_GROUP_EXIT;
+ signal->group_exit_code = exit_code;
+ signal->group_stop_count = 0;
- for_each_thread(start, t) {
+ __for_each_thread(signal, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
sigaddset(&t->pending.signal, SIGKILL);
@@ -391,8 +390,9 @@ static int zap_threads(struct task_struct *tsk,
spin_lock_irq(&tsk->sighand->siglock);
if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
+ /* Allow SIGKILL, see prepare_signal() */
signal->core_state = core_state;
- nr = zap_process(tsk, exit_code);
+ nr = zap_process(signal, exit_code);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
tsk->flags |= PF_DUMPCORE;
atomic_set(&core_state->nr_threads, nr);
@@ -991,7 +991,7 @@ void validate_coredump_safety(void)
}
}
-static int proc_dostring_coredump(struct ctl_table *table, int write,
+static int proc_dostring_coredump(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int error = proc_dostring(table, write, buffer, lenp, ppos);
diff --git a/fs/dcache.c b/fs/dcache.c
index 8bdc278a0205..3d8daaecb6d1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -177,7 +177,7 @@ static long get_nr_dentry_negative(void)
return sum < 0 ? 0 : sum;
}
-static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
+static int proc_nr_dentry(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
dentry_stat.nr_dentry = get_nr_dentry();
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index b9575957a7c2..d45ef541d848 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -48,7 +48,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
iput(toput_inode);
}
-int drop_caches_sysctl_handler(struct ctl_table *table, int write,
+int drop_caches_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int ret;
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 8be60797ea2f..1b7eba38ba1e 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -21,38 +21,32 @@ void erofs_put_metabuf(struct erofs_buf *buf)
if (!buf->page)
return;
erofs_unmap_metabuf(buf);
- put_page(buf->page);
+ folio_put(page_folio(buf->page));
buf->page = NULL;
}
-/*
- * Derive the block size from inode->i_blkbits to make compatible with
- * anonymous inode in fscache mode.
- */
void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
enum erofs_kmap_type type)
{
pgoff_t index = offset >> PAGE_SHIFT;
- struct page *page = buf->page;
- struct folio *folio;
- unsigned int nofs_flag;
+ struct folio *folio = NULL;
- if (!page || page->index != index) {
+ if (buf->page) {
+ folio = page_folio(buf->page);
+ if (folio_file_page(folio, index) != buf->page)
+ erofs_unmap_metabuf(buf);
+ }
+ if (!folio || !folio_contains(folio, index)) {
erofs_put_metabuf(buf);
-
- nofs_flag = memalloc_nofs_save();
- folio = read_cache_folio(buf->mapping, index, NULL, NULL);
- memalloc_nofs_restore(nofs_flag);
+ folio = read_mapping_folio(buf->mapping, index, NULL);
if (IS_ERR(folio))
return folio;
-
- /* should already be PageUptodate, no need to lock page */
- page = folio_file_page(folio, index);
- buf->page = page;
}
+ buf->page = folio_file_page(folio, index);
+
if (buf->kmap_type == EROFS_NO_KMAP) {
if (type == EROFS_KMAP)
- buf->base = kmap_local_page(page);
+ buf->base = kmap_local_page(buf->page);
buf->kmap_type = type;
} else if (buf->kmap_type != type) {
DBG_BUGON(1);
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 06a722b85a45..40666815046f 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -188,7 +188,7 @@ again:
!rq->partial_decoding);
buf.in_size = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in);
rq->inputsize -= buf.in_size;
- buf.in = dctx.kin + rq->pageofs_in,
+ buf.in = dctx.kin + rq->pageofs_in;
dctx.bounce = strm->bounce;
do {
dctx.avail_out = buf.out_size - buf.out_pos;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 5f6439a63af7..43c09aae2afc 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -334,14 +334,29 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
unsigned int query_flags)
{
struct inode *const inode = d_inode(path->dentry);
+ bool compressed =
+ erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout);
- if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout))
+ if (compressed)
stat->attributes |= STATX_ATTR_COMPRESSED;
-
stat->attributes |= STATX_ATTR_IMMUTABLE;
stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
STATX_ATTR_IMMUTABLE);
+ /*
+ * Return the DIO alignment restrictions if requested.
+ *
+ * In EROFS, STATX_DIOALIGN is not supported in ondemand mode and
+ * compressed files, so in these cases we report no DIO support.
+ */
+ if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
+ stat->result_mask |= STATX_DIOALIGN;
+ if (!erofs_is_fscache_mode(inode->i_sb) && !compressed) {
+ stat->dio_mem_align =
+ bdev_logical_block_size(inode->i_sb->s_bdev);
+ stat->dio_offset_align = stat->dio_mem_align;
+ }
+ }
generic_fillattr(idmap, request_mask, inode, stat);
return 0;
}
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 35268263aaed..32ce5b35e1df 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -576,6 +576,21 @@ static const struct export_operations erofs_export_ops = {
.get_parent = erofs_get_parent,
};
+static void erofs_set_sysfs_name(struct super_block *sb)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ if (erofs_is_fscache_mode(sb)) {
+ if (sbi->domain_id)
+ super_set_sysfs_name_generic(sb, "%s,%s",sbi->domain_id,
+ sbi->fsid);
+ else
+ super_set_sysfs_name_generic(sb, "%s", sbi->fsid);
+ return;
+ }
+ super_set_sysfs_name_id(sb);
+}
+
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
@@ -643,6 +658,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_POSIXACL;
else
sb->s_flags &= ~SB_POSIXACL;
+ erofs_set_sysfs_name(sb);
#ifdef CONFIG_EROFS_FS_ZIP
xa_init(&sbi->managed_pslots);
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index b80f612867c2..9b53883e5caf 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -38,11 +38,13 @@ void *z_erofs_get_gbuf(unsigned int requiredpages)
{
struct z_erofs_gbuf *gbuf;
+ migrate_disable();
gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
spin_lock(&gbuf->lock);
/* check if the buffer is too small */
if (requiredpages > gbuf->nrpages) {
spin_unlock(&gbuf->lock);
+ migrate_enable();
/* (for sparse checker) pretend gbuf->lock is still taken */
__acquire(gbuf->lock);
return NULL;
@@ -57,6 +59,7 @@ void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
DBG_BUGON(gbuf->ptr != ptr);
spin_unlock(&gbuf->lock);
+ migrate_enable();
}
int z_erofs_gbuf_growsize(unsigned int nrpages)
diff --git a/fs/exec.c b/fs/exec.c
index a47d0e4c54f6..a126e3d1cacb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2204,7 +2204,7 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
#ifdef CONFIG_SYSCTL
-static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
@@ -2236,5 +2236,5 @@ fs_initcall(init_fs_exec_sysctls);
#endif /* CONFIG_SYSCTL */
#ifdef CONFIG_EXEC_KUNIT_TEST
-#include "exec_test.c"
+#include "tests/exec_kunit.c"
#endif
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 55d444bec5c0..bdd96329dddd 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1186,6 +1186,11 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
ckpt->next_free_nid = cpu_to_le32(last_nid);
+
+ /* update user_block_counts */
+ sbi->last_valid_block_count = sbi->total_valid_block_count;
+ percpu_counter_set(&sbi->alloc_valid_block_count, 0);
+ percpu_counter_set(&sbi->rf_node_block_count, 0);
}
static bool __need_flush_quota(struct f2fs_sb_info *sbi)
@@ -1575,11 +1580,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk += NR_CURSEG_NODE_TYPE;
}
- /* update user_block_counts */
- sbi->last_valid_block_count = sbi->total_valid_block_count;
- percpu_counter_set(&sbi->alloc_valid_block_count, 0);
- percpu_counter_set(&sbi->rf_node_block_count, 0);
-
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
/* Wait for all dirty meta pages to be submitted for IO */
@@ -1718,6 +1718,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
f2fs_restore_inmem_curseg(sbi);
+ f2fs_reinit_atgc_curseg(sbi);
stat_inc_cp_count(sbi);
stop:
unblock_operations(sbi);
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 1ef82a546391..990b93689b46 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1100,7 +1100,7 @@ retry:
struct bio *bio = NULL;
ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
- &last_block_in_bio, false, true);
+ &last_block_in_bio, NULL, true);
f2fs_put_rpages(cc);
f2fs_destroy_compress_ctx(cc, true);
if (ret)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b9b0debc6b3d..6457e5bca9c9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -925,6 +925,7 @@ alloc_new:
#ifdef CONFIG_BLK_DEV_ZONED
static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
{
+ struct block_device *bdev = sbi->sb->s_bdev;
int devi = 0;
if (f2fs_is_multi_device(sbi)) {
@@ -935,8 +936,9 @@ static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
return false;
}
blkaddr -= FDEV(devi).start_blk;
+ bdev = FDEV(devi).bdev;
}
- return bdev_is_zoned(FDEV(devi).bdev) &&
+ return bdev_is_zoned(bdev) &&
f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
}
@@ -2067,12 +2069,17 @@ static inline loff_t f2fs_readpage_limit(struct inode *inode)
return i_size_read(inode);
}
+static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac)
+{
+ return rac ? REQ_RAHEAD : 0;
+}
+
static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
unsigned nr_pages,
struct f2fs_map_blocks *map,
struct bio **bio_ret,
sector_t *last_block_in_bio,
- bool is_readahead)
+ struct readahead_control *rac)
{
struct bio *bio = *bio_ret;
const unsigned blocksize = blks_to_bytes(inode, 1);
@@ -2148,7 +2155,7 @@ submit_and_realloc:
}
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0, index,
+ f2fs_ra_op_flags(rac), index,
false);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
@@ -2178,7 +2185,7 @@ out:
#ifdef CONFIG_F2FS_FS_COMPRESSION
int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
- bool is_readahead, bool for_write)
+ struct readahead_control *rac, bool for_write)
{
struct dnode_of_data dn;
struct inode *inode = cc->inode;
@@ -2301,7 +2308,7 @@ submit_and_realloc:
if (!bio) {
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0,
+ f2fs_ra_op_flags(rac),
page->index, for_write);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
@@ -2399,7 +2406,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
ret = f2fs_read_multi_pages(&cc, &bio,
max_nr_pages,
&last_block_in_bio,
- rac != NULL, false);
+ rac, false);
f2fs_destroy_compress_ctx(&cc, false);
if (ret)
goto set_error_page;
@@ -2449,7 +2456,7 @@ next_page:
ret = f2fs_read_multi_pages(&cc, &bio,
max_nr_pages,
&last_block_in_bio,
- rac != NULL, false);
+ rac, false);
f2fs_destroy_compress_ctx(&cc, false);
}
}
@@ -2601,7 +2608,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (IS_NOQUOTA(inode))
return true;
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_used_in_atomic_write(inode))
return true;
/* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
if (f2fs_compressed_file(inode) &&
@@ -2688,7 +2695,7 @@ got_it:
}
/* wait for GCed page writeback via META_MAPPING */
- if (fio->post_read)
+ if (fio->meta_gc)
f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
/*
@@ -2783,7 +2790,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
.submitted = 0,
.compr_blocks = compr_blocks,
.need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
- .post_read = f2fs_post_read_required(inode) ? 1 : 0,
+ .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0,
.io_type = io_type,
.io_wbc = wbc,
.bio = bio,
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 48048fa36427..fd1fc06359ee 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -19,33 +19,23 @@
#include "node.h"
#include <trace/events/f2fs.h>
-bool sanity_check_extent_cache(struct inode *inode)
+bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
- struct extent_tree *et = fi->extent_tree[EX_READ];
- struct extent_info *ei;
-
- if (!et)
- return true;
+ struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
+ struct extent_info ei;
- ei = &et->largest;
- if (!ei->len)
- return true;
+ get_read_extent_info(&ei, i_ext);
- /* Let's drop, if checkpoint got corrupted. */
- if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
- ei->len = 0;
- et->largest_updated = true;
+ if (!ei.len)
return true;
- }
- if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
- !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
+ !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
DATA_GENERIC_ENHANCE)) {
f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
__func__, inode->i_ino,
- ei->blk, ei->fofs, ei->len);
+ ei.blk, ei.fofs, ei.len);
return false;
}
return true;
@@ -394,24 +384,22 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
if (!__may_extent_tree(inode, EX_READ)) {
/* drop largest read extent */
- if (i_ext && i_ext->len) {
+ if (i_ext->len) {
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
i_ext->len = 0;
set_page_dirty(ipage);
}
- goto out;
+ set_inode_flag(inode, FI_NO_EXTENT);
+ return;
}
et = __grab_extent_tree(inode, EX_READ);
- if (!i_ext || !i_ext->len)
- goto out;
-
get_read_extent_info(&ei, i_ext);
write_lock(&et->lock);
- if (atomic_read(&et->node_cnt))
- goto unlock_out;
+ if (atomic_read(&et->node_cnt) || !ei.len)
+ goto skip;
en = __attach_extent_node(sbi, et, &ei, NULL,
&et->root.rb_root.rb_node, true);
@@ -423,11 +411,13 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
list_add_tail(&en->list, &eti->extent_list);
spin_unlock(&eti->extent_lock);
}
-unlock_out:
+skip:
+ /* Let's drop, if checkpoint got corrupted. */
+ if (f2fs_cp_error(sbi)) {
+ et->largest.len = 0;
+ et->largest_updated = true;
+ }
write_unlock(&et->lock);
-out:
- if (!F2FS_I(inode)->extent_tree[EX_READ])
- set_inode_flag(inode, FI_NO_EXTENT);
}
void f2fs_init_age_extent_tree(struct inode *inode)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8a9d910aa552..ac19c61f0c3e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -803,6 +803,7 @@ enum {
FI_COW_FILE, /* indicate COW file */
FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */
FI_ATOMIC_REPLACE, /* indicate atomic replace */
+ FI_OPENED_FILE, /* indicate file has been opened */
FI_MAX, /* max flag, never be used */
};
@@ -842,7 +843,11 @@ struct f2fs_inode_info {
struct task_struct *atomic_write_task; /* store atomic write task */
struct extent_tree *extent_tree[NR_EXTENT_CACHES];
/* cached extent_tree entry */
- struct inode *cow_inode; /* copy-on-write inode for atomic write */
+ union {
+ struct inode *cow_inode; /* copy-on-write inode for atomic write */
+ struct inode *atomic_inode;
+ /* point to atomic_inode, available only for cow_inode */
+ };
/* avoid racing between foreground op and gc */
struct f2fs_rwsem i_gc_rwsem[2];
@@ -1210,7 +1215,7 @@ struct f2fs_io_info {
unsigned int in_list:1; /* indicate fio is in io_list */
unsigned int is_por:1; /* indicate IO is from recovery or not */
unsigned int encrypted:1; /* indicate file is encrypted */
- unsigned int post_read:1; /* require post read */
+ unsigned int meta_gc:1; /* require meta inode GC */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
struct bio **bio; /* bio for ipu */
@@ -3222,21 +3227,15 @@ static inline bool f2fs_need_compress_data(struct inode *inode)
return false;
}
-static inline unsigned int addrs_per_inode(struct inode *inode)
+static inline unsigned int addrs_per_page(struct inode *inode,
+ bool is_inode)
{
- unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
- get_inline_xattr_addrs(inode);
+ unsigned int addrs = is_inode ? (CUR_ADDRS_PER_INODE(inode) -
+ get_inline_xattr_addrs(inode)) : DEF_ADDRS_PER_BLOCK;
- if (!f2fs_compressed_file(inode))
- return addrs;
- return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size);
-}
-
-static inline unsigned int addrs_per_block(struct inode *inode)
-{
- if (!f2fs_compressed_file(inode))
- return DEF_ADDRS_PER_BLOCK;
- return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size);
+ if (f2fs_compressed_file(inode))
+ return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size);
+ return addrs;
}
static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -3706,6 +3705,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno);
int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi);
void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
@@ -4163,7 +4163,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab;
* inline.c
*/
bool f2fs_may_inline_data(struct inode *inode);
-bool f2fs_sanity_check_inline_data(struct inode *inode);
+bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage);
bool f2fs_may_inline_dentry(struct inode *inode);
void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage);
void f2fs_truncate_inline_inode(struct inode *inode,
@@ -4204,7 +4204,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
-bool sanity_check_extent_cache(struct inode *inode);
+bool sanity_check_extent_cache(struct inode *inode, struct page *ipage);
void f2fs_init_extent_tree(struct inode *inode);
void f2fs_drop_extent_tree(struct inode *inode);
void f2fs_destroy_extent_node(struct inode *inode);
@@ -4275,6 +4275,16 @@ static inline bool f2fs_post_read_required(struct inode *inode)
f2fs_compressed_file(inode);
}
+static inline bool f2fs_used_in_atomic_write(struct inode *inode)
+{
+ return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode);
+}
+
+static inline bool f2fs_meta_inode_gc_required(struct inode *inode)
+{
+ return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode);
+}
+
/*
* compress.c
*/
@@ -4310,7 +4320,7 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
unsigned int llen, unsigned int c_len);
int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
- bool is_readahead, bool for_write);
+ struct readahead_control *rac, bool for_write);
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed,
bool in_task);
@@ -4401,22 +4411,18 @@ static inline int set_compress_context(struct inode *inode)
{
#ifdef CONFIG_F2FS_FS_COMPRESSION
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
- F2FS_I(inode)->i_compress_algorithm =
- F2FS_OPTION(sbi).compress_algorithm;
- F2FS_I(inode)->i_log_cluster_size =
- F2FS_OPTION(sbi).compress_log_size;
- F2FS_I(inode)->i_compress_flag =
- F2FS_OPTION(sbi).compress_chksum ?
- BIT(COMPRESS_CHKSUM) : 0;
- F2FS_I(inode)->i_cluster_size =
- BIT(F2FS_I(inode)->i_log_cluster_size);
- if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
- F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
+ fi->i_compress_algorithm = F2FS_OPTION(sbi).compress_algorithm;
+ fi->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size;
+ fi->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ?
+ BIT(COMPRESS_CHKSUM) : 0;
+ fi->i_cluster_size = BIT(fi->i_log_cluster_size);
+ if ((fi->i_compress_algorithm == COMPRESS_LZ4 ||
+ fi->i_compress_algorithm == COMPRESS_ZSTD) &&
F2FS_OPTION(sbi).compress_level)
- F2FS_I(inode)->i_compress_level =
- F2FS_OPTION(sbi).compress_level;
- F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
+ fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
+ fi->i_flags |= F2FS_COMPR_FL;
set_inode_flag(inode, FI_COMPRESSED_FILE);
stat_inc_compr_inode(inode);
inc_compr_inode_stat(inode);
@@ -4431,15 +4437,15 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- f2fs_down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&fi->i_sem);
if (!f2fs_compressed_file(inode)) {
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return true;
}
if (f2fs_is_mmap_file(inode) ||
(S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) {
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return false;
}
@@ -4448,7 +4454,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
clear_inode_flag(inode, FI_COMPRESSED_FILE);
f2fs_mark_inode_dirty_sync(inode, true);
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return true;
}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c1ad9b278c47..168f08507004 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -554,6 +554,42 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+static int finish_preallocate_blocks(struct inode *inode)
+{
+ int ret;
+
+ inode_lock(inode);
+ if (is_inode_flag_set(inode, FI_OPENED_FILE)) {
+ inode_unlock(inode);
+ return 0;
+ }
+
+ if (!file_should_truncate(inode)) {
+ set_inode_flag(inode, FI_OPENED_FILE);
+ inode_unlock(inode);
+ return 0;
+ }
+
+ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ filemap_invalidate_lock(inode->i_mapping);
+
+ truncate_setsize(inode, i_size_read(inode));
+ ret = f2fs_truncate(inode);
+
+ filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
+ if (!ret)
+ set_inode_flag(inode, FI_OPENED_FILE);
+
+ inode_unlock(inode);
+ if (ret)
+ return ret;
+
+ file_dont_truncate(inode);
+ return 0;
+}
+
static int f2fs_file_open(struct inode *inode, struct file *filp)
{
int err = fscrypt_file_open(inode, filp);
@@ -571,7 +607,11 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_CAN_ODIRECT;
- return dquot_file_open(inode, filp);
+ err = dquot_file_open(inode, filp);
+ if (err)
+ return err;
+
+ return finish_preallocate_blocks(inode);
}
void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
@@ -825,6 +865,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw)
return true;
if (f2fs_compressed_file(inode))
return true;
+ if (f2fs_has_inline_data(inode))
+ return true;
/* disallow direct IO if any of devices has unaligned blksize */
if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
@@ -937,6 +979,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
int err;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
@@ -955,7 +998,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return -EOPNOTSUPP;
if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
!IS_ALIGNED(attr->ia_size,
- F2FS_BLK_TO_BYTES(F2FS_I(inode)->i_cluster_size)))
+ F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
return -EINVAL;
}
@@ -1009,7 +1052,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return err;
}
- f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, attr->ia_size);
@@ -1021,14 +1064,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
* larger than i_size.
*/
filemap_invalidate_unlock(inode->i_mapping);
- f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
if (err)
return err;
- spin_lock(&F2FS_I(inode)->i_size_lock);
+ spin_lock(&fi->i_size_lock);
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
- F2FS_I(inode)->last_disk_size = i_size_read(inode);
- spin_unlock(&F2FS_I(inode)->i_size_lock);
+ fi->last_disk_size = i_size_read(inode);
+ spin_unlock(&fi->i_size_lock);
}
__setattr_copy(idmap, inode, attr);
@@ -1038,7 +1081,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
if (!err)
- inode->i_mode = F2FS_I(inode)->i_acl_mode;
+ inode->i_mode = fi->i_acl_mode;
clear_inode_flag(inode, FI_ACL_MODE);
}
}
@@ -1946,15 +1989,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if (err)
return err;
- f2fs_down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&fi->i_sem);
if (!f2fs_may_compress(inode) ||
(S_ISREG(inode->i_mode) &&
F2FS_HAS_BLOCKS(inode))) {
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return -EINVAL;
}
err = set_compress_context(inode);
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
if (err)
return err;
@@ -2139,6 +2182,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
set_inode_flag(fi->cow_inode, FI_COW_FILE);
clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
+
+ /* Set the COW inode's atomic_inode to the atomic inode */
+ F2FS_I(fi->cow_inode)->atomic_inode = inode;
} else {
/* Reuse the already created COW inode */
ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
@@ -3541,6 +3587,7 @@ next:
static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t page_idx = 0, last_idx;
unsigned int released_blocks = 0;
@@ -3578,7 +3625,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
goto out;
- if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ if (!atomic_read(&fi->i_compr_blocks)) {
ret = -EPERM;
goto out;
}
@@ -3587,7 +3634,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, true);
- f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3613,7 +3660,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
- count = round_up(count, F2FS_I(inode)->i_cluster_size);
+ count = round_up(count, fi->i_cluster_size);
ret = release_compress_blocks(&dn, count);
@@ -3629,7 +3676,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
}
filemap_invalidate_unlock(inode->i_mapping);
- f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
out:
if (released_blocks)
f2fs_update_time(sbi, REQ_TIME);
@@ -3640,14 +3687,14 @@ out:
if (ret >= 0) {
ret = put_user(released_blocks, (u64 __user *)arg);
} else if (released_blocks &&
- atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ atomic_read(&fi->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
"iblocks=%llu, released=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
released_blocks,
- atomic_read(&F2FS_I(inode)->i_compr_blocks));
+ atomic_read(&fi->i_compr_blocks));
}
return ret;
@@ -3736,6 +3783,7 @@ next:
static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t page_idx = 0, last_idx;
unsigned int reserved_blocks = 0;
@@ -3761,10 +3809,10 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
goto unlock_inode;
}
- if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
+ if (atomic_read(&fi->i_compr_blocks))
goto unlock_inode;
- f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3790,7 +3838,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
- count = round_up(count, F2FS_I(inode)->i_cluster_size);
+ count = round_up(count, fi->i_cluster_size);
ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
@@ -3805,7 +3853,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
}
filemap_invalidate_unlock(inode->i_mapping);
- f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
if (!ret) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
@@ -3821,14 +3869,14 @@ unlock_inode:
if (!ret) {
ret = put_user(reserved_blocks, (u64 __user *)arg);
} else if (reserved_blocks &&
- atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ atomic_read(&fi->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
"iblocks=%llu, reserved=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
reserved_blocks,
- atomic_read(&F2FS_I(inode)->i_compr_blocks));
+ atomic_read(&fi->i_compr_blocks));
}
return ret;
@@ -3891,7 +3939,9 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
return -EOPNOTSUPP;
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
@@ -4017,7 +4067,7 @@ out:
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
err:
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
@@ -4052,6 +4102,7 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_comp_option option;
int ret = 0;
@@ -4071,7 +4122,9 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
option.algorithm >= COMPRESS_MAX)
return -EINVAL;
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
f2fs_down_write(&F2FS_I(inode)->i_sem);
@@ -4090,27 +4143,27 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
goto out;
}
- F2FS_I(inode)->i_compress_algorithm = option.algorithm;
- F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
- F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size);
+ fi->i_compress_algorithm = option.algorithm;
+ fi->i_log_cluster_size = option.log_cluster_size;
+ fi->i_cluster_size = BIT(option.log_cluster_size);
/* Set default level */
- if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD)
- F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+ if (fi->i_compress_algorithm == COMPRESS_ZSTD)
+ fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
else
- F2FS_I(inode)->i_compress_level = 0;
+ fi->i_compress_level = 0;
/* Adjust mount option level */
if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
F2FS_OPTION(sbi).compress_level)
- F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level;
+ fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
f2fs_mark_inode_dirty_sync(inode, true);
if (!f2fs_is_compress_backend_ready(inode))
f2fs_warn(sbi, "compression algorithm is successfully set, "
"but current kernel doesn't support this algorithm.");
out:
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
@@ -4167,7 +4220,9 @@ static int f2fs_ioc_decompress_file(struct file *filp)
f2fs_balance_fs(sbi, true);
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
if (!f2fs_is_compress_backend_ready(inode)) {
@@ -4222,7 +4277,7 @@ static int f2fs_ioc_decompress_file(struct file *filp)
f2fs_update_time(sbi, REQ_TIME);
out:
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
@@ -4244,7 +4299,9 @@ static int f2fs_ioc_compress_file(struct file *filp)
f2fs_balance_fs(sbi, true);
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
if (!f2fs_is_compress_backend_ready(inode)) {
@@ -4300,7 +4357,7 @@ static int f2fs_ioc_compress_file(struct file *filp)
f2fs_update_time(sbi, REQ_TIME);
out:
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 6066c6eecf41..724bbcb447d3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1171,7 +1171,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static int ra_data_block(struct inode *inode, pgoff_t index)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct address_space *mapping = inode->i_mapping;
+ struct address_space *mapping = f2fs_is_cow_file(inode) ?
+ F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
struct f2fs_io_info fio = {
@@ -1260,6 +1261,8 @@ put_page:
static int move_data_block(struct inode *inode, block_t bidx,
int gc_type, unsigned int segno, int off)
{
+ struct address_space *mapping = f2fs_is_cow_file(inode) ?
+ F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping;
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.ino = inode->i_ino,
@@ -1282,7 +1285,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
/* do not read out */
- page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
+ page = f2fs_grab_cache_page(mapping, bidx, false);
if (!page)
return -ENOMEM;
@@ -1563,6 +1566,16 @@ next_step:
continue;
}
+ if (f2fs_has_inline_data(inode)) {
+ iput(inode);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err_ratelimited(sbi,
+ "inode %lx has both inline_data flag and "
+ "data block, nid=%u, ofs_in_node=%u",
+ inode->i_ino, dni.nid, ofs_in_node);
+ continue;
+ }
+
err = f2fs_gc_pinned_control(inode, gc_type, segno);
if (err == -EAGAIN) {
iput(inode);
@@ -1579,7 +1592,7 @@ next_step:
start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
ofs_in_node;
- if (f2fs_post_read_required(inode)) {
+ if (f2fs_meta_inode_gc_required(inode)) {
int err = ra_data_block(inode, start_bidx);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1630,7 +1643,7 @@ next_step:
start_bidx = f2fs_start_bidx_of_node(nofs, inode)
+ ofs_in_node;
- if (f2fs_post_read_required(inode))
+ if (f2fs_meta_inode_gc_required(inode))
err = move_data_block(inode, start_bidx,
gc_type, segno, off);
else
@@ -1638,7 +1651,7 @@ next_step:
segno, off);
if (!err && (gc_type == FG_GC ||
- f2fs_post_read_required(inode)))
+ f2fs_meta_inode_gc_required(inode)))
submitted++;
if (locked) {
@@ -1742,7 +1755,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
if (type != GET_SUM_TYPE((&sum->footer))) {
f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
- set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_CORRUPTED_SUMMARY);
goto skip;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 7638d0d7b7ee..cca7d448e55c 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -16,7 +16,7 @@
static bool support_inline_data(struct inode *inode)
{
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_used_in_atomic_write(inode))
return false;
if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
return false;
@@ -33,11 +33,29 @@ bool f2fs_may_inline_data(struct inode *inode)
return !f2fs_post_read_required(inode);
}
-bool f2fs_sanity_check_inline_data(struct inode *inode)
+static bool inode_has_blocks(struct inode *inode, struct page *ipage)
+{
+ struct f2fs_inode *ri = F2FS_INODE(ipage);
+ int i;
+
+ if (F2FS_HAS_BLOCKS(inode))
+ return true;
+
+ for (i = 0; i < DEF_NIDS_PER_INODE; i++) {
+ if (ri->i_nid[i])
+ return true;
+ }
+ return false;
+}
+
+bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage)
{
if (!f2fs_has_inline_data(inode))
return false;
+ if (inode_has_blocks(inode, ipage))
+ return false;
+
if (!support_inline_data(inode))
return true;
@@ -203,8 +221,10 @@ int f2fs_convert_inline_inode(struct inode *inode)
struct page *ipage, *page;
int err = 0;
- if (!f2fs_has_inline_data(inode) ||
- f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
+ if (f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ if (!f2fs_has_inline_data(inode))
return 0;
err = f2fs_dquot_initialize(inode);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 005dde72aff3..aef57172014f 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -29,6 +29,9 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
if (is_inode_flag_set(inode, FI_NEW_INODE))
return;
+ if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ return;
+
if (f2fs_inode_dirtied(inode, sync))
return;
@@ -310,10 +313,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
if (!sanity_check_compress_inode(inode, ri))
return false;
}
- } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
- f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
- __func__, inode->i_ino);
- return false;
}
if (!f2fs_sb_has_extra_attr(sbi)) {
@@ -344,7 +343,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
}
- if (f2fs_sanity_check_inline_data(inode)) {
+ if (f2fs_sanity_check_inline_data(inode, node_page)) {
f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
return false;
@@ -508,16 +507,16 @@ static int do_read_inode(struct inode *inode)
init_idisk_time(inode);
- /* Need all the flag bits */
- f2fs_init_read_extent_tree(inode, node_page);
- f2fs_init_age_extent_tree(inode);
-
- if (!sanity_check_extent_cache(inode)) {
+ if (!sanity_check_extent_cache(inode, node_page)) {
f2fs_put_page(node_page, 1);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
return -EFSCORRUPTED;
}
+ /* Need all the flag bits */
+ f2fs_init_read_extent_tree(inode, node_page);
+ f2fs_init_age_extent_tree(inode);
+
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
@@ -610,14 +609,6 @@ make_now:
}
f2fs_set_inode_flags(inode);
- if (file_should_truncate(inode) &&
- !is_sbi_flag_set(sbi, SBI_POR_DOING)) {
- ret = f2fs_truncate(inode);
- if (ret)
- goto bad_inode;
- file_dont_truncate(inode);
- }
-
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
@@ -645,8 +636,9 @@ retry:
void f2fs_update_inode(struct inode *inode, struct page *node_page)
{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_inode *ri;
- struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
+ struct extent_tree *et = fi->extent_tree[EX_READ];
f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
@@ -656,7 +648,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
ri = F2FS_INODE(node_page);
ri->i_mode = cpu_to_le16(inode->i_mode);
- ri->i_advise = F2FS_I(inode)->i_advise;
+ ri->i_advise = fi->i_advise;
ri->i_uid = cpu_to_le32(i_uid_read(inode));
ri->i_gid = cpu_to_le32(i_gid_read(inode));
ri->i_links = cpu_to_le32(inode->i_nlink);
@@ -682,58 +674,49 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
ri->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
if (S_ISDIR(inode->i_mode))
- ri->i_current_depth =
- cpu_to_le32(F2FS_I(inode)->i_current_depth);
+ ri->i_current_depth = cpu_to_le32(fi->i_current_depth);
else if (S_ISREG(inode->i_mode))
- ri->i_gc_failures = cpu_to_le16(F2FS_I(inode)->i_gc_failures);
- ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
- ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
- ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
+ ri->i_gc_failures = cpu_to_le16(fi->i_gc_failures);
+ ri->i_xattr_nid = cpu_to_le32(fi->i_xattr_nid);
+ ri->i_flags = cpu_to_le32(fi->i_flags);
+ ri->i_pino = cpu_to_le32(fi->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
- ri->i_dir_level = F2FS_I(inode)->i_dir_level;
+ ri->i_dir_level = fi->i_dir_level;
if (f2fs_has_extra_attr(inode)) {
- ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
+ ri->i_extra_isize = cpu_to_le16(fi->i_extra_isize);
if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)))
ri->i_inline_xattr_size =
- cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
+ cpu_to_le16(fi->i_inline_xattr_size);
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
- F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
- i_projid)) {
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) {
projid_t i_projid;
- i_projid = from_kprojid(&init_user_ns,
- F2FS_I(inode)->i_projid);
+ i_projid = from_kprojid(&init_user_ns, fi->i_projid);
ri->i_projid = cpu_to_le32(i_projid);
}
if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
- F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
- i_crtime)) {
- ri->i_crtime =
- cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec);
- ri->i_crtime_nsec =
- cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec);
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
+ ri->i_crtime = cpu_to_le64(fi->i_crtime.tv_sec);
+ ri->i_crtime_nsec = cpu_to_le32(fi->i_crtime.tv_nsec);
}
if (f2fs_sb_has_compression(F2FS_I_SB(inode)) &&
- F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_compress_flag)) {
unsigned short compress_flag;
- ri->i_compr_blocks =
- cpu_to_le64(atomic_read(
- &F2FS_I(inode)->i_compr_blocks));
- ri->i_compress_algorithm =
- F2FS_I(inode)->i_compress_algorithm;
- compress_flag = F2FS_I(inode)->i_compress_flag |
- F2FS_I(inode)->i_compress_level <<
+ ri->i_compr_blocks = cpu_to_le64(
+ atomic_read(&fi->i_compr_blocks));
+ ri->i_compress_algorithm = fi->i_compress_algorithm;
+ compress_flag = fi->i_compress_flag |
+ fi->i_compress_level <<
COMPRESS_LEVEL_OFFSET;
ri->i_compress_flag = cpu_to_le16(compress_flag);
- ri->i_log_cluster_size =
- F2FS_I(inode)->i_log_cluster_size;
+ ri->i_log_cluster_size = fi->i_log_cluster_size;
}
}
@@ -813,8 +796,9 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_abort_atomic_write(inode, true);
- if (fi->cow_inode) {
+ if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) {
clear_inode_flag(fi->cow_inode, FI_COW_FILE);
+ F2FS_I(fi->cow_inode)->atomic_inode = NULL;
iput(fi->cow_inode);
fi->cow_inode = NULL;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1ecde2b45e99..38b4750475db 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -221,6 +221,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
const char *name)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct f2fs_inode_info *fi;
nid_t ino;
struct inode *inode;
bool nid_free = false;
@@ -241,14 +242,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
inode_init_owner(idmap, inode, dir, mode);
+ fi = F2FS_I(inode);
inode->i_ino = ino;
inode->i_blocks = 0;
simple_inode_init_ts(inode);
- F2FS_I(inode)->i_crtime = inode_get_mtime(inode);
+ fi->i_crtime = inode_get_mtime(inode);
inode->i_generation = get_random_u32();
if (S_ISDIR(inode->i_mode))
- F2FS_I(inode)->i_current_depth = 1;
+ fi->i_current_depth = 1;
err = insert_inode_locked(inode);
if (err) {
@@ -258,9 +260,9 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
if (f2fs_sb_has_project_quota(sbi) &&
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
- F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
+ fi->i_projid = F2FS_I(dir)->i_projid;
else
- F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
+ fi->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = fscrypt_prepare_new_inode(dir, inode, &encrypt);
@@ -278,7 +280,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
- F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
+ fi->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
if (test_opt(sbi, INLINE_XATTR))
@@ -296,15 +298,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
f2fs_has_inline_dentry(inode)) {
xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
}
- F2FS_I(inode)->i_inline_xattr_size = xattr_size;
+ fi->i_inline_xattr_size = xattr_size;
- F2FS_I(inode)->i_flags =
+ fi->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
- F2FS_I(inode)->i_flags |= F2FS_INDEX_FL;
+ fi->i_flags |= F2FS_INDEX_FL;
- if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
+ if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
/* Check compression first. */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 8712e264071f..9756f0f2b7f7 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -280,6 +280,7 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
static int recover_inode(struct inode *inode, struct page *page)
{
struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
char *name;
int err;
@@ -302,12 +303,12 @@ static int recover_inode(struct inode *inode, struct page *page)
i_projid = (projid_t)le32_to_cpu(raw->i_projid);
kprojid = make_kprojid(&init_user_ns, i_projid);
- if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) {
+ if (!projid_eq(kprojid, fi->i_projid)) {
err = f2fs_transfer_project_quota(inode,
kprojid);
if (err)
return err;
- F2FS_I(inode)->i_projid = kprojid;
+ fi->i_projid = kprojid;
}
}
}
@@ -320,10 +321,10 @@ static int recover_inode(struct inode *inode, struct page *page)
inode_set_mtime(inode, le64_to_cpu(raw->i_mtime),
le32_to_cpu(raw->i_mtime_nsec));
- F2FS_I(inode)->i_advise = raw->i_advise;
- F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
+ fi->i_advise = raw->i_advise;
+ fi->i_flags = le32_to_cpu(raw->i_flags);
f2fs_set_inode_flags(inode);
- F2FS_I(inode)->i_gc_failures = le16_to_cpu(raw->i_gc_failures);
+ fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures);
recover_inline_flags(inode, raw);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a0ce3d080f80..78c3198a6308 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2784,11 +2784,19 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
unsigned short seg_type = curseg->seg_type;
sanity_check_seg_type(sbi, seg_type);
- if (f2fs_need_rand_seg(sbi))
- return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
+ if (__is_large_section(sbi)) {
+ if (f2fs_need_rand_seg(sbi)) {
+ unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno);
- if (__is_large_section(sbi))
+ if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint)
+ return curseg->segno;
+ return get_random_u32_inclusive(curseg->segno + 1,
+ GET_SEG_FROM_SEC(sbi, hint + 1) - 1);
+ }
return curseg->segno;
+ } else if (f2fs_need_rand_seg(sbi)) {
+ return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
+ }
/* inmem log may not locate on any segment after mount */
if (!curseg->inited)
@@ -2931,12 +2939,12 @@ static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
return ret;
}
-static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
int ret = 0;
- if (!sbi->am.atgc_enabled)
+ if (!sbi->am.atgc_enabled && !force)
return 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
@@ -2953,9 +2961,30 @@ static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
f2fs_up_read(&SM_I(sbi)->curseg_lock);
return ret;
}
+
int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
{
- return __f2fs_init_atgc_curseg(sbi);
+ return __f2fs_init_atgc_curseg(sbi, false);
+}
+
+int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi)
+{
+ int ret;
+
+ if (!test_opt(sbi, ATGC))
+ return 0;
+ if (sbi->am.atgc_enabled)
+ return 0;
+ if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) <
+ sbi->am.age_threshold)
+ return 0;
+
+ ret = __f2fs_init_atgc_curseg(sbi, true);
+ if (!ret) {
+ sbi->am.atgc_enabled = true;
+ f2fs_info(sbi, "reenabled age threshold GC");
+ }
+ return ret;
}
static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
@@ -3483,7 +3512,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (page_private_gcing(fio->page)) {
if (fio->sbi->am.atgc_enabled &&
(fio->io_type == FS_DATA_IO) &&
- (fio->sbi->gc_mode != GC_URGENT_HIGH))
+ (fio->sbi->gc_mode != GC_URGENT_HIGH) &&
+ __is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !is_inode_flag_set(inode, FI_OPU_WRITE))
return CURSEG_ALL_DATA_ATGC;
else
return CURSEG_COLD_DATA;
@@ -3828,7 +3859,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
goto drop_bio;
}
- if (fio->post_read)
+ if (fio->meta_gc)
f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
stat_inc_inplace_blocks(fio->sbi);
@@ -3998,7 +4029,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *cpage;
- if (!f2fs_post_read_required(inode))
+ if (!f2fs_meta_inode_gc_required(inode))
return;
if (!__is_valid_data_blkaddr(blkaddr))
@@ -4017,7 +4048,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
block_t i;
- if (!f2fs_post_read_required(inode))
+ if (!f2fs_meta_inode_gc_required(inode))
return;
for (i = 0; i < len; i++)
@@ -5187,7 +5218,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
}
/* Allocate a new section if it's not new. */
- if (cs->next_blkoff) {
+ if (cs->next_blkoff ||
+ cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) {
unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
f2fs_allocate_new_section(sbi, type, true);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e1c0f418aa11..bfc01a521cb9 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,7 +347,8 @@ static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
unsigned int segno, bool use_section)
{
if (use_section && __is_large_section(sbi)) {
- unsigned int start_segno = START_SEGNO(segno);
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int blocks = 0;
int i;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index df4cf31f93df..3959fd137cc9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -151,8 +151,6 @@ enum {
Opt_mode,
Opt_fault_injection,
Opt_fault_type,
- Opt_lazytime,
- Opt_nolazytime,
Opt_quota,
Opt_noquota,
Opt_usrquota,
@@ -229,8 +227,6 @@ static match_table_t f2fs_tokens = {
{Opt_mode, "mode=%s"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_fault_type, "fault_type=%u"},
- {Opt_lazytime, "lazytime"},
- {Opt_nolazytime, "nolazytime"},
{Opt_quota, "quota"},
{Opt_noquota, "noquota"},
{Opt_usrquota, "usrquota"},
@@ -918,12 +914,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
f2fs_info(sbi, "fault_type options not supported");
break;
#endif
- case Opt_lazytime:
- sb->s_flags |= SB_LAZYTIME;
- break;
- case Opt_nolazytime:
- sb->s_flags &= ~SB_LAZYTIME;
- break;
#ifdef CONFIG_QUOTA
case Opt_quota:
case Opt_usrquota:
@@ -4481,6 +4471,7 @@ try_onemore:
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_sysfs_name_bdev(sb);
sb->s_iflags |= SB_I_CGROUPWB;
/* init f2fs-specific super block info */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 09d3ecfaa4f1..fee7ee45ceaa 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -340,13 +340,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
struct ckpt_req_control *cprc = &sbi->cprc_info;
int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
- int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
+ int level = IOPRIO_PRIO_LEVEL(cprc->ckpt_thread_ioprio);
if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE)
return -EINVAL;
return sysfs_emit(buf, "%s,%d\n",
- class == IOPRIO_CLASS_RT ? "rt" : "be", data);
+ class == IOPRIO_CLASS_RT ? "rt" : "be", level);
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -450,7 +450,7 @@ out:
const char *name = strim((char *)buf);
struct ckpt_req_control *cprc = &sbi->cprc_info;
int class;
- long data;
+ long level;
int ret;
if (!strncmp(name, "rt,", 3))
@@ -461,13 +461,13 @@ out:
return -EINVAL;
name += 3;
- ret = kstrtol(name, 10, &data);
+ ret = kstrtol(name, 10, &level);
if (ret)
return ret;
- if (data >= IOPRIO_NR_LEVELS || data < 0)
+ if (level >= IOPRIO_NR_LEVELS || level < 0)
return -EINVAL;
- cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data);
+ cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, level);
if (test_opt(sbi, MERGE_CHECKPOINT)) {
ret = set_task_ioprio(cprc->f2fs_issue_ckpt,
cprc->ckpt_thread_ioprio);
diff --git a/fs/file_table.c b/fs/file_table.c
index 4f03beed4737..ca7843dde56d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(get_max_files);
/*
* Handle nr_files sysctl
*/
-static int proc_nr_files(struct ctl_table *table, int write, void *buffer,
+static int proc_nr_files(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 92a5b8283528..b865a3fa52f3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2413,7 +2413,7 @@ static int __init start_dirtytime_writeback(void)
}
__initcall(start_dirtytime_writeback);
-int dirtytime_interval_handler(struct ctl_table *table, int write,
+int dirtytime_interval_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 0239e3af3945..8b39c15c408c 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -63,9 +63,10 @@ struct hostfs_stat {
struct hostfs_timespec atime, mtime, ctime;
unsigned int blksize;
unsigned long long blocks;
- unsigned int maj;
- unsigned int min;
- dev_t dev;
+ struct {
+ unsigned int maj;
+ unsigned int min;
+ } rdev, dev;
};
extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3eb747d26924..22df574ca99e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/mount.h>
#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/namei.h>
#include "hostfs.h"
#include <init.h>
@@ -455,7 +456,7 @@ static int hostfs_read_folio(struct file *file, struct folio *folio)
if (bytes_read < 0)
ret = bytes_read;
else
- buffer = folio_zero_tail(folio, bytes_read, buffer);
+ buffer = folio_zero_tail(folio, bytes_read, buffer + bytes_read);
kunmap_local(buffer);
folio_end_read(folio, ret == 0);
@@ -532,10 +533,11 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
static int hostfs_inode_set(struct inode *ino, void *data)
{
struct hostfs_stat *st = data;
- dev_t rdev;
+ dev_t dev, rdev;
/* Reencode maj and min with the kernel encoding.*/
- rdev = MKDEV(st->maj, st->min);
+ rdev = MKDEV(st->rdev.maj, st->rdev.min);
+ dev = MKDEV(st->dev.maj, st->dev.min);
switch (st->mode & S_IFMT) {
case S_IFLNK:
@@ -561,7 +563,7 @@ static int hostfs_inode_set(struct inode *ino, void *data)
return -EIO;
}
- HOSTFS_I(ino)->dev = st->dev;
+ HOSTFS_I(ino)->dev = dev;
ino->i_ino = st->ino;
ino->i_mode = st->mode;
return hostfs_inode_update(ino, st);
@@ -570,8 +572,9 @@ static int hostfs_inode_set(struct inode *ino, void *data)
static int hostfs_inode_test(struct inode *inode, void *data)
{
const struct hostfs_stat *st = data;
+ dev_t dev = MKDEV(st->dev.maj, st->dev.min);
- return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == st->dev;
+ return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev;
}
static struct inode *hostfs_iget(struct super_block *sb, char *name)
@@ -927,7 +930,6 @@ static const struct inode_operations hostfs_link_iops = {
static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct hostfs_fs_info *fsi = sb->s_fs_info;
- const char *host_root = fc->source;
struct inode *root_inode;
int err;
@@ -941,15 +943,6 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
- /* NULL is printed as '(null)' by printf(): avoid that. */
- if (fc->source == NULL)
- host_root = "";
-
- fsi->host_root_path =
- kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root);
- if (fsi->host_root_path == NULL)
- return -ENOMEM;
-
root_inode = hostfs_iget(sb, fsi->host_root_path);
if (IS_ERR(root_inode))
return PTR_ERR(root_inode);
@@ -975,6 +968,58 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc)
return 0;
}
+enum hostfs_parma {
+ Opt_hostfs,
+};
+
+static const struct fs_parameter_spec hostfs_param_specs[] = {
+ fsparam_string_empty("hostfs", Opt_hostfs),
+ {}
+};
+
+static int hostfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct hostfs_fs_info *fsi = fc->s_fs_info;
+ struct fs_parse_result result;
+ char *host_root;
+ int opt;
+
+ opt = fs_parse(fc, hostfs_param_specs, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_hostfs:
+ host_root = param->string;
+ if (!*host_root)
+ host_root = "";
+ fsi->host_root_path =
+ kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root);
+ if (fsi->host_root_path == NULL)
+ return -ENOMEM;
+ break;
+ }
+
+ return 0;
+}
+
+static int hostfs_parse_monolithic(struct fs_context *fc, void *data)
+{
+ struct hostfs_fs_info *fsi = fc->s_fs_info;
+ char *host_root = (char *)data;
+
+ /* NULL is printed as '(null)' by printf(): avoid that. */
+ if (host_root == NULL)
+ host_root = "";
+
+ fsi->host_root_path =
+ kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root);
+ if (fsi->host_root_path == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int hostfs_fc_get_tree(struct fs_context *fc)
{
return get_tree_nodev(fc, hostfs_fill_super);
@@ -992,6 +1037,8 @@ static void hostfs_fc_free(struct fs_context *fc)
}
static const struct fs_context_operations hostfs_context_ops = {
+ .parse_monolithic = hostfs_parse_monolithic,
+ .parse_param = hostfs_parse_param,
.get_tree = hostfs_fc_get_tree,
.free = hostfs_fc_free,
};
@@ -1040,4 +1087,5 @@ static void __exit exit_hostfs(void)
module_init(init_hostfs)
module_exit(exit_hostfs)
+MODULE_DESCRIPTION("User-Mode Linux Host filesystem");
MODULE_LICENSE("GPL");
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 840619e39a1a..97e9c40a9448 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -34,9 +34,10 @@ static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
p->mtime.tv_nsec = 0;
p->blksize = buf->st_blksize;
p->blocks = buf->st_blocks;
- p->maj = os_major(buf->st_rdev);
- p->min = os_minor(buf->st_rdev);
- p->dev = buf->st_dev;
+ p->rdev.maj = os_major(buf->st_rdev);
+ p->rdev.min = os_minor(buf->st_rdev);
+ p->dev.maj = os_major(buf->st_dev);
+ p->dev.min = os_minor(buf->st_dev);
}
int stat_file(const char *path, struct hostfs_stat *p, int fd)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 81dab95f67ed..9f6cff356796 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -222,13 +222,13 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct hstate *h = hstate_file(file);
const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
if (len & ~huge_page_mask(h))
return -EINVAL;
- if (len > TASK_SIZE)
+ if (len > mmap_end - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED) {
@@ -239,9 +239,10 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
- vma = find_vma(mm, addr);
- if (mmap_end - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ vma = find_vma_prev(mm, addr, &prev);
+ if (mmap_end - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
return addr;
}
@@ -422,7 +423,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
if (!ptep)
return false;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(vma->vm_mm, addr, ptep);
if (huge_pte_none(pte) || !pte_present(pte))
return false;
@@ -892,7 +893,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
error = PTR_ERR(folio);
goto out;
}
- clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
+ folio_zero_user(folio, ALIGN_DOWN(addr, hpage_size));
__folio_mark_uptodate(folio);
error = hugetlb_add_to_page_cache(folio, mapping, index);
if (unlikely(error)) {
@@ -1128,10 +1129,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
hugetlb_set_folio_subpool(src, NULL);
}
- if (mode != MIGRATE_SYNC_NO_COPY)
- folio_migrate_copy(dst, src);
- else
- folio_migrate_flags(dst, src);
+ folio_migrate_flags(dst, src);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/inode.c b/fs/inode.c
index f356fe2ec2b6..86670941884b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -107,7 +107,7 @@ long get_nr_dirty_inodes(void)
*/
static struct inodes_stat_t inodes_stat;
-static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
+static int proc_nr_inodes(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
@@ -676,6 +676,16 @@ static void evict(struct inode *inode)
remove_inode_hash(inode);
+ /*
+ * Wake up waiters in __wait_on_freeing_inode().
+ *
+ * Lockless hash lookup may end up finding the inode before we removed
+ * it above, but only lock it *after* we are done with the wakeup below.
+ * In this case the potential waiter cannot safely block.
+ *
+ * The inode being unhashed after the call to remove_inode_hash() is
+ * used as an indicator whether blocking on it is safe.
+ */
spin_lock(&inode->i_lock);
wake_up_bit(&inode->i_state, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
@@ -888,18 +898,18 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
return freed;
}
-static void __wait_on_freeing_inode(struct inode *inode, bool locked);
+static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked);
/*
* Called with the inode lock held.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
int (*test)(struct inode *, void *),
- void *data, bool locked)
+ void *data, bool is_inode_hash_locked)
{
struct inode *inode = NULL;
- if (locked)
+ if (is_inode_hash_locked)
lockdep_assert_held(&inode_hash_lock);
else
lockdep_assert_not_held(&inode_hash_lock);
@@ -913,7 +923,7 @@ repeat:
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode, locked);
+ __wait_on_freeing_inode(inode, is_inode_hash_locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
@@ -936,11 +946,11 @@ repeat:
*/
static struct inode *find_inode_fast(struct super_block *sb,
struct hlist_head *head, unsigned long ino,
- bool locked)
+ bool is_inode_hash_locked)
{
struct inode *inode = NULL;
- if (locked)
+ if (is_inode_hash_locked)
lockdep_assert_held(&inode_hash_lock);
else
lockdep_assert_not_held(&inode_hash_lock);
@@ -954,7 +964,7 @@ repeat:
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
- __wait_on_freeing_inode(inode, locked);
+ __wait_on_freeing_inode(inode, is_inode_hash_locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
@@ -2287,19 +2297,29 @@ EXPORT_SYMBOL(inode_needs_sync);
* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
* will DTRT.
*/
-static void __wait_on_freeing_inode(struct inode *inode, bool locked)
+static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+
+ /*
+ * Handle racing against evict(), see that routine for more details.
+ */
+ if (unlikely(inode_unhashed(inode))) {
+ WARN_ON(is_inode_hash_locked);
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
- if (locked)
+ if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock);
schedule();
finish_wait(wq, &wait.wq_entry);
- if (locked)
+ if (is_inode_hash_locked)
spin_lock(&inode_hash_lock);
rcu_read_lock();
}
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
index 7c96bc107218..560187d61562 100644
--- a/fs/jffs2/Kconfig
+++ b/fs/jffs2/Kconfig
@@ -151,8 +151,9 @@ config JFFS2_RUBIN
RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
choice
- prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
+ prompt "JFFS2 default compression mode"
default JFFS2_CMODE_PRIORITY
+ depends on JFFS2_COMPRESSION_OPTIONS
depends on JFFS2_FS
help
You can set here the default compression mode of JFFS2 from
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index cb3cda1390ad..5713994328cb 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1626,6 +1626,8 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
} else if (rc == -ENOSPC) {
/* search for next smaller log2 block */
l2nb = BLKSTOL2(nblocks) - 1;
+ if (unlikely(l2nb < 0))
+ break;
nblocks = 1LL << l2nb;
} else {
/* Trim any already allocated blocks */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 031d8f570f58..5d3127ca68a4 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -834,6 +834,8 @@ int dtInsert(tid_t tid, struct inode *ip,
* the full page.
*/
DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
+ if (p->header.freelist == 0)
+ return -EINVAL;
/*
* insert entry for new key
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 2ec35889ad24..1407feccbc2d 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -290,7 +290,7 @@ int diSync(struct inode *ipimap)
int diRead(struct inode *ip)
{
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
- int iagno, ino, extno, rc;
+ int iagno, ino, extno, rc, agno;
struct inode *ipimap;
struct dinode *dp;
struct iag *iagp;
@@ -339,8 +339,11 @@ int diRead(struct inode *ip)
/* get the ag for the iag */
agstart = le64_to_cpu(iagp->agstart);
+ agno = BLKTOAG(agstart, JFS_SBI(ip->i_sb));
release_metapage(mp);
+ if (agno >= MAXAG || agno < 0)
+ return -EIO;
rel_inode = (ino & (INOSPERPAGE - 1));
pageno = blkno >> sbi->l2nbperpage;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 9609349e92e5..270808b6219b 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1600,7 +1600,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
mp, sizeof(struct metapage), 0);
print_hex_dump(KERN_ERR, "page: ",
DUMP_PREFIX_ADDRESS, 16,
- sizeof(long), mp->page,
+ sizeof(long), mp->folio,
sizeof(struct page), 0);
} else
print_hex_dump(KERN_ERR, "tblock:",
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 961569c11159..df575a873ec6 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -4,6 +4,7 @@
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*/
+#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -46,9 +47,9 @@ static inline void __lock_metapage(struct metapage *mp)
do {
set_current_state(TASK_UNINTERRUPTIBLE);
if (metapage_locked(mp)) {
- unlock_page(mp->page);
+ folio_unlock(mp->folio);
io_schedule();
- lock_page(mp->page);
+ folio_lock(mp->folio);
}
} while (trylock_metapage(mp));
__set_current_state(TASK_RUNNING);
@@ -56,7 +57,7 @@ static inline void __lock_metapage(struct metapage *mp)
}
/*
- * Must have mp->page locked
+ * Must have mp->folio locked
*/
static inline void lock_metapage(struct metapage *mp)
{
@@ -75,36 +76,36 @@ static mempool_t *metapage_mempool;
struct meta_anchor {
int mp_count;
atomic_t io_count;
+ blk_status_t status;
struct metapage *mp[MPS_PER_PAGE];
};
-#define mp_anchor(page) ((struct meta_anchor *)page_private(page))
-static inline struct metapage *page_to_mp(struct page *page, int offset)
+static inline struct metapage *folio_to_mp(struct folio *folio, int offset)
{
- if (!PagePrivate(page))
+ struct meta_anchor *anchor = folio->private;
+
+ if (!anchor)
return NULL;
- return mp_anchor(page)->mp[offset >> L2PSIZE];
+ return anchor->mp[offset >> L2PSIZE];
}
-static inline int insert_metapage(struct page *page, struct metapage *mp)
+static inline int insert_metapage(struct folio *folio, struct metapage *mp)
{
struct meta_anchor *a;
int index;
int l2mp_blocks; /* log2 blocks per metapage */
- if (PagePrivate(page))
- a = mp_anchor(page);
- else {
+ a = folio->private;
+ if (!a) {
a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
if (!a)
return -ENOMEM;
- set_page_private(page, (unsigned long)a);
- SetPagePrivate(page);
- kmap(page);
+ folio_attach_private(folio, a);
+ kmap(&folio->page);
}
if (mp) {
- l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+ l2mp_blocks = L2PSIZE - folio->mapping->host->i_blkbits;
index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
a->mp_count++;
a->mp[index] = mp;
@@ -113,10 +114,10 @@ static inline int insert_metapage(struct page *page, struct metapage *mp)
return 0;
}
-static inline void remove_metapage(struct page *page, struct metapage *mp)
+static inline void remove_metapage(struct folio *folio, struct metapage *mp)
{
- struct meta_anchor *a = mp_anchor(page);
- int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+ struct meta_anchor *a = folio->private;
+ int l2mp_blocks = L2PSIZE - folio->mapping->host->i_blkbits;
int index;
index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
@@ -126,48 +127,53 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
a->mp[index] = NULL;
if (--a->mp_count == 0) {
kfree(a);
- set_page_private(page, 0);
- ClearPagePrivate(page);
- kunmap(page);
+ folio_detach_private(folio);
+ kunmap(&folio->page);
}
}
-static inline void inc_io(struct page *page)
+static inline void inc_io(struct folio *folio)
{
- atomic_inc(&mp_anchor(page)->io_count);
+ struct meta_anchor *anchor = folio->private;
+
+ atomic_inc(&anchor->io_count);
}
-static inline void dec_io(struct page *page, void (*handler) (struct page *))
+static inline void dec_io(struct folio *folio, blk_status_t status,
+ void (*handler)(struct folio *, blk_status_t))
{
- if (atomic_dec_and_test(&mp_anchor(page)->io_count))
- handler(page);
+ struct meta_anchor *anchor = folio->private;
+
+ if (anchor->status == BLK_STS_OK)
+ anchor->status = status;
+
+ if (atomic_dec_and_test(&anchor->io_count))
+ handler(folio, anchor->status);
}
#else
-static inline struct metapage *page_to_mp(struct page *page, int offset)
+static inline struct metapage *folio_to_mp(struct folio *folio, int offset)
{
- return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
+ return folio->private;
}
-static inline int insert_metapage(struct page *page, struct metapage *mp)
+static inline int insert_metapage(struct folio *folio, struct metapage *mp)
{
if (mp) {
- set_page_private(page, (unsigned long)mp);
- SetPagePrivate(page);
- kmap(page);
+ folio_attach_private(folio, mp);
+ kmap(&folio->page);
}
return 0;
}
-static inline void remove_metapage(struct page *page, struct metapage *mp)
+static inline void remove_metapage(struct folio *folio, struct metapage *mp)
{
- set_page_private(page, 0);
- ClearPagePrivate(page);
- kunmap(page);
+ folio_detach_private(folio);
+ kunmap(&folio->page);
}
-#define inc_io(page) do {} while(0)
-#define dec_io(page, handler) handler(page)
+#define inc_io(folio) do {} while(0)
+#define dec_io(folio, status, handler) handler(folio, status)
#endif
@@ -218,12 +224,12 @@ void metapage_exit(void)
kmem_cache_destroy(metapage_cache);
}
-static inline void drop_metapage(struct page *page, struct metapage *mp)
+static inline void drop_metapage(struct folio *folio, struct metapage *mp)
{
if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
test_bit(META_io, &mp->flag))
return;
- remove_metapage(page, mp);
+ remove_metapage(folio, mp);
INCREMENT(mpStat.pagefree);
free_metapage(mp);
}
@@ -257,23 +263,20 @@ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
return lblock;
}
-static void last_read_complete(struct page *page)
+static void last_read_complete(struct folio *folio, blk_status_t status)
{
- if (!PageError(page))
- SetPageUptodate(page);
- unlock_page(page);
+ if (status)
+ printk(KERN_ERR "Read error %d at %#llx\n", status,
+ folio_pos(folio));
+
+ folio_end_read(folio, status == 0);
}
static void metapage_read_end_io(struct bio *bio)
{
- struct page *page = bio->bi_private;
-
- if (bio->bi_status) {
- printk(KERN_ERR "metapage_read_end_io: I/O error\n");
- SetPageError(page);
- }
+ struct folio *folio = bio->bi_private;
- dec_io(page, last_read_complete);
+ dec_io(folio, bio->bi_status, last_read_complete);
bio_put(bio);
}
@@ -299,13 +302,19 @@ static void remove_from_logsync(struct metapage *mp)
LOGSYNC_UNLOCK(log, flags);
}
-static void last_write_complete(struct page *page)
+static void last_write_complete(struct folio *folio, blk_status_t status)
{
struct metapage *mp;
unsigned int offset;
+ if (status) {
+ int err = blk_status_to_errno(status);
+ printk(KERN_ERR "metapage_write_end_io: I/O error\n");
+ mapping_set_error(folio->mapping, err);
+ }
+
for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
- mp = page_to_mp(page, offset);
+ mp = folio_to_mp(folio, offset);
if (mp && test_bit(META_io, &mp->flag)) {
if (mp->lsn)
remove_from_logsync(mp);
@@ -316,28 +325,25 @@ static void last_write_complete(struct page *page)
* safe unless I have the page locked
*/
}
- end_page_writeback(page);
+ folio_end_writeback(folio);
}
static void metapage_write_end_io(struct bio *bio)
{
- struct page *page = bio->bi_private;
+ struct folio *folio = bio->bi_private;
- BUG_ON(!PagePrivate(page));
+ BUG_ON(!folio->private);
- if (bio->bi_status) {
- printk(KERN_ERR "metapage_write_end_io: I/O error\n");
- SetPageError(page);
- }
- dec_io(page, last_write_complete);
+ dec_io(folio, bio->bi_status, last_write_complete);
bio_put(bio);
}
-static int metapage_writepage(struct page *page, struct writeback_control *wbc)
+static int metapage_write_folio(struct folio *folio,
+ struct writeback_control *wbc, void *unused)
{
struct bio *bio = NULL;
int block_offset; /* block offset of mp within page */
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
int len;
int xlen;
@@ -353,14 +359,13 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
int offset;
int bad_blocks = 0;
- page_start = (sector_t)page->index <<
- (PAGE_SHIFT - inode->i_blkbits);
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
+ page_start = folio_pos(folio) >> inode->i_blkbits;
+ BUG_ON(!folio_test_locked(folio));
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
- mp = page_to_mp(page, offset);
+ mp = folio_to_mp(folio, offset);
if (!mp || !test_bit(META_dirty, &mp->flag))
continue;
@@ -389,22 +394,20 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
continue;
}
/* Not contiguous */
- if (bio_add_page(bio, page, bio_bytes, bio_offset) <
- bio_bytes)
- goto add_failed;
+ bio_add_folio_nofail(bio, folio, bio_bytes, bio_offset);
/*
* Increment counter before submitting i/o to keep
* count from hitting zero before we're through
*/
- inc_io(page);
+ inc_io(folio);
if (!bio->bi_iter.bi_size)
goto dump_bio;
submit_bio(bio);
nr_underway++;
bio = NULL;
} else
- inc_io(page);
- xlen = (PAGE_SIZE - offset) >> inode->i_blkbits;
+ inc_io(folio);
+ xlen = (folio_size(folio) - offset) >> inode->i_blkbits;
pblock = metapage_get_blocks(inode, lblock, &xlen);
if (!pblock) {
printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
@@ -420,7 +423,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOFS);
bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_write_end_io;
- bio->bi_private = page;
+ bio->bi_private = folio;
/* Don't call bio_add_page yet, we may add to this vec */
bio_offset = offset;
@@ -430,8 +433,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
next_block = lblock + len;
}
if (bio) {
- if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
- goto add_failed;
+ bio_add_folio_nofail(bio, folio, bio_bytes, bio_offset);
if (!bio->bi_iter.bi_size)
goto dump_bio;
@@ -439,50 +441,56 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
nr_underway++;
}
if (redirty)
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
- unlock_page(page);
+ folio_unlock(folio);
if (bad_blocks)
goto err_out;
if (nr_underway == 0)
- end_page_writeback(page);
+ folio_end_writeback(folio);
return 0;
-add_failed:
- /* We should never reach here, since we're only adding one vec */
- printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
- goto skip;
dump_bio:
print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
4, bio, sizeof(*bio), 0);
-skip:
bio_put(bio);
- unlock_page(page);
- dec_io(page, last_write_complete);
+ folio_unlock(folio);
+ dec_io(folio, BLK_STS_OK, last_write_complete);
err_out:
while (bad_blocks--)
- dec_io(page, last_write_complete);
+ dec_io(folio, BLK_STS_OK, last_write_complete);
return -EIO;
}
+static int metapage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct blk_plug plug;
+ int err;
+
+ blk_start_plug(&plug);
+ err = write_cache_pages(mapping, wbc, metapage_write_folio, NULL);
+ blk_finish_plug(&plug);
+
+ return err;
+}
+
static int metapage_read_folio(struct file *fp, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct bio *bio = NULL;
int block_offset;
- int blocks_per_page = i_blocks_per_page(inode, page);
+ int blocks_per_page = i_blocks_per_folio(inode, folio);
sector_t page_start; /* address of page in fs blocks */
sector_t pblock;
int xlen;
unsigned int len;
int offset;
- BUG_ON(!PageLocked(page));
- page_start = (sector_t)page->index <<
- (PAGE_SHIFT - inode->i_blkbits);
+ BUG_ON(!folio_test_locked(folio));
+ page_start = folio_pos(folio) >> inode->i_blkbits;
block_offset = 0;
while (block_offset < blocks_per_page) {
@@ -490,9 +498,9 @@ static int metapage_read_folio(struct file *fp, struct folio *folio)
pblock = metapage_get_blocks(inode, page_start + block_offset,
&xlen);
if (pblock) {
- if (!PagePrivate(page))
- insert_metapage(page, NULL);
- inc_io(page);
+ if (!folio->private)
+ insert_metapage(folio, NULL);
+ inc_io(folio);
if (bio)
submit_bio(bio);
@@ -501,11 +509,10 @@ static int metapage_read_folio(struct file *fp, struct folio *folio)
bio->bi_iter.bi_sector =
pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_read_end_io;
- bio->bi_private = page;
+ bio->bi_private = folio;
len = xlen << inode->i_blkbits;
offset = block_offset << inode->i_blkbits;
- if (bio_add_page(bio, page, len, offset) < len)
- goto add_failed;
+ bio_add_folio_nofail(bio, folio, len, offset);
block_offset += xlen;
} else
block_offset++;
@@ -513,15 +520,9 @@ static int metapage_read_folio(struct file *fp, struct folio *folio)
if (bio)
submit_bio(bio);
else
- unlock_page(page);
+ folio_unlock(folio);
return 0;
-
-add_failed:
- printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
- bio_put(bio);
- dec_io(page, last_read_complete);
- return -EIO;
}
static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask)
@@ -531,7 +532,7 @@ static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask)
int offset;
for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
- mp = page_to_mp(&folio->page, offset);
+ mp = folio_to_mp(folio, offset);
if (!mp)
continue;
@@ -546,7 +547,7 @@ static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask)
}
if (mp->lsn)
remove_from_logsync(mp);
- remove_metapage(&folio->page, mp);
+ remove_metapage(folio, mp);
INCREMENT(mpStat.pagefree);
free_metapage(mp);
}
@@ -565,7 +566,7 @@ static void metapage_invalidate_folio(struct folio *folio, size_t offset,
const struct address_space_operations jfs_metapage_aops = {
.read_folio = metapage_read_folio,
- .writepage = metapage_writepage,
+ .writepages = metapage_writepages,
.release_folio = metapage_release_folio,
.invalidate_folio = metapage_invalidate_folio,
.dirty_folio = filemap_dirty_folio,
@@ -579,7 +580,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
int l2bsize;
struct address_space *mapping;
struct metapage *mp = NULL;
- struct page *page;
+ struct folio *folio;
unsigned long page_index;
unsigned long page_offset;
@@ -610,22 +611,22 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
}
if (new && (PSIZE == PAGE_SIZE)) {
- page = grab_cache_page(mapping, page_index);
- if (!page) {
- jfs_err("grab_cache_page failed!");
+ folio = filemap_grab_folio(mapping, page_index);
+ if (IS_ERR(folio)) {
+ jfs_err("filemap_grab_folio failed!");
return NULL;
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
} else {
- page = read_mapping_page(mapping, page_index, NULL);
- if (IS_ERR(page)) {
+ folio = read_mapping_folio(mapping, page_index, NULL);
+ if (IS_ERR(folio)) {
jfs_err("read_mapping_page failed!");
return NULL;
}
- lock_page(page);
+ folio_lock(folio);
}
- mp = page_to_mp(page, page_offset);
+ mp = folio_to_mp(folio, page_offset);
if (mp) {
if (mp->logical_size != size) {
jfs_error(inode->i_sb,
@@ -651,16 +652,16 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
mp = alloc_metapage(GFP_NOFS);
if (!mp)
goto unlock;
- mp->page = page;
+ mp->folio = folio;
mp->sb = inode->i_sb;
mp->flag = 0;
mp->xflag = COMMIT_PAGE;
mp->count = 1;
mp->nohomeok = 0;
mp->logical_size = size;
- mp->data = page_address(page) + page_offset;
+ mp->data = folio_address(folio) + page_offset;
mp->index = lblock;
- if (unlikely(insert_metapage(page, mp))) {
+ if (unlikely(insert_metapage(folio, mp))) {
free_metapage(mp);
goto unlock;
}
@@ -672,28 +673,27 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
memset(mp->data, 0, PSIZE);
}
- unlock_page(page);
+ folio_unlock(folio);
jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
return mp;
unlock:
- unlock_page(page);
+ folio_unlock(folio);
return NULL;
}
void grab_metapage(struct metapage * mp)
{
jfs_info("grab_metapage: mp = 0x%p", mp);
- get_page(mp->page);
- lock_page(mp->page);
+ folio_get(mp->folio);
+ folio_lock(mp->folio);
mp->count++;
lock_metapage(mp);
- unlock_page(mp->page);
+ folio_unlock(mp->folio);
}
-static int metapage_write_one(struct page *page)
+static int metapage_write_one(struct folio *folio)
{
- struct folio *folio = page_folio(page);
struct address_space *mapping = folio->mapping;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
@@ -707,7 +707,7 @@ static int metapage_write_one(struct page *page)
if (folio_clear_dirty_for_io(folio)) {
folio_get(folio);
- ret = metapage_writepage(page, &wbc);
+ ret = metapage_write_folio(folio, &wbc, NULL);
if (ret == 0)
folio_wait_writeback(folio);
folio_put(folio);
@@ -722,71 +722,69 @@ static int metapage_write_one(struct page *page)
void force_metapage(struct metapage *mp)
{
- struct page *page = mp->page;
+ struct folio *folio = mp->folio;
jfs_info("force_metapage: mp = 0x%p", mp);
set_bit(META_forcewrite, &mp->flag);
clear_bit(META_sync, &mp->flag);
- get_page(page);
- lock_page(page);
- set_page_dirty(page);
- if (metapage_write_one(page))
+ folio_get(folio);
+ folio_lock(folio);
+ folio_mark_dirty(folio);
+ if (metapage_write_one(folio))
jfs_error(mp->sb, "metapage_write_one() failed\n");
clear_bit(META_forcewrite, &mp->flag);
- put_page(page);
+ folio_put(folio);
}
void hold_metapage(struct metapage *mp)
{
- lock_page(mp->page);
+ folio_lock(mp->folio);
}
void put_metapage(struct metapage *mp)
{
if (mp->count || mp->nohomeok) {
/* Someone else will release this */
- unlock_page(mp->page);
+ folio_unlock(mp->folio);
return;
}
- get_page(mp->page);
+ folio_get(mp->folio);
mp->count++;
lock_metapage(mp);
- unlock_page(mp->page);
+ folio_unlock(mp->folio);
release_metapage(mp);
}
void release_metapage(struct metapage * mp)
{
- struct page *page = mp->page;
+ struct folio *folio = mp->folio;
jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
- BUG_ON(!page);
-
- lock_page(page);
+ folio_lock(folio);
unlock_metapage(mp);
assert(mp->count);
if (--mp->count || mp->nohomeok) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return;
}
if (test_bit(META_dirty, &mp->flag)) {
- set_page_dirty(page);
+ folio_mark_dirty(folio);
if (test_bit(META_sync, &mp->flag)) {
clear_bit(META_sync, &mp->flag);
- if (metapage_write_one(page))
+ if (metapage_write_one(folio))
jfs_error(mp->sb, "metapage_write_one() failed\n");
- lock_page(page);
+ folio_lock(folio);
}
} else if (mp->lsn) /* discard_metapage doesn't remove it */
remove_from_logsync(mp);
/* Try to keep metapages from using up too much memory */
- drop_metapage(page, mp);
+ drop_metapage(folio, mp);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
void __invalidate_metapages(struct inode *ip, s64 addr, int len)
@@ -798,7 +796,6 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
struct address_space *mapping =
JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
struct metapage *mp;
- struct page *page;
unsigned int offset;
/*
@@ -807,11 +804,12 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
*/
for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
lblock += BlocksPerPage) {
- page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
- if (!page)
+ struct folio *folio = filemap_lock_folio(mapping,
+ lblock >> l2BlocksPerPage);
+ if (IS_ERR(folio))
continue;
for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
- mp = page_to_mp(page, offset);
+ mp = folio_to_mp(folio, offset);
if (!mp)
continue;
if (mp->index < addr)
@@ -824,8 +822,8 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
if (mp->lsn)
remove_from_logsync(mp);
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
}
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 4179f9df4deb..2e5015c2705b 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -24,7 +24,7 @@ struct metapage {
wait_queue_head_t wait;
/* implementation */
- struct page *page;
+ struct folio *folio;
struct super_block *sb;
unsigned int logical_size;
@@ -90,14 +90,14 @@ static inline void discard_metapage(struct metapage *mp)
static inline void metapage_nohomeok(struct metapage *mp)
{
- struct page *page = mp->page;
- lock_page(page);
+ struct folio *folio = mp->folio;
+ folio_lock(folio);
if (!mp->nohomeok++) {
mark_metapage_dirty(mp);
- get_page(page);
- wait_on_page_writeback(page);
+ folio_get(folio);
+ folio_wait_writeback(folio);
}
- unlock_page(page);
+ folio_unlock(folio);
}
/*
@@ -107,7 +107,7 @@ static inline void metapage_nohomeok(struct metapage *mp)
static inline void metapage_wait_for_io(struct metapage *mp)
{
if (test_bit(META_io, &mp->flag))
- wait_on_page_writeback(mp->page);
+ folio_wait_writeback(mp->folio);
}
/*
@@ -116,7 +116,7 @@ static inline void metapage_wait_for_io(struct metapage *mp)
static inline void _metapage_homeok(struct metapage *mp)
{
if (!--mp->nohomeok)
- put_page(mp->page);
+ folio_put(mp->folio);
}
static inline void metapage_homeok(struct metapage *mp)
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 9987055293b3..2999ed5d83f5 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -797,7 +797,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
size_t buf_size)
{
struct jfs_ea_list *ealist;
- struct jfs_ea *ea;
+ struct jfs_ea *ea, *ealist_end;
struct ea_buffer ea_buf;
int xattr_size;
ssize_t size;
@@ -817,9 +817,16 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
goto not_found;
ealist = (struct jfs_ea_list *) ea_buf.xattr;
+ ealist_end = END_EALIST(ealist);
/* Find the named attribute */
- for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea))
+ for (ea = FIRST_EA(ealist); ea < ealist_end; ea = NEXT_EA(ea)) {
+ if (unlikely(ea + 1 > ealist_end) ||
+ unlikely(NEXT_EA(ea) > ealist_end)) {
+ size = -EUCLEAN;
+ goto release;
+ }
+
if ((namelen == ea->namelen) &&
memcmp(name, ea->name, namelen) == 0) {
/* Found it */
@@ -834,6 +841,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
memcpy(data, value, size);
goto release;
}
+ }
not_found:
size = -ENODATA;
release:
@@ -861,7 +869,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
ssize_t size = 0;
int xattr_size;
struct jfs_ea_list *ealist;
- struct jfs_ea *ea;
+ struct jfs_ea *ea, *ealist_end;
struct ea_buffer ea_buf;
down_read(&JFS_IP(inode)->xattr_sem);
@@ -876,9 +884,16 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
goto release;
ealist = (struct jfs_ea_list *) ea_buf.xattr;
+ ealist_end = END_EALIST(ealist);
/* compute required size of list */
- for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) {
+ for (ea = FIRST_EA(ealist); ea < ealist_end; ea = NEXT_EA(ea)) {
+ if (unlikely(ea + 1 > ealist_end) ||
+ unlikely(NEXT_EA(ea) > ealist_end)) {
+ size = -EUCLEAN;
+ goto release;
+ }
+
if (can_list(ea))
size += name_size(ea) + 1;
}
diff --git a/fs/locks.c b/fs/locks.c
index bdd94c32256f..9afb16e0683f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2570,8 +2570,9 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by releasing the
- * lock that was just acquired. There is no need to do that when we're
+ * Detect close/fcntl races and recover by zapping all POSIX locks
+ * associated with this file and our files_struct, just like on
+ * filp_flush(). There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
if (!error && file_lock->c.flc_type != F_UNLCK &&
@@ -2586,9 +2587,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
- file_lock->c.flc_type = F_UNLCK;
- error = do_lock_file_wait(filp, cmd, file_lock);
- WARN_ON_ONCE(error);
+ locks_remove_posix(filp, files);
error = -EBADF;
}
}
diff --git a/fs/namei.c b/fs/namei.c
index 3a4c40e12f78..5512cb10fa89 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3248,9 +3248,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
/**
* vfs_create - create new file
* @idmap: idmap of the mount the inode was found from
- * @dir: inode of @dentry
- * @dentry: pointer to dentry of the base directory
- * @mode: mode of the new file
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child file
+ * @mode: mode of the child file
* @want_excl: whether the file must not yet exist
*
* Create a new file.
@@ -4047,9 +4047,9 @@ EXPORT_SYMBOL(user_path_create);
/**
* vfs_mknod - create device node or file
* @idmap: idmap of the mount the inode was found from
- * @dir: inode of @dentry
- * @dentry: pointer to dentry of the base directory
- * @mode: mode of the new device node or file
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child device node
+ * @mode: mode of the child device node
* @dev: device number of device to create
*
* Create a device node or file.
@@ -4174,9 +4174,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
/**
* vfs_mkdir - create directory
* @idmap: idmap of the mount the inode was found from
- * @dir: inode of @dentry
- * @dentry: pointer to dentry of the base directory
- * @mode: mode of the new directory
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child directory
+ * @mode: mode of the child directory
*
* Create a directory.
*
@@ -4256,8 +4256,8 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
/**
* vfs_rmdir - remove directory
* @idmap: idmap of the mount the inode was found from
- * @dir: inode of @dentry
- * @dentry: pointer to dentry of the base directory
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child directory
*
* Remove a directory.
*
@@ -4537,8 +4537,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
/**
* vfs_symlink - create symlink
* @idmap: idmap of the mount the inode was found from
- * @dir: inode of @dentry
- * @dentry: pointer to dentry of the base directory
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child symlink file
* @oldname: name of the file to link to
*
* Create a symlink.
diff --git a/fs/namespace.c b/fs/namespace.c
index 221db9de4729..328087a4df8a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -70,7 +70,7 @@ static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
/* Don't allow confusion with old 32bit mount ID */
-#define MNT_UNIQUE_ID_OFFSET (1ULL << 32)
+#define MNT_UNIQUE_ID_OFFSET (1ULL << 31)
static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
static struct hlist_head *mount_hashtable __ro_after_init;
diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig
index bec805e0c44c..1b78e8b65ebc 100644
--- a/fs/netfs/Kconfig
+++ b/fs/netfs/Kconfig
@@ -22,6 +22,14 @@ config NETFS_STATS
between CPUs. On the other hand, the stats are very useful for
debugging purposes. Saying 'Y' here is recommended.
+config NETFS_DEBUG
+ bool "Enable dynamic debugging netfslib and FS-Cache"
+ depends on NETFS
+ help
+ This permits debugging to be dynamically enabled in the local caching
+ management module. If this is set, the debugging output may be
+ enabled by setting bits in /sys/module/netfs/parameters/debug.
+
config FSCACHE
bool "General filesystem local caching manager"
depends on NETFS_SUPPORT
@@ -50,13 +58,3 @@ config FSCACHE_STATS
debugging purposes. Saying 'Y' here is recommended.
See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_DEBUG
- bool "Debug FS-Cache"
- depends on FSCACHE
- help
- This permits debugging to be dynamically enabled in the local caching
- management module. If this is set, the debugging output may be
- enabled by setting bits in /sys/modules/fscache/parameter/debug.
-
- See Documentation/filesystems/caching/fscache.rst for more information.
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 4c0401dbbfcf..a688d4c75d99 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
- kdebug("no unlock");
+ _debug("no unlock");
else
folio_unlock(folio);
}
@@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl)
struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
int ret;
- kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
+ _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
if (readahead_count(ractl) == 0)
return;
@@ -268,10 +268,10 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct folio *sink = NULL;
int ret;
- kenter("%lx", folio->index);
+ _enter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
- folio_file_pos(folio), folio_size(folio),
+ folio_pos(folio), folio_size(folio),
NETFS_READPAGE);
if (IS_ERR(rreq)) {
ret = PTR_ERR(rreq);
@@ -470,7 +470,7 @@ retry:
}
rreq = netfs_alloc_request(mapping, file,
- folio_file_pos(folio), folio_size(folio),
+ folio_pos(folio), folio_size(folio),
NETFS_READ_FOR_WRITE);
if (IS_ERR(rreq)) {
ret = PTR_ERR(rreq);
@@ -508,7 +508,7 @@ retry:
have_folio:
*_folio = folio;
- kleave(" = 0");
+ _leave(" = 0");
return 0;
error_put:
@@ -518,7 +518,7 @@ error:
folio_unlock(folio);
folio_put(folio);
}
- kleave(" = %d", ret);
+ _leave(" = %d", ret);
return ret;
}
EXPORT_SYMBOL(netfs_write_begin);
@@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t flen = folio_size(folio);
int ret;
- kenter("%zx @%llx", flen, start);
+ _enter("%zx @%llx", flen, start);
ret = -ENOMEM;
@@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
error:
- kleave(" = %d", ret);
+ _leave(" = %d", ret);
return ret;
}
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index ecbc99ec7d36..4726c315453c 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -54,9 +54,9 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
{
struct netfs_folio *finfo = netfs_folio_info(folio);
struct netfs_group *group = netfs_folio_group(folio);
- loff_t pos = folio_file_pos(folio);
+ loff_t pos = folio_pos(folio);
- kenter("");
+ _enter("");
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;
@@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
*/
howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
flen, offset, part, maybe_trouble);
- kdebug("howto %u", howto);
+ _debug("howto %u", howto);
switch (howto) {
case NETFS_JUST_PREFETCH:
ret = netfs_prefetch_for_write(file, folio, offset, part);
if (ret < 0) {
- kdebug("prefetch = %zd", ret);
+ _debug("prefetch = %zd", ret);
goto error_folio_unlock;
}
break;
@@ -418,7 +418,7 @@ out:
}
iocb->ki_pos += written;
- kleave(" = %zd [%zd]", written, ret);
+ _leave(" = %zd [%zd]", written, ret);
return written ? written : ret;
error_folio_unlock:
@@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct netfs_inode *ictx = netfs_inode(inode);
ssize_t ret;
- kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
@@ -529,7 +529,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
vm_fault_t ret = VM_FAULT_RETRY;
int err;
- kenter("%lx", folio->index);
+ _enter("%lx", folio->index);
sb_start_pagefault(inode->i_sb);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index b6debac6205f..10a1e4da6bda 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
size_t orig_count = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- kenter("");
+ _enter("");
if (!orig_count)
return 0; /* Don't update atime */
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 792ef17bae21..88f2adfab75e 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
size_t len = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
- kenter("");
+ _enter("");
/* We're going to need a bounce buffer if what we transmit is going to
* be different in some way to the source buffer, e.g. because it gets
@@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
// TODO
- kdebug("uw %llx-%llx", start, end);
+ _debug("uw %llx-%llx", start, end);
wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
iocb->ki_flags & IOCB_DIRECT ?
@@ -96,7 +96,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
- kdebug("begin = %zd", ret);
+ _debug("begin = %zd", ret);
goto out;
}
@@ -143,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t pos = iocb->ki_pos;
unsigned long long end = pos + iov_iter_count(from) - 1;
- kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
+ _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c
index 288a73c3072d..9397ed39b0b4 100644
--- a/fs/netfs/fscache_cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
int n_accesses;
- kenter("{%s,%s}", ops->name, cache->name);
+ _enter("{%s,%s}", ops->name, cache->name);
BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING);
@@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache,
up_write(&fscache_addremove_sem);
pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name);
- kleave(" = 0 [%s]", cache->name);
+ _leave(" = 0 [%s]", cache->name);
return 0;
}
EXPORT_SYMBOL(fscache_add_cache);
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index 4d1e8bf4c615..bce2492186d0 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
{
struct fscache_cookie *cookie;
- kenter("V=%x", volume->debug_id);
+ _enter("V=%x", volume->debug_id);
if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
return NULL;
@@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
trace_fscache_acquire(cookie);
fscache_stat(&fscache_n_acquires_ok);
- kleave(" = c=%08x", cookie->debug_id);
+ _leave(" = c=%08x", cookie->debug_id);
return cookie;
}
EXPORT_SYMBOL(__fscache_acquire_cookie);
@@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed;
bool need_withdraw = false;
- kenter("");
+ _enter("");
if (!cookie->volume->cache_priv) {
fscache_create_volume(cookie->volume, true);
@@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
if (cookie->state != FSCACHE_COOKIE_STATE_FAILED)
fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT);
need_withdraw = true;
- kleave(" [fail]");
+ _leave(" [fail]");
goto out;
}
@@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify)
bool queue = false;
int n_active;
- kenter("c=%08x", cookie->debug_id);
+ _enter("c=%08x", cookie->debug_id);
if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
"Trying to use relinquished cookie\n"))
@@ -636,7 +636,7 @@ again:
spin_unlock(&cookie->lock);
if (queue)
fscache_queue_cookie(cookie, fscache_cookie_get_use_work);
- kleave("");
+ _leave("");
}
EXPORT_SYMBOL(__fscache_use_cookie);
@@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie)
enum fscache_cookie_state state;
bool wake = false;
- kenter("c=%x", cookie->debug_id);
+ _enter("c=%x", cookie->debug_id);
again:
spin_lock(&cookie->lock);
@@ -820,7 +820,7 @@ out:
spin_unlock(&cookie->lock);
if (wake)
wake_up_cookie_state(cookie);
- kleave("");
+ _leave("");
}
static void fscache_cookie_worker(struct work_struct *work)
@@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie)
set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags);
spin_unlock(&cookie->lock);
fscache_stat(&fscache_n_cookies_lru_expired);
- kdebug("lru c=%x", cookie->debug_id);
+ _debug("lru c=%x", cookie->debug_id);
__fscache_withdraw_cookie(cookie);
}
@@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
if (retire)
fscache_stat(&fscache_n_relinquishes_retire);
- kenter("c=%08x{%d},%d",
+ _enter("c=%08x{%d},%d",
cookie->debug_id, atomic_read(&cookie->n_active), retire);
if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
@@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
{
bool is_caching;
- kenter("c=%x", cookie->debug_id);
+ _enter("c=%x", cookie->debug_id);
fscache_stat(&fscache_n_invalidates);
@@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */
default:
spin_unlock(&cookie->lock);
- kleave(" [no %u]", cookie->state);
+ _leave(" [no %u]", cookie->state);
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
- kleave(" [look %x]", cookie->inval_counter);
+ _leave(" [look %x]", cookie->inval_counter);
return;
case FSCACHE_COOKIE_STATE_ACTIVE:
@@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
if (is_caching)
fscache_queue_cookie(cookie, fscache_cookie_get_inval_work);
- kleave(" [inv]");
+ _leave(" [inv]");
return;
}
}
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index bf4eaeec44fb..38637e5c9b57 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
again:
if (!fscache_cache_is_live(cookie->volume->cache)) {
- kleave(" [broken]");
+ _leave(" [broken]");
return false;
}
state = fscache_cookie_state(cookie);
- kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_CREATING:
@@ -52,7 +52,7 @@ again:
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
default:
- kleave(" [not live]");
+ _leave(" [not live]");
return false;
}
@@ -92,7 +92,7 @@ again:
spin_lock(&cookie->lock);
state = fscache_cookie_state(cookie);
- kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
+ _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_LOOKING_UP:
@@ -140,7 +140,7 @@ failed:
cres->cache_priv = NULL;
cres->ops = NULL;
fscache_end_cookie_access(cookie, fscache_access_io_not_live);
- kleave(" = -ENOBUFS");
+ _leave(" = -ENOBUFS");
return -ENOBUFS;
}
@@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
if (len == 0)
goto abandon;
- kenter("%llx,%zx", start, len);
+ _enter("%llx,%zx", start, len);
wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS);
if (!wreq)
diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c
index bf9b33d26e31..42e98bb523e3 100644
--- a/fs/netfs/fscache_main.c
+++ b/fs/netfs/fscache_main.c
@@ -99,7 +99,7 @@ error_wq:
*/
void __exit fscache_exit(void)
{
- kenter("");
+ _enter("");
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index 2e2a405ca9b0..cb75c07b5281 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -264,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
fscache_see_volume(volume, fscache_volume_new_acquire);
fscache_stat(&fscache_n_volumes);
up_write(&fscache_addremove_sem);
- kleave(" = v=%x", volume->debug_id);
+ _leave(" = v=%x", volume->debug_id);
return volume;
err_vol:
@@ -466,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume)
{
int n_accesses;
- kdebug("withdraw V=%x", volume->debug_id);
+ _debug("withdraw V=%x", volume->debug_id);
/* Allow wakeups on dec-to-0 */
n_accesses = atomic_dec_return(&volume->n_accesses);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 21e46bc9aa49..7773f3d855a9 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -34,6 +34,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
/*
* main.c
*/
+extern unsigned int netfs_debug;
extern struct list_head netfs_io_requests;
extern spinlock_t netfs_proc_lock;
extern mempool_t netfs_request_pool;
@@ -353,12 +354,42 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait);
* debug tracing
*/
#define dbgprintk(FMT, ...) \
- pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
+ printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
+#ifdef __KDEBUG
+#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
+#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
+#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
+
+#elif defined(CONFIG_NETFS_DEBUG)
+#define _enter(FMT, ...) \
+do { \
+ if (netfs_debug) \
+ kenter(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT, ...) \
+do { \
+ if (netfs_debug) \
+ kleave(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT, ...) \
+do { \
+ if (netfs_debug) \
+ kdebug(FMT, ##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
+#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
+#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
+#endif
+
/*
* assertions
*/
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c7576481c321..c93851b98368 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
if (count == remaining)
return;
- kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
rreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->transferred,
subreq->len, rreq->i_size,
@@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
- kenter("R=%x[%x]{%llx,%lx},%zd",
+ _enter("R=%x[%x]{%llx,%lx},%zd",
rreq->debug_id, subreq->debug_index,
subreq->start, subreq->flags, transferred_or_error);
@@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
struct netfs_inode *ictx = netfs_inode(rreq->inode);
size_t lsize;
- kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
+ _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
if (rreq->origin != NETFS_DIO_READ) {
source = netfs_cache_prepare_read(subreq, rreq->i_size);
@@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->start = rreq->start + rreq->submitted;
subreq->len = io_iter->count;
- kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
+ _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
/* Call out to the cache to find out what it can do with the remaining
@@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
struct iov_iter io_iter;
int ret;
- kenter("R=%x %llx-%llx",
+ _enter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
if (rreq->len == 0) {
@@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
atomic_set(&rreq->nr_outstanding, 1);
io_iter = rreq->io_iter;
do {
- kdebug("submit %llx + %llx >= %llx",
+ _debug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
if (rreq->origin == NETFS_DIO_READ &&
rreq->start + rreq->submitted >= rreq->i_size)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index db824c372842..5f0f438e5d21 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -20,6 +20,10 @@ MODULE_LICENSE("GPL");
EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
+unsigned netfs_debug;
+module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
+
static struct kmem_cache *netfs_request_slab;
static struct kmem_cache *netfs_subrequest_slab;
mempool_t netfs_request_pool;
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 172808e83ca8..83e644bd518f 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -26,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
struct fscache_cookie *cookie = netfs_i_cookie(ictx);
bool need_use = false;
- kenter("");
+ _enter("");
if (!filemap_dirty_folio(mapping, folio))
return false;
@@ -99,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
struct netfs_folio *finfo;
size_t flen = folio_size(folio);
- kenter("{%lx},%zx,%zx", folio->index, offset, length);
+ _enter("{%lx},%zx,%zx", folio->index, offset, length);
if (!folio_test_private(folio))
return;
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 488147439fe0..426cf87aaf2e 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
{
struct list_head *next;
- kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+ _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
if (list_empty(&stream->subrequests))
return;
@@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
unsigned int notes;
int s;
- kenter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+ _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
trace_netfs_collect(wreq);
trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
@@ -409,7 +409,7 @@ reassess_streams:
front = stream->front;
while (front) {
trace_netfs_collect_sreq(wreq, front);
- //kdebug("sreq [%x] %llx %zx/%zx",
+ //_debug("sreq [%x] %llx %zx/%zx",
// front->debug_index, front->start, front->transferred, front->len);
/* Stall if there may be a discontinuity. */
@@ -598,7 +598,7 @@ reassess_streams:
out:
netfs_put_group_many(wreq->group, wreq->nr_group_rel);
wreq->nr_group_rel = 0;
- kleave(" = %x", notes);
+ _leave(" = %x", notes);
return;
need_retry:
@@ -606,7 +606,7 @@ need_retry:
* that any partially completed op will have had any wholly transferred
* folios removed from it.
*/
- kdebug("retry");
+ _debug("retry");
netfs_retry_writes(wreq);
goto out;
}
@@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work)
size_t transferred;
int s;
- kenter("R=%x", wreq->debug_id);
+ _enter("R=%x", wreq->debug_id);
netfs_see_request(wreq, netfs_rreq_trace_see_work);
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
@@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work)
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
- kdebug("finished");
+ _debug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
@@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
struct netfs_io_request *wreq = subreq->rreq;
struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
- kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+ _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index d7c971df8866..9258d30cffe3 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
if (IS_ERR(wreq))
return wreq;
- kenter("R=%x", wreq->debug_id);
+ _enter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
@@ -122,6 +122,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
wreq->io_streams[1].transferred = LONG_MAX;
if (fscache_resources_valid(&wreq->cache_resources)) {
wreq->io_streams[1].avail = true;
+ wreq->io_streams[1].active = true;
wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
}
@@ -159,7 +160,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
subreq->max_nr_segs = INT_MAX;
subreq->stream_nr = stream->stream_nr;
- kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
+ _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
@@ -215,7 +216,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
{
struct netfs_io_request *wreq = subreq->rreq;
- kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
+ _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
return netfs_write_subrequest_terminated(subreq, subreq->error, false);
@@ -272,11 +273,11 @@ int netfs_advance_write(struct netfs_io_request *wreq,
size_t part;
if (!stream->avail) {
- kleave("no write");
+ _leave("no write");
return len;
}
- kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
+ _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
if (subreq && start != subreq->start + subreq->len) {
netfs_issue_write(wreq, stream);
@@ -288,7 +289,7 @@ int netfs_advance_write(struct netfs_io_request *wreq,
subreq = stream->construct;
part = min(subreq->max_len - subreq->len, len);
- kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
+ _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
subreq->len += part;
subreq->nr_segs++;
@@ -319,7 +320,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
bool to_eof = false, streamw = false;
bool debug = false;
- kenter("");
+ _enter("");
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
@@ -329,7 +330,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (fpos >= i_size) {
/* mmap beyond eof. */
- kdebug("beyond eof");
+ _debug("beyond eof");
folio_start_writeback(folio);
folio_unlock(folio);
wreq->nr_group_rel += netfs_folio_written_back(folio);
@@ -363,7 +364,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
flen -= foff;
- kdebug("folio %zx %zx %zx", foff, flen, fsize);
+ _debug("folio %zx %zx %zx", foff, flen, fsize);
/* Deal with discontinuities in the stream of dirty pages. These can
* arise from a number of sources:
@@ -487,7 +488,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
for (int s = 0; s < NR_IO_STREAMS; s++)
netfs_issue_write(wreq, &wreq->io_streams[s]);
- kleave(" = 0");
+ _leave(" = 0");
return 0;
}
@@ -522,7 +523,7 @@ int netfs_writepages(struct address_space *mapping,
netfs_stat(&netfs_n_wh_writepages);
do {
- kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
+ _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
/* It appears we don't have to handle cyclic writeback wrapping. */
WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted);
@@ -546,14 +547,14 @@ int netfs_writepages(struct address_space *mapping,
mutex_unlock(&ictx->wb_lock);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- kleave(" = %d", error);
+ _leave(" = %d", error);
return error;
couldnt_start:
netfs_kill_dirty_pages(mapping, wbc, folio);
out:
mutex_unlock(&ictx->wb_lock);
- kleave(" = %d", error);
+ _leave(" = %d", error);
return error;
}
EXPORT_SYMBOL(netfs_writepages);
@@ -590,7 +591,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
- kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
+ _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
@@ -624,7 +625,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
struct netfs_inode *ictx = netfs_inode(wreq->inode);
int ret;
- kenter("R=%x", wreq->debug_id);
+ _enter("R=%x", wreq->debug_id);
if (writethrough_cache)
netfs_write_folio(wreq, wbc, writethrough_cache);
@@ -657,7 +658,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
loff_t start = wreq->start;
int error = 0;
- kenter("%zx", len);
+ _enter("%zx", len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
@@ -665,7 +666,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
while (len) {
// TODO: Prepare content encryption
- kdebug("unbuffered %zx", len);
+ _debug("unbuffered %zx", len);
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
@@ -684,6 +685,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
if (list_empty(&upload->subrequests))
netfs_wake_write_collector(wreq, false);
- kleave(" = %d", error);
+ _leave(" = %d", error);
return error;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9aa2ab218c0a..61a8cdb9f1e1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -591,7 +591,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
- (long long)folio_file_pos(folio));
+ (long long)folio_pos(folio));
sb_start_pagefault(inode->i_sb);
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index b17a9eb9b148..49862c95b224 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -46,6 +46,10 @@ static inline void nfs_add_stats(const struct inode *inode,
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats)
static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 190c1fa8882c..d074d0ceb4f0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -182,7 +182,7 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset,
end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio);
if (i_size > 0 && folio->index < end_index)
goto out;
- end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count;
+ end = folio_pos(folio) + (loff_t)offset + (loff_t)count;
if (i_size >= end)
goto out;
trace_nfs_size_grow(inode, end);
@@ -1344,7 +1344,7 @@ int nfs_update_folio(struct file *file, struct folio *folio,
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count,
- (long long)(folio_file_pos(folio) + offset));
+ (long long)(folio_pos(folio) + offset));
if (!count)
goto out;
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 383f0afa2cea..cd14ea25968c 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -450,15 +450,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
const struct buffer_head *bh)
{
- struct buffer_head *pbh;
- __u64 key;
+ loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh);
- key = page_index(bh->b_page) << (PAGE_SHIFT -
- bmap->b_inode->i_blkbits);
- for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
- key++;
-
- return key;
+ return pos >> bmap->b_inode->i_blkbits;
}
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 0131d83b912d..c034080c334b 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -51,12 +51,21 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node));
if (unlikely(!bh))
- return NULL;
+ return ERR_PTR(-ENOMEM);
if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
buffer_dirty(bh))) {
- brelse(bh);
- BUG();
+ /*
+ * The block buffer at the specified new address was already
+ * in use. This can happen if it is a virtual block number
+ * and has been reallocated due to corruption of the bitmap
+ * used to manage its allocation state (if not, the buffer
+ * clearing of an abandoned b-tree node is missing somewhere).
+ */
+ nilfs_error(inode->i_sb,
+ "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)",
+ (unsigned long long)blocknr, inode->i_ino);
+ goto failed;
}
memset(bh->b_data, 0, i_blocksize(inode));
bh->b_bdev = inode->i_sb->s_bdev;
@@ -67,6 +76,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
folio_unlock(bh->b_folio);
folio_put(bh->b_folio);
return bh;
+
+failed:
+ folio_unlock(bh->b_folio);
+ folio_put(bh->b_folio);
+ brelse(bh);
+ return ERR_PTR(-EIO);
}
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
@@ -217,8 +232,8 @@ retry:
}
nbh = nilfs_btnode_create_block(btnc, newkey);
- if (!nbh)
- return -ENOMEM;
+ if (IS_ERR(nbh))
+ return PTR_ERR(nbh);
BUG_ON(nbh == obh);
ctxt->newbh = nbh;
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index a139970e4804..862bdf23120e 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
struct buffer_head *bh;
bh = nilfs_btnode_create_block(btnc, ptr);
- if (!bh)
- return -ENOMEM;
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
set_buffer_nilfs_volatile(bh);
*bhp = bh;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6ea81f1d5094..0ca3110d6386 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
#define nilfs_cnt32_ge(a, b) \
(typecheck(__u32, a) && typecheck(__u32, b) && \
- ((__s32)(a) - (__s32)(b) >= 0))
+ ((__s32)((a) - (b)) >= 0))
static int nilfs_prepare_segment_lock(struct super_block *sb,
struct nilfs_transaction_info *ti)
@@ -1639,41 +1639,30 @@ static void nilfs_begin_folio_io(struct folio *folio)
folio_unlock(folio);
}
-static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
+/**
+ * nilfs_prepare_write_logs - prepare to write logs
+ * @logs: logs to prepare for writing
+ * @seed: checksum seed value
+ *
+ * nilfs_prepare_write_logs() adds checksums and prepares the block
+ * buffers/folios for writing logs. In order to stabilize folios of
+ * memory-mapped file blocks by putting them in writeback state before
+ * calculating the checksums, first prepare to write payload blocks other
+ * than segment summary and super root blocks in which the checksums will
+ * be embedded.
+ */
+static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed)
{
struct nilfs_segment_buffer *segbuf;
struct folio *bd_folio = NULL, *fs_folio = NULL;
+ struct buffer_head *bh;
- list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
- struct buffer_head *bh;
-
- list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
- b_assoc_buffers) {
- if (bh->b_folio != bd_folio) {
- if (bd_folio) {
- folio_lock(bd_folio);
- folio_wait_writeback(bd_folio);
- folio_clear_dirty_for_io(bd_folio);
- folio_start_writeback(bd_folio);
- folio_unlock(bd_folio);
- }
- bd_folio = bh->b_folio;
- }
- }
-
+ /* Prepare to write payload blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- if (bh == segbuf->sb_super_root) {
- if (bh->b_folio != bd_folio) {
- folio_lock(bd_folio);
- folio_wait_writeback(bd_folio);
- folio_clear_dirty_for_io(bd_folio);
- folio_start_writeback(bd_folio);
- folio_unlock(bd_folio);
- bd_folio = bh->b_folio;
- }
+ if (bh == segbuf->sb_super_root)
break;
- }
set_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_begin_folio_io(fs_folio);
@@ -1681,6 +1670,42 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
}
}
}
+ nilfs_begin_folio_io(fs_folio);
+
+ nilfs_add_checksums_on_logs(logs, seed);
+
+ /* Prepare to write segment summary blocks */
+ list_for_each_entry(segbuf, logs, sb_list) {
+ list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
+ b_assoc_buffers) {
+ mark_buffer_dirty(bh);
+ if (bh->b_folio == bd_folio)
+ continue;
+ if (bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+ }
+ bd_folio = bh->b_folio;
+ }
+ }
+
+ /* Prepare to write super root block */
+ bh = NILFS_LAST_SEGBUF(logs)->sb_super_root;
+ if (bh) {
+ mark_buffer_dirty(bh);
+ if (bh->b_folio != bd_folio) {
+ folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
+ folio_clear_dirty_for_io(bd_folio);
+ folio_start_writeback(bd_folio);
+ folio_unlock(bd_folio);
+ bd_folio = bh->b_folio;
+ }
+ }
+
if (bd_folio) {
folio_lock(bd_folio);
folio_wait_writeback(bd_folio);
@@ -1688,7 +1713,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
folio_start_writeback(bd_folio);
folio_unlock(bd_folio);
}
- nilfs_begin_folio_io(fs_folio);
}
static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -2070,10 +2094,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
/* Write partial segments */
- nilfs_segctor_prepare_write(sci);
-
- nilfs_add_checksums_on_logs(&sci->sc_segbufs,
- nilfs->ns_crc_seed);
+ nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed);
err = nilfs_segctor_write(sci, nilfs);
if (unlikely(err))
@@ -2824,8 +2845,6 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
- inode_attach_wb(nilfs->ns_bdev->bd_mapping->host, NULL);
-
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (unlikely(err))
nilfs_detach_log_writer(sb);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 379d22e28ed6..a5569b7f47a3 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \
sg_##name##_kobj); \
complete(&subgroups->sg_##name##_kobj_unregister); \
} \
-static struct kobj_type nilfs_##name##_ktype = { \
+static const struct kobj_type nilfs_##name##_ktype = { \
.default_groups = nilfs_##name##_groups, \
.sysfs_ops = &nilfs_##name##_attr_ops, \
.release = nilfs_##name##_attr_release, \
@@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = {
.store = nilfs_snapshot_attr_store,
};
-static struct kobj_type nilfs_snapshot_ktype = {
+static const struct kobj_type nilfs_snapshot_ktype = {
.default_groups = nilfs_snapshot_groups,
.sysfs_ops = &nilfs_snapshot_attr_ops,
.release = nilfs_snapshot_attr_release,
@@ -967,7 +967,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = {
.store = nilfs_dev_attr_store,
};
-static struct kobj_type nilfs_dev_ktype = {
+static const struct kobj_type nilfs_dev_ktype = {
.default_groups = nilfs_dev_groups,
.sysfs_ops = &nilfs_dev_attr_ops,
.release = nilfs_dev_attr_release,
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index 8e6bcdf99770..6ede3e924dec 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -231,7 +231,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
struct ntfs_sb_info *sbi;
struct ATTRIB *attr_s;
struct MFT_REC *rec;
- u32 used, asize, rsize, aoff, align;
+ u32 used, asize, rsize, aoff;
bool is_data;
CLST len, alen;
char *next;
@@ -252,10 +252,13 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
rsize = le32_to_cpu(attr->res.data_size);
is_data = attr->type == ATTR_DATA && !attr->name_len;
- align = sbi->cluster_size;
- if (is_attr_compressed(attr))
- align <<= COMPRESSION_UNIT;
- len = (rsize + align - 1) >> sbi->cluster_bits;
+ /* len - how many clusters required to store 'rsize' bytes */
+ if (is_attr_compressed(attr)) {
+ u8 shift = sbi->cluster_bits + NTFS_LZNT_CUNIT;
+ len = ((rsize + (1u << shift) - 1) >> shift) << NTFS_LZNT_CUNIT;
+ } else {
+ len = bytes_to_cluster(sbi, rsize);
+ }
run_init(run);
@@ -285,22 +288,21 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
if (err)
goto out2;
} else if (!page) {
- char *kaddr;
-
- page = grab_cache_page(ni->vfs_inode.i_mapping, 0);
- if (!page) {
- err = -ENOMEM;
+ struct address_space *mapping = ni->vfs_inode.i_mapping;
+ struct folio *folio;
+
+ folio = __filemap_get_folio(
+ mapping, 0, FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
goto out2;
}
- kaddr = kmap_atomic(page);
- memcpy(kaddr, data, rsize);
- memset(kaddr + rsize, 0, PAGE_SIZE - rsize);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
- SetPageUptodate(page);
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
+ folio_fill_tail(folio, 0, data, rsize);
+ folio_mark_uptodate(folio);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
}
}
@@ -670,7 +672,8 @@ pack_runs:
goto undo_2;
}
- if (!is_mft)
+ /* keep runs for $MFT::$ATTR_DATA and $MFT::$ATTR_BITMAP. */
+ if (ni->mi.rno != MFT_REC_MFT)
run_truncate_head(run, evcn + 1);
svcn = le64_to_cpu(attr->nres.svcn);
@@ -972,6 +975,19 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
if (err)
goto out;
+ /* Check for compressed frame. */
+ err = attr_is_frame_compressed(ni, attr, vcn >> NTFS_LZNT_CUNIT, &hint);
+ if (err)
+ goto out;
+
+ if (hint) {
+ /* if frame is compressed - don't touch it. */
+ *lcn = COMPRESSED_LCN;
+ *len = hint;
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
if (!*len) {
if (run_lookup_entry(run, vcn, lcn, len, NULL)) {
if (*lcn != SPARSE_LCN || !new)
@@ -1223,11 +1239,12 @@ undo1:
goto out;
}
-int attr_data_read_resident(struct ntfs_inode *ni, struct page *page)
+int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio)
{
u64 vbo;
struct ATTRIB *attr;
u32 data_size;
+ size_t len;
attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL);
if (!attr)
@@ -1236,30 +1253,20 @@ int attr_data_read_resident(struct ntfs_inode *ni, struct page *page)
if (attr->non_res)
return E_NTFS_NONRESIDENT;
- vbo = page->index << PAGE_SHIFT;
+ vbo = folio->index << PAGE_SHIFT;
data_size = le32_to_cpu(attr->res.data_size);
- if (vbo < data_size) {
- const char *data = resident_data(attr);
- char *kaddr = kmap_atomic(page);
- u32 use = data_size - vbo;
-
- if (use > PAGE_SIZE)
- use = PAGE_SIZE;
+ if (vbo > data_size)
+ len = 0;
+ else
+ len = min(data_size - vbo, folio_size(folio));
- memcpy(kaddr, data + vbo, use);
- memset(kaddr + use, 0, PAGE_SIZE - use);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
- SetPageUptodate(page);
- } else if (!PageUptodate(page)) {
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- }
+ folio_fill_tail(folio, 0, resident_data(attr) + vbo, len);
+ folio_mark_uptodate(folio);
return 0;
}
-int attr_data_write_resident(struct ntfs_inode *ni, struct page *page)
+int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio)
{
u64 vbo;
struct mft_inode *mi;
@@ -1275,17 +1282,13 @@ int attr_data_write_resident(struct ntfs_inode *ni, struct page *page)
return E_NTFS_NONRESIDENT;
}
- vbo = page->index << PAGE_SHIFT;
+ vbo = folio->index << PAGE_SHIFT;
data_size = le32_to_cpu(attr->res.data_size);
if (vbo < data_size) {
char *data = resident_data(attr);
- char *kaddr = kmap_atomic(page);
- u32 use = data_size - vbo;
+ size_t len = min(data_size - vbo, folio_size(folio));
- if (use > PAGE_SIZE)
- use = PAGE_SIZE;
- memcpy(data + vbo, kaddr, use);
- kunmap_atomic(kaddr);
+ memcpy_from_folio(data + vbo, folio, 0, len);
mi->dirty = true;
}
ni->i_valid = data_size;
@@ -1378,7 +1381,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
u32 voff;
u8 bytes_per_off;
char *addr;
- struct page *page;
+ struct folio *folio;
int i, err;
__le32 *off32;
__le64 *off64;
@@ -1423,18 +1426,18 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
wof_size = le64_to_cpu(attr->nres.data_size);
down_write(&ni->file.run_lock);
- page = ni->file.offs_page;
- if (!page) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
+ folio = ni->file.offs_folio;
+ if (!folio) {
+ folio = folio_alloc(GFP_KERNEL, 0);
+ if (!folio) {
err = -ENOMEM;
goto out;
}
- page->index = -1;
- ni->file.offs_page = page;
+ folio->index = -1;
+ ni->file.offs_folio = folio;
}
- lock_page(page);
- addr = page_address(page);
+ folio_lock(folio);
+ addr = folio_address(folio);
if (vbo[1]) {
voff = vbo[1] & (PAGE_SIZE - 1);
@@ -1450,7 +1453,8 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
do {
pgoff_t index = vbo[i] >> PAGE_SHIFT;
- if (index != page->index) {
+ if (index != folio->index) {
+ struct page *page = &folio->page;
u64 from = vbo[i] & ~(u64)(PAGE_SIZE - 1);
u64 to = min(from + PAGE_SIZE, wof_size);
@@ -1463,10 +1467,10 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
err = ntfs_bio_pages(sbi, run, &page, 1, from,
to - from, REQ_OP_READ);
if (err) {
- page->index = -1;
+ folio->index = -1;
goto out1;
}
- page->index = index;
+ folio->index = index;
}
if (i) {
@@ -1504,7 +1508,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr,
*ondisk_size = off[1] - off[0];
out1:
- unlock_page(page);
+ folio_unlock(folio);
out:
up_write(&ni->file.run_lock);
return err;
@@ -1722,6 +1726,7 @@ repack:
attr_b->nres.total_size = cpu_to_le64(total_size);
inode_set_bytes(&ni->vfs_inode, total_size);
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
mi_b->dirty = true;
mark_inode_dirty(&ni->vfs_inode);
@@ -2356,8 +2361,13 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
}
- if (vbo > data_size) {
- /* Insert range after the file size is not allowed. */
+ if (vbo >= data_size) {
+ /*
+ * Insert range after the file size is not allowed.
+ * If the offset is equal to or greater than the end of
+ * file, an error is returned. For such operations (i.e., inserting
+ * a hole at the end of file), ftruncate(2) should be used.
+ */
return -EINVAL;
}
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index c9eb01ccee51..cf4fe21a5039 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -1382,7 +1382,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
if (err)
- break;
+ return err;
bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
if (!bh)
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index 1937e8e612f8..fc6a8aa29e3a 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -272,9 +272,12 @@ out:
return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode;
}
-static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
- const struct NTFS_DE *e, u8 *name,
- struct dir_context *ctx)
+/*
+ * returns false if 'ctx' if full
+ */
+static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi,
+ struct ntfs_inode *ni, const struct NTFS_DE *e,
+ u8 *name, struct dir_context *ctx)
{
const struct ATTR_FILE_NAME *fname;
unsigned long ino;
@@ -284,29 +287,29 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
fname = Add2Ptr(e, sizeof(struct NTFS_DE));
if (fname->type == FILE_NAME_DOS)
- return 0;
+ return true;
if (!mi_is_ref(&ni->mi, &fname->home))
- return 0;
+ return true;
ino = ino_get(&e->ref);
if (ino == MFT_REC_ROOT)
- return 0;
+ return true;
/* Skip meta files. Unless option to show metafiles is set. */
if (!sbi->options->showmeta && ntfs_is_meta_file(sbi, ino))
- return 0;
+ return true;
if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN))
- return 0;
+ return true;
name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name,
PATH_MAX);
if (name_len <= 0) {
ntfs_warn(sbi->sb, "failed to convert name for inode %lx.",
ino);
- return 0;
+ return true;
}
/*
@@ -326,7 +329,8 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
* It does additional locks/reads just to get the type of name.
* Should we use additional mount option to enable branch below?
*/
- if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ if (((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) ||
+ fname->dup.ea_size) &&
ino != ni->mi.rno) {
struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
if (!IS_ERR_OR_NULL(inode)) {
@@ -335,17 +339,20 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
}
}
- return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
+ return dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
}
/*
* ntfs_read_hdr - Helper function for ntfs_readdir().
+ *
+ * returns 0 if ok.
+ * returns -EINVAL if directory is corrupted.
+ * returns +1 if 'ctx' is full.
*/
static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
const struct INDEX_HDR *hdr, u64 vbo, u64 pos,
u8 *name, struct dir_context *ctx)
{
- int err;
const struct NTFS_DE *e;
u32 e_size;
u32 end = le32_to_cpu(hdr->used);
@@ -353,12 +360,12 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
for (;; off += e_size) {
if (off + sizeof(struct NTFS_DE) > end)
- return -1;
+ return -EINVAL;
e = Add2Ptr(hdr, off);
e_size = le16_to_cpu(e->size);
if (e_size < sizeof(struct NTFS_DE) || off + e_size > end)
- return -1;
+ return -EINVAL;
if (de_is_last(e))
return 0;
@@ -368,14 +375,15 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
continue;
if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME)
- return -1;
+ return -EINVAL;
ctx->pos = vbo + off;
/* Submit the name to the filldir callback. */
- err = ntfs_filldir(sbi, ni, e, name, ctx);
- if (err)
- return err;
+ if (!ntfs_dir_emit(sbi, ni, e, name, ctx)) {
+ /* ctx is full. */
+ return +1;
+ }
}
}
@@ -474,8 +482,6 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
vbo = (u64)bit << index_bits;
if (vbo >= i_size) {
- ntfs_inode_err(dir, "Looks like your dir is corrupt");
- ctx->pos = eod;
err = -EINVAL;
goto out;
}
@@ -498,9 +504,16 @@ out:
__putname(name);
put_indx_node(node);
- if (err == -ENOENT) {
+ if (err == 1) {
+ /* 'ctx' is full. */
+ err = 0;
+ } else if (err == -ENOENT) {
err = 0;
ctx->pos = pos;
+ } else if (err < 0) {
+ if (err == -EINVAL)
+ ntfs_inode_err(dir, "directory corrupted");
+ ctx->pos = eod;
}
return err;
@@ -618,10 +631,12 @@ const struct file_operations ntfs_dir_operations = {
#endif
};
+#if IS_ENABLED(CONFIG_NTFS_FS)
const struct file_operations ntfs_legacy_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = ntfs_readdir,
.open = ntfs_file_open,
};
+#endif
// clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 2f903b6ce157..ca1ddc46bd86 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -13,6 +13,7 @@
#include <linux/compat.h>
#include <linux/falloc.h>
#include <linux/fiemap.h>
+#include <linux/fileattr.h>
#include "debug.h"
#include "ntfs.h"
@@ -48,6 +49,65 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
return 0;
}
+/*
+ * ntfs_fileattr_get - inode_operations::fileattr_get
+ */
+int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct ntfs_inode *ni = ntfs_i(inode);
+ u32 flags = 0;
+
+ if (inode->i_flags & S_IMMUTABLE)
+ flags |= FS_IMMUTABLE_FL;
+
+ if (inode->i_flags & S_APPEND)
+ flags |= FS_APPEND_FL;
+
+ if (is_compressed(ni))
+ flags |= FS_COMPR_FL;
+
+ if (is_encrypted(ni))
+ flags |= FS_ENCRYPT_FL;
+
+ fileattr_fill_flags(fa, flags);
+
+ return 0;
+}
+
+/*
+ * ntfs_fileattr_set - inode_operations::fileattr_set
+ */
+int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ u32 flags = fa->flags;
+ unsigned int new_fl = 0;
+
+ if (fileattr_has_fsx(fa))
+ return -EOPNOTSUPP;
+
+ if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL))
+ return -EOPNOTSUPP;
+
+ if (flags & FS_IMMUTABLE_FL)
+ new_fl |= S_IMMUTABLE;
+
+ if (flags & FS_APPEND_FL)
+ new_fl |= S_APPEND;
+
+ inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND);
+
+ inode_set_ctime_current(inode);
+ mark_inode_dirty(inode);
+
+ return 0;
+}
+
+/*
+ * ntfs_ioctl - file_operations::unlocked_ioctl
+ */
long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -77,20 +137,27 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct inode *inode = d_inode(path->dentry);
struct ntfs_inode *ni = ntfs_i(inode);
+ stat->result_mask |= STATX_BTIME;
+ stat->btime = ni->i_crtime;
+ stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */
+
+ if (inode->i_flags & S_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+
+ if (inode->i_flags & S_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+
if (is_compressed(ni))
stat->attributes |= STATX_ATTR_COMPRESSED;
if (is_encrypted(ni))
stat->attributes |= STATX_ATTR_ENCRYPTED;
- stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED;
+ stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED |
+ STATX_ATTR_IMMUTABLE | STATX_ATTR_APPEND;
generic_fillattr(idmap, request_mask, inode, stat);
- stat->result_mask |= STATX_BTIME;
- stat->btime = ni->i_crtime;
- stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */
-
return 0;
}
@@ -196,9 +263,9 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
PAGE_SIZE;
iblock = page_off >> inode->i_blkbits;
- folio = __filemap_get_folio(mapping, idx,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
- mapping_gfp_constraint(mapping, ~__GFP_FS));
+ folio = __filemap_get_folio(
+ mapping, idx, FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_constraint(mapping, ~__GFP_FS));
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -253,8 +320,7 @@ out:
*/
static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = file_inode(file);
struct ntfs_inode *ni = ntfs_i(inode);
u64 from = ((u64)vma->vm_pgoff << PAGE_SHIFT);
bool rw = vma->vm_flags & VM_WRITE;
@@ -299,10 +365,7 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
}
if (ni->i_valid < to) {
- if (!inode_trylock(inode)) {
- err = -EAGAIN;
- goto out;
- }
+ inode_lock(inode);
err = ntfs_extend_initialized_size(file, ni,
ni->i_valid, to);
inode_unlock(inode);
@@ -431,7 +494,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
*/
static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
{
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
struct super_block *sb = inode->i_sb;
struct ntfs_sb_info *sbi = sb->s_fs_info;
@@ -744,7 +807,7 @@ out:
static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = file_inode(file);
struct ntfs_inode *ni = ntfs_i(inode);
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
@@ -781,7 +844,7 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
- struct inode *inode = in->f_mapping->host;
+ struct inode *inode = file_inode(in);
struct ntfs_inode *ni = ntfs_i(inode);
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
@@ -824,23 +887,25 @@ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index,
*frame_uptodate = true;
for (npages = 0; npages < pages_per_frame; npages++, index++) {
- struct page *page;
+ struct folio *folio;
- page = find_or_create_page(mapping, index, gfp_mask);
- if (!page) {
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ gfp_mask);
+ if (IS_ERR(folio)) {
while (npages--) {
- page = pages[npages];
- unlock_page(page);
- put_page(page);
+ folio = page_folio(pages[npages]);
+ folio_unlock(folio);
+ folio_put(folio);
}
return -ENOMEM;
}
- if (!PageUptodate(page))
+ if (!folio_test_uptodate(folio))
*frame_uptodate = false;
- pages[npages] = page;
+ pages[npages] = &folio->page;
}
return 0;
@@ -1075,8 +1140,7 @@ out:
static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = file_inode(file);
ssize_t ret;
int err;
struct ntfs_inode *ni = ntfs_i(inode);
@@ -1198,7 +1262,7 @@ static int ntfs_file_release(struct inode *inode, struct file *file)
}
/*
- * ntfs_fiemap - file_operations::fiemap
+ * ntfs_fiemap - inode_operations::fiemap
*/
int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
@@ -1227,6 +1291,8 @@ const struct inode_operations ntfs_file_inode_operations = {
.get_acl = ntfs_get_acl,
.set_acl = ntfs_set_acl,
.fiemap = ntfs_fiemap,
+ .fileattr_get = ntfs_fileattr_get,
+ .fileattr_set = ntfs_fileattr_set,
};
const struct file_operations ntfs_file_operations = {
@@ -1246,6 +1312,7 @@ const struct file_operations ntfs_file_operations = {
.release = ntfs_file_release,
};
+#if IS_ENABLED(CONFIG_NTFS_FS)
const struct file_operations ntfs_legacy_file_operations = {
.llseek = generic_file_llseek,
.read_iter = ntfs_file_read_iter,
@@ -1253,4 +1320,5 @@ const struct file_operations ntfs_legacy_file_operations = {
.open = ntfs_file_open,
.release = ntfs_file_release,
};
+#endif
// clang-format on
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 0008670939a4..a469c608a394 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -122,10 +122,10 @@ void ni_clear(struct ntfs_inode *ni)
else {
run_close(&ni->file.run);
#ifdef CONFIG_NTFS3_LZX_XPRESS
- if (ni->file.offs_page) {
+ if (ni->file.offs_folio) {
/* On-demand allocated page for offsets. */
- put_page(ni->file.offs_page);
- ni->file.offs_page = NULL;
+ folio_put(ni->file.offs_folio);
+ ni->file.offs_folio = NULL;
}
#endif
}
@@ -1501,7 +1501,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
if (is_ext) {
if (flags & ATTR_FLAG_COMPRESSED)
- attr->nres.c_unit = COMPRESSION_UNIT;
+ attr->nres.c_unit = NTFS_LZNT_CUNIT;
attr->nres.total_size = attr->nres.alloc_size;
}
@@ -1601,8 +1601,10 @@ int ni_delete_all(struct ntfs_inode *ni)
asize = le32_to_cpu(attr->size);
roff = le16_to_cpu(attr->nres.run_off);
- if (roff > asize)
+ if (roff > asize) {
+ _ntfs_bad_inode(&ni->vfs_inode);
return -EINVAL;
+ }
/* run==1 means unpack and deallocate. */
run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn,
@@ -1897,6 +1899,47 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr,
}
/*
+ * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent
+ * but it accepts kernel address for fi_extents_start
+ */
+static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo,
+ u64 logical, u64 phys, u64 len, u32 flags)
+{
+ struct fiemap_extent extent;
+ struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
+
+ /* only count the extents */
+ if (fieinfo->fi_extents_max == 0) {
+ fieinfo->fi_extents_mapped++;
+ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+ }
+
+ if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
+ return 1;
+
+ if (flags & FIEMAP_EXTENT_DELALLOC)
+ flags |= FIEMAP_EXTENT_UNKNOWN;
+ if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
+ flags |= FIEMAP_EXTENT_ENCODED;
+ if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
+ flags |= FIEMAP_EXTENT_NOT_ALIGNED;
+
+ memset(&extent, 0, sizeof(extent));
+ extent.fe_logical = logical;
+ extent.fe_physical = phys;
+ extent.fe_length = len;
+ extent.fe_flags = flags;
+
+ dest += fieinfo->fi_extents_mapped;
+ memcpy(dest, &extent, sizeof(extent));
+
+ fieinfo->fi_extents_mapped++;
+ if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
+ return 1;
+ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+}
+
+/*
* ni_fiemap - Helper for file_fiemap().
*
* Assumed ni_lock.
@@ -1906,6 +1949,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
__u64 vbo, __u64 len)
{
int err = 0;
+ struct fiemap_extent __user *fe_u = fieinfo->fi_extents_start;
+ struct fiemap_extent *fe_k = NULL;
struct ntfs_sb_info *sbi = ni->mi.sbi;
u8 cluster_bits = sbi->cluster_bits;
struct runs_tree *run;
@@ -1953,6 +1998,18 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
goto out;
}
+ /*
+ * To avoid lock problems replace pointer to user memory by pointer to kernel memory.
+ */
+ fe_k = kmalloc_array(fieinfo->fi_extents_max,
+ sizeof(struct fiemap_extent),
+ GFP_NOFS | __GFP_ZERO);
+ if (!fe_k) {
+ err = -ENOMEM;
+ goto out;
+ }
+ fieinfo->fi_extents_start = fe_k;
+
end = vbo + len;
alloc_size = le64_to_cpu(attr->nres.alloc_size);
if (end > alloc_size)
@@ -2041,8 +2098,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
if (vbo + dlen >= end)
flags |= FIEMAP_EXTENT_LAST;
- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen,
- flags);
+ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, dlen,
+ flags);
+
if (err < 0)
break;
if (err == 1) {
@@ -2062,7 +2120,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
if (vbo + bytes >= end)
flags |= FIEMAP_EXTENT_LAST;
- err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags);
+ err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, bytes,
+ flags);
if (err < 0)
break;
if (err == 1) {
@@ -2075,7 +2134,19 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
up_read(run_lock);
+ /*
+ * Copy to user memory out of lock
+ */
+ if (copy_to_user(fe_u, fe_k,
+ fieinfo->fi_extents_max *
+ sizeof(struct fiemap_extent))) {
+ err = -EFAULT;
+ }
+
out:
+ /* Restore original pointer. */
+ fieinfo->fi_extents_start = fe_u;
+ kfree(fe_k);
return err;
}
@@ -2085,12 +2156,12 @@ out:
* When decompressing, we typically obtain more than one page per reference.
* We inject the additional pages into the page cache.
*/
-int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
+int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio)
{
int err;
struct ntfs_sb_info *sbi = ni->mi.sbi;
- struct address_space *mapping = page->mapping;
- pgoff_t index = page->index;
+ struct address_space *mapping = folio->mapping;
+ pgoff_t index = folio->index;
u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT;
struct page **pages = NULL; /* Array of at most 16 pages. stack? */
u8 frame_bits;
@@ -2100,7 +2171,8 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
struct page *pg;
if (vbo >= i_size_read(&ni->vfs_inode)) {
- SetPageUptodate(page);
+ folio_zero_range(folio, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
err = 0;
goto out;
}
@@ -2124,7 +2196,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
goto out;
}
- pages[idx] = page;
+ pages[idx] = &folio->page;
index = frame_vbo >> PAGE_SHIFT;
gfp_mask = mapping_gfp_mask(mapping);
@@ -2143,9 +2215,6 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame);
out1:
- if (err)
- SetPageError(page);
-
for (i = 0; i < pages_per_frame; i++) {
pg = pages[i];
if (i == idx || !pg)
@@ -2157,7 +2226,7 @@ out1:
out:
/* At this point, err contains 0 or -EIO depending on the "critical" page. */
kfree(pages);
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
@@ -2362,9 +2431,9 @@ remove_wof:
/* Clear cached flag. */
ni->ni_flags &= ~NI_FLAG_COMPRESSED_MASK;
- if (ni->file.offs_page) {
- put_page(ni->file.offs_page);
- ni->file.offs_page = NULL;
+ if (ni->file.offs_folio) {
+ folio_put(ni->file.offs_folio);
+ ni->file.offs_folio = NULL;
}
mapping->a_ops = &ntfs_aops;
@@ -2718,7 +2787,6 @@ out:
for (i = 0; i < pages_per_frame; i++) {
pg = pages[i];
kunmap(pg);
- ClearPageError(pg);
SetPageUptodate(pg);
}
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index d7807d255dfe..c64dd114ac65 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -724,7 +724,8 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
if (!rsize || rsize > bytes ||
rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts ||
- le16_to_cpu(rt->total) > ne || ff > ts || lf > ts ||
+ le16_to_cpu(rt->total) > ne || ff > ts - sizeof(__le32) ||
+ lf > ts - sizeof(__le32) ||
(ff && ff < sizeof(struct RESTART_TABLE)) ||
(lf && lf < sizeof(struct RESTART_TABLE))) {
return false;
@@ -754,6 +755,9 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
return false;
off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off));
+
+ if (off > ts - sizeof(__le32))
+ return false;
}
return true;
@@ -2992,7 +2996,7 @@ static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi,
if (is_ext) {
attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
if (is_attr_compressed(attr))
- attr->nres.c_unit = COMPRESSION_UNIT;
+ attr->nres.c_unit = NTFS_LZNT_CUNIT;
attr->nres.run_off =
cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size);
@@ -3722,6 +3726,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
u64 rec_lsn, checkpt_lsn = 0, rlsn = 0;
struct ATTR_NAME_ENTRY *attr_names = NULL;
+ u32 attr_names_bytes = 0;
+ u32 oatbl_bytes = 0;
struct RESTART_TABLE *dptbl = NULL;
struct RESTART_TABLE *trtbl = NULL;
const struct RESTART_TABLE *rt;
@@ -3736,6 +3742,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
struct NTFS_RESTART *rst = NULL;
struct lcb *lcb = NULL;
struct OPEN_ATTR_ENRTY *oe;
+ struct ATTR_NAME_ENTRY *ane;
struct TRANSACTION_ENTRY *tr;
struct DIR_PAGE_ENTRY *dp;
u32 i, bytes_per_attr_entry;
@@ -3915,6 +3922,9 @@ check_restart_area:
goto out;
}
+ log->page_mask = log->page_size - 1;
+ log->page_bits = blksize_bits(log->page_size);
+
/* If the file size has shrunk then we won't mount it. */
if (log->l_size < le64_to_cpu(ra2->l_size)) {
err = -EINVAL;
@@ -4104,7 +4114,7 @@ process_log:
/* Allocate and Read the Transaction Table. */
if (!rst->transact_table_len)
- goto check_dirty_page_table;
+ goto check_dirty_page_table; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->transact_table_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4144,7 +4154,7 @@ process_log:
check_dirty_page_table:
/* The next record back should be the Dirty Pages Table. */
if (!rst->dirty_pages_len)
- goto check_attribute_names;
+ goto check_attribute_names; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->dirty_pages_table_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4180,7 +4190,7 @@ check_dirty_page_table:
/* Convert Ra version '0' into version '1'. */
if (rst->major_ver)
- goto end_conv_1;
+ goto end_conv_1; /* reduce tab pressure. */
dp = NULL;
while ((dp = enum_rstbl(dptbl, dp))) {
@@ -4200,8 +4210,7 @@ end_conv_1:
* remembering the oldest lsn values.
*/
if (sbi->cluster_size <= log->page_size)
- goto trace_dp_table;
-
+ goto trace_dp_table; /* reduce tab pressure. */
dp = NULL;
while ((dp = enum_rstbl(dptbl, dp))) {
struct DIR_PAGE_ENTRY *next = dp;
@@ -4222,7 +4231,7 @@ trace_dp_table:
check_attribute_names:
/* The next record should be the Attribute Names. */
if (!rst->attr_names_len)
- goto check_attr_table;
+ goto check_attr_table; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->attr_names_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4240,9 +4249,9 @@ check_attribute_names:
}
t32 = lrh_length(lrh);
- rec_len -= t32;
+ attr_names_bytes = rec_len - t32;
- attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS);
+ attr_names = kmemdup(Add2Ptr(lrh, t32), attr_names_bytes, GFP_NOFS);
if (!attr_names) {
err = -ENOMEM;
goto out;
@@ -4254,7 +4263,7 @@ check_attribute_names:
check_attr_table:
/* The next record should be the attribute Table. */
if (!rst->open_attr_len)
- goto check_attribute_names2;
+ goto check_attribute_names2; /* reduce tab pressure. */
t64 = le64_to_cpu(rst->open_attr_table_lsn);
err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
@@ -4274,14 +4283,14 @@ check_attr_table:
t16 = le16_to_cpu(lrh->redo_off);
rt = Add2Ptr(lrh, t16);
- t32 = rec_len - t16;
+ oatbl_bytes = rec_len - t16;
- if (!check_rstbl(rt, t32)) {
+ if (!check_rstbl(rt, oatbl_bytes)) {
err = -EINVAL;
goto out;
}
- oatbl = kmemdup(rt, t32, GFP_NOFS);
+ oatbl = kmemdup(rt, oatbl_bytes, GFP_NOFS);
if (!oatbl) {
err = -ENOMEM;
goto out;
@@ -4314,17 +4323,40 @@ check_attr_table:
lcb = NULL;
check_attribute_names2:
- if (rst->attr_names_len && oatbl) {
- struct ATTR_NAME_ENTRY *ane = attr_names;
- while (ane->off) {
+ if (attr_names && oatbl) {
+ off = 0;
+ for (;;) {
+ /* Check we can use attribute name entry 'ane'. */
+ static_assert(sizeof(*ane) == 4);
+ if (off + sizeof(*ane) > attr_names_bytes) {
+ /* just ignore the rest. */
+ break;
+ }
+
+ ane = Add2Ptr(attr_names, off);
+ t16 = le16_to_cpu(ane->off);
+ if (!t16) {
+ /* this is the only valid exit. */
+ break;
+ }
+
+ /* Check we can use open attribute entry 'oe'. */
+ if (t16 + sizeof(*oe) > oatbl_bytes) {
+ /* just ignore the rest. */
+ break;
+ }
+
/* TODO: Clear table on exit! */
- oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
+ oe = Add2Ptr(oatbl, t16);
t16 = le16_to_cpu(ane->name_bytes);
+ off += t16 + sizeof(*ane);
+ if (off > attr_names_bytes) {
+ /* just ignore the rest. */
+ break;
+ }
oe->name_len = t16 / sizeof(short);
oe->ptr = ane->name;
oe->is_attr_name = 2;
- ane = Add2Ptr(ane,
- sizeof(struct ATTR_NAME_ENTRY) + t16);
}
}
@@ -4520,7 +4552,6 @@ copy_lcns:
}
}
goto next_log_record_analyze;
- ;
}
case OpenNonresidentAttribute:
@@ -4659,7 +4690,7 @@ end_log_records_enumerate:
* table are not empty.
*/
if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total))
- goto end_reply;
+ goto end_replay;
sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
if (is_ro)
@@ -5088,7 +5119,7 @@ undo_action_done:
sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY;
-end_reply:
+end_replay:
err = 0;
if (is_ro)
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 626d3f2c7e2d..0fa636038b4e 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -2650,8 +2650,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len)
{
int err;
struct ATTRIB *attr;
+ u32 uni_bytes;
struct ntfs_inode *ni = sbi->volume.ni;
- const u8 max_ulen = 0x80; /* TODO: use attrdef to get maximum length */
/* Allocate PATH_MAX bytes. */
struct cpu_str *uni = __getname();
@@ -2663,7 +2663,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len)
if (err < 0)
goto out;
- if (uni->len > max_ulen) {
+ uni_bytes = uni->len * sizeof(u16);
+ if (uni_bytes > NTFS_LABEL_MAX_LENGTH * sizeof(u16)) {
ntfs_warn(sbi->sb, "new label is too long");
err = -EFBIG;
goto out;
@@ -2674,13 +2675,13 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len)
/* Ignore any errors. */
ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL);
- err = ni_insert_resident(ni, uni->len * sizeof(u16), ATTR_LABEL, NULL,
- 0, &attr, NULL, NULL);
+ err = ni_insert_resident(ni, uni_bytes, ATTR_LABEL, NULL, 0, &attr,
+ NULL, NULL);
if (err < 0)
goto unlock_out;
/* write new label in on-disk struct. */
- memcpy(resident_data(attr), uni->name, uni->len * sizeof(u16));
+ memcpy(resident_data(attr), uni->name, uni_bytes);
/* update cached value of current label. */
if (len >= ARRAY_SIZE(sbi->volume.label))
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index d0f15bbf78f6..9089c58a005c 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -978,7 +978,7 @@ static struct indx_node *indx_new(struct ntfs_index *indx,
hdr->used =
cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64));
de_set_vbn_le(e, *sub_vbn);
- hdr->flags = 1;
+ hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES;
} else {
e->size = cpu_to_le16(sizeof(struct NTFS_DE));
hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE));
@@ -1683,7 +1683,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64));
e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST;
- hdr->flags = 1;
+ hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES;
hdr->used = hdr->total =
cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr));
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 0f1664db94ad..6b0bdc474e76 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -18,7 +18,7 @@
#include "ntfs_fs.h"
/*
- * ntfs_read_mft - Read record and parses MFT.
+ * ntfs_read_mft - Read record and parse MFT.
*/
static struct inode *ntfs_read_mft(struct inode *inode,
const struct cpu_str *name,
@@ -441,10 +441,9 @@ end_enum:
* Usually a hard links to directories are disabled.
*/
inode->i_op = &ntfs_dir_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_dir_operations;
- else
- inode->i_fop = &ntfs_dir_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_dir_operations :
+ &ntfs_dir_operations;
ni->i_valid = 0;
} else if (S_ISLNK(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
@@ -454,10 +453,9 @@ end_enum:
} else if (S_ISREG(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
inode->i_op = &ntfs_file_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_file_operations;
- else
- inode->i_fop = &ntfs_file_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_file_operations :
+ &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
if (ino != MFT_REC_MFT)
@@ -580,10 +578,11 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
bh->b_blocknr = RESIDENT_LCN;
bh->b_size = block_size;
if (!folio) {
+ /* direct io (read) or bmap call */
err = 0;
} else {
ni_lock(ni);
- err = attr_data_read_resident(ni, &folio->page);
+ err = attr_data_read_resident(ni, folio);
ni_unlock(ni);
if (!err)
@@ -710,25 +709,24 @@ static sector_t ntfs_bmap(struct address_space *mapping, sector_t block)
static int ntfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
int err;
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
if (is_resident(ni)) {
ni_lock(ni);
- err = attr_data_read_resident(ni, page);
+ err = attr_data_read_resident(ni, folio);
ni_unlock(ni);
if (err != E_NTFS_NONRESIDENT) {
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
}
if (is_compressed(ni)) {
ni_lock(ni);
- err = ni_readpage_cmpr(ni, page);
+ err = ni_readpage_cmpr(ni, folio);
ni_unlock(ni);
return err;
}
@@ -872,7 +870,7 @@ static int ntfs_resident_writepage(struct folio *folio,
return -EIO;
ni_lock(ni);
- ret = attr_data_write_resident(ni, &folio->page);
+ ret = attr_data_write_resident(ni, folio);
ni_unlock(ni);
if (ret != E_NTFS_NONRESIDENT)
@@ -914,24 +912,25 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
*pagep = NULL;
if (is_resident(ni)) {
- struct page *page =
- grab_cache_page_write_begin(mapping, pos >> PAGE_SHIFT);
+ struct folio *folio = __filemap_get_folio(
+ mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
- if (!page) {
- err = -ENOMEM;
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
goto out;
}
ni_lock(ni);
- err = attr_data_read_resident(ni, page);
+ err = attr_data_read_resident(ni, folio);
ni_unlock(ni);
if (!err) {
- *pagep = page;
+ *pagep = &folio->page;
goto out;
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (err != E_NTFS_NONRESIDENT)
goto out;
@@ -950,6 +949,7 @@ out:
int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
u32 len, u32 copied, struct page *page, void *fsdata)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
@@ -958,26 +958,26 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
if (is_resident(ni)) {
ni_lock(ni);
- err = attr_data_write_resident(ni, page);
+ err = attr_data_write_resident(ni, folio);
ni_unlock(ni);
if (!err) {
+ struct buffer_head *head = folio_buffers(folio);
dirty = true;
- /* Clear any buffers in page. */
- if (page_has_buffers(page)) {
- struct buffer_head *head, *bh;
+ /* Clear any buffers in folio. */
+ if (head) {
+ struct buffer_head *bh = head;
- bh = head = page_buffers(page);
do {
clear_buffer_dirty(bh);
clear_buffer_mapped(bh);
set_buffer_uptodate(bh);
} while (head != (bh = bh->b_this_page));
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
err = copied;
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
} else {
err = generic_write_end(file, mapping, pos, len, copied, page,
fsdata);
@@ -1093,33 +1093,31 @@ int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
if (!ret && i2)
ret = writeback_inode(i2);
if (!ret)
- ret = sync_blockdev_nowait(sb->s_bdev);
+ ret = filemap_flush(sb->s_bdev_file->f_mapping);
return ret;
}
-int inode_write_data(struct inode *inode, const void *data, size_t bytes)
+/*
+ * Helper function to read file.
+ */
+int inode_read_data(struct inode *inode, void *data, size_t bytes)
{
pgoff_t idx;
+ struct address_space *mapping = inode->i_mapping;
- /* Write non resident data. */
for (idx = 0; bytes; idx++) {
size_t op = bytes > PAGE_SIZE ? PAGE_SIZE : bytes;
- struct page *page = ntfs_map_page(inode->i_mapping, idx);
+ struct page *page = read_mapping_page(mapping, idx, NULL);
+ void *kaddr;
if (IS_ERR(page))
return PTR_ERR(page);
- lock_page(page);
- WARN_ON(!PageUptodate(page));
- ClearPageUptodate(page);
-
- memcpy(page_address(page), data, op);
-
- flush_dcache_page(page);
- SetPageUptodate(page);
- unlock_page(page);
+ kaddr = kmap_atomic(page);
+ memcpy(data, kaddr, op);
+ kunmap_atomic(kaddr);
- ntfs_unmap_page(page);
+ put_page(page);
bytes -= op;
data = Add2Ptr(data, PAGE_SIZE);
@@ -1508,7 +1506,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8);
attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
attr->flags = ATTR_FLAG_COMPRESSED;
- attr->nres.c_unit = COMPRESSION_UNIT;
+ attr->nres.c_unit = NTFS_LZNT_CUNIT;
asize = SIZEOF_NONRESIDENT_EX + 8;
} else {
attr->size = cpu_to_le32(SIZEOF_NONRESIDENT + 8);
@@ -1559,7 +1557,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
/*
* Below function 'ntfs_save_wsl_perm' requires 0x78 bytes.
- * It is good idea to keep extened attributes resident.
+ * It is good idea to keep extended attributes resident.
*/
if (asize + t16 + 0x78 + 8 > sbi->record_size) {
CLST alen;
@@ -1628,10 +1626,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
if (S_ISDIR(mode)) {
inode->i_op = &ntfs_dir_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_dir_operations;
- else
- inode->i_fop = &ntfs_dir_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_dir_operations :
+ &ntfs_dir_operations;
} else if (S_ISLNK(mode)) {
inode->i_op = &ntfs_link_inode_operations;
inode->i_fop = NULL;
@@ -1640,10 +1637,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
inode_nohighmem(inode);
} else if (S_ISREG(mode)) {
inode->i_op = &ntfs_file_inode_operations;
- if (is_legacy_ntfs(inode->i_sb))
- inode->i_fop = &ntfs_legacy_file_operations;
- else
- inode->i_fop = &ntfs_file_operations;
+ inode->i_fop = unlikely(is_legacy_ntfs(sb)) ?
+ &ntfs_legacy_file_operations :
+ &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
init_rwsem(&ni->file.run_lock);
@@ -1668,7 +1664,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
* The packed size of extended attribute is stored in direntry too.
* 'fname' here points to inside new_de.
*/
- ntfs_save_wsl_perm(inode, &fname->dup.ea_size);
+ err = ntfs_save_wsl_perm(inode, &fname->dup.ea_size);
+ if (err)
+ goto out6;
/*
* update ea_size in file_name attribute too.
@@ -1712,6 +1710,12 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
goto out2;
out6:
+ attr = ni_find_attr(ni, NULL, NULL, ATTR_EA, NULL, 0, NULL, NULL);
+ if (attr && attr->non_res) {
+ /* Delete ATTR_EA, if non-resident. */
+ attr_set_size(ni, ATTR_EA, NULL, 0, NULL, 0, NULL, false, NULL);
+ }
+
if (rp_inserted)
ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref);
@@ -2133,5 +2137,6 @@ const struct address_space_operations ntfs_aops = {
const struct address_space_operations ntfs_aops_cmpr = {
.read_folio = ntfs_read_folio,
.readahead = ntfs_readahead,
+ .dirty_folio = block_dirty_folio,
};
// clang-format on
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index 71498421ce60..f16d318c4372 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -112,9 +112,7 @@ static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir,
}
/*
- * ntfs_mknod
- *
- * inode_operations::mknod
+ * ntfs_mknod - inode_operations::mknod
*/
static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
@@ -509,6 +507,8 @@ const struct inode_operations ntfs_dir_inode_operations = {
.getattr = ntfs_getattr,
.listxattr = ntfs_listxattr,
.fiemap = ntfs_fiemap,
+ .fileattr_get = ntfs_fileattr_get,
+ .fileattr_set = ntfs_fileattr_set,
};
const struct inode_operations ntfs_special_inode_operations = {
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 3d6143c7abc0..241f2ffdd920 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -82,9 +82,6 @@ typedef u32 CLST;
#define RESIDENT_LCN ((CLST)-2)
#define COMPRESSED_LCN ((CLST)-3)
-#define COMPRESSION_UNIT 4
-#define COMPRESS_MAX_CLUSTER 0x1000
-
enum RECORD_NUM {
MFT_REC_MFT = 0,
MFT_REC_MIRR = 1,
@@ -696,14 +693,15 @@ static inline bool de_has_vcn_ex(const struct NTFS_DE *e)
offsetof(struct ATTR_FILE_NAME, name) + \
NTFS_NAME_LEN * sizeof(short), 8)
+#define NTFS_INDEX_HDR_HAS_SUBNODES cpu_to_le32(1)
+
struct INDEX_HDR {
__le32 de_off; // 0x00: The offset from the start of this structure
// to the first NTFS_DE.
__le32 used; // 0x04: The size of this structure plus all
// entries (quad-word aligned).
__le32 total; // 0x08: The allocated size of for this structure plus all entries.
- u8 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory.
- u8 res[3];
+ __le32 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory.
//
// de_off + used <= total
@@ -751,7 +749,7 @@ static inline struct NTFS_DE *hdr_next_de(const struct INDEX_HDR *hdr,
static inline bool hdr_has_subnode(const struct INDEX_HDR *hdr)
{
- return hdr->flags & 1;
+ return hdr->flags & NTFS_INDEX_HDR_HAS_SUBNODES;
}
struct INDEX_BUFFER {
@@ -771,7 +769,7 @@ static inline bool ib_is_empty(const struct INDEX_BUFFER *ib)
static inline bool ib_is_leaf(const struct INDEX_BUFFER *ib)
{
- return !(ib->ihdr.flags & 1);
+ return !(ib->ihdr.flags & NTFS_INDEX_HDR_HAS_SUBNODES);
}
/* Index root structure ( 0x90 ). */
@@ -1002,9 +1000,6 @@ struct REPARSE_POINT {
static_assert(sizeof(struct REPARSE_POINT) == 0x18);
-/* Maximum allowed size of the reparse data. */
-#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE (16 * 1024)
-
/*
* The value of the following constant needs to satisfy the following
* conditions:
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index f9ed6d2b065d..e5255a251929 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -383,7 +383,7 @@ struct ntfs_inode {
struct rw_semaphore run_lock;
struct runs_tree run;
#ifdef CONFIG_NTFS3_LZX_XPRESS
- struct page *offs_page;
+ struct folio *offs_folio;
#endif
} file;
};
@@ -434,8 +434,8 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
struct ATTRIB **ret);
int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
CLST *len, bool *new, bool zero);
-int attr_data_read_resident(struct ntfs_inode *ni, struct page *page);
-int attr_data_write_resident(struct ntfs_inode *ni, struct page *page);
+int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio);
+int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio);
int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
const __le16 *name, u8 name_len, struct runs_tree *run,
CLST vcn);
@@ -497,6 +497,9 @@ extern const struct file_operations ntfs_dir_operations;
extern const struct file_operations ntfs_legacy_dir_operations;
/* Globals from file.c */
+int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct fileattr *fa);
int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, u32 flags);
int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
@@ -564,7 +567,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint);
#define _ni_write_inode(i, w) ni_write_inode(i, w, __func__)
int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo,
__u64 vbo, __u64 len);
-int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page);
+int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio);
int ni_decompress_file(struct ntfs_inode *ni);
int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
u32 pages_per_frame);
@@ -716,7 +719,7 @@ int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc);
int ntfs_sync_inode(struct inode *inode);
int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
-int inode_write_data(struct inode *inode, const void *data, size_t bytes);
+int inode_read_data(struct inode *inode, void *data, size_t bytes);
int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const struct cpu_str *uni,
umode_t mode, dev_t dev, const char *symname, u32 size,
@@ -910,22 +913,6 @@ static inline bool ntfs_is_meta_file(struct ntfs_sb_info *sbi, CLST rno)
rno == sbi->usn_jrnl_no;
}
-static inline void ntfs_unmap_page(struct page *page)
-{
- kunmap(page);
- put_page(page);
-}
-
-static inline struct page *ntfs_map_page(struct address_space *mapping,
- unsigned long index)
-{
- struct page *page = read_mapping_page(mapping, index, NULL);
-
- if (!IS_ERR(page))
- kmap(page);
- return page;
-}
-
static inline size_t wnd_zone_bit(const struct wnd_bitmap *wnd)
{
return wnd->zone_bit;
@@ -1156,6 +1143,13 @@ static inline void le64_sub_cpu(__le64 *var, u64 val)
*var = cpu_to_le64(le64_to_cpu(*var) - val);
}
+#if IS_ENABLED(CONFIG_NTFS_FS)
bool is_legacy_ntfs(struct super_block *sb);
+#else
+static inline bool is_legacy_ntfs(struct super_block *sb)
+{
+ return false;
+}
+#endif
#endif /* _LINUX_NTFS3_NTFS_FS_H */
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index c5b688c5f984..a8758b85803f 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -275,7 +275,7 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_flag_no("acl", Opt_acl),
fsparam_string("iocharset", Opt_iocharset),
fsparam_flag_no("prealloc", Opt_prealloc),
- fsparam_flag_no("nocase", Opt_nocase),
+ fsparam_flag_no("case", Opt_nocase),
{}
};
// clang-format on
@@ -464,7 +464,7 @@ static int ntfs3_volinfo(struct seq_file *m, void *o)
struct super_block *sb = m->private;
struct ntfs_sb_info *sbi = sb->s_fs_info;
- seq_printf(m, "ntfs%d.%d\n%u\n%zu\n\%zu\n%zu\n%s\n%s\n",
+ seq_printf(m, "ntfs%d.%d\n%u\n%zu\n%zu\n%zu\n%s\n%s\n",
sbi->volume.major_ver, sbi->volume.minor_ver,
sbi->cluster_size, sbi->used.bitmap.nbits,
sbi->mft.bitmap.nbits,
@@ -1159,7 +1159,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
CLST vcn, lcn, len;
struct ATTRIB *attr;
const struct VOLUME_INFO *info;
- u32 idx, done, bytes;
+ u32 done, bytes;
struct ATTR_DEF_ENTRY *t;
u16 *shared;
struct MFT_REF ref;
@@ -1201,7 +1201,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
/*
* Load $Volume. This should be done before $LogFile
- * 'cause 'sbi->volume.ni' is used 'ntfs_set_state'.
+ * 'cause 'sbi->volume.ni' is used in 'ntfs_set_state'.
*/
ref.low = cpu_to_le32(MFT_REC_VOL);
ref.seq = cpu_to_le16(MFT_REC_VOL);
@@ -1431,31 +1431,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto put_inode_out;
}
- for (done = idx = 0; done < bytes; done += PAGE_SIZE, idx++) {
- unsigned long tail = bytes - done;
- struct page *page = ntfs_map_page(inode->i_mapping, idx);
+ /* Read the entire file. */
+ err = inode_read_data(inode, sbi->def_table, bytes);
+ if (err) {
+ ntfs_err(sb, "Failed to read $AttrDef (%d).", err);
+ goto put_inode_out;
+ }
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- ntfs_err(sb, "Failed to read $AttrDef (%d).", err);
- goto put_inode_out;
- }
- memcpy(Add2Ptr(t, done), page_address(page),
- min(PAGE_SIZE, tail));
- ntfs_unmap_page(page);
-
- if (!idx && ATTR_STD != t->type) {
- ntfs_err(sb, "$AttrDef is corrupted.");
- err = -EINVAL;
- goto put_inode_out;
- }
+ if (ATTR_STD != t->type) {
+ ntfs_err(sb, "$AttrDef is corrupted.");
+ err = -EINVAL;
+ goto put_inode_out;
}
t += 1;
sbi->def_entries = 1;
done = sizeof(struct ATTR_DEF_ENTRY);
- sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE;
- sbi->ea_max_size = 0x10000; /* default formatter value */
while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) {
u32 t32 = le32_to_cpu(t->type);
@@ -1491,27 +1482,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto put_inode_out;
}
- for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) {
- const __le16 *src;
- u16 *dst = Add2Ptr(sbi->upcase, idx << PAGE_SHIFT);
- struct page *page = ntfs_map_page(inode->i_mapping, idx);
-
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- ntfs_err(sb, "Failed to read $UpCase (%d).", err);
- goto put_inode_out;
- }
-
- src = page_address(page);
+ /* Read the entire file. */
+ err = inode_read_data(inode, sbi->upcase, 0x10000 * sizeof(short));
+ if (err) {
+ ntfs_err(sb, "Failed to read $UpCase (%d).", err);
+ goto put_inode_out;
+ }
#ifdef __BIG_ENDIAN
- for (i = 0; i < PAGE_SIZE / sizeof(u16); i++)
+ {
+ const __le16 *src = sbi->upcase;
+ u16 *dst = sbi->upcase;
+
+ for (i = 0; i < 0x10000; i++)
*dst++ = le16_to_cpu(*src++);
-#else
- memcpy(dst, src, PAGE_SIZE);
-#endif
- ntfs_unmap_page(page);
}
+#endif
shared = ntfs_set_shared(sbi->upcase, 0x10000 * sizeof(short));
if (shared && sbi->upcase != shared) {
@@ -1847,10 +1833,8 @@ bool is_legacy_ntfs(struct super_block *sb)
#else
static inline void register_as_ntfs_legacy(void) {}
static inline void unregister_as_ntfs_legacy(void) {}
-bool is_legacy_ntfs(struct super_block *sb) { return false; }
#endif
-
// clang-format on
static int __init init_ntfs_fs(void)
@@ -1876,8 +1860,7 @@ static int __init init_ntfs_fs(void)
ntfs_inode_cachep = kmem_cache_create(
"ntfs_inode_cache", sizeof(struct ntfs_inode), 0,
- (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT),
- init_once);
+ (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), init_once);
if (!ntfs_inode_cachep) {
err = -ENOMEM;
goto out1;
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 73785dece7a7..0703e1ae32b2 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -195,10 +195,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
{
const struct EA_INFO *info;
struct EA_FULL *ea_all = NULL;
- const struct EA_FULL *ea;
u32 off, size;
int err;
- int ea_size;
size_t ret;
err = ntfs_read_ea(ni, &ea_all, 0, &info);
@@ -212,16 +210,18 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
/* Enumerate all xattrs. */
ret = 0;
- for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) {
- ea = Add2Ptr(ea_all, off);
- ea_size = unpacked_ea_size(ea);
+ off = 0;
+ while (off + sizeof(struct EA_FULL) < size) {
+ const struct EA_FULL *ea = Add2Ptr(ea_all, off);
+ int ea_size = unpacked_ea_size(ea);
+ u8 name_len = ea->name_len;
- if (!ea->name_len)
+ if (!name_len)
break;
- if (ea->name_len > ea_size) {
+ if (name_len > ea_size) {
ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
- err = -EINVAL; /* corrupted fs */
+ err = -EINVAL; /* corrupted fs. */
break;
}
@@ -230,16 +230,17 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
if (off + ea_size > size)
break;
- if (ret + ea->name_len + 1 > bytes_per_buffer) {
+ if (ret + name_len + 1 > bytes_per_buffer) {
err = -ERANGE;
goto out;
}
- memcpy(buffer + ret, ea->name, ea->name_len);
- buffer[ret + ea->name_len] = 0;
+ memcpy(buffer + ret, ea->name, name_len);
+ buffer[ret + name_len] = 0;
}
- ret += ea->name_len + 1;
+ ret += name_len + 1;
+ off += ea_size;
}
out:
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index d620d4c53c6f..f0beb173dbba 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -294,13 +294,16 @@ out:
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
*/
-static int ocfs2_check_dir_entry(struct inode * dir,
- struct ocfs2_dir_entry * de,
- struct buffer_head * bh,
+static int ocfs2_check_dir_entry(struct inode *dir,
+ struct ocfs2_dir_entry *de,
+ struct buffer_head *bh,
+ char *buf,
+ unsigned int size,
unsigned long offset)
{
const char *error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
+ const unsigned long next_offset = ((char *) de - buf) + rlen;
if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
@@ -308,9 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
error_msg = "rec_len % 4 != 0";
else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (unlikely(
- ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
- error_msg = "directory entry across blocks";
+ else if (unlikely(next_offset > size))
+ error_msg = "directory entry overrun";
+ else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
+ next_offset != size)
+ error_msg = "directory entry too close to end";
if (unlikely(error_msg != NULL))
mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
@@ -352,16 +357,17 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
de_buf = first_de;
dlimit = de_buf + bytes;
- while (de_buf < dlimit) {
+ while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = (struct ocfs2_dir_entry *) de_buf;
- if (de_buf + namelen <= dlimit &&
+ if (de->name + namelen <= dlimit &&
ocfs2_match(namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de,
+ bytes, offset)) {
ret = -1;
goto bail;
}
@@ -1138,7 +1144,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
pde = NULL;
de = (struct ocfs2_dir_entry *) first_de;
while (i < bytes) {
- if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
status = -EIO;
mlog_errno(status);
goto bail;
@@ -1635,7 +1641,8 @@ int __ocfs2_add_entry(handle_t *handle,
/* These checks should've already been passed by the
* prepare function, but I guess we can leave them
* here anyway. */
- if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start,
+ size, offset)) {
retval = -ENOENT;
goto bail;
}
@@ -1774,7 +1781,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
}
de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
- if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
+ if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
+ i_size_read(inode), ctx->pos)) {
/* On error, skip the f_pos to the end. */
ctx->pos = i_size_read(inode);
break;
@@ -1867,7 +1875,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
while (ctx->pos < i_size_read(inode)
&& offset < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
- if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
+ sb->s_blocksize, offset)) {
/* On error, skip the f_pos to the
next block. */
ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
@@ -3339,7 +3348,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
struct super_block *sb = dir->i_sb;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_dir_entry *de, *last_de = NULL;
- char *de_buf, *limit;
+ char *first_de, *de_buf, *limit;
unsigned long offset = 0;
unsigned int rec_len, new_rec_len, free_space;
@@ -3352,14 +3361,16 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
else
free_space = dir->i_sb->s_blocksize - i_size_read(dir);
- de_buf = di->id2.i_data.id_data;
+ first_de = di->id2.i_data.id_data;
+ de_buf = first_de;
limit = de_buf + i_size_read(dir);
rec_len = OCFS2_DIR_REC_LEN(namelen);
while (de_buf < limit) {
de = (struct ocfs2_dir_entry *)de_buf;
- if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de,
+ i_size_read(dir), offset)) {
ret = -ENOENT;
goto out;
}
@@ -3441,7 +3452,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
/* move to next block */
de = (struct ocfs2_dir_entry *) bh->b_data;
}
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize,
+ offset)) {
status = -ENOENT;
goto bail;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index cb40cafbc062..da78a04d6f0b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -221,12 +221,12 @@ struct ocfs2_lock_res_ops {
*/
#define LOCK_TYPE_USES_LVB 0x2
-static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.get_osb = ocfs2_get_inode_osb,
.check_downconvert = ocfs2_check_meta_downconvert,
.set_lvb = ocfs2_set_meta_lvb,
@@ -234,50 +234,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_super_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_super_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH,
};
-static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
.downconvert_worker = ocfs2_dentry_convert_worker,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_flock_lops = {
.get_osb = ocfs2_get_file_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
.set_lvb = ocfs2_set_qinfo_lvb,
.get_osb = ocfs2_get_qinfo_osb,
.flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
.check_downconvert = ocfs2_check_refcount_downconvert,
.downconvert_worker = ocfs2_refcount_convert_worker,
.flags = 0,
@@ -510,7 +510,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
- struct ocfs2_lock_res_ops *ops,
+ const struct ocfs2_lock_res_ops *ops,
void *priv)
{
res->l_type = type;
@@ -553,7 +553,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
unsigned int generation,
struct inode *inode)
{
- struct ocfs2_lock_res_ops *ops;
+ const struct ocfs2_lock_res_ops *ops;
switch(type) {
case OCFS2_LOCK_TYPE_RW:
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 4d1ea8703fcd..59c92353151a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2189,8 +2189,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
* @osb: ocfs2 file system
* @ret_orphan_dir: Orphan dir inode - returned locked!
* @blkno: Actual block number of the inode to be inserted into orphan dir.
+ * @name: Buffer to store the name of the orphan.
* @lookup: dir lookup result, to be passed back into functions like
* ocfs2_orphan_add
+ * @dio: Flag indicating if direct IO is being used or not.
*
* Returns zero on success and the ret_orphan_dir, name and lookup
* fields will be populated.
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8fe826143d7b..51c52768132d 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -154,7 +154,7 @@ struct ocfs2_lock_stats {
struct ocfs2_lock_res {
void *l_priv;
- struct ocfs2_lock_res_ops *l_ops;
+ const struct ocfs2_lock_res_ops *l_ops;
struct list_head l_blocked_list;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index c973c03f6fd8..10157d9d7a9c 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations o2cb_stack_ops = {
+static const struct ocfs2_stack_operations o2cb_stack_ops = {
.connect = o2cb_cluster_connect,
.disconnect = o2cb_cluster_disconnect,
.this_node = o2cb_cluster_this_node,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index c11406cd87a8..77edcd70f72c 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1065,7 +1065,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
+static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
.connect = user_cluster_connect,
.disconnect = user_cluster_disconnect,
.this_node = user_cluster_this_node,
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 3636847fae19..02ab072c528a 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -223,7 +223,7 @@ struct ocfs2_stack_operations {
*/
struct ocfs2_stack_plugin {
char *sp_name;
- struct ocfs2_stack_operations *sp_ops;
+ const struct ocfs2_stack_operations *sp_ops;
struct module *sp_owner;
/* These are managed by the stackglue code. */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b81213ed7b8..35c0cc2a51af 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1062,13 +1062,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
return i_ret + b_ret;
}
-static int ocfs2_xattr_find_entry(int name_index,
+static int ocfs2_xattr_find_entry(struct inode *inode, int name_index,
const char *name,
struct ocfs2_xattr_search *xs)
{
struct ocfs2_xattr_entry *entry;
size_t name_len;
- int i, cmp = 1;
+ int i, name_offset, cmp = 1;
if (name == NULL)
return -EINVAL;
@@ -1076,13 +1076,22 @@ static int ocfs2_xattr_find_entry(int name_index,
name_len = strlen(name);
entry = xs->here;
for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
+ if ((void *)entry >= xs->end) {
+ ocfs2_error(inode->i_sb, "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
cmp = name_index - ocfs2_xattr_get_type(entry);
if (!cmp)
cmp = name_len - entry->xe_name_len;
- if (!cmp)
- cmp = memcmp(name, (xs->base +
- le16_to_cpu(entry->xe_name_offset)),
- name_len);
+ if (!cmp) {
+ name_offset = le16_to_cpu(entry->xe_name_offset);
+ if ((xs->base + name_offset + name_len) > xs->end) {
+ ocfs2_error(inode->i_sb,
+ "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
+ cmp = memcmp(name, (xs->base + name_offset), name_len);
+ }
if (cmp == 0)
break;
entry += 1;
@@ -1166,7 +1175,7 @@ static int ocfs2_xattr_ibody_get(struct inode *inode,
xs->base = (void *)xs->header;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret)
return ret;
size = le64_to_cpu(xs->here->xe_value_size);
@@ -2698,7 +2707,7 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
/* Find the named attribute. */
if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret && ret != -ENODATA)
return ret;
xs->not_found = ret;
@@ -2833,7 +2842,7 @@ static int ocfs2_xattr_block_find(struct inode *inode,
xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
} else
ret = ocfs2_xattr_index_block_find(inode, blk_bh,
name_index,
diff --git a/fs/pidfs.c b/fs/pidfs.c
index c9cb14181def..7ffdc88dfb52 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -119,7 +119,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct task_struct *task __free(put_task) = NULL;
struct nsproxy *nsp __free(put_nsproxy) = NULL;
struct pid *pid = pidfd_pid(file);
- struct ns_common *ns_common;
+ struct ns_common *ns_common = NULL;
if (arg)
return -EINVAL;
@@ -146,52 +146,73 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (cmd) {
/* Namespaces that hang of nsproxy. */
case PIDFD_GET_CGROUP_NAMESPACE:
- get_cgroup_ns(nsp->cgroup_ns);
- ns_common = to_ns_common(nsp->cgroup_ns);
+ if (IS_ENABLED(CONFIG_CGROUPS)) {
+ get_cgroup_ns(nsp->cgroup_ns);
+ ns_common = to_ns_common(nsp->cgroup_ns);
+ }
break;
case PIDFD_GET_IPC_NAMESPACE:
- get_ipc_ns(nsp->ipc_ns);
- ns_common = to_ns_common(nsp->ipc_ns);
+ if (IS_ENABLED(CONFIG_IPC_NS)) {
+ get_ipc_ns(nsp->ipc_ns);
+ ns_common = to_ns_common(nsp->ipc_ns);
+ }
break;
case PIDFD_GET_MNT_NAMESPACE:
get_mnt_ns(nsp->mnt_ns);
ns_common = to_ns_common(nsp->mnt_ns);
break;
case PIDFD_GET_NET_NAMESPACE:
- ns_common = to_ns_common(nsp->net_ns);
- get_net_ns(ns_common);
+ if (IS_ENABLED(CONFIG_NET_NS)) {
+ ns_common = to_ns_common(nsp->net_ns);
+ get_net_ns(ns_common);
+ }
break;
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
- get_pid_ns(nsp->pid_ns_for_children);
- ns_common = to_ns_common(nsp->pid_ns_for_children);
+ if (IS_ENABLED(CONFIG_PID_NS)) {
+ get_pid_ns(nsp->pid_ns_for_children);
+ ns_common = to_ns_common(nsp->pid_ns_for_children);
+ }
break;
case PIDFD_GET_TIME_NAMESPACE:
- get_time_ns(nsp->time_ns);
- ns_common = to_ns_common(nsp->time_ns);
+ if (IS_ENABLED(CONFIG_TIME_NS)) {
+ get_time_ns(nsp->time_ns);
+ ns_common = to_ns_common(nsp->time_ns);
+ }
break;
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
- get_time_ns(nsp->time_ns_for_children);
- ns_common = to_ns_common(nsp->time_ns_for_children);
+ if (IS_ENABLED(CONFIG_TIME_NS)) {
+ get_time_ns(nsp->time_ns_for_children);
+ ns_common = to_ns_common(nsp->time_ns_for_children);
+ }
break;
case PIDFD_GET_UTS_NAMESPACE:
- get_uts_ns(nsp->uts_ns);
- ns_common = to_ns_common(nsp->uts_ns);
+ if (IS_ENABLED(CONFIG_UTS_NS)) {
+ get_uts_ns(nsp->uts_ns);
+ ns_common = to_ns_common(nsp->uts_ns);
+ }
break;
/* Namespaces that don't hang of nsproxy. */
case PIDFD_GET_USER_NAMESPACE:
- rcu_read_lock();
- ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
- rcu_read_unlock();
+ if (IS_ENABLED(CONFIG_USER_NS)) {
+ rcu_read_lock();
+ ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
+ rcu_read_unlock();
+ }
break;
case PIDFD_GET_PID_NAMESPACE:
- rcu_read_lock();
- ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
- rcu_read_unlock();
+ if (IS_ENABLED(CONFIG_PID_NS)) {
+ rcu_read_lock();
+ ns_common = to_ns_common( get_pid_ns(task_active_pid_ns(task)));
+ rcu_read_unlock();
+ }
break;
default:
return -ENOIOCTLCMD;
}
+ if (!ns_common)
+ return -EOPNOTSUPP;
+
/* open_namespace() unconditionally consumes the reference */
return open_namespace(ns_common);
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 50c8a8596b52..7dff2aa50a6d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1469,7 +1469,7 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
return 0;
}
-static int proc_dopipe_max_size(struct ctl_table *table, int write,
+static int proc_dopipe_max_size(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_douintvec(table, write, buffer, lenp, ppos,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index a71ac5379584..a8a8576d8592 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -13,6 +13,7 @@
#include <linux/binfmts.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
+#include <linux/mm.h>
struct ctl_table_header;
struct mempolicy;
@@ -142,6 +143,38 @@ unsigned name_to_int(const struct qstr *qstr);
/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13
+/**
+ * folio_precise_page_mapcount() - Number of mappings of this folio page.
+ * @folio: The folio.
+ * @page: The page.
+ *
+ * The number of present user page table entries that reference this page
+ * as tracked via the RMAP: either referenced directly (PTE) or as part of
+ * a larger area that covers this page (e.g., PMD).
+ *
+ * Use this function only for the calculation of existing statistics
+ * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount).
+ *
+ * Do not add new users.
+ *
+ * Returns: The number of mappings of this folio page. 0 for
+ * folios that are not mapped to user space or are not tracked via the RMAP
+ * (e.g., shared zeropage).
+ */
+static inline int folio_precise_page_mapcount(struct folio *folio,
+ struct page *page)
+{
+ int mapcount = atomic_read(&page->_mapcount) + 1;
+
+ /* Handle page_has_type() pages */
+ if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
+ mapcount = 0;
+ if (folio_test_large(folio))
+ mapcount += folio_entire_mapcount(folio);
+
+ return mapcount;
+}
+
/*
* array.c
*/
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2fb64bdb64eb..b7a5c84b5819 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -37,21 +37,19 @@ static inline unsigned long get_max_dump_pfn(void)
#endif
}
-/* /proc/kpagecount - an array exposing page counts
+/* /proc/kpagecount - an array exposing page mapcounts
*
* Each entry is a u64 representing the corresponding
- * physical page count.
+ * physical page mapcount.
*/
static ssize_t kpagecount_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
- struct page *ppage;
unsigned long src = *ppos;
unsigned long pfn;
ssize_t ret = 0;
- u64 pcount;
pfn = src / KPMSIZE;
if (src & KPMMASK || count & KPMMASK)
@@ -61,18 +59,19 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
while (count > 0) {
+ struct page *page;
+ u64 mapcount = 0;
+
/*
* TODO: ZONE_DEVICE support requires to identify
* memmaps that were actually initialized.
*/
- ppage = pfn_to_online_page(pfn);
-
- if (!ppage)
- pcount = 0;
- else
- pcount = page_mapcount(ppage);
+ page = pfn_to_online_page(pfn);
+ if (page)
+ mapcount = folio_precise_page_mapcount(page_folio(page),
+ page);
- if (put_user(pcount, out)) {
+ if (put_user(mapcount, out)) {
ret = -EFAULT;
break;
}
@@ -148,19 +147,16 @@ u64 stable_page_flags(const struct page *page)
u |= 1 << KPF_COMPOUND_TAIL;
if (folio_test_hugetlb(folio))
u |= 1 << KPF_HUGE;
- /*
- * We need to check PageLRU/PageAnon
- * to make sure a given page is a thp, not a non-huge compound page.
- */
- else if (folio_test_large(folio)) {
- if ((k & (1 << PG_lru)) || is_anon)
- u |= 1 << KPF_THP;
- else if (is_huge_zero_folio(folio)) {
- u |= 1 << KPF_ZERO_PAGE;
- u |= 1 << KPF_THP;
- }
- } else if (is_zero_pfn(page_to_pfn(page)))
+ else if (folio_test_large(folio) &&
+ folio_test_large_rmappable(folio)) {
+ /* Note: we indicate any THPs here, not just PMD-sized ones */
+ u |= 1 << KPF_THP;
+ } else if (is_huge_zero_folio(folio)) {
u |= 1 << KPF_ZERO_PAGE;
+ u |= 1 << KPF_THP;
+ } else if (is_zero_folio(folio)) {
+ u |= 1 << KPF_ZERO_PAGE;
+ }
/*
* Caveats on high order pages: PG_buddy and PG_slab will only be set
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 71e5039d940d..5f171ad7b436 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -22,6 +22,7 @@
#include <linux/pkeys.h>
#include <linux/minmax.h>
#include <linux/overflow.h>
+#include <linux/buildid.h>
#include <asm/elf.h>
#include <asm/tlb.h>
@@ -239,6 +240,67 @@ static int do_maps_open(struct inode *inode, struct file *file,
sizeof(struct proc_maps_private));
}
+static void get_vma_name(struct vm_area_struct *vma,
+ const struct path **path,
+ const char **name,
+ const char **name_fmt)
+{
+ struct anon_vma_name *anon_name = vma->vm_mm ? anon_vma_name(vma) : NULL;
+
+ *name = NULL;
+ *path = NULL;
+ *name_fmt = NULL;
+
+ /*
+ * Print the dentry name for named mappings, and a
+ * special [heap] marker for the heap:
+ */
+ if (vma->vm_file) {
+ /*
+ * If user named this anon shared memory via
+ * prctl(PR_SET_VMA ..., use the provided name.
+ */
+ if (anon_name) {
+ *name_fmt = "[anon_shmem:%s]";
+ *name = anon_name->name;
+ } else {
+ *path = file_user_path(vma->vm_file);
+ }
+ return;
+ }
+
+ if (vma->vm_ops && vma->vm_ops->name) {
+ *name = vma->vm_ops->name(vma);
+ if (*name)
+ return;
+ }
+
+ *name = arch_vma_name(vma);
+ if (*name)
+ return;
+
+ if (!vma->vm_mm) {
+ *name = "[vdso]";
+ return;
+ }
+
+ if (vma_is_initial_heap(vma)) {
+ *name = "[heap]";
+ return;
+ }
+
+ if (vma_is_initial_stack(vma)) {
+ *name = "[stack]";
+ return;
+ }
+
+ if (anon_name) {
+ *name_fmt = "[anon:%s]";
+ *name = anon_name->name;
+ return;
+ }
+}
+
static void show_vma_header_prefix(struct seq_file *m,
unsigned long start, unsigned long end,
vm_flags_t flags, unsigned long long pgoff,
@@ -262,17 +324,15 @@ static void show_vma_header_prefix(struct seq_file *m,
static void
show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
- struct anon_vma_name *anon_name = NULL;
- struct mm_struct *mm = vma->vm_mm;
- struct file *file = vma->vm_file;
+ const struct path *path;
+ const char *name_fmt, *name;
vm_flags_t flags = vma->vm_flags;
unsigned long ino = 0;
unsigned long long pgoff = 0;
unsigned long start, end;
dev_t dev = 0;
- const char *name = NULL;
- if (file) {
+ if (vma->vm_file) {
const struct inode *inode = file_user_inode(vma->vm_file);
dev = inode->i_sb->s_dev;
@@ -283,57 +343,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
start = vma->vm_start;
end = vma->vm_end;
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
- if (mm)
- anon_name = anon_vma_name(vma);
- /*
- * Print the dentry name for named mappings, and a
- * special [heap] marker for the heap:
- */
- if (file) {
+ get_vma_name(vma, &path, &name, &name_fmt);
+ if (path) {
seq_pad(m, ' ');
- /*
- * If user named this anon shared memory via
- * prctl(PR_SET_VMA ..., use the provided name.
- */
- if (anon_name)
- seq_printf(m, "[anon_shmem:%s]", anon_name->name);
- else
- seq_path(m, file_user_path(file), "\n");
- goto done;
- }
-
- if (vma->vm_ops && vma->vm_ops->name) {
- name = vma->vm_ops->name(vma);
- if (name)
- goto done;
- }
-
- name = arch_vma_name(vma);
- if (!name) {
- if (!mm) {
- name = "[vdso]";
- goto done;
- }
-
- if (vma_is_initial_heap(vma)) {
- name = "[heap]";
- goto done;
- }
-
- if (vma_is_initial_stack(vma)) {
- name = "[stack]";
- goto done;
- }
-
- if (anon_name) {
- seq_pad(m, ' ');
- seq_printf(m, "[anon:%s]", anon_name->name);
- }
- }
-
-done:
- if (name) {
+ seq_path(m, path, "\n");
+ } else if (name_fmt) {
+ seq_pad(m, ' ');
+ seq_printf(m, name_fmt, name);
+ } else if (name) {
seq_pad(m, ' ');
seq_puts(m, name);
}
@@ -358,11 +376,268 @@ static int pid_maps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_maps_op);
}
+#define PROCMAP_QUERY_VMA_FLAGS ( \
+ PROCMAP_QUERY_VMA_READABLE | \
+ PROCMAP_QUERY_VMA_WRITABLE | \
+ PROCMAP_QUERY_VMA_EXECUTABLE | \
+ PROCMAP_QUERY_VMA_SHARED \
+)
+
+#define PROCMAP_QUERY_VALID_FLAGS_MASK ( \
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA | \
+ PROCMAP_QUERY_FILE_BACKED_VMA | \
+ PROCMAP_QUERY_VMA_FLAGS \
+)
+
+static int query_vma_setup(struct mm_struct *mm)
+{
+ return mmap_read_lock_killable(mm);
+}
+
+static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+ mmap_read_unlock(mm);
+}
+
+static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr)
+{
+ return find_vma(mm, addr);
+}
+
+static struct vm_area_struct *query_matching_vma(struct mm_struct *mm,
+ unsigned long addr, u32 flags)
+{
+ struct vm_area_struct *vma;
+
+next_vma:
+ vma = query_vma_find_by_addr(mm, addr);
+ if (!vma)
+ goto no_vma;
+
+ /* user requested only file-backed VMA, keep iterating */
+ if ((flags & PROCMAP_QUERY_FILE_BACKED_VMA) && !vma->vm_file)
+ goto skip_vma;
+
+ /* VMA permissions should satisfy query flags */
+ if (flags & PROCMAP_QUERY_VMA_FLAGS) {
+ u32 perm = 0;
+
+ if (flags & PROCMAP_QUERY_VMA_READABLE)
+ perm |= VM_READ;
+ if (flags & PROCMAP_QUERY_VMA_WRITABLE)
+ perm |= VM_WRITE;
+ if (flags & PROCMAP_QUERY_VMA_EXECUTABLE)
+ perm |= VM_EXEC;
+ if (flags & PROCMAP_QUERY_VMA_SHARED)
+ perm |= VM_MAYSHARE;
+
+ if ((vma->vm_flags & perm) != perm)
+ goto skip_vma;
+ }
+
+ /* found covering VMA or user is OK with the matching next VMA */
+ if ((flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) || vma->vm_start <= addr)
+ return vma;
+
+skip_vma:
+ /*
+ * If the user needs closest matching VMA, keep iterating.
+ */
+ addr = vma->vm_end;
+ if (flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA)
+ goto next_vma;
+
+no_vma:
+ return ERR_PTR(-ENOENT);
+}
+
+static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg)
+{
+ struct procmap_query karg;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ const char *name = NULL;
+ char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL;
+ __u64 usize;
+ int err;
+
+ if (copy_from_user(&usize, (void __user *)uarg, sizeof(usize)))
+ return -EFAULT;
+ /* argument struct can never be that large, reject abuse */
+ if (usize > PAGE_SIZE)
+ return -E2BIG;
+ /* argument struct should have at least query_flags and query_addr fields */
+ if (usize < offsetofend(struct procmap_query, query_addr))
+ return -EINVAL;
+ err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize);
+ if (err)
+ return err;
+
+ /* reject unknown flags */
+ if (karg.query_flags & ~PROCMAP_QUERY_VALID_FLAGS_MASK)
+ return -EINVAL;
+ /* either both buffer address and size are set, or both should be zero */
+ if (!!karg.vma_name_size != !!karg.vma_name_addr)
+ return -EINVAL;
+ if (!!karg.build_id_size != !!karg.build_id_addr)
+ return -EINVAL;
+
+ mm = priv->mm;
+ if (!mm || !mmget_not_zero(mm))
+ return -ESRCH;
+
+ err = query_vma_setup(mm);
+ if (err) {
+ mmput(mm);
+ return err;
+ }
+
+ vma = query_matching_vma(mm, karg.query_addr, karg.query_flags);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ vma = NULL;
+ goto out;
+ }
+
+ karg.vma_start = vma->vm_start;
+ karg.vma_end = vma->vm_end;
+
+ karg.vma_flags = 0;
+ if (vma->vm_flags & VM_READ)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_READABLE;
+ if (vma->vm_flags & VM_WRITE)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_WRITABLE;
+ if (vma->vm_flags & VM_EXEC)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_EXECUTABLE;
+ if (vma->vm_flags & VM_MAYSHARE)
+ karg.vma_flags |= PROCMAP_QUERY_VMA_SHARED;
+
+ karg.vma_page_size = vma_kernel_pagesize(vma);
+
+ if (vma->vm_file) {
+ const struct inode *inode = file_user_inode(vma->vm_file);
+
+ karg.vma_offset = ((__u64)vma->vm_pgoff) << PAGE_SHIFT;
+ karg.dev_major = MAJOR(inode->i_sb->s_dev);
+ karg.dev_minor = MINOR(inode->i_sb->s_dev);
+ karg.inode = inode->i_ino;
+ } else {
+ karg.vma_offset = 0;
+ karg.dev_major = 0;
+ karg.dev_minor = 0;
+ karg.inode = 0;
+ }
+
+ if (karg.build_id_size) {
+ __u32 build_id_sz;
+
+ err = build_id_parse(vma, build_id_buf, &build_id_sz);
+ if (err) {
+ karg.build_id_size = 0;
+ } else {
+ if (karg.build_id_size < build_id_sz) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ karg.build_id_size = build_id_sz;
+ }
+ }
+
+ if (karg.build_id_size) {
+ __u32 build_id_sz;
+
+ err = build_id_parse(vma, build_id_buf, &build_id_sz);
+ if (err) {
+ karg.build_id_size = 0;
+ } else {
+ if (karg.build_id_size < build_id_sz) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ karg.build_id_size = build_id_sz;
+ }
+ }
+
+ if (karg.vma_name_size) {
+ size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size);
+ const struct path *path;
+ const char *name_fmt;
+ size_t name_sz = 0;
+
+ get_vma_name(vma, &path, &name, &name_fmt);
+
+ if (path || name_fmt || name) {
+ name_buf = kmalloc(name_buf_sz, GFP_KERNEL);
+ if (!name_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ if (path) {
+ name = d_path(path, name_buf, name_buf_sz);
+ if (IS_ERR(name)) {
+ err = PTR_ERR(name);
+ goto out;
+ }
+ name_sz = name_buf + name_buf_sz - name;
+ } else if (name || name_fmt) {
+ name_sz = 1 + snprintf(name_buf, name_buf_sz, name_fmt ?: "%s", name);
+ name = name_buf;
+ }
+ if (name_sz > name_buf_sz) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ karg.vma_name_size = name_sz;
+ }
+
+ /* unlock vma or mmap_lock, and put mm_struct before copying data to user */
+ query_vma_teardown(mm, vma);
+ mmput(mm);
+
+ if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr),
+ name, karg.vma_name_size)) {
+ kfree(name_buf);
+ return -EFAULT;
+ }
+ kfree(name_buf);
+
+ if (karg.build_id_size && copy_to_user(u64_to_user_ptr(karg.build_id_addr),
+ build_id_buf, karg.build_id_size))
+ return -EFAULT;
+
+ if (copy_to_user(uarg, &karg, min_t(size_t, sizeof(karg), usize)))
+ return -EFAULT;
+
+ return 0;
+
+out:
+ query_vma_teardown(mm, vma);
+ mmput(mm);
+ kfree(name_buf);
+ return err;
+}
+
+static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct seq_file *seq = file->private_data;
+ struct proc_maps_private *priv = seq->private;
+
+ switch (cmd) {
+ case PROCMAP_QUERY:
+ return do_procmap_query(priv, (void __user *)arg);
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = proc_map_release,
+ .unlocked_ioctl = procfs_procmap_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/*
@@ -442,7 +717,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked,
- bool migration)
+ bool present)
{
struct folio *folio = page_folio(page);
int i, nr = compound ? compound_nr(page) : 1;
@@ -471,24 +746,29 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
* Then accumulate quantities that may depend on sharing, or that may
* differ page-by-page.
*
- * refcount == 1 guarantees the page is mapped exactly once.
- * If any subpage of the compound page mapped with PTE it would elevate
- * the refcount.
+ * refcount == 1 for present entries guarantees that the folio is mapped
+ * exactly once. For large folios this implies that exactly one
+ * PTE/PMD/... maps (a part of) this folio.
*
- * The page_mapcount() is called to get a snapshot of the mapcount.
- * Without holding the page lock this snapshot can be slightly wrong as
- * we cannot always read the mapcount atomically. It is not safe to
- * call page_mapcount() even with PTL held if the page is not mapped,
- * especially for migration entries. Treat regular migration entries
- * as mapcount == 1.
+ * Treat all non-present entries (where relying on the mapcount and
+ * refcount doesn't make sense) as "maybe shared, but not sure how
+ * often". We treat device private entries as being fake-present.
+ *
+ * Note that it would not be safe to read the mapcount especially for
+ * pages referenced by migration entries, even with the PTL held.
*/
- if ((folio_ref_count(folio) == 1) || migration) {
+ if (folio_ref_count(folio) == 1 || !present) {
smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT,
- dirty, locked, true);
+ dirty, locked, present);
return;
}
+ /*
+ * We obtain a snapshot of the mapcount. Without holding the folio lock
+ * this snapshot can be slightly wrong as we cannot always read the
+ * mapcount atomically.
+ */
for (i = 0; i < nr; i++, page++) {
- int mapcount = page_mapcount(page);
+ int mapcount = folio_precise_page_mapcount(folio, page);
unsigned long pss = PAGE_SIZE << PSS_SHIFT;
if (mapcount >= 2)
pss /= mapcount;
@@ -531,13 +811,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
- bool migration = false, young = false, dirty = false;
+ bool present = false, young = false, dirty = false;
pte_t ptent = ptep_get(pte);
if (pte_present(ptent)) {
page = vm_normal_page(vma, addr, ptent);
young = pte_young(ptent);
dirty = pte_dirty(ptent);
+ present = true;
} else if (is_swap_pte(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
@@ -555,8 +836,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
}
} else if (is_pfn_swap_entry(swpent)) {
- if (is_migration_entry(swpent))
- migration = true;
+ if (is_device_private_entry(swpent))
+ present = true;
page = pfn_swap_entry_to_page(swpent);
}
} else {
@@ -567,7 +848,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, young, dirty, locked, migration);
+ smaps_account(mss, page, false, young, dirty, locked, present);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -578,18 +859,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
+ bool present = false;
struct folio *folio;
- bool migration = false;
if (pmd_present(*pmd)) {
page = vm_normal_page_pmd(vma, addr, *pmd);
+ present = true;
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
- if (is_migration_entry(entry)) {
- migration = true;
+ if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
- }
}
if (IS_ERR_OR_NULL(page))
return;
@@ -604,7 +884,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
mss->file_thp += HPAGE_PMD_SIZE;
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
- locked, migration);
+ locked, present);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -708,6 +988,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_SHADOW_STACK)] = "ss",
#endif
#ifdef CONFIG_64BIT
+ [ilog2(VM_DROPPABLE)] = "dp",
[ilog2(VM_SEALED)] = "sl",
#endif
};
@@ -733,19 +1014,23 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
- pte_t ptent = huge_ptep_get(pte);
+ pte_t ptent = huge_ptep_get(walk->mm, addr, pte);
struct folio *folio = NULL;
+ bool present = false;
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
+ present = true;
} else if (is_swap_pte(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (is_pfn_swap_entry(swpent))
folio = pfn_swap_entry_folio(swpent);
}
+
if (folio) {
- if (folio_likely_mapped_shared(folio) ||
+ /* We treat non-present entries as "maybe shared". */
+ if (!present || folio_likely_mapped_shared(folio) ||
hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
@@ -1091,7 +1376,7 @@ struct clear_refs_private {
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
- struct page *page;
+ struct folio *folio;
if (!pte_write(pte))
return false;
@@ -1099,10 +1384,10 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return false;
if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
return false;
- page = vm_normal_page(vma, addr, pte);
- if (!page)
+ folio = vm_normal_folio(vma, addr, pte);
+ if (!folio)
return false;
- return page_maybe_dma_pinned(page);
+ return folio_maybe_dma_pinned(folio);
}
static inline void clear_soft_dirty(struct vm_area_struct *vma,
@@ -1418,7 +1703,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
{
u64 frame = 0, flags = 0;
struct page *page = NULL;
- bool migration = false;
+ struct folio *folio;
if (pte_present(pte)) {
if (pm->show_pfn)
@@ -1450,17 +1735,20 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
(offset << MAX_SWAPFILES_SHIFT);
}
flags |= PM_SWAP;
- migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
if (pte_marker_entry_uffd_wp(entry))
flags |= PM_UFFD_WP;
}
- if (page && !PageAnon(page))
- flags |= PM_FILE;
- if (page && !migration && page_mapcount(page) == 1)
- flags |= PM_MMAP_EXCLUSIVE;
+ if (page) {
+ folio = page_folio(page);
+ if (!folio_test_anon(folio))
+ flags |= PM_FILE;
+ if ((flags & PM_PRESENT) &&
+ folio_precise_page_mapcount(folio, page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+ }
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1476,13 +1764,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pte_t *pte, *orig_pte;
int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- bool migration = false;
ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
+ unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT;
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
struct page *page = NULL;
+ struct folio *folio = NULL;
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1496,8 +1785,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pmd_uffd_wp(pmd))
flags |= PM_UFFD_WP;
if (pm->show_pfn)
- frame = pmd_pfn(pmd) +
- ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ frame = pmd_pfn(pmd) + idx;
}
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
else if (is_swap_pmd(pmd)) {
@@ -1506,11 +1794,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pm->show_pfn) {
if (is_pfn_swap_entry(entry))
- offset = swp_offset_pfn(entry);
+ offset = swp_offset_pfn(entry) + idx;
else
- offset = swp_offset(entry);
- offset = offset +
- ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ offset = swp_offset(entry) + idx;
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);
}
@@ -1520,17 +1806,25 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (pmd_swp_uffd_wp(pmd))
flags |= PM_UFFD_WP;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
- migration = is_migration_entry(entry);
page = pfn_swap_entry_to_page(entry);
}
#endif
- if (page && !migration && page_mapcount(page) == 1)
- flags |= PM_MMAP_EXCLUSIVE;
+ if (page) {
+ folio = page_folio(page);
+ if (!folio_test_anon(folio))
+ flags |= PM_FILE;
+ }
+
+ for (; addr != end; addr += PAGE_SIZE, idx++) {
+ unsigned long cur_flags = flags;
+ pagemap_entry_t pme;
- for (; addr != end; addr += PAGE_SIZE) {
- pagemap_entry_t pme = make_pme(frame, flags);
+ if (folio && (flags & PM_PRESENT) &&
+ folio_precise_page_mapcount(folio, page + idx) == 1)
+ cur_flags |= PM_MMAP_EXCLUSIVE;
+ pme = make_pme(frame, cur_flags);
err = add_to_pagemap(&pme, pm);
if (err)
break;
@@ -1585,7 +1879,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(walk->mm, addr, ptep);
if (pte_present(pte)) {
struct folio *folio = page_folio(pte_page(pte));
@@ -2274,7 +2568,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
if (~p->arg.flags & PM_SCAN_WP_MATCHING) {
/* Go the short route when not write-protecting pages. */
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(walk->mm, start, ptep);
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
if (!pagemap_scan_is_interesting_page(categories, p))
@@ -2286,7 +2580,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
i_mmap_lock_write(vma->vm_file->f_mapping);
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(walk->mm, start, ptep);
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
if (!pagemap_scan_is_interesting_page(categories, p))
@@ -2566,7 +2860,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
unsigned long nr_pages)
{
struct folio *folio = page_folio(page);
- int count = page_mapcount(page);
+ int count = folio_precise_page_mapcount(folio, page);
md->pages += nr_pages;
if (pte_dirty || folio_test_dirty(folio))
@@ -2682,7 +2976,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
- pte_t huge_pte = huge_ptep_get(pte);
+ pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte);
struct numa_maps *md;
struct page *page;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a2b256dac36e..7ae885e6d5d7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2913,7 +2913,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
};
EXPORT_SYMBOL(dquot_quotactl_sysfile_ops);
-static int do_proc_dqstats(struct ctl_table *table, int write,
+static int do_proc_dqstats(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int type = (unsigned long *)table->data - dqstats.stat;
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 0e04cf8b1d89..5c2845e47cf2 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -133,8 +133,8 @@ struct ksmbd_transport_ops {
};
struct ksmbd_transport {
- struct ksmbd_conn *conn;
- struct ksmbd_transport_ops *ops;
+ struct ksmbd_conn *conn;
+ const struct ksmbd_transport_ops *ops;
};
#define KSMBD_TCP_RECV_TIMEOUT (7 * HZ)
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index aec0a7a12405..162a12685d2c 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -149,6 +149,7 @@ void ksmbd_session_destroy(struct ksmbd_session *sess)
ksmbd_tree_conn_session_logoff(sess);
ksmbd_destroy_file_table(&sess->file_table);
+ ksmbd_launch_ksmbd_durable_scavenger();
ksmbd_session_rpc_clear_list(sess);
free_channel_list(sess);
kfree(sess->Preauth_HashValue);
@@ -326,6 +327,7 @@ void destroy_previous_session(struct ksmbd_conn *conn,
ksmbd_destroy_file_table(&prev_sess->file_table);
prev_sess->state = SMB2_SESSION_EXPIRED;
+ ksmbd_launch_ksmbd_durable_scavenger();
out:
up_write(&conn->session_lock);
up_write(&sessions_table_lock);
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index e9da63f25b20..72bc88a63a40 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -11,13 +11,6 @@
#define OPLOCK_WAIT_TIME (35 * HZ)
-/* SMB2 Oplock levels */
-#define SMB2_OPLOCK_LEVEL_NONE 0x00
-#define SMB2_OPLOCK_LEVEL_II 0x01
-#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08
-#define SMB2_OPLOCK_LEVEL_BATCH 0x09
-#define SMB2_OPLOCK_LEVEL_LEASE 0xFF
-
/* Oplock states */
#define OPLOCK_STATE_NONE 0x00
#define OPLOCK_ACK_WAIT 0x01
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index c67fbc8d6683..4d24cc105ef6 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -377,6 +377,7 @@ static void server_ctrl_handle_reset(struct server_ctrl_struct *ctrl)
{
ksmbd_ipc_soft_reset();
ksmbd_conn_transport_destroy();
+ ksmbd_stop_durable_scavenger();
server_conf_free();
server_conf_init();
WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP);
diff --git a/fs/smb/server/server.h b/fs/smb/server/server.h
index db7278181760..4fc529335271 100644
--- a/fs/smb/server/server.h
+++ b/fs/smb/server/server.h
@@ -44,6 +44,7 @@ struct ksmbd_server_config {
unsigned int max_connections;
char *conf[SERVER_CONF_WORK_GROUP + 1];
+ struct task_struct *dh_task;
};
extern struct ksmbd_server_config server_conf;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 840c71c66b30..37a39ab4ee65 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -3526,7 +3526,7 @@ int smb2_open(struct ksmbd_work *work)
SMB2_CREATE_GUID_SIZE);
if (dh_info.timeout)
fp->durable_timeout = min(dh_info.timeout,
- 300000);
+ DURABLE_HANDLE_MAX_TIMEOUT);
else
fp->durable_timeout = 60;
}
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 643f5e1cfe35..3be7d5ae65a8 100644
--- a/fs/smb/server/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
@@ -72,6 +72,8 @@ struct create_durable_req_v2 {
__u8 CreateGuid[16];
} __packed;
+#define DURABLE_HANDLE_MAX_TIMEOUT 300000
+
struct create_durable_reconn_req {
struct create_context_hdr ccontext;
__u8 Name[8];
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 8faa25c6e129..cf4418f72772 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -164,7 +164,7 @@ enum {
SMB_DIRECT_MSG_DATA_TRANSFER
};
-static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
+static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
struct smb_direct_send_ctx {
struct list_head msg_list;
@@ -2292,7 +2292,7 @@ out:
return rdma_capable;
}
-static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
+static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.prepare = smb_direct_prepare,
.disconnect = smb_direct_disconnect,
.shutdown = smb_direct_shutdown,
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index 6633fa78e9b9..a84788396daa 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -37,7 +37,7 @@ struct tcp_transport {
unsigned int nr_iov;
};
-static struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
+static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
static void tcp_stop_kthread(struct task_struct *kthread);
static struct interface *alloc_iface(char *ifname);
@@ -649,7 +649,7 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz)
return 0;
}
-static struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
+static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
.read = ksmbd_tcp_read,
.writev = ksmbd_tcp_writev,
.disconnect = ksmbd_tcp_disconnect,
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 8b2e37c8716e..4d4ee696e37c 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -8,6 +8,8 @@
#include <linux/filelock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
#include "glob.h"
#include "vfs_cache.h"
@@ -17,6 +19,7 @@
#include "mgmt/tree_connect.h"
#include "mgmt/user_session.h"
#include "smb_common.h"
+#include "server.h"
#define S_DEL_PENDING 1
#define S_DEL_ON_CLS 2
@@ -31,6 +34,10 @@ static struct ksmbd_file_table global_ft;
static atomic_long_t fd_limit;
static struct kmem_cache *filp_cache;
+static bool durable_scavenger_running;
+static DEFINE_MUTEX(durable_scavenger_lock);
+static wait_queue_head_t dh_wq;
+
void ksmbd_set_fd_limit(unsigned long limit)
{
limit = min(limit, get_max_files());
@@ -280,9 +287,16 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp)
if (!has_file_id(fp->persistent_id))
return;
- write_lock(&global_ft.lock);
idr_remove(global_ft.idr, fp->persistent_id);
+}
+
+static void ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+{
+ write_lock(&global_ft.lock);
+ __ksmbd_remove_durable_fd(fp);
write_unlock(&global_ft.lock);
+ if (waitqueue_active(&dh_wq))
+ wake_up(&dh_wq);
}
static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
@@ -305,7 +319,7 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
struct ksmbd_lock *smb_lock, *tmp_lock;
fd_limit_close();
- __ksmbd_remove_durable_fd(fp);
+ ksmbd_remove_durable_fd(fp);
if (ft)
__ksmbd_remove_fd(ft, fp);
@@ -477,7 +491,10 @@ struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id)
struct ksmbd_file *fp;
fp = __ksmbd_lookup_fd(&global_ft, id);
- if (fp && fp->conn) {
+ if (fp && (fp->conn ||
+ (fp->durable_scavenger_timeout &&
+ (fp->durable_scavenger_timeout <
+ jiffies_to_msecs(jiffies))))) {
ksmbd_put_durable_fd(fp);
fp = NULL;
}
@@ -694,6 +711,142 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon,
return fp->tcon != tcon;
}
+static bool ksmbd_durable_scavenger_alive(void)
+{
+ mutex_lock(&durable_scavenger_lock);
+ if (!durable_scavenger_running) {
+ mutex_unlock(&durable_scavenger_lock);
+ return false;
+ }
+ mutex_unlock(&durable_scavenger_lock);
+
+ if (kthread_should_stop())
+ return false;
+
+ if (idr_is_empty(global_ft.idr))
+ return false;
+
+ return true;
+}
+
+static void ksmbd_scavenger_dispose_dh(struct list_head *head)
+{
+ while (!list_empty(head)) {
+ struct ksmbd_file *fp;
+
+ fp = list_first_entry(head, struct ksmbd_file, node);
+ list_del_init(&fp->node);
+ __ksmbd_close_fd(NULL, fp);
+ }
+}
+
+static int ksmbd_durable_scavenger(void *dummy)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+ unsigned int min_timeout = 1;
+ bool found_fp_timeout;
+ LIST_HEAD(scavenger_list);
+ unsigned long remaining_jiffies;
+
+ __module_get(THIS_MODULE);
+
+ set_freezable();
+ while (ksmbd_durable_scavenger_alive()) {
+ if (try_to_freeze())
+ continue;
+
+ found_fp_timeout = false;
+
+ remaining_jiffies = wait_event_timeout(dh_wq,
+ ksmbd_durable_scavenger_alive() == false,
+ __msecs_to_jiffies(min_timeout));
+ if (remaining_jiffies)
+ min_timeout = jiffies_to_msecs(remaining_jiffies);
+ else
+ min_timeout = DURABLE_HANDLE_MAX_TIMEOUT;
+
+ write_lock(&global_ft.lock);
+ idr_for_each_entry(global_ft.idr, fp, id) {
+ if (!fp->durable_timeout)
+ continue;
+
+ if (atomic_read(&fp->refcount) > 1 ||
+ fp->conn)
+ continue;
+
+ found_fp_timeout = true;
+ if (fp->durable_scavenger_timeout <=
+ jiffies_to_msecs(jiffies)) {
+ __ksmbd_remove_durable_fd(fp);
+ list_add(&fp->node, &scavenger_list);
+ } else {
+ unsigned long durable_timeout;
+
+ durable_timeout =
+ fp->durable_scavenger_timeout -
+ jiffies_to_msecs(jiffies);
+
+ if (min_timeout > durable_timeout)
+ min_timeout = durable_timeout;
+ }
+ }
+ write_unlock(&global_ft.lock);
+
+ ksmbd_scavenger_dispose_dh(&scavenger_list);
+
+ if (found_fp_timeout == false)
+ break;
+ }
+
+ mutex_lock(&durable_scavenger_lock);
+ durable_scavenger_running = false;
+ mutex_unlock(&durable_scavenger_lock);
+
+ module_put(THIS_MODULE);
+
+ return 0;
+}
+
+void ksmbd_launch_ksmbd_durable_scavenger(void)
+{
+ if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE))
+ return;
+
+ mutex_lock(&durable_scavenger_lock);
+ if (durable_scavenger_running == true) {
+ mutex_unlock(&durable_scavenger_lock);
+ return;
+ }
+
+ durable_scavenger_running = true;
+
+ server_conf.dh_task = kthread_run(ksmbd_durable_scavenger,
+ (void *)NULL, "ksmbd-durable-scavenger");
+ if (IS_ERR(server_conf.dh_task))
+ pr_err("cannot start conn thread, err : %ld\n",
+ PTR_ERR(server_conf.dh_task));
+ mutex_unlock(&durable_scavenger_lock);
+}
+
+void ksmbd_stop_durable_scavenger(void)
+{
+ if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE))
+ return;
+
+ mutex_lock(&durable_scavenger_lock);
+ if (!durable_scavenger_running) {
+ mutex_unlock(&durable_scavenger_lock);
+ return;
+ }
+
+ durable_scavenger_running = false;
+ if (waitqueue_active(&dh_wq))
+ wake_up(&dh_wq);
+ mutex_unlock(&durable_scavenger_lock);
+ kthread_stop(server_conf.dh_task);
+}
+
static bool session_fd_check(struct ksmbd_tree_connect *tcon,
struct ksmbd_file *fp)
{
@@ -718,6 +871,10 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon,
fp->tcon = NULL;
fp->volatile_id = KSMBD_NO_FID;
+ if (fp->durable_timeout)
+ fp->durable_scavenger_timeout =
+ jiffies_to_msecs(jiffies) + fp->durable_timeout;
+
return true;
}
@@ -750,11 +907,12 @@ void ksmbd_free_global_file_table(void)
unsigned int id;
idr_for_each_entry(global_ft.idr, fp, id) {
- __ksmbd_remove_durable_fd(fp);
- kmem_cache_free(filp_cache, fp);
+ ksmbd_remove_durable_fd(fp);
+ __ksmbd_close_fd(NULL, fp);
}
- ksmbd_destroy_file_table(&global_ft);
+ idr_destroy(global_ft.idr);
+ kfree(global_ft.idr);
}
int ksmbd_validate_name_reconnect(struct ksmbd_share_config *share,
@@ -810,6 +968,7 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp)
}
up_write(&ci->m_lock);
+ fp->f_state = FP_NEW;
__open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID);
if (!has_file_id(fp->volatile_id)) {
fp->conn = NULL;
@@ -849,6 +1008,8 @@ int ksmbd_init_file_cache(void)
if (!filp_cache)
goto out;
+ init_waitqueue_head(&dh_wq);
+
return 0;
out:
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index 5a225e7055f1..b0f6d0f94cb8 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -101,6 +101,7 @@ struct ksmbd_file {
struct list_head lock_list;
int durable_timeout;
+ int durable_scavenger_timeout;
/* if ls is happening on directory, below is valid*/
struct ksmbd_readdir_data readdir_data;
@@ -152,6 +153,8 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
+void ksmbd_launch_ksmbd_durable_scavenger(void);
+void ksmbd_stop_durable_scavenger(void);
void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
void ksmbd_close_session_fds(struct ksmbd_work *work);
int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
diff --git a/fs/super.c b/fs/super.c
index 095ba793e10c..38d72a3cf6fc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -736,6 +736,17 @@ struct super_block *sget_fc(struct fs_context *fc,
struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
int err;
+ /*
+ * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT is
+ * not set, as the filesystem is likely unprepared to handle it.
+ * This can happen when fsconfig() is called from init_user_ns with
+ * an fs_fd opened in another user namespace.
+ */
+ if (user_ns != &init_user_ns && !(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) {
+ errorfc(fc, "VFS: Mounting from non-initial user namespace is not allowed");
+ return ERR_PTR(-EPERM);
+ }
+
retry:
spin_lock(&sb_lock);
if (test) {
diff --git a/fs/binfmt_elf_test.c b/fs/tests/binfmt_elf_kunit.c
index 11d734fec366..11d734fec366 100644
--- a/fs/binfmt_elf_test.c
+++ b/fs/tests/binfmt_elf_kunit.c
diff --git a/fs/exec_test.c b/fs/tests/exec_kunit.c
index 7c77d039680b..7c77d039680b 100644
--- a/fs/exec_test.c
+++ b/fs/tests/exec_kunit.c
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 44666afc6209..bc625788589c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1540,4 +1540,5 @@ static void __exit exit_ufs_fs(void)
module_init(init_ufs_fs)
module_exit(exit_ufs_fs)
+MODULE_DESCRIPTION("UFS Filesystem");
MODULE_LICENSE("GPL");
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 17e409ceaa33..27a3e9285fbf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -257,7 +257,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
goto out;
ret = false;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep);
/*
* Lockless access: we're in a wait_event so it's ok if it
diff --git a/fs/xattr.c b/fs/xattr.c
index f8b643f91a98..7672ce5486c5 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -630,10 +630,9 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
ctx->kvalue, ctx->size, ctx->flags);
}
-static long
-setxattr(struct mnt_idmap *idmap, struct dentry *d,
- const char __user *name, const void __user *value, size_t size,
- int flags)
+static int path_setxattr(const char __user *pathname,
+ const char __user *name, const void __user *value,
+ size_t size, int flags, unsigned int lookup_flags)
{
struct xattr_name kname;
struct xattr_ctx ctx = {
@@ -643,33 +642,20 @@ setxattr(struct mnt_idmap *idmap, struct dentry *d,
.kname = &kname,
.flags = flags,
};
+ struct path path;
int error;
error = setxattr_copy(name, &ctx);
if (error)
return error;
- error = do_setxattr(idmap, d, &ctx);
-
- kvfree(ctx.kvalue);
- return error;
-}
-
-static int path_setxattr(const char __user *pathname,
- const char __user *name, const void __user *value,
- size_t size, int flags, unsigned int lookup_flags)
-{
- struct path path;
- int error;
-
retry:
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
if (error)
- return error;
+ goto out;
error = mnt_want_write(path.mnt);
if (!error) {
- error = setxattr(mnt_idmap(path.mnt), path.dentry, name,
- value, size, flags);
+ error = do_setxattr(mnt_idmap(path.mnt), path.dentry, &ctx);
mnt_drop_write(path.mnt);
}
path_put(&path);
@@ -677,6 +663,9 @@ retry:
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+
+out:
+ kvfree(ctx.kvalue);
return error;
}
@@ -697,20 +686,32 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
const void __user *,value, size_t, size, int, flags)
{
- struct fd f = fdget(fd);
- int error = -EBADF;
+ struct xattr_name kname;
+ struct xattr_ctx ctx = {
+ .cvalue = value,
+ .kvalue = NULL,
+ .size = size,
+ .kname = &kname,
+ .flags = flags,
+ };
+ int error;
+ CLASS(fd, f)(fd);
if (!f.file)
- return error;
+ return -EBADF;
+
audit_file(f.file);
+ error = setxattr_copy(name, &ctx);
+ if (error)
+ return error;
+
error = mnt_want_write_file(f.file);
if (!error) {
- error = setxattr(file_mnt_idmap(f.file),
- f.file->f_path.dentry, name,
- value, size, flags);
+ error = do_setxattr(file_mnt_idmap(f.file),
+ f.file->f_path.dentry, &ctx);
mnt_drop_write_file(f.file);
}
- fdput(f);
+ kvfree(ctx.kvalue);
return error;
}
@@ -899,9 +900,17 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
* Extended attribute REMOVE operations
*/
static long
-removexattr(struct mnt_idmap *idmap, struct dentry *d,
- const char __user *name)
+removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name)
{
+ if (is_posix_acl_xattr(name))
+ return vfs_remove_acl(idmap, d, name);
+ return vfs_removexattr(idmap, d, name);
+}
+
+static int path_removexattr(const char __user *pathname,
+ const char __user *name, unsigned int lookup_flags)
+{
+ struct path path;
int error;
char kname[XATTR_NAME_MAX + 1];
@@ -910,25 +919,13 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d,
error = -ERANGE;
if (error < 0)
return error;
-
- if (is_posix_acl_xattr(kname))
- return vfs_remove_acl(idmap, d, kname);
-
- return vfs_removexattr(idmap, d, kname);
-}
-
-static int path_removexattr(const char __user *pathname,
- const char __user *name, unsigned int lookup_flags)
-{
- struct path path;
- int error;
retry:
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
if (error)
return error;
error = mnt_want_write(path.mnt);
if (!error) {
- error = removexattr(mnt_idmap(path.mnt), path.dentry, name);
+ error = removexattr(mnt_idmap(path.mnt), path.dentry, kname);
mnt_drop_write(path.mnt);
}
path_put(&path);
@@ -954,15 +951,23 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
{
struct fd f = fdget(fd);
+ char kname[XATTR_NAME_MAX + 1];
int error = -EBADF;
if (!f.file)
return error;
audit_file(f.file);
+
+ error = strncpy_from_user(kname, name, sizeof(kname));
+ if (error == 0 || error == sizeof(kname))
+ error = -ERANGE;
+ if (error < 0)
+ return error;
+
error = mnt_want_write_file(f.file);
if (!error) {
error = removexattr(file_mnt_idmap(f.file),
- f.file->f_path.dentry, name);
+ f.file->f_path.dentry, kname);
mnt_drop_write_file(f.file);
}
fdput(f);
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index a191f6560f98..c84df23b494d 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -11,7 +11,7 @@ static struct ctl_table_header *xfs_table_header;
#ifdef CONFIG_PROC_FS
STATIC int
xfs_stats_clear_proc_handler(
- struct ctl_table *ctl,
+ const struct ctl_table *ctl,
int write,
void *buffer,
size_t *lenp,
@@ -31,7 +31,7 @@ xfs_stats_clear_proc_handler(
STATIC int
xfs_panic_mask_proc_handler(
- struct ctl_table *ctl,
+ const struct ctl_table *ctl,
int write,
void *buffer,
size_t *lenp,
@@ -52,7 +52,7 @@ xfs_panic_mask_proc_handler(
STATIC int
xfs_deprecated_dointvec_minmax(
- struct ctl_table *ctl,
+ const struct ctl_table *ctl,
int write,
void *buffer,
size_t *lenp,