diff options
Diffstat (limited to 'fs')
135 files changed, 2352 insertions, 1405 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 67afe68972d5..f8622ed72e08 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -533,14 +533,14 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, break; } - offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio); + offset = round_down(ctx->pos, sizeof(*dblock)) - folio_pos(folio); size = min_t(loff_t, folio_size(folio), - req->actual_len - folio_file_pos(folio)); + req->actual_len - folio_pos(folio)); do { dblock = kmap_local_folio(folio, offset); ret = afs_dir_iterate_block(dvnode, ctx, dblock, - folio_file_pos(folio) + offset); + folio_pos(folio) + offset); kunmap_local(dblock); if (ret != 1) goto out; diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index e2fa577b66fe..a71bff10496b 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -256,7 +256,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode, folio = folio0; } - block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); + block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio)); /* Abandon the edit if we got a callback break. */ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -417,7 +417,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, folio = folio0; } - block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); + block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio)); /* Abandon the edit if we got a callback break. */ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -410,17 +410,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, struct kioctx *ctx; unsigned long flags; pgoff_t idx; - int rc; - - /* - * We cannot support the _NO_COPY case here, because copy needs to - * happen under the ctx->completion_lock. That does not work with the - * migration workflow of MIGRATE_SYNC_NO_COPY. - */ - if (mode == MIGRATE_SYNC_NO_COPY) - return -EINVAL; - - rc = 0; + int rc = 0; /* mapping->i_private_lock here protects against the kioctx teardown. */ spin_lock(&mapping->i_private_lock); @@ -465,7 +455,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, * events from being lost. */ spin_lock_irqsave(&ctx->completion_lock, flags); - folio_migrate_copy(dst, src); + folio_copy(dst, src); + folio_migrate_flags(dst, src); BUG_ON(ctx->ring_folios[idx] != src); ctx->ring_folios[idx] = dst; spin_unlock_irqrestore(&ctx->completion_lock, flags); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index cabf866c7956..618d2ff0292e 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -496,12 +496,6 @@ again: for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); alloc_cursor < k.k->p.offset; alloc_cursor++) { - ret = btree_trans_too_many_iters(trans); - if (ret) { - ob = ERR_PTR(ret); - break; - } - s->buckets_seen++; u64 bucket = alloc_cursor & ~(~0ULL << 56); diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index df3763c18c0e..1d6b691e8da6 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -6,15 +6,29 @@ #include <linux/kthread.h> #include <linux/preempt.h> -static inline long io_timer_cmp(io_timer_heap *h, - struct io_timer *l, - struct io_timer *r) +static inline bool io_timer_cmp(const void *l, const void *r, void __always_unused *args) { - return l->expire - r->expire; + struct io_timer **_l = (struct io_timer **)l; + struct io_timer **_r = (struct io_timer **)r; + + return (*_l)->expire < (*_r)->expire; +} + +static inline void io_timer_swp(void *l, void *r, void __always_unused *args) +{ + struct io_timer **_l = (struct io_timer **)l; + struct io_timer **_r = (struct io_timer **)r; + + swap(*_l, *_r); } void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) { + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; + spin_lock(&clock->timer_lock); if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) { @@ -23,22 +37,27 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer) return; } - for (size_t i = 0; i < clock->timers.used; i++) + for (size_t i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) goto out; - BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL)); + BUG_ON(!min_heap_push(&clock->timers, &timer, &callbacks, NULL)); out: spin_unlock(&clock->timer_lock); } void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) { + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; + spin_lock(&clock->timer_lock); - for (size_t i = 0; i < clock->timers.used; i++) + for (size_t i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) { - heap_del(&clock->timers, i, io_timer_cmp, NULL); + min_heap_del(&clock->timers, i, &callbacks, NULL); break; } @@ -123,10 +142,17 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now) { struct io_timer *ret = NULL; + const struct min_heap_callbacks callbacks = { + .less = io_timer_cmp, + .swp = io_timer_swp, + }; + + if (clock->timers.nr && + time_after_eq64(now, clock->timers.data[0]->expire)) { + ret = *min_heap_peek(&clock->timers); + min_heap_pop(&clock->timers, &callbacks, NULL); + } - if (clock->timers.used && - time_after_eq64(now, clock->timers.data[0]->expire)) - heap_pop(&clock->timers, ret, io_timer_cmp, NULL); return ret; } @@ -150,7 +176,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) printbuf_tabstop_push(out, 40); prt_printf(out, "current time:\t%llu\n", now); - for (unsigned i = 0; i < clock->timers.used; i++) + for (unsigned i = 0; i < clock->timers.nr; i++) prt_printf(out, "%ps %ps:\t%llu\n", clock->timers.data[i]->fn, clock->timers.data[i]->fn2, diff --git a/fs/bcachefs/clock_types.h b/fs/bcachefs/clock_types.h index 9c25d0fcf294..37554e4514fe 100644 --- a/fs/bcachefs/clock_types.h +++ b/fs/bcachefs/clock_types.h @@ -24,7 +24,7 @@ struct io_timer { /* Amount to buffer up on a percpu counter */ #define IO_CLOCK_PCPU_SECTORS 128 -typedef HEAP(struct io_timer *) io_timer_heap; +typedef DEFINE_MIN_HEAP(struct io_timer *, io_timer_heap) io_timer_heap; struct io_clock { atomic64_t now; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 86948d110f6b..9b5b5c9a6c63 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -901,8 +901,8 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) mutex_lock(&c->ec_stripes_heap_lock); if (n.size > h->size) { - memcpy(n.data, h->data, h->used * sizeof(h->data[0])); - n.used = h->used; + memcpy(n.data, h->data, h->nr * sizeof(h->data[0])); + n.nr = h->nr; swap(*h, n); } mutex_unlock(&c->ec_stripes_heap_lock); @@ -993,7 +993,7 @@ static u64 stripe_idx_to_delete(struct bch_fs *c) lockdep_assert_held(&c->ec_stripes_heap_lock); - if (h->used && + if (h->nr && h->data[0].blocks_nonempty == 0 && !bch2_stripe_is_open(c, h->data[0].idx)) return h->data[0].idx; @@ -1001,14 +1001,6 @@ static u64 stripe_idx_to_delete(struct bch_fs *c) return 0; } -static inline int ec_stripes_heap_cmp(ec_stripes_heap *h, - struct ec_stripe_heap_entry l, - struct ec_stripe_heap_entry r) -{ - return ((l.blocks_nonempty > r.blocks_nonempty) - - (l.blocks_nonempty < r.blocks_nonempty)); -} - static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, size_t i) { @@ -1017,39 +1009,71 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i; } +static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args) +{ + struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l; + struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r; + + return ((_l->blocks_nonempty > _r->blocks_nonempty) < + (_l->blocks_nonempty < _r->blocks_nonempty)); +} + +static inline void ec_stripes_heap_swap(void *l, void *r, void *h) +{ + struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l; + struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r; + ec_stripes_heap *_h = (ec_stripes_heap *)h; + size_t i = _l - _h->data; + size_t j = _r - _h->data; + + swap(*_l, *_r); + + ec_stripes_heap_set_backpointer(_h, i); + ec_stripes_heap_set_backpointer(_h, j); +} + static void heap_verify_backpointer(struct bch_fs *c, size_t idx) { ec_stripes_heap *h = &c->ec_stripes_heap; struct stripe *m = genradix_ptr(&c->stripes, idx); - BUG_ON(m->heap_idx >= h->used); + BUG_ON(m->heap_idx >= h->nr); BUG_ON(h->data[m->heap_idx].idx != idx); } void bch2_stripes_heap_del(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; + mutex_lock(&c->ec_stripes_heap_lock); heap_verify_backpointer(c, idx); - heap_del(&c->ec_stripes_heap, m->heap_idx, - ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap); mutex_unlock(&c->ec_stripes_heap_lock); } void bch2_stripes_heap_insert(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; + mutex_lock(&c->ec_stripes_heap_lock); - BUG_ON(heap_full(&c->ec_stripes_heap)); + BUG_ON(min_heap_full(&c->ec_stripes_heap)); - heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) { + genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr; + min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) { .idx = idx, .blocks_nonempty = m->blocks_nonempty, }), - ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + &callbacks, + &c->ec_stripes_heap); heap_verify_backpointer(c, idx); mutex_unlock(&c->ec_stripes_heap_lock); @@ -1058,6 +1082,10 @@ void bch2_stripes_heap_insert(struct bch_fs *c, void bch2_stripes_heap_update(struct bch_fs *c, struct stripe *m, size_t idx) { + const struct min_heap_callbacks callbacks = { + .less = ec_stripes_heap_cmp, + .swp = ec_stripes_heap_swap, + }; ec_stripes_heap *h = &c->ec_stripes_heap; bool do_deletes; size_t i; @@ -1068,10 +1096,8 @@ void bch2_stripes_heap_update(struct bch_fs *c, h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty; i = m->heap_idx; - heap_sift_up(h, i, ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); - heap_sift_down(h, i, ec_stripes_heap_cmp, - ec_stripes_heap_set_backpointer); + min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap); + min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap); heap_verify_backpointer(c, idx); @@ -1864,7 +1890,7 @@ static s64 get_existing_stripe(struct bch_fs *c, return -1; mutex_lock(&c->ec_stripes_heap_lock); - for (heap_idx = 0; heap_idx < h->used; heap_idx++) { + for (heap_idx = 0; heap_idx < h->nr; heap_idx++) { /* No blocks worth reusing, stripe will just be deleted: */ if (!h->data[heap_idx].blocks_nonempty) continue; @@ -2195,7 +2221,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) size_t i; mutex_lock(&c->ec_stripes_heap_lock); - for (i = 0; i < min_t(size_t, h->used, 50); i++) { + for (i = 0; i < min_t(size_t, h->nr, 50); i++) { m = genradix_ptr(&c->stripes, h->data[i].idx); prt_printf(out, "%zu %u/%u+%u", h->data[i].idx, diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h index 976426da3a12..1df03dccfc72 100644 --- a/fs/bcachefs/ec_types.h +++ b/fs/bcachefs/ec_types.h @@ -36,6 +36,6 @@ struct ec_stripe_heap_entry { unsigned blocks_nonempty; }; -typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap; +typedef DEFINE_MIN_HEAP(struct ec_stripe_heap_entry, ec_stripes_heap) ec_stripes_heap; #endif /* _BCACHEFS_EC_TYPES_H */ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index cc4f0963c0c5..9138944c5ae6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -283,6 +283,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 inode_snapshot) { + struct bch_fs *c = trans->c; struct bch_hash_info dir_hash; struct bch_inode_unpacked lostfound; char name_buf[20]; @@ -317,7 +318,7 @@ static int reattach_inode(struct btree_trans *trans, return ret; } - dir_hash = bch2_hash_info_init(trans->c, &lostfound); + dir_hash = bch2_hash_info_init(c, &lostfound); name = (struct qstr) QSTR(name_buf); @@ -330,8 +331,10 @@ static int reattach_inode(struct btree_trans *trans, inode->bi_subvol ?: inode->bi_inum, &dir_offset, STR_HASH_must_create); - if (ret) + if (ret) { + bch_err_msg(c, ret, "error creating dirent"); return ret; + } inode->bi_dir = lostfound.bi_inum; inode->bi_dir_offset = dir_offset; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index d8a630742887..70b998d9f19c 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -206,6 +206,7 @@ void bch2_journal_space_available(struct journal *j) if (nr_online < metadata_replicas_required(c)) { struct printbuf buf = PRINTBUF; + buf.atomic++; prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" "rw journal devs:", nr_online, metadata_replicas_required(c)); diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h index 4fcf062dd22c..47e4a3c3d26e 100644 --- a/fs/bcachefs/mean_and_variance.h +++ b/fs/bcachefs/mean_and_variance.h @@ -111,11 +111,11 @@ static inline u128_u u128_shl(u128_u i, s8 shift) { u128_u r; - r.lo = i.lo << shift; + r.lo = i.lo << (shift & 63); if (shift < 64) - r.hi = (i.hi << shift) | (i.lo >> (64 - shift)); + r.hi = (i.hi << (shift & 63)) | (i.lo >> (-shift & 63)); else { - r.hi = i.lo << (shift - 64); + r.hi = i.lo << (-shift & 63); r.lo = 0; } return r; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 4ec7e44d6e36..138320eaa2ad 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * random utiility code, for bcache but in theory not specific to bcache + * random utility code, for bcache but in theory not specific to bcache * * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> * Copyright 2012 Google, Inc. diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 2def4f761ca6..902b7f5406a2 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -8,6 +8,7 @@ #include <linux/errno.h> #include <linux/freezer.h> #include <linux/kernel.h> +#include <linux/min_heap.h> #include <linux/sched/clock.h> #include <linux/llist.h> #include <linux/log2.h> @@ -54,17 +55,9 @@ static inline size_t buf_pages(void *p, size_t len) PAGE_SIZE); } -#define HEAP(type) \ -struct { \ - size_t size, used; \ - type *data; \ -} - -#define DECLARE_HEAP(type, name) HEAP(type) name - #define init_heap(heap, _size, gfp) \ ({ \ - (heap)->used = 0; \ + (heap)->nr = 0; \ (heap)->size = (_size); \ (heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\ (gfp)); \ @@ -76,113 +69,6 @@ do { \ (heap)->data = NULL; \ } while (0) -#define heap_set_backpointer(h, i, _fn) \ -do { \ - void (*fn)(typeof(h), size_t) = _fn; \ - if (fn) \ - fn(h, i); \ -} while (0) - -#define heap_swap(h, i, j, set_backpointer) \ -do { \ - swap((h)->data[i], (h)->data[j]); \ - heap_set_backpointer(h, i, set_backpointer); \ - heap_set_backpointer(h, j, set_backpointer); \ -} while (0) - -#define heap_peek(h) \ -({ \ - EBUG_ON(!(h)->used); \ - (h)->data[0]; \ -}) - -#define heap_full(h) ((h)->used == (h)->size) - -#define heap_sift_down(h, i, cmp, set_backpointer) \ -do { \ - size_t _c, _j = i; \ - \ - for (; _j * 2 + 1 < (h)->used; _j = _c) { \ - _c = _j * 2 + 1; \ - if (_c + 1 < (h)->used && \ - cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \ - _c++; \ - \ - if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \ - break; \ - heap_swap(h, _c, _j, set_backpointer); \ - } \ -} while (0) - -#define heap_sift_up(h, i, cmp, set_backpointer) \ -do { \ - while (i) { \ - size_t p = (i - 1) / 2; \ - if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \ - break; \ - heap_swap(h, i, p, set_backpointer); \ - i = p; \ - } \ -} while (0) - -#define __heap_add(h, d, cmp, set_backpointer) \ -({ \ - size_t _i = (h)->used++; \ - (h)->data[_i] = d; \ - heap_set_backpointer(h, _i, set_backpointer); \ - \ - heap_sift_up(h, _i, cmp, set_backpointer); \ - _i; \ -}) - -#define heap_add(h, d, cmp, set_backpointer) \ -({ \ - bool _r = !heap_full(h); \ - if (_r) \ - __heap_add(h, d, cmp, set_backpointer); \ - _r; \ -}) - -#define heap_add_or_replace(h, new, cmp, set_backpointer) \ -do { \ - if (!heap_add(h, new, cmp, set_backpointer) && \ - cmp(h, new, heap_peek(h)) >= 0) { \ - (h)->data[0] = new; \ - heap_set_backpointer(h, 0, set_backpointer); \ - heap_sift_down(h, 0, cmp, set_backpointer); \ - } \ -} while (0) - -#define heap_del(h, i, cmp, set_backpointer) \ -do { \ - size_t _i = (i); \ - \ - BUG_ON(_i >= (h)->used); \ - (h)->used--; \ - if ((_i) < (h)->used) { \ - heap_swap(h, _i, (h)->used, set_backpointer); \ - heap_sift_up(h, _i, cmp, set_backpointer); \ - heap_sift_down(h, _i, cmp, set_backpointer); \ - } \ -} while (0) - -#define heap_pop(h, d, cmp, set_backpointer) \ -({ \ - bool _r = (h)->used; \ - if (_r) { \ - (d) = (h)->data[0]; \ - heap_del(h, 0, cmp, set_backpointer); \ - } \ - _r; \ -}) - -#define heap_resort(heap, cmp, set_backpointer) \ -do { \ - ssize_t _i; \ - for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \ - heap_sift_down(heap, _i, cmp, set_backpointer); \ -} while (0) - #define ANYSINT_MAX(t) \ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 5ae8045f4df4..19fa49cd9907 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2150,5 +2150,5 @@ core_initcall(init_elf_binfmt); module_exit(exit_elf_binfmt); #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST -#include "binfmt_elf_test.c" +#include "tests/binfmt_elf_kunit.c" #endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b592fc8cf368..0533d0f82dc9 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2981,8 +2981,7 @@ static int relocate_one_folio(struct reloc_control *rc, if (folio_test_readahead(folio)) page_cache_async_readahead(inode->i_mapping, ra, NULL, - folio, index, - last_index + 1 - index); + folio, last_index + 1 - index); if (!folio_test_uptodate(folio)) { btrfs_read_folio(NULL, folio); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fb3675f5bf50..4ca711a773ef 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5306,7 +5306,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) if (folio_test_readahead(folio)) page_cache_async_readahead(mapping, &sctx->ra, NULL, folio, - index, last_index + 1 - index); + last_index + 1 - index); if (!folio_test_uptodate(folio)) { btrfs_read_folio(NULL, folio); diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index e667dbcd20e8..a91acd03ee12 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -630,7 +630,7 @@ static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq) _enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start); - subreq->max_len = ULONG_MAX; + subreq->max_len = MAX_RW_COUNT; subreq->max_nr_segs = BIO_MAX_VECS; if (!cachefiles_cres_file(cres)) { diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c4941ba245ac..e98aa8219303 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3067,10 +3067,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, flags, &_got); WARN_ON_ONCE(ret == -EAGAIN); if (!ret) { +#ifdef CONFIG_DEBUG_FS struct ceph_mds_client *mdsc = fsc->mdsc; struct cap_wait cw; +#endif DEFINE_WAIT_FUNC(wait, woken_wake_function); +#ifdef CONFIG_DEBUG_FS cw.ino = ceph_ino(inode); cw.tgid = current->tgid; cw.need = need; @@ -3079,6 +3082,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, spin_lock(&mdsc->caps_list_lock); list_add(&cw.list, &mdsc->cap_wait_list); spin_unlock(&mdsc->caps_list_lock); +#endif /* make sure used fmode not timeout */ ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS); @@ -3097,9 +3101,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, remove_wait_queue(&ci->i_cap_wq, &wait); ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS); +#ifdef CONFIG_DEBUG_FS spin_lock(&mdsc->caps_list_lock); list_del(&cw.list); spin_unlock(&mdsc->caps_list_lock); +#endif if (ret == -EAGAIN) continue; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 82a2e2a06a65..18c72b305858 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -141,7 +141,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, if (ptr_pos >= i_size_read(dir)) return NULL; - if (!cache_ctl->page || ptr_pgoff != page_index(cache_ctl->page)) { + if (!cache_ctl->page || ptr_pgoff != cache_ctl->page->index) { ceph_readdir_cache_release(cache_ctl); cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff); if (!cache_ctl->page) { @@ -1589,7 +1589,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) } spin_lock(&mdsc->dentry_list_lock); - __dentry_dir_lease_touch(mdsc, di), + __dentry_dir_lease_touch(mdsc, di); spin_unlock(&mdsc->dentry_list_lock); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 249ddfbb1b03..8f8de8f33abb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1863,7 +1863,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, unsigned idx = ctl->index % nsize; pgoff_t pgoff = ctl->index / nsize; - if (!ctl->page || pgoff != page_index(ctl->page)) { + if (!ctl->page || pgoff != ctl->page->index) { ceph_readdir_cache_release(ctl); if (idx == 0) ctl->page = grab_cache_page(&dir->i_data, pgoff); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c2157f6e0c69..276e34ab3e2c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5446,6 +5446,8 @@ static void delayed_work(struct work_struct *work) } mutex_unlock(&mdsc->mutex); + ceph_flush_cap_releases(mdsc, s); + mutex_lock(&s->s_mutex); if (renew_caps) send_renew_caps(mdsc, s); @@ -5505,7 +5507,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; INIT_LIST_HEAD(&mdsc->cap_delay_list); +#ifdef CONFIG_DEBUG_FS INIT_LIST_HEAD(&mdsc->cap_wait_list); +#endif spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); INIT_LIST_HEAD(&mdsc->snap_flush_list); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index cfa18cf915a0..9bcc7f181bfe 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -416,6 +416,8 @@ struct ceph_quotarealm_inode { struct inode *inode; }; +#ifdef CONFIG_DEBUG_FS + struct cap_wait { struct list_head list; u64 ino; @@ -424,6 +426,8 @@ struct cap_wait { int want; }; +#endif + enum { CEPH_MDSC_STOPPING_BEGIN = 1, CEPH_MDSC_STOPPING_FLUSHING = 2, @@ -512,7 +516,9 @@ struct ceph_mds_client { spinlock_t caps_list_lock; struct list_head caps_list; /* unused (reserved or unreserved) */ +#ifdef CONFIG_DEBUG_FS struct list_head cap_wait_list; +#endif int caps_total_count; /* total caps allocated */ int caps_use_count; /* in use */ int caps_use_max; /* max used caps */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 885cb5d4e771..0cdf84cd1791 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -961,7 +961,8 @@ static int __init init_caches(void) if (!ceph_mds_request_cachep) goto bad_mds_req; - ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, + (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *)); if (!ceph_wb_pagevec_pool) goto bad_pagevec_pool; diff --git a/fs/coredump.c b/fs/coredump.c index a57a06b80f57..7f12ff6ad1d3 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -361,17 +361,16 @@ out: return ispipe; } -static int zap_process(struct task_struct *start, int exit_code) +static int zap_process(struct signal_struct *signal, int exit_code) { struct task_struct *t; int nr = 0; - /* Allow SIGKILL, see prepare_signal() */ - start->signal->flags = SIGNAL_GROUP_EXIT; - start->signal->group_exit_code = exit_code; - start->signal->group_stop_count = 0; + signal->flags = SIGNAL_GROUP_EXIT; + signal->group_exit_code = exit_code; + signal->group_stop_count = 0; - for_each_thread(start, t) { + __for_each_thread(signal, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); @@ -391,8 +390,9 @@ static int zap_threads(struct task_struct *tsk, spin_lock_irq(&tsk->sighand->siglock); if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) { + /* Allow SIGKILL, see prepare_signal() */ signal->core_state = core_state; - nr = zap_process(tsk, exit_code); + nr = zap_process(signal, exit_code); clear_tsk_thread_flag(tsk, TIF_SIGPENDING); tsk->flags |= PF_DUMPCORE; atomic_set(&core_state->nr_threads, nr); @@ -991,7 +991,7 @@ void validate_coredump_safety(void) } } -static int proc_dostring_coredump(struct ctl_table *table, int write, +static int proc_dostring_coredump(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int error = proc_dostring(table, write, buffer, lenp, ppos); diff --git a/fs/dcache.c b/fs/dcache.c index 8bdc278a0205..3d8daaecb6d1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -177,7 +177,7 @@ static long get_nr_dentry_negative(void) return sum < 0 ? 0 : sum; } -static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer, +static int proc_nr_dentry(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index b9575957a7c2..d45ef541d848 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -48,7 +48,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) iput(toput_inode); } -int drop_caches_sysctl_handler(struct ctl_table *table, int write, +int drop_caches_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int ret; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 8be60797ea2f..1b7eba38ba1e 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -21,38 +21,32 @@ void erofs_put_metabuf(struct erofs_buf *buf) if (!buf->page) return; erofs_unmap_metabuf(buf); - put_page(buf->page); + folio_put(page_folio(buf->page)); buf->page = NULL; } -/* - * Derive the block size from inode->i_blkbits to make compatible with - * anonymous inode in fscache mode. - */ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, enum erofs_kmap_type type) { pgoff_t index = offset >> PAGE_SHIFT; - struct page *page = buf->page; - struct folio *folio; - unsigned int nofs_flag; + struct folio *folio = NULL; - if (!page || page->index != index) { + if (buf->page) { + folio = page_folio(buf->page); + if (folio_file_page(folio, index) != buf->page) + erofs_unmap_metabuf(buf); + } + if (!folio || !folio_contains(folio, index)) { erofs_put_metabuf(buf); - - nofs_flag = memalloc_nofs_save(); - folio = read_cache_folio(buf->mapping, index, NULL, NULL); - memalloc_nofs_restore(nofs_flag); + folio = read_mapping_folio(buf->mapping, index, NULL); if (IS_ERR(folio)) return folio; - - /* should already be PageUptodate, no need to lock page */ - page = folio_file_page(folio, index); - buf->page = page; } + buf->page = folio_file_page(folio, index); + if (buf->kmap_type == EROFS_NO_KMAP) { if (type == EROFS_KMAP) - buf->base = kmap_local_page(page); + buf->base = kmap_local_page(buf->page); buf->kmap_type = type; } else if (buf->kmap_type != type) { DBG_BUGON(1); diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 06a722b85a45..40666815046f 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -188,7 +188,7 @@ again: !rq->partial_decoding); buf.in_size = min(rq->inputsize, PAGE_SIZE - rq->pageofs_in); rq->inputsize -= buf.in_size; - buf.in = dctx.kin + rq->pageofs_in, + buf.in = dctx.kin + rq->pageofs_in; dctx.bounce = strm->bounce; do { dctx.avail_out = buf.out_size - buf.out_pos; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 5f6439a63af7..43c09aae2afc 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -334,14 +334,29 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, unsigned int query_flags) { struct inode *const inode = d_inode(path->dentry); + bool compressed = + erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout); - if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) + if (compressed) stat->attributes |= STATX_ATTR_COMPRESSED; - stat->attributes |= STATX_ATTR_IMMUTABLE; stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_IMMUTABLE); + /* + * Return the DIO alignment restrictions if requested. + * + * In EROFS, STATX_DIOALIGN is not supported in ondemand mode and + * compressed files, so in these cases we report no DIO support. + */ + if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { + stat->result_mask |= STATX_DIOALIGN; + if (!erofs_is_fscache_mode(inode->i_sb) && !compressed) { + stat->dio_mem_align = + bdev_logical_block_size(inode->i_sb->s_bdev); + stat->dio_offset_align = stat->dio_mem_align; + } + } generic_fillattr(idmap, request_mask, inode, stat); return 0; } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 35268263aaed..32ce5b35e1df 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -576,6 +576,21 @@ static const struct export_operations erofs_export_ops = { .get_parent = erofs_get_parent, }; +static void erofs_set_sysfs_name(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + if (erofs_is_fscache_mode(sb)) { + if (sbi->domain_id) + super_set_sysfs_name_generic(sb, "%s,%s",sbi->domain_id, + sbi->fsid); + else + super_set_sysfs_name_generic(sb, "%s", sbi->fsid); + return; + } + super_set_sysfs_name_id(sb); +} + static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; @@ -643,6 +658,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_flags |= SB_POSIXACL; else sb->s_flags &= ~SB_POSIXACL; + erofs_set_sysfs_name(sb); #ifdef CONFIG_EROFS_FS_ZIP xa_init(&sbi->managed_pslots); diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index b80f612867c2..9b53883e5caf 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -38,11 +38,13 @@ void *z_erofs_get_gbuf(unsigned int requiredpages) { struct z_erofs_gbuf *gbuf; + migrate_disable(); gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; spin_lock(&gbuf->lock); /* check if the buffer is too small */ if (requiredpages > gbuf->nrpages) { spin_unlock(&gbuf->lock); + migrate_enable(); /* (for sparse checker) pretend gbuf->lock is still taken */ __acquire(gbuf->lock); return NULL; @@ -57,6 +59,7 @@ void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; DBG_BUGON(gbuf->ptr != ptr); spin_unlock(&gbuf->lock); + migrate_enable(); } int z_erofs_gbuf_growsize(unsigned int nrpages) diff --git a/fs/exec.c b/fs/exec.c index a47d0e4c54f6..a126e3d1cacb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2204,7 +2204,7 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd, #ifdef CONFIG_SYSCTL -static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, +static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -2236,5 +2236,5 @@ fs_initcall(init_fs_exec_sysctls); #endif /* CONFIG_SYSCTL */ #ifdef CONFIG_EXEC_KUNIT_TEST -#include "exec_test.c" +#include "tests/exec_kunit.c" #endif diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 55d444bec5c0..bdd96329dddd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1186,6 +1186,11 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); } static bool __need_flush_quota(struct f2fs_sb_info *sbi) @@ -1575,11 +1580,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - percpu_counter_set(&sbi->rf_node_block_count, 0); - /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); /* Wait for all dirty meta pages to be submitted for IO */ @@ -1718,6 +1718,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } f2fs_restore_inmem_curseg(sbi); + f2fs_reinit_atgc_curseg(sbi); stat_inc_cp_count(sbi); stop: unblock_operations(sbi); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1ef82a546391..990b93689b46 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1100,7 +1100,7 @@ retry: struct bio *bio = NULL; ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, - &last_block_in_bio, false, true); + &last_block_in_bio, NULL, true); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc, true); if (ret) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b9b0debc6b3d..6457e5bca9c9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -925,6 +925,7 @@ alloc_new: #ifdef CONFIG_BLK_DEV_ZONED static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) { + struct block_device *bdev = sbi->sb->s_bdev; int devi = 0; if (f2fs_is_multi_device(sbi)) { @@ -935,8 +936,9 @@ static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) return false; } blkaddr -= FDEV(devi).start_blk; + bdev = FDEV(devi).bdev; } - return bdev_is_zoned(FDEV(devi).bdev) && + return bdev_is_zoned(bdev) && f2fs_blkz_is_seq(sbi, devi, blkaddr) && (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1); } @@ -2067,12 +2069,17 @@ static inline loff_t f2fs_readpage_limit(struct inode *inode) return i_size_read(inode); } +static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac) +{ + return rac ? REQ_RAHEAD : 0; +} + static int f2fs_read_single_page(struct inode *inode, struct folio *folio, unsigned nr_pages, struct f2fs_map_blocks *map, struct bio **bio_ret, sector_t *last_block_in_bio, - bool is_readahead) + struct readahead_control *rac) { struct bio *bio = *bio_ret; const unsigned blocksize = blks_to_bytes(inode, 1); @@ -2148,7 +2155,7 @@ submit_and_realloc: } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, index, + f2fs_ra_op_flags(rac), index, false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); @@ -2178,7 +2185,7 @@ out: #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write) + struct readahead_control *rac, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; @@ -2301,7 +2308,7 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, + f2fs_ra_op_flags(rac), page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); @@ -2399,7 +2406,7 @@ static int f2fs_mpage_readpages(struct inode *inode, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - rac != NULL, false); + rac, false); f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; @@ -2449,7 +2456,7 @@ next_page: ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - rac != NULL, false); + rac, false); f2fs_destroy_compress_ctx(&cc, false); } } @@ -2601,7 +2608,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_NOQUOTA(inode)) return true; - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return true; /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */ if (f2fs_compressed_file(inode) && @@ -2688,7 +2695,7 @@ got_it: } /* wait for GCed page writeback via META_MAPPING */ - if (fio->post_read) + if (fio->meta_gc) f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); /* @@ -2783,7 +2790,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .submitted = 0, .compr_blocks = compr_blocks, .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, - .post_read = f2fs_post_read_required(inode) ? 1 : 0, + .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 48048fa36427..fd1fc06359ee 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -19,33 +19,23 @@ #include "node.h" #include <trace/events/f2fs.h> -bool sanity_check_extent_cache(struct inode *inode) +bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - struct extent_tree *et = fi->extent_tree[EX_READ]; - struct extent_info *ei; - - if (!et) - return true; + struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct extent_info ei; - ei = &et->largest; - if (!ei->len) - return true; + get_read_extent_info(&ei, i_ext); - /* Let's drop, if checkpoint got corrupted. */ - if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { - ei->len = 0; - et->largest_updated = true; + if (!ei.len) return true; - } - if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || - !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || + !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, DATA_GENERIC_ENHANCE)) { f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", __func__, inode->i_ino, - ei->blk, ei->fofs, ei->len); + ei.blk, ei.fofs, ei.len); return false; } return true; @@ -394,24 +384,22 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ - if (i_ext && i_ext->len) { + if (i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); } - goto out; + set_inode_flag(inode, FI_NO_EXTENT); + return; } et = __grab_extent_tree(inode, EX_READ); - if (!i_ext || !i_ext->len) - goto out; - get_read_extent_info(&ei, i_ext); write_lock(&et->lock); - if (atomic_read(&et->node_cnt)) - goto unlock_out; + if (atomic_read(&et->node_cnt) || !ei.len) + goto skip; en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); @@ -423,11 +411,13 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) list_add_tail(&en->list, &eti->extent_list); spin_unlock(&eti->extent_lock); } -unlock_out: +skip: + /* Let's drop, if checkpoint got corrupted. */ + if (f2fs_cp_error(sbi)) { + et->largest.len = 0; + et->largest_updated = true; + } write_unlock(&et->lock); -out: - if (!F2FS_I(inode)->extent_tree[EX_READ]) - set_inode_flag(inode, FI_NO_EXTENT); } void f2fs_init_age_extent_tree(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8a9d910aa552..ac19c61f0c3e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -803,6 +803,7 @@ enum { FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ + FI_OPENED_FILE, /* indicate file has been opened */ FI_MAX, /* max flag, never be used */ }; @@ -842,7 +843,11 @@ struct f2fs_inode_info { struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ - struct inode *cow_inode; /* copy-on-write inode for atomic write */ + union { + struct inode *cow_inode; /* copy-on-write inode for atomic write */ + struct inode *atomic_inode; + /* point to atomic_inode, available only for cow_inode */ + }; /* avoid racing between foreground op and gc */ struct f2fs_rwsem i_gc_rwsem[2]; @@ -1210,7 +1215,7 @@ struct f2fs_io_info { unsigned int in_list:1; /* indicate fio is in io_list */ unsigned int is_por:1; /* indicate IO is from recovery or not */ unsigned int encrypted:1; /* indicate file is encrypted */ - unsigned int post_read:1; /* require post read */ + unsigned int meta_gc:1; /* require meta inode GC */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -3222,21 +3227,15 @@ static inline bool f2fs_need_compress_data(struct inode *inode) return false; } -static inline unsigned int addrs_per_inode(struct inode *inode) +static inline unsigned int addrs_per_page(struct inode *inode, + bool is_inode) { - unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - - get_inline_xattr_addrs(inode); + unsigned int addrs = is_inode ? (CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode)) : DEF_ADDRS_PER_BLOCK; - if (!f2fs_compressed_file(inode)) - return addrs; - return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); -} - -static inline unsigned int addrs_per_block(struct inode *inode) -{ - if (!f2fs_compressed_file(inode)) - return DEF_ADDRS_PER_BLOCK; - return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size); + if (f2fs_compressed_file(inode)) + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); + return addrs; } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -3706,6 +3705,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, @@ -4163,7 +4163,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); -bool f2fs_sanity_check_inline_data(struct inode *inode); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage); bool f2fs_may_inline_dentry(struct inode *inode); void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage); void f2fs_truncate_inline_inode(struct inode *inode, @@ -4204,7 +4204,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -bool sanity_check_extent_cache(struct inode *inode); +bool sanity_check_extent_cache(struct inode *inode, struct page *ipage); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); @@ -4275,6 +4275,16 @@ static inline bool f2fs_post_read_required(struct inode *inode) f2fs_compressed_file(inode); } +static inline bool f2fs_used_in_atomic_write(struct inode *inode) +{ + return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode); +} + +static inline bool f2fs_meta_inode_gc_required(struct inode *inode) +{ + return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode); +} + /* * compress.c */ @@ -4310,7 +4320,7 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, unsigned int llen, unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write); + struct readahead_control *rac, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); @@ -4401,22 +4411,18 @@ static inline int set_compress_context(struct inode *inode) { #ifdef CONFIG_F2FS_FS_COMPRESSION struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); - F2FS_I(inode)->i_compress_algorithm = - F2FS_OPTION(sbi).compress_algorithm; - F2FS_I(inode)->i_log_cluster_size = - F2FS_OPTION(sbi).compress_log_size; - F2FS_I(inode)->i_compress_flag = - F2FS_OPTION(sbi).compress_chksum ? - BIT(COMPRESS_CHKSUM) : 0; - F2FS_I(inode)->i_cluster_size = - BIT(F2FS_I(inode)->i_log_cluster_size); - if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 || - F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) && + fi->i_compress_algorithm = F2FS_OPTION(sbi).compress_algorithm; + fi->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size; + fi->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ? + BIT(COMPRESS_CHKSUM) : 0; + fi->i_cluster_size = BIT(fi->i_log_cluster_size); + if ((fi->i_compress_algorithm == COMPRESS_LZ4 || + fi->i_compress_algorithm == COMPRESS_ZSTD) && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = - F2FS_OPTION(sbi).compress_level; - F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); inc_compr_inode_stat(inode); @@ -4431,15 +4437,15 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - f2fs_down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&fi->i_sem); if (!f2fs_compressed_file(inode)) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return true; } if (f2fs_is_mmap_file(inode) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return false; } @@ -4448,7 +4454,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return true; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c1ad9b278c47..168f08507004 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -554,6 +554,42 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static int finish_preallocate_blocks(struct inode *inode) +{ + int ret; + + inode_lock(inode); + if (is_inode_flag_set(inode, FI_OPENED_FILE)) { + inode_unlock(inode); + return 0; + } + + if (!file_should_truncate(inode)) { + set_inode_flag(inode, FI_OPENED_FILE); + inode_unlock(inode); + return 0; + } + + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(inode->i_mapping); + + truncate_setsize(inode, i_size_read(inode)); + ret = f2fs_truncate(inode); + + filemap_invalidate_unlock(inode->i_mapping); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + if (!ret) + set_inode_flag(inode, FI_OPENED_FILE); + + inode_unlock(inode); + if (ret) + return ret; + + file_dont_truncate(inode); + return 0; +} + static int f2fs_file_open(struct inode *inode, struct file *filp) { int err = fscrypt_file_open(inode, filp); @@ -571,7 +607,11 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_CAN_ODIRECT; - return dquot_file_open(inode, filp); + err = dquot_file_open(inode, filp); + if (err) + return err; + + return finish_preallocate_blocks(inode); } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) @@ -825,6 +865,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) return true; if (f2fs_compressed_file(inode)) return true; + if (f2fs_has_inline_data(inode)) + return true; /* disallow direct IO if any of devices has unaligned blksize */ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) @@ -937,6 +979,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct f2fs_inode_info *fi = F2FS_I(inode); int err; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) @@ -955,7 +998,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EOPNOTSUPP; if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && !IS_ALIGNED(attr->ia_size, - F2FS_BLK_TO_BYTES(F2FS_I(inode)->i_cluster_size))) + F2FS_BLK_TO_BYTES(fi->i_cluster_size))) return -EINVAL; } @@ -1009,7 +1052,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_setsize(inode, attr->ia_size); @@ -1021,14 +1064,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, * larger than i_size. */ filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; - spin_lock(&F2FS_I(inode)->i_size_lock); + spin_lock(&fi->i_size_lock); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - F2FS_I(inode)->last_disk_size = i_size_read(inode); - spin_unlock(&F2FS_I(inode)->i_size_lock); + fi->last_disk_size = i_size_read(inode); + spin_unlock(&fi->i_size_lock); } __setattr_copy(idmap, inode, attr); @@ -1038,7 +1081,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) - inode->i_mode = F2FS_I(inode)->i_acl_mode; + inode->i_mode = fi->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); } } @@ -1946,15 +1989,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (err) return err; - f2fs_down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&fi->i_sem); if (!f2fs_may_compress(inode) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return -EINVAL; } err = set_compress_context(inode); - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); if (err) return err; @@ -2139,6 +2182,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + /* Set the COW inode's atomic_inode to the atomic inode */ + F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); @@ -3541,6 +3587,7 @@ next: static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int released_blocks = 0; @@ -3578,7 +3625,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; - if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + if (!atomic_read(&fi->i_compr_blocks)) { ret = -EPERM; goto out; } @@ -3587,7 +3634,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3613,7 +3660,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = release_compress_blocks(&dn, count); @@ -3629,7 +3676,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); out: if (released_blocks) f2fs_update_time(sbi, REQ_TIME); @@ -3640,14 +3687,14 @@ out: if (ret >= 0) { ret = put_user(released_blocks, (u64 __user *)arg); } else if (released_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " "iblocks=%llu, released=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, released_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -3736,6 +3783,7 @@ next: static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int reserved_blocks = 0; @@ -3761,10 +3809,10 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) + if (atomic_read(&fi->i_compr_blocks)) goto unlock_inode; - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3790,7 +3838,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = reserve_compress_blocks(&dn, count, &reserved_blocks); @@ -3805,7 +3853,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); @@ -3821,14 +3869,14 @@ unlock_inode: if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " "iblocks=%llu, reserved=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, reserved_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -3891,7 +3939,9 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) return -EOPNOTSUPP; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || @@ -4017,7 +4067,7 @@ out: f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4052,6 +4102,7 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_comp_option option; int ret = 0; @@ -4071,7 +4122,9 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) option.algorithm >= COMPRESS_MAX) return -EINVAL; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); @@ -4090,27 +4143,27 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) goto out; } - F2FS_I(inode)->i_compress_algorithm = option.algorithm; - F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; - F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size); + fi->i_compress_algorithm = option.algorithm; + fi->i_log_cluster_size = option.log_cluster_size; + fi->i_cluster_size = BIT(option.log_cluster_size); /* Set default level */ - if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) - F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + if (fi->i_compress_algorithm == COMPRESS_ZSTD) + fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; else - F2FS_I(inode)->i_compress_level = 0; + fi->i_compress_level = 0; /* Adjust mount option level */ if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; f2fs_mark_inode_dirty_sync(inode, true); if (!f2fs_is_compress_backend_ready(inode)) f2fs_warn(sbi, "compression algorithm is successfully set, " "but current kernel doesn't support this algorithm."); out: - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4167,7 +4220,9 @@ static int f2fs_ioc_decompress_file(struct file *filp) f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4222,7 +4277,7 @@ static int f2fs_ioc_decompress_file(struct file *filp) f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4244,7 +4299,9 @@ static int f2fs_ioc_compress_file(struct file *filp) f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4300,7 +4357,7 @@ static int f2fs_ioc_compress_file(struct file *filp) f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6066c6eecf41..724bbcb447d3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1171,7 +1171,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static int ra_data_block(struct inode *inode, pgoff_t index) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct address_space *mapping = inode->i_mapping; + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct dnode_of_data dn; struct page *page; struct f2fs_io_info fio = { @@ -1260,6 +1261,8 @@ put_page: static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .ino = inode->i_ino, @@ -1282,7 +1285,7 @@ static int move_data_block(struct inode *inode, block_t bidx, CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ - page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); + page = f2fs_grab_cache_page(mapping, bidx, false); if (!page) return -ENOMEM; @@ -1563,6 +1566,16 @@ next_step: continue; } + if (f2fs_has_inline_data(inode)) { + iput(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "inode %lx has both inline_data flag and " + "data block, nid=%u, ofs_in_node=%u", + inode->i_ino, dni.nid, ofs_in_node); + continue; + } + err = f2fs_gc_pinned_control(inode, gc_type, segno); if (err == -EAGAIN) { iput(inode); @@ -1579,7 +1592,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) { + if (f2fs_meta_inode_gc_required(inode)) { int err = ra_data_block(inode, start_bidx); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -1630,7 +1643,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) + if (f2fs_meta_inode_gc_required(inode)) err = move_data_block(inode, start_bidx, gc_type, segno, off); else @@ -1638,7 +1651,7 @@ next_step: segno, off); if (!err && (gc_type == FG_GC || - f2fs_post_read_required(inode))) + f2fs_meta_inode_gc_required(inode))) submitted++; if (locked) { @@ -1742,7 +1755,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (type != GET_SUM_TYPE((&sum->footer))) { f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_SUMMARY); goto skip; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7638d0d7b7ee..cca7d448e55c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,7 +16,7 @@ static bool support_inline_data(struct inode *inode) { - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return false; if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; @@ -33,11 +33,29 @@ bool f2fs_may_inline_data(struct inode *inode) return !f2fs_post_read_required(inode); } -bool f2fs_sanity_check_inline_data(struct inode *inode) +static bool inode_has_blocks(struct inode *inode, struct page *ipage) +{ + struct f2fs_inode *ri = F2FS_INODE(ipage); + int i; + + if (F2FS_HAS_BLOCKS(inode)) + return true; + + for (i = 0; i < DEF_NIDS_PER_INODE; i++) { + if (ri->i_nid[i]) + return true; + } + return false; +} + +bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage) { if (!f2fs_has_inline_data(inode)) return false; + if (inode_has_blocks(inode, ipage)) + return false; + if (!support_inline_data(inode)) return true; @@ -203,8 +221,10 @@ int f2fs_convert_inline_inode(struct inode *inode) struct page *ipage, *page; int err = 0; - if (!f2fs_has_inline_data(inode) || - f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + if (f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + return -EROFS; + + if (!f2fs_has_inline_data(inode)) return 0; err = f2fs_dquot_initialize(inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 005dde72aff3..aef57172014f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -29,6 +29,9 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (is_inode_flag_set(inode, FI_NEW_INODE)) return; + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return; + if (f2fs_inode_dirtied(inode, sync)) return; @@ -310,10 +313,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (!sanity_check_compress_inode(inode, ri)) return false; } - } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", - __func__, inode->i_ino); - return false; } if (!f2fs_sb_has_extra_attr(sbi)) { @@ -344,7 +343,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } - if (f2fs_sanity_check_inline_data(inode)) { + if (f2fs_sanity_check_inline_data(inode, node_page)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; @@ -508,16 +507,16 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - /* Need all the flag bits */ - f2fs_init_read_extent_tree(inode, node_page); - f2fs_init_age_extent_tree(inode); - - if (!sanity_check_extent_cache(inode)) { + if (!sanity_check_extent_cache(inode, node_page)) { f2fs_put_page(node_page, 1); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } + /* Need all the flag bits */ + f2fs_init_read_extent_tree(inode, node_page); + f2fs_init_age_extent_tree(inode); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -610,14 +609,6 @@ make_now: } f2fs_set_inode_flags(inode); - if (file_should_truncate(inode) && - !is_sbi_flag_set(sbi, SBI_POR_DOING)) { - ret = f2fs_truncate(inode); - if (ret) - goto bad_inode; - file_dont_truncate(inode); - } - unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; @@ -645,8 +636,9 @@ retry: void f2fs_update_inode(struct inode *inode, struct page *node_page) { + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_inode *ri; - struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_tree *et = fi->extent_tree[EX_READ]; f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); @@ -656,7 +648,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri = F2FS_INODE(node_page); ri->i_mode = cpu_to_le16(inode->i_mode); - ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_advise = fi->i_advise; ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); @@ -682,58 +674,49 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); ri->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); if (S_ISDIR(inode->i_mode)) - ri->i_current_depth = - cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_current_depth = cpu_to_le32(fi->i_current_depth); else if (S_ISREG(inode->i_mode)) - ri->i_gc_failures = cpu_to_le16(F2FS_I(inode)->i_gc_failures); - ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); - ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); - ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_gc_failures = cpu_to_le16(fi->i_gc_failures); + ri->i_xattr_nid = cpu_to_le32(fi->i_xattr_nid); + ri->i_flags = cpu_to_le32(fi->i_flags); + ri->i_pino = cpu_to_le32(fi->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); - ri->i_dir_level = F2FS_I(inode)->i_dir_level; + ri->i_dir_level = fi->i_dir_level; if (f2fs_has_extra_attr(inode)) { - ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + ri->i_extra_isize = cpu_to_le16(fi->i_extra_isize); if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = - cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + cpu_to_le16(fi->i_inline_xattr_size); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_projid)) { + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) { projid_t i_projid; - i_projid = from_kprojid(&init_user_ns, - F2FS_I(inode)->i_projid); + i_projid = from_kprojid(&init_user_ns, fi->i_projid); ri->i_projid = cpu_to_le32(i_projid); } if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_crtime)) { - ri->i_crtime = - cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec); - ri->i_crtime_nsec = - cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + ri->i_crtime = cpu_to_le64(fi->i_crtime.tv_sec); + ri->i_crtime_nsec = cpu_to_le32(fi->i_crtime.tv_nsec); } if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_compress_flag)) { unsigned short compress_flag; - ri->i_compr_blocks = - cpu_to_le64(atomic_read( - &F2FS_I(inode)->i_compr_blocks)); - ri->i_compress_algorithm = - F2FS_I(inode)->i_compress_algorithm; - compress_flag = F2FS_I(inode)->i_compress_flag | - F2FS_I(inode)->i_compress_level << + ri->i_compr_blocks = cpu_to_le64( + atomic_read(&fi->i_compr_blocks)); + ri->i_compress_algorithm = fi->i_compress_algorithm; + compress_flag = fi->i_compress_flag | + fi->i_compress_level << COMPRESS_LEVEL_OFFSET; ri->i_compress_flag = cpu_to_le16(compress_flag); - ri->i_log_cluster_size = - F2FS_I(inode)->i_log_cluster_size; + ri->i_log_cluster_size = fi->i_log_cluster_size; } } @@ -813,8 +796,9 @@ void f2fs_evict_inode(struct inode *inode) f2fs_abort_atomic_write(inode, true); - if (fi->cow_inode) { + if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) { clear_inode_flag(fi->cow_inode, FI_COW_FILE); + F2FS_I(fi->cow_inode)->atomic_inode = NULL; iput(fi->cow_inode); fi->cow_inode = NULL; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1ecde2b45e99..38b4750475db 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -221,6 +221,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, const char *name) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct f2fs_inode_info *fi; nid_t ino; struct inode *inode; bool nid_free = false; @@ -241,14 +242,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, inode_init_owner(idmap, inode, dir, mode); + fi = F2FS_I(inode); inode->i_ino = ino; inode->i_blocks = 0; simple_inode_init_ts(inode); - F2FS_I(inode)->i_crtime = inode_get_mtime(inode); + fi->i_crtime = inode_get_mtime(inode); inode->i_generation = get_random_u32(); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_current_depth = 1; + fi->i_current_depth = 1; err = insert_inode_locked(inode); if (err) { @@ -258,9 +260,9 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) - F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; + fi->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + fi->i_projid = make_kprojid(&init_user_ns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -278,7 +280,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); - F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; + fi->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } if (test_opt(sbi, INLINE_XATTR)) @@ -296,15 +298,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, f2fs_has_inline_dentry(inode)) { xattr_size = DEFAULT_INLINE_XATTR_ADDRS; } - F2FS_I(inode)->i_inline_xattr_size = xattr_size; + fi->i_inline_xattr_size = xattr_size; - F2FS_I(inode)->i_flags = + fi->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; + fi->i_flags |= F2FS_INDEX_FL; - if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) + if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); /* Check compression first. */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 8712e264071f..9756f0f2b7f7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -280,6 +280,7 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; @@ -302,12 +303,12 @@ static int recover_inode(struct inode *inode, struct page *page) i_projid = (projid_t)le32_to_cpu(raw->i_projid); kprojid = make_kprojid(&init_user_ns, i_projid); - if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) { + if (!projid_eq(kprojid, fi->i_projid)) { err = f2fs_transfer_project_quota(inode, kprojid); if (err) return err; - F2FS_I(inode)->i_projid = kprojid; + fi->i_projid = kprojid; } } } @@ -320,10 +321,10 @@ static int recover_inode(struct inode *inode, struct page *page) inode_set_mtime(inode, le64_to_cpu(raw->i_mtime), le32_to_cpu(raw->i_mtime_nsec)); - F2FS_I(inode)->i_advise = raw->i_advise; - F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + fi->i_advise = raw->i_advise; + fi->i_flags = le32_to_cpu(raw->i_flags); f2fs_set_inode_flags(inode); - F2FS_I(inode)->i_gc_failures = le16_to_cpu(raw->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a0ce3d080f80..78c3198a6308 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2784,11 +2784,19 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) unsigned short seg_type = curseg->seg_type; sanity_check_seg_type(sbi, seg_type); - if (f2fs_need_rand_seg(sbi)) - return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); + if (__is_large_section(sbi)) { + if (f2fs_need_rand_seg(sbi)) { + unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno); - if (__is_large_section(sbi)) + if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint) + return curseg->segno; + return get_random_u32_inclusive(curseg->segno + 1, + GET_SEG_FROM_SEC(sbi, hint + 1) - 1); + } return curseg->segno; + } else if (f2fs_need_rand_seg(sbi)) { + return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); + } /* inmem log may not locate on any segment after mount */ if (!curseg->inited) @@ -2931,12 +2939,12 @@ static int get_atssr_segment(struct f2fs_sb_info *sbi, int type, return ret; } -static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) +static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); int ret = 0; - if (!sbi->am.atgc_enabled) + if (!sbi->am.atgc_enabled && !force) return 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); @@ -2953,9 +2961,30 @@ static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) f2fs_up_read(&SM_I(sbi)->curseg_lock); return ret; } + int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) { - return __f2fs_init_atgc_curseg(sbi); + return __f2fs_init_atgc_curseg(sbi, false); +} + +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi) +{ + int ret; + + if (!test_opt(sbi, ATGC)) + return 0; + if (sbi->am.atgc_enabled) + return 0; + if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) < + sbi->am.age_threshold) + return 0; + + ret = __f2fs_init_atgc_curseg(sbi, true); + if (!ret) { + sbi->am.atgc_enabled = true; + f2fs_info(sbi, "reenabled age threshold GC"); + } + return ret; } static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) @@ -3483,7 +3512,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && - (fio->sbi->gc_mode != GC_URGENT_HIGH)) + (fio->sbi->gc_mode != GC_URGENT_HIGH) && + __is_valid_data_blkaddr(fio->old_blkaddr) && + !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; @@ -3828,7 +3859,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } - if (fio->post_read) + if (fio->meta_gc) f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1); stat_inc_inplace_blocks(fio->sbi); @@ -3998,7 +4029,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; if (!__is_valid_data_blkaddr(blkaddr)) @@ -4017,7 +4048,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; for (i = 0; i < len; i++) @@ -5187,7 +5218,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) } /* Allocate a new section if it's not new. */ - if (cs->next_blkoff) { + if (cs->next_blkoff || + cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) { unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff; f2fs_allocate_new_section(sbi, type, true); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e1c0f418aa11..bfc01a521cb9 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -347,7 +347,8 @@ static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno, bool use_section) { if (use_section && __is_large_section(sbi)) { - unsigned int start_segno = START_SEGNO(segno); + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int blocks = 0; int i; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index df4cf31f93df..3959fd137cc9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -151,8 +151,6 @@ enum { Opt_mode, Opt_fault_injection, Opt_fault_type, - Opt_lazytime, - Opt_nolazytime, Opt_quota, Opt_noquota, Opt_usrquota, @@ -229,8 +227,6 @@ static match_table_t f2fs_tokens = { {Opt_mode, "mode=%s"}, {Opt_fault_injection, "fault_injection=%u"}, {Opt_fault_type, "fault_type=%u"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, {Opt_quota, "quota"}, {Opt_noquota, "noquota"}, {Opt_usrquota, "usrquota"}, @@ -918,12 +914,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) f2fs_info(sbi, "fault_type options not supported"); break; #endif - case Opt_lazytime: - sb->s_flags |= SB_LAZYTIME; - break; - case Opt_nolazytime: - sb->s_flags &= ~SB_LAZYTIME; - break; #ifdef CONFIG_QUOTA case Opt_quota: case Opt_usrquota: @@ -4481,6 +4471,7 @@ try_onemore: sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid)); + super_set_sysfs_name_bdev(sb); sb->s_iflags |= SB_I_CGROUPWB; /* init f2fs-specific super block info */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 09d3ecfaa4f1..fee7ee45ceaa 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -340,13 +340,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) { struct ckpt_req_control *cprc = &sbi->cprc_info; int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio); - int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio); + int level = IOPRIO_PRIO_LEVEL(cprc->ckpt_thread_ioprio); if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE) return -EINVAL; return sysfs_emit(buf, "%s,%d\n", - class == IOPRIO_CLASS_RT ? "rt" : "be", data); + class == IOPRIO_CLASS_RT ? "rt" : "be", level); } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -450,7 +450,7 @@ out: const char *name = strim((char *)buf); struct ckpt_req_control *cprc = &sbi->cprc_info; int class; - long data; + long level; int ret; if (!strncmp(name, "rt,", 3)) @@ -461,13 +461,13 @@ out: return -EINVAL; name += 3; - ret = kstrtol(name, 10, &data); + ret = kstrtol(name, 10, &level); if (ret) return ret; - if (data >= IOPRIO_NR_LEVELS || data < 0) + if (level >= IOPRIO_NR_LEVELS || level < 0) return -EINVAL; - cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data); + cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, level); if (test_opt(sbi, MERGE_CHECKPOINT)) { ret = set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio); diff --git a/fs/file_table.c b/fs/file_table.c index 4f03beed4737..ca7843dde56d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(get_max_files); /* * Handle nr_files sysctl */ -static int proc_nr_files(struct ctl_table *table, int write, void *buffer, +static int proc_nr_files(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 92a5b8283528..b865a3fa52f3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2413,7 +2413,7 @@ static int __init start_dirtytime_writeback(void) } __initcall(start_dirtytime_writeback); -int dirtytime_interval_handler(struct ctl_table *table, int write, +int dirtytime_interval_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 0239e3af3945..8b39c15c408c 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -63,9 +63,10 @@ struct hostfs_stat { struct hostfs_timespec atime, mtime, ctime; unsigned int blksize; unsigned long long blocks; - unsigned int maj; - unsigned int min; - dev_t dev; + struct { + unsigned int maj; + unsigned int min; + } rdev, dev; }; extern int stat_file(const char *path, struct hostfs_stat *p, int fd); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 3eb747d26924..22df574ca99e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -17,6 +17,7 @@ #include <linux/writeback.h> #include <linux/mount.h> #include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/namei.h> #include "hostfs.h" #include <init.h> @@ -455,7 +456,7 @@ static int hostfs_read_folio(struct file *file, struct folio *folio) if (bytes_read < 0) ret = bytes_read; else - buffer = folio_zero_tail(folio, bytes_read, buffer); + buffer = folio_zero_tail(folio, bytes_read, buffer + bytes_read); kunmap_local(buffer); folio_end_read(folio, ret == 0); @@ -532,10 +533,11 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st) static int hostfs_inode_set(struct inode *ino, void *data) { struct hostfs_stat *st = data; - dev_t rdev; + dev_t dev, rdev; /* Reencode maj and min with the kernel encoding.*/ - rdev = MKDEV(st->maj, st->min); + rdev = MKDEV(st->rdev.maj, st->rdev.min); + dev = MKDEV(st->dev.maj, st->dev.min); switch (st->mode & S_IFMT) { case S_IFLNK: @@ -561,7 +563,7 @@ static int hostfs_inode_set(struct inode *ino, void *data) return -EIO; } - HOSTFS_I(ino)->dev = st->dev; + HOSTFS_I(ino)->dev = dev; ino->i_ino = st->ino; ino->i_mode = st->mode; return hostfs_inode_update(ino, st); @@ -570,8 +572,9 @@ static int hostfs_inode_set(struct inode *ino, void *data) static int hostfs_inode_test(struct inode *inode, void *data) { const struct hostfs_stat *st = data; + dev_t dev = MKDEV(st->dev.maj, st->dev.min); - return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == st->dev; + return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev; } static struct inode *hostfs_iget(struct super_block *sb, char *name) @@ -927,7 +930,6 @@ static const struct inode_operations hostfs_link_iops = { static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct hostfs_fs_info *fsi = sb->s_fs_info; - const char *host_root = fc->source; struct inode *root_inode; int err; @@ -941,15 +943,6 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - /* NULL is printed as '(null)' by printf(): avoid that. */ - if (fc->source == NULL) - host_root = ""; - - fsi->host_root_path = - kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); - if (fsi->host_root_path == NULL) - return -ENOMEM; - root_inode = hostfs_iget(sb, fsi->host_root_path); if (IS_ERR(root_inode)) return PTR_ERR(root_inode); @@ -975,6 +968,58 @@ static int hostfs_fill_super(struct super_block *sb, struct fs_context *fc) return 0; } +enum hostfs_parma { + Opt_hostfs, +}; + +static const struct fs_parameter_spec hostfs_param_specs[] = { + fsparam_string_empty("hostfs", Opt_hostfs), + {} +}; + +static int hostfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct hostfs_fs_info *fsi = fc->s_fs_info; + struct fs_parse_result result; + char *host_root; + int opt; + + opt = fs_parse(fc, hostfs_param_specs, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_hostfs: + host_root = param->string; + if (!*host_root) + host_root = ""; + fsi->host_root_path = + kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); + if (fsi->host_root_path == NULL) + return -ENOMEM; + break; + } + + return 0; +} + +static int hostfs_parse_monolithic(struct fs_context *fc, void *data) +{ + struct hostfs_fs_info *fsi = fc->s_fs_info; + char *host_root = (char *)data; + + /* NULL is printed as '(null)' by printf(): avoid that. */ + if (host_root == NULL) + host_root = ""; + + fsi->host_root_path = + kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); + if (fsi->host_root_path == NULL) + return -ENOMEM; + + return 0; +} + static int hostfs_fc_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, hostfs_fill_super); @@ -992,6 +1037,8 @@ static void hostfs_fc_free(struct fs_context *fc) } static const struct fs_context_operations hostfs_context_ops = { + .parse_monolithic = hostfs_parse_monolithic, + .parse_param = hostfs_parse_param, .get_tree = hostfs_fc_get_tree, .free = hostfs_fc_free, }; @@ -1040,4 +1087,5 @@ static void __exit exit_hostfs(void) module_init(init_hostfs) module_exit(exit_hostfs) +MODULE_DESCRIPTION("User-Mode Linux Host filesystem"); MODULE_LICENSE("GPL"); diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 840619e39a1a..97e9c40a9448 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -34,9 +34,10 @@ static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) p->mtime.tv_nsec = 0; p->blksize = buf->st_blksize; p->blocks = buf->st_blocks; - p->maj = os_major(buf->st_rdev); - p->min = os_minor(buf->st_rdev); - p->dev = buf->st_dev; + p->rdev.maj = os_major(buf->st_rdev); + p->rdev.min = os_minor(buf->st_rdev); + p->dev.maj = os_major(buf->st_dev); + p->dev.min = os_minor(buf->st_dev); } int stat_file(const char *path, struct hostfs_stat *p, int fd) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 81dab95f67ed..9f6cff356796 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -222,13 +222,13 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long flags) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct hstate *h = hstate_file(file); const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); if (len & ~huge_page_mask(h)) return -EINVAL; - if (len > TASK_SIZE) + if (len > mmap_end - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) { @@ -239,9 +239,10 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, huge_page_size(h)); - vma = find_vma(mm, addr); - if (mmap_end - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) + vma = find_vma_prev(mm, addr, &prev); + if (mmap_end - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) return addr; } @@ -422,7 +423,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma, if (!ptep) return false; - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(vma->vm_mm, addr, ptep); if (huge_pte_none(pte) || !pte_present(pte)) return false; @@ -892,7 +893,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, error = PTR_ERR(folio); goto out; } - clear_huge_page(&folio->page, addr, pages_per_huge_page(h)); + folio_zero_user(folio, ALIGN_DOWN(addr, hpage_size)); __folio_mark_uptodate(folio); error = hugetlb_add_to_page_cache(folio, mapping, index); if (unlikely(error)) { @@ -1128,10 +1129,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, hugetlb_set_folio_subpool(src, NULL); } - if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(dst, src); - else - folio_migrate_flags(dst, src); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/inode.c b/fs/inode.c index f356fe2ec2b6..86670941884b 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -107,7 +107,7 @@ long get_nr_dirty_inodes(void) */ static struct inodes_stat_t inodes_stat; -static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer, +static int proc_nr_inodes(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); @@ -676,6 +676,16 @@ static void evict(struct inode *inode) remove_inode_hash(inode); + /* + * Wake up waiters in __wait_on_freeing_inode(). + * + * Lockless hash lookup may end up finding the inode before we removed + * it above, but only lock it *after* we are done with the wakeup below. + * In this case the potential waiter cannot safely block. + * + * The inode being unhashed after the call to remove_inode_hash() is + * used as an indicator whether blocking on it is safe. + */ spin_lock(&inode->i_lock); wake_up_bit(&inode->i_state, __I_NEW); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); @@ -888,18 +898,18 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } -static void __wait_on_freeing_inode(struct inode *inode, bool locked); +static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked); /* * Called with the inode lock held. */ static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), - void *data, bool locked) + void *data, bool is_inode_hash_locked) { struct inode *inode = NULL; - if (locked) + if (is_inode_hash_locked) lockdep_assert_held(&inode_hash_lock); else lockdep_assert_not_held(&inode_hash_lock); @@ -913,7 +923,7 @@ repeat: continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode, locked); + __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { @@ -936,11 +946,11 @@ repeat: */ static struct inode *find_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino, - bool locked) + bool is_inode_hash_locked) { struct inode *inode = NULL; - if (locked) + if (is_inode_hash_locked) lockdep_assert_held(&inode_hash_lock); else lockdep_assert_not_held(&inode_hash_lock); @@ -954,7 +964,7 @@ repeat: continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode, locked); + __wait_on_freeing_inode(inode, is_inode_hash_locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { @@ -2287,19 +2297,29 @@ EXPORT_SYMBOL(inode_needs_sync); * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list * will DTRT. */ -static void __wait_on_freeing_inode(struct inode *inode, bool locked) +static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + + /* + * Handle racing against evict(), see that routine for more details. + */ + if (unlikely(inode_unhashed(inode))) { + WARN_ON(is_inode_hash_locked); + spin_unlock(&inode->i_lock); + return; + } + wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); rcu_read_unlock(); - if (locked) + if (is_inode_hash_locked) spin_unlock(&inode_hash_lock); schedule(); finish_wait(wq, &wait.wq_entry); - if (locked) + if (is_inode_hash_locked) spin_lock(&inode_hash_lock); rcu_read_lock(); } diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig index 7c96bc107218..560187d61562 100644 --- a/fs/jffs2/Kconfig +++ b/fs/jffs2/Kconfig @@ -151,8 +151,9 @@ config JFFS2_RUBIN RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure. choice - prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS + prompt "JFFS2 default compression mode" default JFFS2_CMODE_PRIORITY + depends on JFFS2_COMPRESSION_OPTIONS depends on JFFS2_FS help You can set here the default compression mode of JFFS2 from diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index cb3cda1390ad..5713994328cb 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1626,6 +1626,8 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) } else if (rc == -ENOSPC) { /* search for next smaller log2 block */ l2nb = BLKSTOL2(nblocks) - 1; + if (unlikely(l2nb < 0)) + break; nblocks = 1LL << l2nb; } else { /* Trim any already allocated blocks */ diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 031d8f570f58..5d3127ca68a4 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -834,6 +834,8 @@ int dtInsert(tid_t tid, struct inode *ip, * the full page. */ DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); + if (p->header.freelist == 0) + return -EINVAL; /* * insert entry for new key diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 2ec35889ad24..1407feccbc2d 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -290,7 +290,7 @@ int diSync(struct inode *ipimap) int diRead(struct inode *ip) { struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); - int iagno, ino, extno, rc; + int iagno, ino, extno, rc, agno; struct inode *ipimap; struct dinode *dp; struct iag *iagp; @@ -339,8 +339,11 @@ int diRead(struct inode *ip) /* get the ag for the iag */ agstart = le64_to_cpu(iagp->agstart); + agno = BLKTOAG(agstart, JFS_SBI(ip->i_sb)); release_metapage(mp); + if (agno >= MAXAG || agno < 0) + return -EIO; rel_inode = (ino & (INOSPERPAGE - 1)); pageno = blkno >> sbi->l2nbperpage; diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 9609349e92e5..270808b6219b 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1600,7 +1600,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) mp, sizeof(struct metapage), 0); print_hex_dump(KERN_ERR, "page: ", DUMP_PREFIX_ADDRESS, 16, - sizeof(long), mp->page, + sizeof(long), mp->folio, sizeof(struct page), 0); } else print_hex_dump(KERN_ERR, "tblock:", diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 961569c11159..df575a873ec6 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -4,6 +4,7 @@ * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ +#include <linux/blkdev.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/module.h> @@ -46,9 +47,9 @@ static inline void __lock_metapage(struct metapage *mp) do { set_current_state(TASK_UNINTERRUPTIBLE); if (metapage_locked(mp)) { - unlock_page(mp->page); + folio_unlock(mp->folio); io_schedule(); - lock_page(mp->page); + folio_lock(mp->folio); } } while (trylock_metapage(mp)); __set_current_state(TASK_RUNNING); @@ -56,7 +57,7 @@ static inline void __lock_metapage(struct metapage *mp) } /* - * Must have mp->page locked + * Must have mp->folio locked */ static inline void lock_metapage(struct metapage *mp) { @@ -75,36 +76,36 @@ static mempool_t *metapage_mempool; struct meta_anchor { int mp_count; atomic_t io_count; + blk_status_t status; struct metapage *mp[MPS_PER_PAGE]; }; -#define mp_anchor(page) ((struct meta_anchor *)page_private(page)) -static inline struct metapage *page_to_mp(struct page *page, int offset) +static inline struct metapage *folio_to_mp(struct folio *folio, int offset) { - if (!PagePrivate(page)) + struct meta_anchor *anchor = folio->private; + + if (!anchor) return NULL; - return mp_anchor(page)->mp[offset >> L2PSIZE]; + return anchor->mp[offset >> L2PSIZE]; } -static inline int insert_metapage(struct page *page, struct metapage *mp) +static inline int insert_metapage(struct folio *folio, struct metapage *mp) { struct meta_anchor *a; int index; int l2mp_blocks; /* log2 blocks per metapage */ - if (PagePrivate(page)) - a = mp_anchor(page); - else { + a = folio->private; + if (!a) { a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS); if (!a) return -ENOMEM; - set_page_private(page, (unsigned long)a); - SetPagePrivate(page); - kmap(page); + folio_attach_private(folio, a); + kmap(&folio->page); } if (mp) { - l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + l2mp_blocks = L2PSIZE - folio->mapping->host->i_blkbits; index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); a->mp_count++; a->mp[index] = mp; @@ -113,10 +114,10 @@ static inline int insert_metapage(struct page *page, struct metapage *mp) return 0; } -static inline void remove_metapage(struct page *page, struct metapage *mp) +static inline void remove_metapage(struct folio *folio, struct metapage *mp) { - struct meta_anchor *a = mp_anchor(page); - int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + struct meta_anchor *a = folio->private; + int l2mp_blocks = L2PSIZE - folio->mapping->host->i_blkbits; int index; index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); @@ -126,48 +127,53 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) a->mp[index] = NULL; if (--a->mp_count == 0) { kfree(a); - set_page_private(page, 0); - ClearPagePrivate(page); - kunmap(page); + folio_detach_private(folio); + kunmap(&folio->page); } } -static inline void inc_io(struct page *page) +static inline void inc_io(struct folio *folio) { - atomic_inc(&mp_anchor(page)->io_count); + struct meta_anchor *anchor = folio->private; + + atomic_inc(&anchor->io_count); } -static inline void dec_io(struct page *page, void (*handler) (struct page *)) +static inline void dec_io(struct folio *folio, blk_status_t status, + void (*handler)(struct folio *, blk_status_t)) { - if (atomic_dec_and_test(&mp_anchor(page)->io_count)) - handler(page); + struct meta_anchor *anchor = folio->private; + + if (anchor->status == BLK_STS_OK) + anchor->status = status; + + if (atomic_dec_and_test(&anchor->io_count)) + handler(folio, anchor->status); } #else -static inline struct metapage *page_to_mp(struct page *page, int offset) +static inline struct metapage *folio_to_mp(struct folio *folio, int offset) { - return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; + return folio->private; } -static inline int insert_metapage(struct page *page, struct metapage *mp) +static inline int insert_metapage(struct folio *folio, struct metapage *mp) { if (mp) { - set_page_private(page, (unsigned long)mp); - SetPagePrivate(page); - kmap(page); + folio_attach_private(folio, mp); + kmap(&folio->page); } return 0; } -static inline void remove_metapage(struct page *page, struct metapage *mp) +static inline void remove_metapage(struct folio *folio, struct metapage *mp) { - set_page_private(page, 0); - ClearPagePrivate(page); - kunmap(page); + folio_detach_private(folio); + kunmap(&folio->page); } -#define inc_io(page) do {} while(0) -#define dec_io(page, handler) handler(page) +#define inc_io(folio) do {} while(0) +#define dec_io(folio, status, handler) handler(folio, status) #endif @@ -218,12 +224,12 @@ void metapage_exit(void) kmem_cache_destroy(metapage_cache); } -static inline void drop_metapage(struct page *page, struct metapage *mp) +static inline void drop_metapage(struct folio *folio, struct metapage *mp) { if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) || test_bit(META_io, &mp->flag)) return; - remove_metapage(page, mp); + remove_metapage(folio, mp); INCREMENT(mpStat.pagefree); free_metapage(mp); } @@ -257,23 +263,20 @@ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, return lblock; } -static void last_read_complete(struct page *page) +static void last_read_complete(struct folio *folio, blk_status_t status) { - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); + if (status) + printk(KERN_ERR "Read error %d at %#llx\n", status, + folio_pos(folio)); + + folio_end_read(folio, status == 0); } static void metapage_read_end_io(struct bio *bio) { - struct page *page = bio->bi_private; - - if (bio->bi_status) { - printk(KERN_ERR "metapage_read_end_io: I/O error\n"); - SetPageError(page); - } + struct folio *folio = bio->bi_private; - dec_io(page, last_read_complete); + dec_io(folio, bio->bi_status, last_read_complete); bio_put(bio); } @@ -299,13 +302,19 @@ static void remove_from_logsync(struct metapage *mp) LOGSYNC_UNLOCK(log, flags); } -static void last_write_complete(struct page *page) +static void last_write_complete(struct folio *folio, blk_status_t status) { struct metapage *mp; unsigned int offset; + if (status) { + int err = blk_status_to_errno(status); + printk(KERN_ERR "metapage_write_end_io: I/O error\n"); + mapping_set_error(folio->mapping, err); + } + for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) { - mp = page_to_mp(page, offset); + mp = folio_to_mp(folio, offset); if (mp && test_bit(META_io, &mp->flag)) { if (mp->lsn) remove_from_logsync(mp); @@ -316,28 +325,25 @@ static void last_write_complete(struct page *page) * safe unless I have the page locked */ } - end_page_writeback(page); + folio_end_writeback(folio); } static void metapage_write_end_io(struct bio *bio) { - struct page *page = bio->bi_private; + struct folio *folio = bio->bi_private; - BUG_ON(!PagePrivate(page)); + BUG_ON(!folio->private); - if (bio->bi_status) { - printk(KERN_ERR "metapage_write_end_io: I/O error\n"); - SetPageError(page); - } - dec_io(page, last_write_complete); + dec_io(folio, bio->bi_status, last_write_complete); bio_put(bio); } -static int metapage_writepage(struct page *page, struct writeback_control *wbc) +static int metapage_write_folio(struct folio *folio, + struct writeback_control *wbc, void *unused) { struct bio *bio = NULL; int block_offset; /* block offset of mp within page */ - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; int len; int xlen; @@ -353,14 +359,13 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) int offset; int bad_blocks = 0; - page_start = (sector_t)page->index << - (PAGE_SHIFT - inode->i_blkbits); - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); + page_start = folio_pos(folio) >> inode->i_blkbits; + BUG_ON(!folio_test_locked(folio)); + BUG_ON(folio_test_writeback(folio)); + folio_start_writeback(folio); for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) { - mp = page_to_mp(page, offset); + mp = folio_to_mp(folio, offset); if (!mp || !test_bit(META_dirty, &mp->flag)) continue; @@ -389,22 +394,20 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) continue; } /* Not contiguous */ - if (bio_add_page(bio, page, bio_bytes, bio_offset) < - bio_bytes) - goto add_failed; + bio_add_folio_nofail(bio, folio, bio_bytes, bio_offset); /* * Increment counter before submitting i/o to keep * count from hitting zero before we're through */ - inc_io(page); + inc_io(folio); if (!bio->bi_iter.bi_size) goto dump_bio; submit_bio(bio); nr_underway++; bio = NULL; } else - inc_io(page); - xlen = (PAGE_SIZE - offset) >> inode->i_blkbits; + inc_io(folio); + xlen = (folio_size(folio) - offset) >> inode->i_blkbits; pblock = metapage_get_blocks(inode, lblock, &xlen); if (!pblock) { printk(KERN_ERR "JFS: metapage_get_blocks failed\n"); @@ -420,7 +423,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) bio = bio_alloc(inode->i_sb->s_bdev, 1, REQ_OP_WRITE, GFP_NOFS); bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_write_end_io; - bio->bi_private = page; + bio->bi_private = folio; /* Don't call bio_add_page yet, we may add to this vec */ bio_offset = offset; @@ -430,8 +433,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) next_block = lblock + len; } if (bio) { - if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes) - goto add_failed; + bio_add_folio_nofail(bio, folio, bio_bytes, bio_offset); if (!bio->bi_iter.bi_size) goto dump_bio; @@ -439,50 +441,56 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) nr_underway++; } if (redirty) - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); - unlock_page(page); + folio_unlock(folio); if (bad_blocks) goto err_out; if (nr_underway == 0) - end_page_writeback(page); + folio_end_writeback(folio); return 0; -add_failed: - /* We should never reach here, since we're only adding one vec */ - printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); - goto skip; dump_bio: print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16, 4, bio, sizeof(*bio), 0); -skip: bio_put(bio); - unlock_page(page); - dec_io(page, last_write_complete); + folio_unlock(folio); + dec_io(folio, BLK_STS_OK, last_write_complete); err_out: while (bad_blocks--) - dec_io(page, last_write_complete); + dec_io(folio, BLK_STS_OK, last_write_complete); return -EIO; } +static int metapage_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct blk_plug plug; + int err; + + blk_start_plug(&plug); + err = write_cache_pages(mapping, wbc, metapage_write_folio, NULL); + blk_finish_plug(&plug); + + return err; +} + static int metapage_read_folio(struct file *fp, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct bio *bio = NULL; int block_offset; - int blocks_per_page = i_blocks_per_page(inode, page); + int blocks_per_page = i_blocks_per_folio(inode, folio); sector_t page_start; /* address of page in fs blocks */ sector_t pblock; int xlen; unsigned int len; int offset; - BUG_ON(!PageLocked(page)); - page_start = (sector_t)page->index << - (PAGE_SHIFT - inode->i_blkbits); + BUG_ON(!folio_test_locked(folio)); + page_start = folio_pos(folio) >> inode->i_blkbits; block_offset = 0; while (block_offset < blocks_per_page) { @@ -490,9 +498,9 @@ static int metapage_read_folio(struct file *fp, struct folio *folio) pblock = metapage_get_blocks(inode, page_start + block_offset, &xlen); if (pblock) { - if (!PagePrivate(page)) - insert_metapage(page, NULL); - inc_io(page); + if (!folio->private) + insert_metapage(folio, NULL); + inc_io(folio); if (bio) submit_bio(bio); @@ -501,11 +509,10 @@ static int metapage_read_folio(struct file *fp, struct folio *folio) bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_read_end_io; - bio->bi_private = page; + bio->bi_private = folio; len = xlen << inode->i_blkbits; offset = block_offset << inode->i_blkbits; - if (bio_add_page(bio, page, len, offset) < len) - goto add_failed; + bio_add_folio_nofail(bio, folio, len, offset); block_offset += xlen; } else block_offset++; @@ -513,15 +520,9 @@ static int metapage_read_folio(struct file *fp, struct folio *folio) if (bio) submit_bio(bio); else - unlock_page(page); + folio_unlock(folio); return 0; - -add_failed: - printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); - bio_put(bio); - dec_io(page, last_read_complete); - return -EIO; } static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask) @@ -531,7 +532,7 @@ static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask) int offset; for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) { - mp = page_to_mp(&folio->page, offset); + mp = folio_to_mp(folio, offset); if (!mp) continue; @@ -546,7 +547,7 @@ static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask) } if (mp->lsn) remove_from_logsync(mp); - remove_metapage(&folio->page, mp); + remove_metapage(folio, mp); INCREMENT(mpStat.pagefree); free_metapage(mp); } @@ -565,7 +566,7 @@ static void metapage_invalidate_folio(struct folio *folio, size_t offset, const struct address_space_operations jfs_metapage_aops = { .read_folio = metapage_read_folio, - .writepage = metapage_writepage, + .writepages = metapage_writepages, .release_folio = metapage_release_folio, .invalidate_folio = metapage_invalidate_folio, .dirty_folio = filemap_dirty_folio, @@ -579,7 +580,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, int l2bsize; struct address_space *mapping; struct metapage *mp = NULL; - struct page *page; + struct folio *folio; unsigned long page_index; unsigned long page_offset; @@ -610,22 +611,22 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, } if (new && (PSIZE == PAGE_SIZE)) { - page = grab_cache_page(mapping, page_index); - if (!page) { - jfs_err("grab_cache_page failed!"); + folio = filemap_grab_folio(mapping, page_index); + if (IS_ERR(folio)) { + jfs_err("filemap_grab_folio failed!"); return NULL; } - SetPageUptodate(page); + folio_mark_uptodate(folio); } else { - page = read_mapping_page(mapping, page_index, NULL); - if (IS_ERR(page)) { + folio = read_mapping_folio(mapping, page_index, NULL); + if (IS_ERR(folio)) { jfs_err("read_mapping_page failed!"); return NULL; } - lock_page(page); + folio_lock(folio); } - mp = page_to_mp(page, page_offset); + mp = folio_to_mp(folio, page_offset); if (mp) { if (mp->logical_size != size) { jfs_error(inode->i_sb, @@ -651,16 +652,16 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, mp = alloc_metapage(GFP_NOFS); if (!mp) goto unlock; - mp->page = page; + mp->folio = folio; mp->sb = inode->i_sb; mp->flag = 0; mp->xflag = COMMIT_PAGE; mp->count = 1; mp->nohomeok = 0; mp->logical_size = size; - mp->data = page_address(page) + page_offset; + mp->data = folio_address(folio) + page_offset; mp->index = lblock; - if (unlikely(insert_metapage(page, mp))) { + if (unlikely(insert_metapage(folio, mp))) { free_metapage(mp); goto unlock; } @@ -672,28 +673,27 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, memset(mp->data, 0, PSIZE); } - unlock_page(page); + folio_unlock(folio); jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data); return mp; unlock: - unlock_page(page); + folio_unlock(folio); return NULL; } void grab_metapage(struct metapage * mp) { jfs_info("grab_metapage: mp = 0x%p", mp); - get_page(mp->page); - lock_page(mp->page); + folio_get(mp->folio); + folio_lock(mp->folio); mp->count++; lock_metapage(mp); - unlock_page(mp->page); + folio_unlock(mp->folio); } -static int metapage_write_one(struct page *page) +static int metapage_write_one(struct folio *folio) { - struct folio *folio = page_folio(page); struct address_space *mapping = folio->mapping; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -707,7 +707,7 @@ static int metapage_write_one(struct page *page) if (folio_clear_dirty_for_io(folio)) { folio_get(folio); - ret = metapage_writepage(page, &wbc); + ret = metapage_write_folio(folio, &wbc, NULL); if (ret == 0) folio_wait_writeback(folio); folio_put(folio); @@ -722,71 +722,69 @@ static int metapage_write_one(struct page *page) void force_metapage(struct metapage *mp) { - struct page *page = mp->page; + struct folio *folio = mp->folio; jfs_info("force_metapage: mp = 0x%p", mp); set_bit(META_forcewrite, &mp->flag); clear_bit(META_sync, &mp->flag); - get_page(page); - lock_page(page); - set_page_dirty(page); - if (metapage_write_one(page)) + folio_get(folio); + folio_lock(folio); + folio_mark_dirty(folio); + if (metapage_write_one(folio)) jfs_error(mp->sb, "metapage_write_one() failed\n"); clear_bit(META_forcewrite, &mp->flag); - put_page(page); + folio_put(folio); } void hold_metapage(struct metapage *mp) { - lock_page(mp->page); + folio_lock(mp->folio); } void put_metapage(struct metapage *mp) { if (mp->count || mp->nohomeok) { /* Someone else will release this */ - unlock_page(mp->page); + folio_unlock(mp->folio); return; } - get_page(mp->page); + folio_get(mp->folio); mp->count++; lock_metapage(mp); - unlock_page(mp->page); + folio_unlock(mp->folio); release_metapage(mp); } void release_metapage(struct metapage * mp) { - struct page *page = mp->page; + struct folio *folio = mp->folio; jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); - BUG_ON(!page); - - lock_page(page); + folio_lock(folio); unlock_metapage(mp); assert(mp->count); if (--mp->count || mp->nohomeok) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return; } if (test_bit(META_dirty, &mp->flag)) { - set_page_dirty(page); + folio_mark_dirty(folio); if (test_bit(META_sync, &mp->flag)) { clear_bit(META_sync, &mp->flag); - if (metapage_write_one(page)) + if (metapage_write_one(folio)) jfs_error(mp->sb, "metapage_write_one() failed\n"); - lock_page(page); + folio_lock(folio); } } else if (mp->lsn) /* discard_metapage doesn't remove it */ remove_from_logsync(mp); /* Try to keep metapages from using up too much memory */ - drop_metapage(page, mp); + drop_metapage(folio, mp); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } void __invalidate_metapages(struct inode *ip, s64 addr, int len) @@ -798,7 +796,6 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) struct address_space *mapping = JFS_SBI(ip->i_sb)->direct_inode->i_mapping; struct metapage *mp; - struct page *page; unsigned int offset; /* @@ -807,11 +804,12 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) */ for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len; lblock += BlocksPerPage) { - page = find_lock_page(mapping, lblock >> l2BlocksPerPage); - if (!page) + struct folio *folio = filemap_lock_folio(mapping, + lblock >> l2BlocksPerPage); + if (IS_ERR(folio)) continue; for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) { - mp = page_to_mp(page, offset); + mp = folio_to_mp(folio, offset); if (!mp) continue; if (mp->index < addr) @@ -824,8 +822,8 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) if (mp->lsn) remove_from_logsync(mp); } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } } diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index 4179f9df4deb..2e5015c2705b 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -24,7 +24,7 @@ struct metapage { wait_queue_head_t wait; /* implementation */ - struct page *page; + struct folio *folio; struct super_block *sb; unsigned int logical_size; @@ -90,14 +90,14 @@ static inline void discard_metapage(struct metapage *mp) static inline void metapage_nohomeok(struct metapage *mp) { - struct page *page = mp->page; - lock_page(page); + struct folio *folio = mp->folio; + folio_lock(folio); if (!mp->nohomeok++) { mark_metapage_dirty(mp); - get_page(page); - wait_on_page_writeback(page); + folio_get(folio); + folio_wait_writeback(folio); } - unlock_page(page); + folio_unlock(folio); } /* @@ -107,7 +107,7 @@ static inline void metapage_nohomeok(struct metapage *mp) static inline void metapage_wait_for_io(struct metapage *mp) { if (test_bit(META_io, &mp->flag)) - wait_on_page_writeback(mp->page); + folio_wait_writeback(mp->folio); } /* @@ -116,7 +116,7 @@ static inline void metapage_wait_for_io(struct metapage *mp) static inline void _metapage_homeok(struct metapage *mp) { if (!--mp->nohomeok) - put_page(mp->page); + folio_put(mp->folio); } static inline void metapage_homeok(struct metapage *mp) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 9987055293b3..2999ed5d83f5 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -797,7 +797,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, size_t buf_size) { struct jfs_ea_list *ealist; - struct jfs_ea *ea; + struct jfs_ea *ea, *ealist_end; struct ea_buffer ea_buf; int xattr_size; ssize_t size; @@ -817,9 +817,16 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, goto not_found; ealist = (struct jfs_ea_list *) ea_buf.xattr; + ealist_end = END_EALIST(ealist); /* Find the named attribute */ - for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) + for (ea = FIRST_EA(ealist); ea < ealist_end; ea = NEXT_EA(ea)) { + if (unlikely(ea + 1 > ealist_end) || + unlikely(NEXT_EA(ea) > ealist_end)) { + size = -EUCLEAN; + goto release; + } + if ((namelen == ea->namelen) && memcmp(name, ea->name, namelen) == 0) { /* Found it */ @@ -834,6 +841,7 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, memcpy(data, value, size); goto release; } + } not_found: size = -ENODATA; release: @@ -861,7 +869,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) ssize_t size = 0; int xattr_size; struct jfs_ea_list *ealist; - struct jfs_ea *ea; + struct jfs_ea *ea, *ealist_end; struct ea_buffer ea_buf; down_read(&JFS_IP(inode)->xattr_sem); @@ -876,9 +884,16 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) goto release; ealist = (struct jfs_ea_list *) ea_buf.xattr; + ealist_end = END_EALIST(ealist); /* compute required size of list */ - for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { + for (ea = FIRST_EA(ealist); ea < ealist_end; ea = NEXT_EA(ea)) { + if (unlikely(ea + 1 > ealist_end) || + unlikely(NEXT_EA(ea) > ealist_end)) { + size = -EUCLEAN; + goto release; + } + if (can_list(ea)) size += name_size(ea) + 1; } diff --git a/fs/locks.c b/fs/locks.c index bdd94c32256f..9afb16e0683f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2570,8 +2570,9 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, error = do_lock_file_wait(filp, cmd, file_lock); /* - * Attempt to detect a close/fcntl race and recover by releasing the - * lock that was just acquired. There is no need to do that when we're + * Detect close/fcntl races and recover by zapping all POSIX locks + * associated with this file and our files_struct, just like on + * filp_flush(). There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->c.flc_type != F_UNLCK && @@ -2586,9 +2587,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { - file_lock->c.flc_type = F_UNLCK; - error = do_lock_file_wait(filp, cmd, file_lock); - WARN_ON_ONCE(error); + locks_remove_posix(filp, files); error = -EBADF; } } diff --git a/fs/namei.c b/fs/namei.c index 3a4c40e12f78..5512cb10fa89 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3248,9 +3248,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, /** * vfs_create - create new file * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new file + * @dir: inode of the parent directory + * @dentry: dentry of the child file + * @mode: mode of the child file * @want_excl: whether the file must not yet exist * * Create a new file. @@ -4047,9 +4047,9 @@ EXPORT_SYMBOL(user_path_create); /** * vfs_mknod - create device node or file * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new device node or file + * @dir: inode of the parent directory + * @dentry: dentry of the child device node + * @mode: mode of the child device node * @dev: device number of device to create * * Create a device node or file. @@ -4174,9 +4174,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d /** * vfs_mkdir - create directory * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory - * @mode: mode of the new directory + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @mode: mode of the child directory * * Create a directory. * @@ -4256,8 +4256,8 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /** * vfs_rmdir - remove directory * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory + * @dir: inode of the parent directory + * @dentry: dentry of the child directory * * Remove a directory. * @@ -4537,8 +4537,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) /** * vfs_symlink - create symlink * @idmap: idmap of the mount the inode was found from - * @dir: inode of @dentry - * @dentry: pointer to dentry of the base directory + * @dir: inode of the parent directory + * @dentry: dentry of the child symlink file * @oldname: name of the file to link to * * Create a symlink. diff --git a/fs/namespace.c b/fs/namespace.c index 221db9de4729..328087a4df8a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -70,7 +70,7 @@ static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); /* Don't allow confusion with old 32bit mount ID */ -#define MNT_UNIQUE_ID_OFFSET (1ULL << 32) +#define MNT_UNIQUE_ID_OFFSET (1ULL << 31) static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET); static struct hlist_head *mount_hashtable __ro_after_init; diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig index bec805e0c44c..1b78e8b65ebc 100644 --- a/fs/netfs/Kconfig +++ b/fs/netfs/Kconfig @@ -22,6 +22,14 @@ config NETFS_STATS between CPUs. On the other hand, the stats are very useful for debugging purposes. Saying 'Y' here is recommended. +config NETFS_DEBUG + bool "Enable dynamic debugging netfslib and FS-Cache" + depends on NETFS + help + This permits debugging to be dynamically enabled in the local caching + management module. If this is set, the debugging output may be + enabled by setting bits in /sys/module/netfs/parameters/debug. + config FSCACHE bool "General filesystem local caching manager" depends on NETFS_SUPPORT @@ -50,13 +58,3 @@ config FSCACHE_STATS debugging purposes. Saying 'Y' here is recommended. See Documentation/filesystems/caching/fscache.rst for more information. - -config FSCACHE_DEBUG - bool "Debug FS-Cache" - depends on FSCACHE - help - This permits debugging to be dynamically enabled in the local caching - management module. If this is set, the debugging output may be - enabled by setting bits in /sys/modules/fscache/parameter/debug. - - See Documentation/filesystems/caching/fscache.rst for more information. diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 4c0401dbbfcf..a688d4c75d99 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { if (folio->index == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) - kdebug("no unlock"); + _debug("no unlock"); else folio_unlock(folio); } @@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl) struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); int ret; - kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); + _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); if (readahead_count(ractl) == 0) return; @@ -268,10 +268,10 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct folio *sink = NULL; int ret; - kenter("%lx", folio->index); + _enter("%lx", folio->index); rreq = netfs_alloc_request(mapping, file, - folio_file_pos(folio), folio_size(folio), + folio_pos(folio), folio_size(folio), NETFS_READPAGE); if (IS_ERR(rreq)) { ret = PTR_ERR(rreq); @@ -470,7 +470,7 @@ retry: } rreq = netfs_alloc_request(mapping, file, - folio_file_pos(folio), folio_size(folio), + folio_pos(folio), folio_size(folio), NETFS_READ_FOR_WRITE); if (IS_ERR(rreq)) { ret = PTR_ERR(rreq); @@ -508,7 +508,7 @@ retry: have_folio: *_folio = folio; - kleave(" = 0"); + _leave(" = 0"); return 0; error_put: @@ -518,7 +518,7 @@ error: folio_unlock(folio); folio_put(folio); } - kleave(" = %d", ret); + _leave(" = %d", ret); return ret; } EXPORT_SYMBOL(netfs_write_begin); @@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t flen = folio_size(folio); int ret; - kenter("%zx @%llx", flen, start); + _enter("%zx @%llx", flen, start); ret = -ENOMEM; @@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, error_put: netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); error: - kleave(" = %d", ret); + _leave(" = %d", ret); return ret; } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index ecbc99ec7d36..4726c315453c 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -54,9 +54,9 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, { struct netfs_folio *finfo = netfs_folio_info(folio); struct netfs_group *group = netfs_folio_group(folio); - loff_t pos = folio_file_pos(folio); + loff_t pos = folio_pos(folio); - kenter(""); + _enter(""); if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) return NETFS_FLUSH_CONTENT; @@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, */ howto = netfs_how_to_modify(ctx, file, folio, netfs_group, flen, offset, part, maybe_trouble); - kdebug("howto %u", howto); + _debug("howto %u", howto); switch (howto) { case NETFS_JUST_PREFETCH: ret = netfs_prefetch_for_write(file, folio, offset, part); if (ret < 0) { - kdebug("prefetch = %zd", ret); + _debug("prefetch = %zd", ret); goto error_folio_unlock; } break; @@ -418,7 +418,7 @@ out: } iocb->ki_pos += written; - kleave(" = %zd [%zd]", written, ret); + _leave(" = %zd [%zd]", written, ret); return written ? written : ret; error_folio_unlock: @@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct netfs_inode *ictx = netfs_inode(inode); ssize_t ret; - kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; @@ -529,7 +529,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr vm_fault_t ret = VM_FAULT_RETRY; int err; - kenter("%lx", folio->index); + _enter("%lx", folio->index); sb_start_pagefault(inode->i_sb); diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index b6debac6205f..10a1e4da6bda 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i size_t orig_count = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - kenter(""); + _enter(""); if (!orig_count) return 0; /* Don't update atime */ diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 792ef17bae21..88f2adfab75e 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * size_t len = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - kenter(""); + _enter(""); /* We're going to need a bounce buffer if what we transmit is going to * be different in some way to the source buffer, e.g. because it gets @@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * */ // TODO - kdebug("uw %llx-%llx", start, end); + _debug("uw %llx-%llx", start, end); wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start, iocb->ki_flags & IOCB_DIRECT ? @@ -96,7 +96,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * wreq->cleanup = netfs_cleanup_dio_write; ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { - kdebug("begin = %zd", ret); + _debug("begin = %zd", ret); goto out; } @@ -143,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) loff_t pos = iocb->ki_pos; unsigned long long end = pos + iov_iter_count(from) - 1; - kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); + _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c index 288a73c3072d..9397ed39b0b4 100644 --- a/fs/netfs/fscache_cache.c +++ b/fs/netfs/fscache_cache.c @@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache, { int n_accesses; - kenter("{%s,%s}", ops->name, cache->name); + _enter("{%s,%s}", ops->name, cache->name); BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING); @@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache, up_write(&fscache_addremove_sem); pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name); - kleave(" = 0 [%s]", cache->name); + _leave(" = 0 [%s]", cache->name); return 0; } EXPORT_SYMBOL(fscache_add_cache); diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index 4d1e8bf4c615..bce2492186d0 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie( { struct fscache_cookie *cookie; - kenter("V=%x", volume->debug_id); + _enter("V=%x", volume->debug_id); if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255) return NULL; @@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie( trace_fscache_acquire(cookie); fscache_stat(&fscache_n_acquires_ok); - kleave(" = c=%08x", cookie->debug_id); + _leave(" = c=%08x", cookie->debug_id); return cookie; } EXPORT_SYMBOL(__fscache_acquire_cookie); @@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed; bool need_withdraw = false; - kenter(""); + _enter(""); if (!cookie->volume->cache_priv) { fscache_create_volume(cookie->volume, true); @@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) if (cookie->state != FSCACHE_COOKIE_STATE_FAILED) fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); need_withdraw = true; - kleave(" [fail]"); + _leave(" [fail]"); goto out; } @@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) bool queue = false; int n_active; - kenter("c=%08x", cookie->debug_id); + _enter("c=%08x", cookie->debug_id); if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), "Trying to use relinquished cookie\n")) @@ -636,7 +636,7 @@ again: spin_unlock(&cookie->lock); if (queue) fscache_queue_cookie(cookie, fscache_cookie_get_use_work); - kleave(""); + _leave(""); } EXPORT_SYMBOL(__fscache_use_cookie); @@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie) enum fscache_cookie_state state; bool wake = false; - kenter("c=%x", cookie->debug_id); + _enter("c=%x", cookie->debug_id); again: spin_lock(&cookie->lock); @@ -820,7 +820,7 @@ out: spin_unlock(&cookie->lock); if (wake) wake_up_cookie_state(cookie); - kleave(""); + _leave(""); } static void fscache_cookie_worker(struct work_struct *work) @@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie) set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags); spin_unlock(&cookie->lock); fscache_stat(&fscache_n_cookies_lru_expired); - kdebug("lru c=%x", cookie->debug_id); + _debug("lru c=%x", cookie->debug_id); __fscache_withdraw_cookie(cookie); } @@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) if (retire) fscache_stat(&fscache_n_relinquishes_retire); - kenter("c=%08x{%d},%d", + _enter("c=%08x{%d},%d", cookie->debug_id, atomic_read(&cookie->n_active), retire); if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), @@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, { bool is_caching; - kenter("c=%x", cookie->debug_id); + _enter("c=%x", cookie->debug_id); fscache_stat(&fscache_n_invalidates); @@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */ default: spin_unlock(&cookie->lock); - kleave(" [no %u]", cookie->state); + _leave(" [no %u]", cookie->state); return; case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, fallthrough; case FSCACHE_COOKIE_STATE_CREATING: spin_unlock(&cookie->lock); - kleave(" [look %x]", cookie->inval_counter); + _leave(" [look %x]", cookie->inval_counter); return; case FSCACHE_COOKIE_STATE_ACTIVE: @@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, if (is_caching) fscache_queue_cookie(cookie, fscache_cookie_get_inval_work); - kleave(" [inv]"); + _leave(" [inv]"); return; } } diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index bf4eaeec44fb..38637e5c9b57 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres, again: if (!fscache_cache_is_live(cookie->volume->cache)) { - kleave(" [broken]"); + _leave(" [broken]"); return false; } state = fscache_cookie_state(cookie); - kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_CREATING: @@ -52,7 +52,7 @@ again: case FSCACHE_COOKIE_STATE_DROPPED: case FSCACHE_COOKIE_STATE_RELINQUISHING: default: - kleave(" [not live]"); + _leave(" [not live]"); return false; } @@ -92,7 +92,7 @@ again: spin_lock(&cookie->lock); state = fscache_cookie_state(cookie); - kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -140,7 +140,7 @@ failed: cres->cache_priv = NULL; cres->ops = NULL; fscache_end_cookie_access(cookie, fscache_access_io_not_live); - kleave(" = -ENOBUFS"); + _leave(" = -ENOBUFS"); return -ENOBUFS; } @@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, if (len == 0) goto abandon; - kenter("%llx,%zx", start, len); + _enter("%llx,%zx", start, len); wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS); if (!wreq) diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c index bf9b33d26e31..42e98bb523e3 100644 --- a/fs/netfs/fscache_main.c +++ b/fs/netfs/fscache_main.c @@ -99,7 +99,7 @@ error_wq: */ void __exit fscache_exit(void) { - kenter(""); + _enter(""); kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c index 2e2a405ca9b0..cb75c07b5281 100644 --- a/fs/netfs/fscache_volume.c +++ b/fs/netfs/fscache_volume.c @@ -264,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, fscache_see_volume(volume, fscache_volume_new_acquire); fscache_stat(&fscache_n_volumes); up_write(&fscache_addremove_sem); - kleave(" = v=%x", volume->debug_id); + _leave(" = v=%x", volume->debug_id); return volume; err_vol: @@ -466,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume) { int n_accesses; - kdebug("withdraw V=%x", volume->debug_id); + _debug("withdraw V=%x", volume->debug_id); /* Allow wakeups on dec-to-0 */ n_accesses = atomic_dec_return(&volume->n_accesses); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 21e46bc9aa49..7773f3d855a9 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -34,6 +34,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync); /* * main.c */ +extern unsigned int netfs_debug; extern struct list_head netfs_io_requests; extern spinlock_t netfs_proc_lock; extern mempool_t netfs_request_pool; @@ -353,12 +354,42 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait); * debug tracing */ #define dbgprintk(FMT, ...) \ - pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) +#ifdef __KDEBUG +#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) +#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) +#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) + +#elif defined(CONFIG_NETFS_DEBUG) +#define _enter(FMT, ...) \ +do { \ + if (netfs_debug) \ + kenter(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT, ...) \ +do { \ + if (netfs_debug) \ + kleave(FMT, ##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT, ...) \ +do { \ + if (netfs_debug) \ + kdebug(FMT, ##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) +#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) +#endif + /* * assertions */ diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c7576481c321..c93851b98368 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, if (count == remaining) return; - kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", + _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", rreq->debug_id, subreq->debug_index, iov_iter_count(&subreq->io_iter), subreq->transferred, subreq->len, rreq->i_size, @@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, struct netfs_io_request *rreq = subreq->rreq; int u; - kenter("R=%x[%x]{%llx,%lx},%zd", + _enter("R=%x[%x]{%llx,%lx},%zd", rreq->debug_id, subreq->debug_index, subreq->start, subreq->flags, transferred_or_error); @@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, struct netfs_inode *ictx = netfs_inode(rreq->inode); size_t lsize; - kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); + _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); if (rreq->origin != NETFS_DIO_READ) { source = netfs_cache_prepare_read(subreq, rreq->i_size); @@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, subreq->start = rreq->start + rreq->submitted; subreq->len = io_iter->count; - kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); + _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); list_add_tail(&subreq->rreq_link, &rreq->subrequests); /* Call out to the cache to find out what it can do with the remaining @@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) struct iov_iter io_iter; int ret; - kenter("R=%x %llx-%llx", + _enter("R=%x %llx-%llx", rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); if (rreq->len == 0) { @@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) atomic_set(&rreq->nr_outstanding, 1); io_iter = rreq->io_iter; do { - kdebug("submit %llx + %llx >= %llx", + _debug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); if (rreq->origin == NETFS_DIO_READ && rreq->start + rreq->submitted >= rreq->i_size) diff --git a/fs/netfs/main.c b/fs/netfs/main.c index db824c372842..5f0f438e5d21 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -20,6 +20,10 @@ MODULE_LICENSE("GPL"); EXPORT_TRACEPOINT_SYMBOL(netfs_sreq); +unsigned netfs_debug; +module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); + static struct kmem_cache *netfs_request_slab; static struct kmem_cache *netfs_subrequest_slab; mempool_t netfs_request_pool; diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 172808e83ca8..83e644bd518f 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -26,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) struct fscache_cookie *cookie = netfs_i_cookie(ictx); bool need_use = false; - kenter(""); + _enter(""); if (!filemap_dirty_folio(mapping, folio)) return false; @@ -99,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) struct netfs_folio *finfo; size_t flen = folio_size(folio); - kenter("{%lx},%zx,%zx", folio->index, offset, length); + _enter("{%lx},%zx,%zx", folio->index, offset, length); if (!folio_test_private(folio)) return; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 488147439fe0..426cf87aaf2e 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, { struct list_head *next; - kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); + _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); if (list_empty(&stream->subrequests)) return; @@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) unsigned int notes; int s; - kenter("%llx-%llx", wreq->start, wreq->start + wreq->len); + _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); trace_netfs_collect(wreq); trace_netfs_rreq(wreq, netfs_rreq_trace_collect); @@ -409,7 +409,7 @@ reassess_streams: front = stream->front; while (front) { trace_netfs_collect_sreq(wreq, front); - //kdebug("sreq [%x] %llx %zx/%zx", + //_debug("sreq [%x] %llx %zx/%zx", // front->debug_index, front->start, front->transferred, front->len); /* Stall if there may be a discontinuity. */ @@ -598,7 +598,7 @@ reassess_streams: out: netfs_put_group_many(wreq->group, wreq->nr_group_rel); wreq->nr_group_rel = 0; - kleave(" = %x", notes); + _leave(" = %x", notes); return; need_retry: @@ -606,7 +606,7 @@ need_retry: * that any partially completed op will have had any wholly transferred * folios removed from it. */ - kdebug("retry"); + _debug("retry"); netfs_retry_writes(wreq); goto out; } @@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work) size_t transferred; int s; - kenter("R=%x", wreq->debug_id); + _enter("R=%x", wreq->debug_id); netfs_see_request(wreq, netfs_rreq_trace_see_work); if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { @@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work) if (wreq->origin == NETFS_DIO_WRITE) inode_dio_end(wreq->inode); - kdebug("finished"); + _debug("finished"); trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); @@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, struct netfs_io_request *wreq = subreq->rreq; struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; - kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); + _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); switch (subreq->source) { case NETFS_UPLOAD_TO_SERVER: diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index d7c971df8866..9258d30cffe3 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, if (IS_ERR(wreq)) return wreq; - kenter("R=%x", wreq->debug_id); + _enter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) @@ -122,6 +122,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, wreq->io_streams[1].transferred = LONG_MAX; if (fscache_resources_valid(&wreq->cache_resources)) { wreq->io_streams[1].avail = true; + wreq->io_streams[1].active = true; wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq; wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write; } @@ -159,7 +160,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq, subreq->max_nr_segs = INT_MAX; subreq->stream_nr = stream->stream_nr; - kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index); + _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), @@ -215,7 +216,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream, { struct netfs_io_request *wreq = subreq->rreq; - kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); + _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) return netfs_write_subrequest_terminated(subreq, subreq->error, false); @@ -272,11 +273,11 @@ int netfs_advance_write(struct netfs_io_request *wreq, size_t part; if (!stream->avail) { - kleave("no write"); + _leave("no write"); return len; } - kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); + _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); if (subreq && start != subreq->start + subreq->len) { netfs_issue_write(wreq, stream); @@ -288,7 +289,7 @@ int netfs_advance_write(struct netfs_io_request *wreq, subreq = stream->construct; part = min(subreq->max_len - subreq->len, len); - kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); + _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); subreq->len += part; subreq->nr_segs++; @@ -319,7 +320,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, bool to_eof = false, streamw = false; bool debug = false; - kenter(""); + _enter(""); /* netfs_perform_write() may shift i_size around the page or from out * of the page to beyond it, but cannot move i_size into or through the @@ -329,7 +330,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (fpos >= i_size) { /* mmap beyond eof. */ - kdebug("beyond eof"); + _debug("beyond eof"); folio_start_writeback(folio); folio_unlock(folio); wreq->nr_group_rel += netfs_folio_written_back(folio); @@ -363,7 +364,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, } flen -= foff; - kdebug("folio %zx %zx %zx", foff, flen, fsize); + _debug("folio %zx %zx %zx", foff, flen, fsize); /* Deal with discontinuities in the stream of dirty pages. These can * arise from a number of sources: @@ -487,7 +488,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, for (int s = 0; s < NR_IO_STREAMS; s++) netfs_issue_write(wreq, &wreq->io_streams[s]); - kleave(" = 0"); + _leave(" = 0"); return 0; } @@ -522,7 +523,7 @@ int netfs_writepages(struct address_space *mapping, netfs_stat(&netfs_n_wh_writepages); do { - kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); + _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); /* It appears we don't have to handle cyclic writeback wrapping. */ WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted); @@ -546,14 +547,14 @@ int netfs_writepages(struct address_space *mapping, mutex_unlock(&ictx->wb_lock); netfs_put_request(wreq, false, netfs_rreq_trace_put_return); - kleave(" = %d", error); + _leave(" = %d", error); return error; couldnt_start: netfs_kill_dirty_pages(mapping, wbc, folio); out: mutex_unlock(&ictx->wb_lock); - kleave(" = %d", error); + _leave(" = %d", error); return error; } EXPORT_SYMBOL(netfs_writepages); @@ -590,7 +591,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache) { - kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u", + _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end); if (!*writethrough_cache) { @@ -624,7 +625,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr struct netfs_inode *ictx = netfs_inode(wreq->inode); int ret; - kenter("R=%x", wreq->debug_id); + _enter("R=%x", wreq->debug_id); if (writethrough_cache) netfs_write_folio(wreq, wbc, writethrough_cache); @@ -657,7 +658,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t loff_t start = wreq->start; int error = 0; - kenter("%zx", len); + _enter("%zx", len); if (wreq->origin == NETFS_DIO_WRITE) inode_dio_begin(wreq->inode); @@ -665,7 +666,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t while (len) { // TODO: Prepare content encryption - kdebug("unbuffered %zx", len); + _debug("unbuffered %zx", len); part = netfs_advance_write(wreq, upload, start, len, false); start += part; len -= part; @@ -684,6 +685,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t if (list_empty(&upload->subrequests)) netfs_wake_write_collector(wreq, false); - kleave(" = %d", error); + _leave(" = %d", error); return error; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9aa2ab218c0a..61a8cdb9f1e1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -591,7 +591,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", filp, filp->f_mapping->host->i_ino, - (long long)folio_file_pos(folio)); + (long long)folio_pos(folio)); sb_start_pagefault(inode->i_sb); diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index b17a9eb9b148..49862c95b224 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -46,6 +46,10 @@ static inline void nfs_add_stats(const struct inode *inode, nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ #define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats) static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 190c1fa8882c..d074d0ceb4f0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -182,7 +182,7 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset, end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio); if (i_size > 0 && folio->index < end_index) goto out; - end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count; + end = folio_pos(folio) + (loff_t)offset + (loff_t)count; if (i_size >= end) goto out; trace_nfs_size_grow(inode, end); @@ -1344,7 +1344,7 @@ int nfs_update_folio(struct file *file, struct folio *folio, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count, - (long long)(folio_file_pos(folio) + offset)); + (long long)(folio_pos(folio) + offset)); if (!count) goto out; diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 383f0afa2cea..cd14ea25968c 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -450,15 +450,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { - struct buffer_head *pbh; - __u64 key; + loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh); - key = page_index(bh->b_page) << (PAGE_SHIFT - - bmap->b_inode->i_blkbits); - for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) - key++; - - return key; + return pos >> bmap->b_inode->i_blkbits; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 0131d83b912d..c034080c334b 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -51,12 +51,21 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) - return NULL; + return ERR_PTR(-ENOMEM); if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { - brelse(bh); - BUG(); + /* + * The block buffer at the specified new address was already + * in use. This can happen if it is a virtual block number + * and has been reallocated due to corruption of the bitmap + * used to manage its allocation state (if not, the buffer + * clearing of an abandoned b-tree node is missing somewhere). + */ + nilfs_error(inode->i_sb, + "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)", + (unsigned long long)blocknr, inode->i_ino); + goto failed; } memset(bh->b_data, 0, i_blocksize(inode)); bh->b_bdev = inode->i_sb->s_bdev; @@ -67,6 +76,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) folio_unlock(bh->b_folio); folio_put(bh->b_folio); return bh; + +failed: + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + brelse(bh); + return ERR_PTR(-EIO); } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, @@ -217,8 +232,8 @@ retry: } nbh = nilfs_btnode_create_block(btnc, newkey); - if (!nbh) - return -ENOMEM; + if (IS_ERR(nbh)) + return PTR_ERR(nbh); BUG_ON(nbh == obh); ctxt->newbh = nbh; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index a139970e4804..862bdf23120e 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -63,8 +63,8 @@ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); - if (!bh) - return -ENOMEM; + if (IS_ERR(bh)) + return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6ea81f1d5094..0ca3110d6386 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -136,7 +136,7 @@ static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); #define nilfs_cnt32_ge(a, b) \ (typecheck(__u32, a) && typecheck(__u32, b) && \ - ((__s32)(a) - (__s32)(b) >= 0)) + ((__s32)((a) - (b)) >= 0)) static int nilfs_prepare_segment_lock(struct super_block *sb, struct nilfs_transaction_info *ti) @@ -1639,41 +1639,30 @@ static void nilfs_begin_folio_io(struct folio *folio) folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; - - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, - b_assoc_buffers) { - if (bh->b_folio != bd_folio) { - if (bd_folio) { - folio_lock(bd_folio); - folio_wait_writeback(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - } - bd_folio = bh->b_folio; - } - } - + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == segbuf->sb_super_root) { - if (bh->b_folio != bd_folio) { - folio_lock(bd_folio); - folio_wait_writeback(bd_folio); - folio_clear_dirty_for_io(bd_folio); - folio_start_writeback(bd_folio); - folio_unlock(bd_folio); - bd_folio = bh->b_folio; - } + if (bh == segbuf->sb_super_root) break; - } set_buffer_async_write(bh); if (bh->b_folio != fs_folio) { nilfs_begin_folio_io(fs_folio); @@ -1681,6 +1670,42 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } } } + nilfs_begin_folio_io(fs_folio); + + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { + mark_buffer_dirty(bh); + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + } + bd_folio = bh->b_folio; + } + } + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + mark_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + if (bd_folio) { folio_lock(bd_folio); folio_wait_writeback(bd_folio); @@ -1688,7 +1713,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) folio_start_writeback(bd_folio); folio_unlock(bd_folio); } - nilfs_begin_folio_io(fs_folio); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -2070,10 +2094,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) @@ -2824,8 +2845,6 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (!nilfs->ns_writer) return -ENOMEM; - inode_attach_wb(nilfs->ns_bdev->bd_mapping->host, NULL); - err = nilfs_segctor_start_thread(nilfs->ns_writer); if (unlikely(err)) nilfs_detach_log_writer(sb); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 379d22e28ed6..a5569b7f47a3 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \ sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ -static struct kobj_type nilfs_##name##_ktype = { \ +static const struct kobj_type nilfs_##name##_ktype = { \ .default_groups = nilfs_##name##_groups, \ .sysfs_ops = &nilfs_##name##_attr_ops, \ .release = nilfs_##name##_attr_release, \ @@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = { .store = nilfs_snapshot_attr_store, }; -static struct kobj_type nilfs_snapshot_ktype = { +static const struct kobj_type nilfs_snapshot_ktype = { .default_groups = nilfs_snapshot_groups, .sysfs_ops = &nilfs_snapshot_attr_ops, .release = nilfs_snapshot_attr_release, @@ -967,7 +967,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = { .store = nilfs_dev_attr_store, }; -static struct kobj_type nilfs_dev_ktype = { +static const struct kobj_type nilfs_dev_ktype = { .default_groups = nilfs_dev_groups, .sysfs_ops = &nilfs_dev_attr_ops, .release = nilfs_dev_attr_release, diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 8e6bcdf99770..6ede3e924dec 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -231,7 +231,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, struct ntfs_sb_info *sbi; struct ATTRIB *attr_s; struct MFT_REC *rec; - u32 used, asize, rsize, aoff, align; + u32 used, asize, rsize, aoff; bool is_data; CLST len, alen; char *next; @@ -252,10 +252,13 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, rsize = le32_to_cpu(attr->res.data_size); is_data = attr->type == ATTR_DATA && !attr->name_len; - align = sbi->cluster_size; - if (is_attr_compressed(attr)) - align <<= COMPRESSION_UNIT; - len = (rsize + align - 1) >> sbi->cluster_bits; + /* len - how many clusters required to store 'rsize' bytes */ + if (is_attr_compressed(attr)) { + u8 shift = sbi->cluster_bits + NTFS_LZNT_CUNIT; + len = ((rsize + (1u << shift) - 1) >> shift) << NTFS_LZNT_CUNIT; + } else { + len = bytes_to_cluster(sbi, rsize); + } run_init(run); @@ -285,22 +288,21 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, if (err) goto out2; } else if (!page) { - char *kaddr; - - page = grab_cache_page(ni->vfs_inode.i_mapping, 0); - if (!page) { - err = -ENOMEM; + struct address_space *mapping = ni->vfs_inode.i_mapping; + struct folio *folio; + + folio = __filemap_get_folio( + mapping, 0, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(mapping)); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out2; } - kaddr = kmap_atomic(page); - memcpy(kaddr, data, rsize); - memset(kaddr + rsize, 0, PAGE_SIZE - rsize); - kunmap_atomic(kaddr); - flush_dcache_page(page); - SetPageUptodate(page); - set_page_dirty(page); - unlock_page(page); - put_page(page); + folio_fill_tail(folio, 0, data, rsize); + folio_mark_uptodate(folio); + folio_mark_dirty(folio); + folio_unlock(folio); + folio_put(folio); } } @@ -670,7 +672,8 @@ pack_runs: goto undo_2; } - if (!is_mft) + /* keep runs for $MFT::$ATTR_DATA and $MFT::$ATTR_BITMAP. */ + if (ni->mi.rno != MFT_REC_MFT) run_truncate_head(run, evcn + 1); svcn = le64_to_cpu(attr->nres.svcn); @@ -972,6 +975,19 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, if (err) goto out; + /* Check for compressed frame. */ + err = attr_is_frame_compressed(ni, attr, vcn >> NTFS_LZNT_CUNIT, &hint); + if (err) + goto out; + + if (hint) { + /* if frame is compressed - don't touch it. */ + *lcn = COMPRESSED_LCN; + *len = hint; + err = -EOPNOTSUPP; + goto out; + } + if (!*len) { if (run_lookup_entry(run, vcn, lcn, len, NULL)) { if (*lcn != SPARSE_LCN || !new) @@ -1223,11 +1239,12 @@ undo1: goto out; } -int attr_data_read_resident(struct ntfs_inode *ni, struct page *page) +int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio) { u64 vbo; struct ATTRIB *attr; u32 data_size; + size_t len; attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL); if (!attr) @@ -1236,30 +1253,20 @@ int attr_data_read_resident(struct ntfs_inode *ni, struct page *page) if (attr->non_res) return E_NTFS_NONRESIDENT; - vbo = page->index << PAGE_SHIFT; + vbo = folio->index << PAGE_SHIFT; data_size = le32_to_cpu(attr->res.data_size); - if (vbo < data_size) { - const char *data = resident_data(attr); - char *kaddr = kmap_atomic(page); - u32 use = data_size - vbo; - - if (use > PAGE_SIZE) - use = PAGE_SIZE; + if (vbo > data_size) + len = 0; + else + len = min(data_size - vbo, folio_size(folio)); - memcpy(kaddr, data + vbo, use); - memset(kaddr + use, 0, PAGE_SIZE - use); - kunmap_atomic(kaddr); - flush_dcache_page(page); - SetPageUptodate(page); - } else if (!PageUptodate(page)) { - zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); - } + folio_fill_tail(folio, 0, resident_data(attr) + vbo, len); + folio_mark_uptodate(folio); return 0; } -int attr_data_write_resident(struct ntfs_inode *ni, struct page *page) +int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio) { u64 vbo; struct mft_inode *mi; @@ -1275,17 +1282,13 @@ int attr_data_write_resident(struct ntfs_inode *ni, struct page *page) return E_NTFS_NONRESIDENT; } - vbo = page->index << PAGE_SHIFT; + vbo = folio->index << PAGE_SHIFT; data_size = le32_to_cpu(attr->res.data_size); if (vbo < data_size) { char *data = resident_data(attr); - char *kaddr = kmap_atomic(page); - u32 use = data_size - vbo; + size_t len = min(data_size - vbo, folio_size(folio)); - if (use > PAGE_SIZE) - use = PAGE_SIZE; - memcpy(data + vbo, kaddr, use); - kunmap_atomic(kaddr); + memcpy_from_folio(data + vbo, folio, 0, len); mi->dirty = true; } ni->i_valid = data_size; @@ -1378,7 +1381,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, u32 voff; u8 bytes_per_off; char *addr; - struct page *page; + struct folio *folio; int i, err; __le32 *off32; __le64 *off64; @@ -1423,18 +1426,18 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, wof_size = le64_to_cpu(attr->nres.data_size); down_write(&ni->file.run_lock); - page = ni->file.offs_page; - if (!page) { - page = alloc_page(GFP_KERNEL); - if (!page) { + folio = ni->file.offs_folio; + if (!folio) { + folio = folio_alloc(GFP_KERNEL, 0); + if (!folio) { err = -ENOMEM; goto out; } - page->index = -1; - ni->file.offs_page = page; + folio->index = -1; + ni->file.offs_folio = folio; } - lock_page(page); - addr = page_address(page); + folio_lock(folio); + addr = folio_address(folio); if (vbo[1]) { voff = vbo[1] & (PAGE_SIZE - 1); @@ -1450,7 +1453,8 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, do { pgoff_t index = vbo[i] >> PAGE_SHIFT; - if (index != page->index) { + if (index != folio->index) { + struct page *page = &folio->page; u64 from = vbo[i] & ~(u64)(PAGE_SIZE - 1); u64 to = min(from + PAGE_SIZE, wof_size); @@ -1463,10 +1467,10 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, err = ntfs_bio_pages(sbi, run, &page, 1, from, to - from, REQ_OP_READ); if (err) { - page->index = -1; + folio->index = -1; goto out1; } - page->index = index; + folio->index = index; } if (i) { @@ -1504,7 +1508,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, *ondisk_size = off[1] - off[0]; out1: - unlock_page(page); + folio_unlock(folio); out: up_write(&ni->file.run_lock); return err; @@ -1722,6 +1726,7 @@ repack: attr_b->nres.total_size = cpu_to_le64(total_size); inode_set_bytes(&ni->vfs_inode, total_size); + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; mi_b->dirty = true; mark_inode_dirty(&ni->vfs_inode); @@ -2356,8 +2361,13 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1; } - if (vbo > data_size) { - /* Insert range after the file size is not allowed. */ + if (vbo >= data_size) { + /* + * Insert range after the file size is not allowed. + * If the offset is equal to or greater than the end of + * file, an error is returned. For such operations (i.e., inserting + * a hole at the end of file), ftruncate(2) should be used. + */ return -EINVAL; } diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index c9eb01ccee51..cf4fe21a5039 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -1382,7 +1382,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); if (err) - break; + return err; bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 1937e8e612f8..fc6a8aa29e3a 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -272,9 +272,12 @@ out: return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode; } -static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, - const struct NTFS_DE *e, u8 *name, - struct dir_context *ctx) +/* + * returns false if 'ctx' if full + */ +static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi, + struct ntfs_inode *ni, const struct NTFS_DE *e, + u8 *name, struct dir_context *ctx) { const struct ATTR_FILE_NAME *fname; unsigned long ino; @@ -284,29 +287,29 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, fname = Add2Ptr(e, sizeof(struct NTFS_DE)); if (fname->type == FILE_NAME_DOS) - return 0; + return true; if (!mi_is_ref(&ni->mi, &fname->home)) - return 0; + return true; ino = ino_get(&e->ref); if (ino == MFT_REC_ROOT) - return 0; + return true; /* Skip meta files. Unless option to show metafiles is set. */ if (!sbi->options->showmeta && ntfs_is_meta_file(sbi, ino)) - return 0; + return true; if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN)) - return 0; + return true; name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name, PATH_MAX); if (name_len <= 0) { ntfs_warn(sbi->sb, "failed to convert name for inode %lx.", ino); - return 0; + return true; } /* @@ -326,7 +329,8 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, * It does additional locks/reads just to get the type of name. * Should we use additional mount option to enable branch below? */ - if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + if (((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) || + fname->dup.ea_size) && ino != ni->mi.rno) { struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); if (!IS_ERR_OR_NULL(inode)) { @@ -335,17 +339,20 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, } } - return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); + return dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } /* * ntfs_read_hdr - Helper function for ntfs_readdir(). + * + * returns 0 if ok. + * returns -EINVAL if directory is corrupted. + * returns +1 if 'ctx' is full. */ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, const struct INDEX_HDR *hdr, u64 vbo, u64 pos, u8 *name, struct dir_context *ctx) { - int err; const struct NTFS_DE *e; u32 e_size; u32 end = le32_to_cpu(hdr->used); @@ -353,12 +360,12 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, for (;; off += e_size) { if (off + sizeof(struct NTFS_DE) > end) - return -1; + return -EINVAL; e = Add2Ptr(hdr, off); e_size = le16_to_cpu(e->size); if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) - return -1; + return -EINVAL; if (de_is_last(e)) return 0; @@ -368,14 +375,15 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, continue; if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME) - return -1; + return -EINVAL; ctx->pos = vbo + off; /* Submit the name to the filldir callback. */ - err = ntfs_filldir(sbi, ni, e, name, ctx); - if (err) - return err; + if (!ntfs_dir_emit(sbi, ni, e, name, ctx)) { + /* ctx is full. */ + return +1; + } } } @@ -474,8 +482,6 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) vbo = (u64)bit << index_bits; if (vbo >= i_size) { - ntfs_inode_err(dir, "Looks like your dir is corrupt"); - ctx->pos = eod; err = -EINVAL; goto out; } @@ -498,9 +504,16 @@ out: __putname(name); put_indx_node(node); - if (err == -ENOENT) { + if (err == 1) { + /* 'ctx' is full. */ + err = 0; + } else if (err == -ENOENT) { err = 0; ctx->pos = pos; + } else if (err < 0) { + if (err == -EINVAL) + ntfs_inode_err(dir, "directory corrupted"); + ctx->pos = eod; } return err; @@ -618,10 +631,12 @@ const struct file_operations ntfs_dir_operations = { #endif }; +#if IS_ENABLED(CONFIG_NTFS_FS) const struct file_operations ntfs_legacy_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = ntfs_readdir, .open = ntfs_file_open, }; +#endif // clang-format on diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 2f903b6ce157..ca1ddc46bd86 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -13,6 +13,7 @@ #include <linux/compat.h> #include <linux/falloc.h> #include <linux/fiemap.h> +#include <linux/fileattr.h> #include "debug.h" #include "ntfs.h" @@ -48,6 +49,65 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) return 0; } +/* + * ntfs_fileattr_get - inode_operations::fileattr_get + */ +int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct ntfs_inode *ni = ntfs_i(inode); + u32 flags = 0; + + if (inode->i_flags & S_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; + + if (inode->i_flags & S_APPEND) + flags |= FS_APPEND_FL; + + if (is_compressed(ni)) + flags |= FS_COMPR_FL; + + if (is_encrypted(ni)) + flags |= FS_ENCRYPT_FL; + + fileattr_fill_flags(fa, flags); + + return 0; +} + +/* + * ntfs_fileattr_set - inode_operations::fileattr_set + */ +int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, + struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + u32 flags = fa->flags; + unsigned int new_fl = 0; + + if (fileattr_has_fsx(fa)) + return -EOPNOTSUPP; + + if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL)) + return -EOPNOTSUPP; + + if (flags & FS_IMMUTABLE_FL) + new_fl |= S_IMMUTABLE; + + if (flags & FS_APPEND_FL) + new_fl |= S_APPEND; + + inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND); + + inode_set_ctime_current(inode); + mark_inode_dirty(inode); + + return 0; +} + +/* + * ntfs_ioctl - file_operations::unlocked_ioctl + */ long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -77,20 +137,27 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct inode *inode = d_inode(path->dentry); struct ntfs_inode *ni = ntfs_i(inode); + stat->result_mask |= STATX_BTIME; + stat->btime = ni->i_crtime; + stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */ + + if (inode->i_flags & S_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + + if (inode->i_flags & S_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (is_compressed(ni)) stat->attributes |= STATX_ATTR_COMPRESSED; if (is_encrypted(ni)) stat->attributes |= STATX_ATTR_ENCRYPTED; - stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED; + stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | STATX_ATTR_APPEND; generic_fillattr(idmap, request_mask, inode, stat); - stat->result_mask |= STATX_BTIME; - stat->btime = ni->i_crtime; - stat->blksize = ni->mi.sbi->cluster_size; /* 512, 1K, ..., 2M */ - return 0; } @@ -196,9 +263,9 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) PAGE_SIZE; iblock = page_off >> inode->i_blkbits; - folio = __filemap_get_folio(mapping, idx, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - mapping_gfp_constraint(mapping, ~__GFP_FS)); + folio = __filemap_get_folio( + mapping, idx, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_constraint(mapping, ~__GFP_FS)); if (IS_ERR(folio)) return PTR_ERR(folio); @@ -253,8 +320,7 @@ out: */ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) { - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); u64 from = ((u64)vma->vm_pgoff << PAGE_SHIFT); bool rw = vma->vm_flags & VM_WRITE; @@ -299,10 +365,7 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) } if (ni->i_valid < to) { - if (!inode_trylock(inode)) { - err = -EAGAIN; - goto out; - } + inode_lock(inode); err = ntfs_extend_initialized_size(file, ni, ni->i_valid, to); inode_unlock(inode); @@ -431,7 +494,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) */ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) { - struct inode *inode = file->f_mapping->host; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; struct super_block *sb = inode->i_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; @@ -744,7 +807,7 @@ out: static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) @@ -781,7 +844,7 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - struct inode *inode = in->f_mapping->host; + struct inode *inode = file_inode(in); struct ntfs_inode *ni = ntfs_i(inode); if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) @@ -824,23 +887,25 @@ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index, *frame_uptodate = true; for (npages = 0; npages < pages_per_frame; npages++, index++) { - struct page *page; + struct folio *folio; - page = find_or_create_page(mapping, index, gfp_mask); - if (!page) { + folio = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + gfp_mask); + if (IS_ERR(folio)) { while (npages--) { - page = pages[npages]; - unlock_page(page); - put_page(page); + folio = page_folio(pages[npages]); + folio_unlock(folio); + folio_put(folio); } return -ENOMEM; } - if (!PageUptodate(page)) + if (!folio_test_uptodate(folio)) *frame_uptodate = false; - pages[npages] = page; + pages[npages] = &folio->page; } return 0; @@ -1075,8 +1140,7 @@ out: static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; + struct inode *inode = file_inode(file); ssize_t ret; int err; struct ntfs_inode *ni = ntfs_i(inode); @@ -1198,7 +1262,7 @@ static int ntfs_file_release(struct inode *inode, struct file *file) } /* - * ntfs_fiemap - file_operations::fiemap + * ntfs_fiemap - inode_operations::fiemap */ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) @@ -1227,6 +1291,8 @@ const struct inode_operations ntfs_file_inode_operations = { .get_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, .fiemap = ntfs_fiemap, + .fileattr_get = ntfs_fileattr_get, + .fileattr_set = ntfs_fileattr_set, }; const struct file_operations ntfs_file_operations = { @@ -1246,6 +1312,7 @@ const struct file_operations ntfs_file_operations = { .release = ntfs_file_release, }; +#if IS_ENABLED(CONFIG_NTFS_FS) const struct file_operations ntfs_legacy_file_operations = { .llseek = generic_file_llseek, .read_iter = ntfs_file_read_iter, @@ -1253,4 +1320,5 @@ const struct file_operations ntfs_legacy_file_operations = { .open = ntfs_file_open, .release = ntfs_file_release, }; +#endif // clang-format on diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 0008670939a4..a469c608a394 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -122,10 +122,10 @@ void ni_clear(struct ntfs_inode *ni) else { run_close(&ni->file.run); #ifdef CONFIG_NTFS3_LZX_XPRESS - if (ni->file.offs_page) { + if (ni->file.offs_folio) { /* On-demand allocated page for offsets. */ - put_page(ni->file.offs_page); - ni->file.offs_page = NULL; + folio_put(ni->file.offs_folio); + ni->file.offs_folio = NULL; } #endif } @@ -1501,7 +1501,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, if (is_ext) { if (flags & ATTR_FLAG_COMPRESSED) - attr->nres.c_unit = COMPRESSION_UNIT; + attr->nres.c_unit = NTFS_LZNT_CUNIT; attr->nres.total_size = attr->nres.alloc_size; } @@ -1601,8 +1601,10 @@ int ni_delete_all(struct ntfs_inode *ni) asize = le32_to_cpu(attr->size); roff = le16_to_cpu(attr->nres.run_off); - if (roff > asize) + if (roff > asize) { + _ntfs_bad_inode(&ni->vfs_inode); return -EINVAL; + } /* run==1 means unpack and deallocate. */ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, @@ -1897,6 +1899,47 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, } /* + * fiemap_fill_next_extent_k - a copy of fiemap_fill_next_extent + * but it accepts kernel address for fi_extents_start + */ +static int fiemap_fill_next_extent_k(struct fiemap_extent_info *fieinfo, + u64 logical, u64 phys, u64 len, u32 flags) +{ + struct fiemap_extent extent; + struct fiemap_extent __user *dest = fieinfo->fi_extents_start; + + /* only count the extents */ + if (fieinfo->fi_extents_max == 0) { + fieinfo->fi_extents_mapped++; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; + } + + if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) + return 1; + + if (flags & FIEMAP_EXTENT_DELALLOC) + flags |= FIEMAP_EXTENT_UNKNOWN; + if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED) + flags |= FIEMAP_EXTENT_ENCODED; + if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE)) + flags |= FIEMAP_EXTENT_NOT_ALIGNED; + + memset(&extent, 0, sizeof(extent)); + extent.fe_logical = logical; + extent.fe_physical = phys; + extent.fe_length = len; + extent.fe_flags = flags; + + dest += fieinfo->fi_extents_mapped; + memcpy(dest, &extent, sizeof(extent)); + + fieinfo->fi_extents_mapped++; + if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) + return 1; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; +} + +/* * ni_fiemap - Helper for file_fiemap(). * * Assumed ni_lock. @@ -1906,6 +1949,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, __u64 vbo, __u64 len) { int err = 0; + struct fiemap_extent __user *fe_u = fieinfo->fi_extents_start; + struct fiemap_extent *fe_k = NULL; struct ntfs_sb_info *sbi = ni->mi.sbi; u8 cluster_bits = sbi->cluster_bits; struct runs_tree *run; @@ -1953,6 +1998,18 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, goto out; } + /* + * To avoid lock problems replace pointer to user memory by pointer to kernel memory. + */ + fe_k = kmalloc_array(fieinfo->fi_extents_max, + sizeof(struct fiemap_extent), + GFP_NOFS | __GFP_ZERO); + if (!fe_k) { + err = -ENOMEM; + goto out; + } + fieinfo->fi_extents_start = fe_k; + end = vbo + len; alloc_size = le64_to_cpu(attr->nres.alloc_size); if (end > alloc_size) @@ -2041,8 +2098,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, if (vbo + dlen >= end) flags |= FIEMAP_EXTENT_LAST; - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, - flags); + err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, dlen, + flags); + if (err < 0) break; if (err == 1) { @@ -2062,7 +2120,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, if (vbo + bytes >= end) flags |= FIEMAP_EXTENT_LAST; - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); + err = fiemap_fill_next_extent_k(fieinfo, vbo, lbo, bytes, + flags); if (err < 0) break; if (err == 1) { @@ -2075,7 +2134,19 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, up_read(run_lock); + /* + * Copy to user memory out of lock + */ + if (copy_to_user(fe_u, fe_k, + fieinfo->fi_extents_max * + sizeof(struct fiemap_extent))) { + err = -EFAULT; + } + out: + /* Restore original pointer. */ + fieinfo->fi_extents_start = fe_u; + kfree(fe_k); return err; } @@ -2085,12 +2156,12 @@ out: * When decompressing, we typically obtain more than one page per reference. * We inject the additional pages into the page cache. */ -int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) +int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio) { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; - struct address_space *mapping = page->mapping; - pgoff_t index = page->index; + struct address_space *mapping = folio->mapping; + pgoff_t index = folio->index; u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT; struct page **pages = NULL; /* Array of at most 16 pages. stack? */ u8 frame_bits; @@ -2100,7 +2171,8 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) struct page *pg; if (vbo >= i_size_read(&ni->vfs_inode)) { - SetPageUptodate(page); + folio_zero_range(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); err = 0; goto out; } @@ -2124,7 +2196,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) goto out; } - pages[idx] = page; + pages[idx] = &folio->page; index = frame_vbo >> PAGE_SHIFT; gfp_mask = mapping_gfp_mask(mapping); @@ -2143,9 +2215,6 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame); out1: - if (err) - SetPageError(page); - for (i = 0; i < pages_per_frame; i++) { pg = pages[i]; if (i == idx || !pg) @@ -2157,7 +2226,7 @@ out1: out: /* At this point, err contains 0 or -EIO depending on the "critical" page. */ kfree(pages); - unlock_page(page); + folio_unlock(folio); return err; } @@ -2362,9 +2431,9 @@ remove_wof: /* Clear cached flag. */ ni->ni_flags &= ~NI_FLAG_COMPRESSED_MASK; - if (ni->file.offs_page) { - put_page(ni->file.offs_page); - ni->file.offs_page = NULL; + if (ni->file.offs_folio) { + folio_put(ni->file.offs_folio); + ni->file.offs_folio = NULL; } mapping->a_ops = &ntfs_aops; @@ -2718,7 +2787,6 @@ out: for (i = 0; i < pages_per_frame; i++) { pg = pages[i]; kunmap(pg); - ClearPageError(pg); SetPageUptodate(pg); } diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index d7807d255dfe..c64dd114ac65 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -724,7 +724,8 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes) if (!rsize || rsize > bytes || rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts || - le16_to_cpu(rt->total) > ne || ff > ts || lf > ts || + le16_to_cpu(rt->total) > ne || ff > ts - sizeof(__le32) || + lf > ts - sizeof(__le32) || (ff && ff < sizeof(struct RESTART_TABLE)) || (lf && lf < sizeof(struct RESTART_TABLE))) { return false; @@ -754,6 +755,9 @@ static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes) return false; off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off)); + + if (off > ts - sizeof(__le32)) + return false; } return true; @@ -2992,7 +2996,7 @@ static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi, if (is_ext) { attr->name_off = SIZEOF_NONRESIDENT_EX_LE; if (is_attr_compressed(attr)) - attr->nres.c_unit = COMPRESSION_UNIT; + attr->nres.c_unit = NTFS_LZNT_CUNIT; attr->nres.run_off = cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size); @@ -3722,6 +3726,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) u64 rec_lsn, checkpt_lsn = 0, rlsn = 0; struct ATTR_NAME_ENTRY *attr_names = NULL; + u32 attr_names_bytes = 0; + u32 oatbl_bytes = 0; struct RESTART_TABLE *dptbl = NULL; struct RESTART_TABLE *trtbl = NULL; const struct RESTART_TABLE *rt; @@ -3736,6 +3742,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) struct NTFS_RESTART *rst = NULL; struct lcb *lcb = NULL; struct OPEN_ATTR_ENRTY *oe; + struct ATTR_NAME_ENTRY *ane; struct TRANSACTION_ENTRY *tr; struct DIR_PAGE_ENTRY *dp; u32 i, bytes_per_attr_entry; @@ -3915,6 +3922,9 @@ check_restart_area: goto out; } + log->page_mask = log->page_size - 1; + log->page_bits = blksize_bits(log->page_size); + /* If the file size has shrunk then we won't mount it. */ if (log->l_size < le64_to_cpu(ra2->l_size)) { err = -EINVAL; @@ -4104,7 +4114,7 @@ process_log: /* Allocate and Read the Transaction Table. */ if (!rst->transact_table_len) - goto check_dirty_page_table; + goto check_dirty_page_table; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->transact_table_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4144,7 +4154,7 @@ process_log: check_dirty_page_table: /* The next record back should be the Dirty Pages Table. */ if (!rst->dirty_pages_len) - goto check_attribute_names; + goto check_attribute_names; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->dirty_pages_table_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4180,7 +4190,7 @@ check_dirty_page_table: /* Convert Ra version '0' into version '1'. */ if (rst->major_ver) - goto end_conv_1; + goto end_conv_1; /* reduce tab pressure. */ dp = NULL; while ((dp = enum_rstbl(dptbl, dp))) { @@ -4200,8 +4210,7 @@ end_conv_1: * remembering the oldest lsn values. */ if (sbi->cluster_size <= log->page_size) - goto trace_dp_table; - + goto trace_dp_table; /* reduce tab pressure. */ dp = NULL; while ((dp = enum_rstbl(dptbl, dp))) { struct DIR_PAGE_ENTRY *next = dp; @@ -4222,7 +4231,7 @@ trace_dp_table: check_attribute_names: /* The next record should be the Attribute Names. */ if (!rst->attr_names_len) - goto check_attr_table; + goto check_attr_table; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->attr_names_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4240,9 +4249,9 @@ check_attribute_names: } t32 = lrh_length(lrh); - rec_len -= t32; + attr_names_bytes = rec_len - t32; - attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS); + attr_names = kmemdup(Add2Ptr(lrh, t32), attr_names_bytes, GFP_NOFS); if (!attr_names) { err = -ENOMEM; goto out; @@ -4254,7 +4263,7 @@ check_attribute_names: check_attr_table: /* The next record should be the attribute Table. */ if (!rst->open_attr_len) - goto check_attribute_names2; + goto check_attribute_names2; /* reduce tab pressure. */ t64 = le64_to_cpu(rst->open_attr_table_lsn); err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); @@ -4274,14 +4283,14 @@ check_attr_table: t16 = le16_to_cpu(lrh->redo_off); rt = Add2Ptr(lrh, t16); - t32 = rec_len - t16; + oatbl_bytes = rec_len - t16; - if (!check_rstbl(rt, t32)) { + if (!check_rstbl(rt, oatbl_bytes)) { err = -EINVAL; goto out; } - oatbl = kmemdup(rt, t32, GFP_NOFS); + oatbl = kmemdup(rt, oatbl_bytes, GFP_NOFS); if (!oatbl) { err = -ENOMEM; goto out; @@ -4314,17 +4323,40 @@ check_attr_table: lcb = NULL; check_attribute_names2: - if (rst->attr_names_len && oatbl) { - struct ATTR_NAME_ENTRY *ane = attr_names; - while (ane->off) { + if (attr_names && oatbl) { + off = 0; + for (;;) { + /* Check we can use attribute name entry 'ane'. */ + static_assert(sizeof(*ane) == 4); + if (off + sizeof(*ane) > attr_names_bytes) { + /* just ignore the rest. */ + break; + } + + ane = Add2Ptr(attr_names, off); + t16 = le16_to_cpu(ane->off); + if (!t16) { + /* this is the only valid exit. */ + break; + } + + /* Check we can use open attribute entry 'oe'. */ + if (t16 + sizeof(*oe) > oatbl_bytes) { + /* just ignore the rest. */ + break; + } + /* TODO: Clear table on exit! */ - oe = Add2Ptr(oatbl, le16_to_cpu(ane->off)); + oe = Add2Ptr(oatbl, t16); t16 = le16_to_cpu(ane->name_bytes); + off += t16 + sizeof(*ane); + if (off > attr_names_bytes) { + /* just ignore the rest. */ + break; + } oe->name_len = t16 / sizeof(short); oe->ptr = ane->name; oe->is_attr_name = 2; - ane = Add2Ptr(ane, - sizeof(struct ATTR_NAME_ENTRY) + t16); } } @@ -4520,7 +4552,6 @@ copy_lcns: } } goto next_log_record_analyze; - ; } case OpenNonresidentAttribute: @@ -4659,7 +4690,7 @@ end_log_records_enumerate: * table are not empty. */ if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total)) - goto end_reply; + goto end_replay; sbi->flags |= NTFS_FLAGS_NEED_REPLAY; if (is_ro) @@ -5088,7 +5119,7 @@ undo_action_done: sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY; -end_reply: +end_replay: err = 0; if (is_ro) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 626d3f2c7e2d..0fa636038b4e 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -2650,8 +2650,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) { int err; struct ATTRIB *attr; + u32 uni_bytes; struct ntfs_inode *ni = sbi->volume.ni; - const u8 max_ulen = 0x80; /* TODO: use attrdef to get maximum length */ /* Allocate PATH_MAX bytes. */ struct cpu_str *uni = __getname(); @@ -2663,7 +2663,8 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) if (err < 0) goto out; - if (uni->len > max_ulen) { + uni_bytes = uni->len * sizeof(u16); + if (uni_bytes > NTFS_LABEL_MAX_LENGTH * sizeof(u16)) { ntfs_warn(sbi->sb, "new label is too long"); err = -EFBIG; goto out; @@ -2674,13 +2675,13 @@ int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len) /* Ignore any errors. */ ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL); - err = ni_insert_resident(ni, uni->len * sizeof(u16), ATTR_LABEL, NULL, - 0, &attr, NULL, NULL); + err = ni_insert_resident(ni, uni_bytes, ATTR_LABEL, NULL, 0, &attr, + NULL, NULL); if (err < 0) goto unlock_out; /* write new label in on-disk struct. */ - memcpy(resident_data(attr), uni->name, uni->len * sizeof(u16)); + memcpy(resident_data(attr), uni->name, uni_bytes); /* update cached value of current label. */ if (len >= ARRAY_SIZE(sbi->volume.label)) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index d0f15bbf78f6..9089c58a005c 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -978,7 +978,7 @@ static struct indx_node *indx_new(struct ntfs_index *indx, hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64)); de_set_vbn_le(e, *sub_vbn); - hdr->flags = 1; + hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; } else { e->size = cpu_to_le16(sizeof(struct NTFS_DE)); hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE)); @@ -1683,7 +1683,7 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST; - hdr->flags = 1; + hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; hdr->used = hdr->total = cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr)); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 0f1664db94ad..6b0bdc474e76 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -18,7 +18,7 @@ #include "ntfs_fs.h" /* - * ntfs_read_mft - Read record and parses MFT. + * ntfs_read_mft - Read record and parse MFT. */ static struct inode *ntfs_read_mft(struct inode *inode, const struct cpu_str *name, @@ -441,10 +441,9 @@ end_enum: * Usually a hard links to directories are disabled. */ inode->i_op = &ntfs_dir_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_dir_operations; - else - inode->i_fop = &ntfs_dir_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_dir_operations : + &ntfs_dir_operations; ni->i_valid = 0; } else if (S_ISLNK(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; @@ -454,10 +453,9 @@ end_enum: } else if (S_ISREG(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; inode->i_op = &ntfs_file_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_file_operations; - else - inode->i_fop = &ntfs_file_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_file_operations : + &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; if (ino != MFT_REC_MFT) @@ -580,10 +578,11 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, bh->b_blocknr = RESIDENT_LCN; bh->b_size = block_size; if (!folio) { + /* direct io (read) or bmap call */ err = 0; } else { ni_lock(ni); - err = attr_data_read_resident(ni, &folio->page); + err = attr_data_read_resident(ni, folio); ni_unlock(ni); if (!err) @@ -710,25 +709,24 @@ static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) static int ntfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; int err; - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); if (is_resident(ni)) { ni_lock(ni); - err = attr_data_read_resident(ni, page); + err = attr_data_read_resident(ni, folio); ni_unlock(ni); if (err != E_NTFS_NONRESIDENT) { - unlock_page(page); + folio_unlock(folio); return err; } } if (is_compressed(ni)) { ni_lock(ni); - err = ni_readpage_cmpr(ni, page); + err = ni_readpage_cmpr(ni, folio); ni_unlock(ni); return err; } @@ -872,7 +870,7 @@ static int ntfs_resident_writepage(struct folio *folio, return -EIO; ni_lock(ni); - ret = attr_data_write_resident(ni, &folio->page); + ret = attr_data_write_resident(ni, folio); ni_unlock(ni); if (ret != E_NTFS_NONRESIDENT) @@ -914,24 +912,25 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping, *pagep = NULL; if (is_resident(ni)) { - struct page *page = - grab_cache_page_write_begin(mapping, pos >> PAGE_SHIFT); + struct folio *folio = __filemap_get_folio( + mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); - if (!page) { - err = -ENOMEM; + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out; } ni_lock(ni); - err = attr_data_read_resident(ni, page); + err = attr_data_read_resident(ni, folio); ni_unlock(ni); if (!err) { - *pagep = page; + *pagep = &folio->page; goto out; } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); if (err != E_NTFS_NONRESIDENT) goto out; @@ -950,6 +949,7 @@ out: int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, u32 len, u32 copied, struct page *page, void *fsdata) { + struct folio *folio = page_folio(page); struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); u64 valid = ni->i_valid; @@ -958,26 +958,26 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, if (is_resident(ni)) { ni_lock(ni); - err = attr_data_write_resident(ni, page); + err = attr_data_write_resident(ni, folio); ni_unlock(ni); if (!err) { + struct buffer_head *head = folio_buffers(folio); dirty = true; - /* Clear any buffers in page. */ - if (page_has_buffers(page)) { - struct buffer_head *head, *bh; + /* Clear any buffers in folio. */ + if (head) { + struct buffer_head *bh = head; - bh = head = page_buffers(page); do { clear_buffer_dirty(bh); clear_buffer_mapped(bh); set_buffer_uptodate(bh); } while (head != (bh = bh->b_this_page)); } - SetPageUptodate(page); + folio_mark_uptodate(folio); err = copied; } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } else { err = generic_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -1093,33 +1093,31 @@ int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, if (!ret && i2) ret = writeback_inode(i2); if (!ret) - ret = sync_blockdev_nowait(sb->s_bdev); + ret = filemap_flush(sb->s_bdev_file->f_mapping); return ret; } -int inode_write_data(struct inode *inode, const void *data, size_t bytes) +/* + * Helper function to read file. + */ +int inode_read_data(struct inode *inode, void *data, size_t bytes) { pgoff_t idx; + struct address_space *mapping = inode->i_mapping; - /* Write non resident data. */ for (idx = 0; bytes; idx++) { size_t op = bytes > PAGE_SIZE ? PAGE_SIZE : bytes; - struct page *page = ntfs_map_page(inode->i_mapping, idx); + struct page *page = read_mapping_page(mapping, idx, NULL); + void *kaddr; if (IS_ERR(page)) return PTR_ERR(page); - lock_page(page); - WARN_ON(!PageUptodate(page)); - ClearPageUptodate(page); - - memcpy(page_address(page), data, op); - - flush_dcache_page(page); - SetPageUptodate(page); - unlock_page(page); + kaddr = kmap_atomic(page); + memcpy(data, kaddr, op); + kunmap_atomic(kaddr); - ntfs_unmap_page(page); + put_page(page); bytes -= op; data = Add2Ptr(data, PAGE_SIZE); @@ -1508,7 +1506,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8); attr->name_off = SIZEOF_NONRESIDENT_EX_LE; attr->flags = ATTR_FLAG_COMPRESSED; - attr->nres.c_unit = COMPRESSION_UNIT; + attr->nres.c_unit = NTFS_LZNT_CUNIT; asize = SIZEOF_NONRESIDENT_EX + 8; } else { attr->size = cpu_to_le32(SIZEOF_NONRESIDENT + 8); @@ -1559,7 +1557,7 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, /* * Below function 'ntfs_save_wsl_perm' requires 0x78 bytes. - * It is good idea to keep extened attributes resident. + * It is good idea to keep extended attributes resident. */ if (asize + t16 + 0x78 + 8 > sbi->record_size) { CLST alen; @@ -1628,10 +1626,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, if (S_ISDIR(mode)) { inode->i_op = &ntfs_dir_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_dir_operations; - else - inode->i_fop = &ntfs_dir_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_dir_operations : + &ntfs_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &ntfs_link_inode_operations; inode->i_fop = NULL; @@ -1640,10 +1637,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, inode_nohighmem(inode); } else if (S_ISREG(mode)) { inode->i_op = &ntfs_file_inode_operations; - if (is_legacy_ntfs(inode->i_sb)) - inode->i_fop = &ntfs_legacy_file_operations; - else - inode->i_fop = &ntfs_file_operations; + inode->i_fop = unlikely(is_legacy_ntfs(sb)) ? + &ntfs_legacy_file_operations : + &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; init_rwsem(&ni->file.run_lock); @@ -1668,7 +1664,9 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, * The packed size of extended attribute is stored in direntry too. * 'fname' here points to inside new_de. */ - ntfs_save_wsl_perm(inode, &fname->dup.ea_size); + err = ntfs_save_wsl_perm(inode, &fname->dup.ea_size); + if (err) + goto out6; /* * update ea_size in file_name attribute too. @@ -1712,6 +1710,12 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, goto out2; out6: + attr = ni_find_attr(ni, NULL, NULL, ATTR_EA, NULL, 0, NULL, NULL); + if (attr && attr->non_res) { + /* Delete ATTR_EA, if non-resident. */ + attr_set_size(ni, ATTR_EA, NULL, 0, NULL, 0, NULL, false, NULL); + } + if (rp_inserted) ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref); @@ -2133,5 +2137,6 @@ const struct address_space_operations ntfs_aops = { const struct address_space_operations ntfs_aops_cmpr = { .read_folio = ntfs_read_folio, .readahead = ntfs_readahead, + .dirty_folio = block_dirty_folio, }; // clang-format on diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index 71498421ce60..f16d318c4372 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -112,9 +112,7 @@ static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir, } /* - * ntfs_mknod - * - * inode_operations::mknod + * ntfs_mknod - inode_operations::mknod */ static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) @@ -509,6 +507,8 @@ const struct inode_operations ntfs_dir_inode_operations = { .getattr = ntfs_getattr, .listxattr = ntfs_listxattr, .fiemap = ntfs_fiemap, + .fileattr_get = ntfs_fileattr_get, + .fileattr_set = ntfs_fileattr_set, }; const struct inode_operations ntfs_special_inode_operations = { diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 3d6143c7abc0..241f2ffdd920 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -82,9 +82,6 @@ typedef u32 CLST; #define RESIDENT_LCN ((CLST)-2) #define COMPRESSED_LCN ((CLST)-3) -#define COMPRESSION_UNIT 4 -#define COMPRESS_MAX_CLUSTER 0x1000 - enum RECORD_NUM { MFT_REC_MFT = 0, MFT_REC_MIRR = 1, @@ -696,14 +693,15 @@ static inline bool de_has_vcn_ex(const struct NTFS_DE *e) offsetof(struct ATTR_FILE_NAME, name) + \ NTFS_NAME_LEN * sizeof(short), 8) +#define NTFS_INDEX_HDR_HAS_SUBNODES cpu_to_le32(1) + struct INDEX_HDR { __le32 de_off; // 0x00: The offset from the start of this structure // to the first NTFS_DE. __le32 used; // 0x04: The size of this structure plus all // entries (quad-word aligned). __le32 total; // 0x08: The allocated size of for this structure plus all entries. - u8 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory. - u8 res[3]; + __le32 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory. // // de_off + used <= total @@ -751,7 +749,7 @@ static inline struct NTFS_DE *hdr_next_de(const struct INDEX_HDR *hdr, static inline bool hdr_has_subnode(const struct INDEX_HDR *hdr) { - return hdr->flags & 1; + return hdr->flags & NTFS_INDEX_HDR_HAS_SUBNODES; } struct INDEX_BUFFER { @@ -771,7 +769,7 @@ static inline bool ib_is_empty(const struct INDEX_BUFFER *ib) static inline bool ib_is_leaf(const struct INDEX_BUFFER *ib) { - return !(ib->ihdr.flags & 1); + return !(ib->ihdr.flags & NTFS_INDEX_HDR_HAS_SUBNODES); } /* Index root structure ( 0x90 ). */ @@ -1002,9 +1000,6 @@ struct REPARSE_POINT { static_assert(sizeof(struct REPARSE_POINT) == 0x18); -/* Maximum allowed size of the reparse data. */ -#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE (16 * 1024) - /* * The value of the following constant needs to satisfy the following * conditions: diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index f9ed6d2b065d..e5255a251929 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -383,7 +383,7 @@ struct ntfs_inode { struct rw_semaphore run_lock; struct runs_tree run; #ifdef CONFIG_NTFS3_LZX_XPRESS - struct page *offs_page; + struct folio *offs_folio; #endif } file; }; @@ -434,8 +434,8 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, struct ATTRIB **ret); int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, CLST *len, bool *new, bool zero); -int attr_data_read_resident(struct ntfs_inode *ni, struct page *page); -int attr_data_write_resident(struct ntfs_inode *ni, struct page *page); +int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio); +int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio); int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, u8 name_len, struct runs_tree *run, CLST vcn); @@ -497,6 +497,9 @@ extern const struct file_operations ntfs_dir_operations; extern const struct file_operations ntfs_legacy_dir_operations; /* Globals from file.c */ +int ntfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, + struct fileattr *fa); int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, @@ -564,7 +567,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint); #define _ni_write_inode(i, w) ni_write_inode(i, w, __func__) int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, __u64 vbo, __u64 len); -int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page); +int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio); int ni_decompress_file(struct ntfs_inode *ni); int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, u32 pages_per_frame); @@ -716,7 +719,7 @@ int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc); int ntfs_sync_inode(struct inode *inode); int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2); -int inode_write_data(struct inode *inode, const void *data, size_t bytes); +int inode_read_data(struct inode *inode, void *data, size_t bytes); int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const struct cpu_str *uni, umode_t mode, dev_t dev, const char *symname, u32 size, @@ -910,22 +913,6 @@ static inline bool ntfs_is_meta_file(struct ntfs_sb_info *sbi, CLST rno) rno == sbi->usn_jrnl_no; } -static inline void ntfs_unmap_page(struct page *page) -{ - kunmap(page); - put_page(page); -} - -static inline struct page *ntfs_map_page(struct address_space *mapping, - unsigned long index) -{ - struct page *page = read_mapping_page(mapping, index, NULL); - - if (!IS_ERR(page)) - kmap(page); - return page; -} - static inline size_t wnd_zone_bit(const struct wnd_bitmap *wnd) { return wnd->zone_bit; @@ -1156,6 +1143,13 @@ static inline void le64_sub_cpu(__le64 *var, u64 val) *var = cpu_to_le64(le64_to_cpu(*var) - val); } +#if IS_ENABLED(CONFIG_NTFS_FS) bool is_legacy_ntfs(struct super_block *sb); +#else +static inline bool is_legacy_ntfs(struct super_block *sb) +{ + return false; +} +#endif #endif /* _LINUX_NTFS3_NTFS_FS_H */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index c5b688c5f984..a8758b85803f 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -275,7 +275,7 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_flag_no("acl", Opt_acl), fsparam_string("iocharset", Opt_iocharset), fsparam_flag_no("prealloc", Opt_prealloc), - fsparam_flag_no("nocase", Opt_nocase), + fsparam_flag_no("case", Opt_nocase), {} }; // clang-format on @@ -464,7 +464,7 @@ static int ntfs3_volinfo(struct seq_file *m, void *o) struct super_block *sb = m->private; struct ntfs_sb_info *sbi = sb->s_fs_info; - seq_printf(m, "ntfs%d.%d\n%u\n%zu\n\%zu\n%zu\n%s\n%s\n", + seq_printf(m, "ntfs%d.%d\n%u\n%zu\n%zu\n%zu\n%s\n%s\n", sbi->volume.major_ver, sbi->volume.minor_ver, sbi->cluster_size, sbi->used.bitmap.nbits, sbi->mft.bitmap.nbits, @@ -1159,7 +1159,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) CLST vcn, lcn, len; struct ATTRIB *attr; const struct VOLUME_INFO *info; - u32 idx, done, bytes; + u32 done, bytes; struct ATTR_DEF_ENTRY *t; u16 *shared; struct MFT_REF ref; @@ -1201,7 +1201,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) /* * Load $Volume. This should be done before $LogFile - * 'cause 'sbi->volume.ni' is used 'ntfs_set_state'. + * 'cause 'sbi->volume.ni' is used in 'ntfs_set_state'. */ ref.low = cpu_to_le32(MFT_REC_VOL); ref.seq = cpu_to_le16(MFT_REC_VOL); @@ -1431,31 +1431,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) goto put_inode_out; } - for (done = idx = 0; done < bytes; done += PAGE_SIZE, idx++) { - unsigned long tail = bytes - done; - struct page *page = ntfs_map_page(inode->i_mapping, idx); + /* Read the entire file. */ + err = inode_read_data(inode, sbi->def_table, bytes); + if (err) { + ntfs_err(sb, "Failed to read $AttrDef (%d).", err); + goto put_inode_out; + } - if (IS_ERR(page)) { - err = PTR_ERR(page); - ntfs_err(sb, "Failed to read $AttrDef (%d).", err); - goto put_inode_out; - } - memcpy(Add2Ptr(t, done), page_address(page), - min(PAGE_SIZE, tail)); - ntfs_unmap_page(page); - - if (!idx && ATTR_STD != t->type) { - ntfs_err(sb, "$AttrDef is corrupted."); - err = -EINVAL; - goto put_inode_out; - } + if (ATTR_STD != t->type) { + ntfs_err(sb, "$AttrDef is corrupted."); + err = -EINVAL; + goto put_inode_out; } t += 1; sbi->def_entries = 1; done = sizeof(struct ATTR_DEF_ENTRY); - sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE; - sbi->ea_max_size = 0x10000; /* default formatter value */ while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) { u32 t32 = le32_to_cpu(t->type); @@ -1491,27 +1482,22 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) goto put_inode_out; } - for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) { - const __le16 *src; - u16 *dst = Add2Ptr(sbi->upcase, idx << PAGE_SHIFT); - struct page *page = ntfs_map_page(inode->i_mapping, idx); - - if (IS_ERR(page)) { - err = PTR_ERR(page); - ntfs_err(sb, "Failed to read $UpCase (%d).", err); - goto put_inode_out; - } - - src = page_address(page); + /* Read the entire file. */ + err = inode_read_data(inode, sbi->upcase, 0x10000 * sizeof(short)); + if (err) { + ntfs_err(sb, "Failed to read $UpCase (%d).", err); + goto put_inode_out; + } #ifdef __BIG_ENDIAN - for (i = 0; i < PAGE_SIZE / sizeof(u16); i++) + { + const __le16 *src = sbi->upcase; + u16 *dst = sbi->upcase; + + for (i = 0; i < 0x10000; i++) *dst++ = le16_to_cpu(*src++); -#else - memcpy(dst, src, PAGE_SIZE); -#endif - ntfs_unmap_page(page); } +#endif shared = ntfs_set_shared(sbi->upcase, 0x10000 * sizeof(short)); if (shared && sbi->upcase != shared) { @@ -1847,10 +1833,8 @@ bool is_legacy_ntfs(struct super_block *sb) #else static inline void register_as_ntfs_legacy(void) {} static inline void unregister_as_ntfs_legacy(void) {} -bool is_legacy_ntfs(struct super_block *sb) { return false; } #endif - // clang-format on static int __init init_ntfs_fs(void) @@ -1876,8 +1860,7 @@ static int __init init_ntfs_fs(void) ntfs_inode_cachep = kmem_cache_create( "ntfs_inode_cache", sizeof(struct ntfs_inode), 0, - (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), - init_once); + (SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), init_once); if (!ntfs_inode_cachep) { err = -ENOMEM; goto out1; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 73785dece7a7..0703e1ae32b2 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -195,10 +195,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, { const struct EA_INFO *info; struct EA_FULL *ea_all = NULL; - const struct EA_FULL *ea; u32 off, size; int err; - int ea_size; size_t ret; err = ntfs_read_ea(ni, &ea_all, 0, &info); @@ -212,16 +210,18 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, /* Enumerate all xattrs. */ ret = 0; - for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) { - ea = Add2Ptr(ea_all, off); - ea_size = unpacked_ea_size(ea); + off = 0; + while (off + sizeof(struct EA_FULL) < size) { + const struct EA_FULL *ea = Add2Ptr(ea_all, off); + int ea_size = unpacked_ea_size(ea); + u8 name_len = ea->name_len; - if (!ea->name_len) + if (!name_len) break; - if (ea->name_len > ea_size) { + if (name_len > ea_size) { ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); - err = -EINVAL; /* corrupted fs */ + err = -EINVAL; /* corrupted fs. */ break; } @@ -230,16 +230,17 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (off + ea_size > size) break; - if (ret + ea->name_len + 1 > bytes_per_buffer) { + if (ret + name_len + 1 > bytes_per_buffer) { err = -ERANGE; goto out; } - memcpy(buffer + ret, ea->name, ea->name_len); - buffer[ret + ea->name_len] = 0; + memcpy(buffer + ret, ea->name, name_len); + buffer[ret + name_len] = 0; } - ret += ea->name_len + 1; + ret += name_len + 1; + off += ea_size; } out: diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index d620d4c53c6f..f0beb173dbba 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -294,13 +294,16 @@ out: * bh passed here can be an inode block or a dir data block, depending * on the inode inline data flag. */ -static int ocfs2_check_dir_entry(struct inode * dir, - struct ocfs2_dir_entry * de, - struct buffer_head * bh, +static int ocfs2_check_dir_entry(struct inode *dir, + struct ocfs2_dir_entry *de, + struct buffer_head *bh, + char *buf, + unsigned int size, unsigned long offset) { const char *error_msg = NULL; const int rlen = le16_to_cpu(de->rec_len); + const unsigned long next_offset = ((char *) de - buf) + rlen; if (unlikely(rlen < OCFS2_DIR_REC_LEN(1))) error_msg = "rec_len is smaller than minimal"; @@ -308,9 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir, error_msg = "rec_len % 4 != 0"; else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len))) error_msg = "rec_len is too small for name_len"; - else if (unlikely( - ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)) - error_msg = "directory entry across blocks"; + else if (unlikely(next_offset > size)) + error_msg = "directory entry overrun"; + else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) && + next_offset != size) + error_msg = "directory entry too close to end"; if (unlikely(error_msg != NULL)) mlog(ML_ERROR, "bad entry in directory #%llu: %s - " @@ -352,16 +357,17 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh, de_buf = first_de; dlimit = de_buf + bytes; - while (de_buf < dlimit) { + while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ de = (struct ocfs2_dir_entry *) de_buf; - if (de_buf + namelen <= dlimit && + if (de->name + namelen <= dlimit && ocfs2_match(namelen, name, de)) { /* found a match - just to be sure, do a full check */ - if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, bh, first_de, + bytes, offset)) { ret = -1; goto bail; } @@ -1138,7 +1144,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, pde = NULL; de = (struct ocfs2_dir_entry *) first_de; while (i < bytes) { - if (!ocfs2_check_dir_entry(dir, de, bh, i)) { + if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) { status = -EIO; mlog_errno(status); goto bail; @@ -1635,7 +1641,8 @@ int __ocfs2_add_entry(handle_t *handle, /* These checks should've already been passed by the * prepare function, but I guess we can leave them * here anyway. */ - if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start, + size, offset)) { retval = -ENOENT; goto bail; } @@ -1774,7 +1781,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, } de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos); - if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) { + if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data, + i_size_read(inode), ctx->pos)) { /* On error, skip the f_pos to the end. */ ctx->pos = i_size_read(inode); break; @@ -1867,7 +1875,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, while (ctx->pos < i_size_read(inode) && offset < sb->s_blocksize) { de = (struct ocfs2_dir_entry *) (bh->b_data + offset); - if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { + if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data, + sb->s_blocksize, offset)) { /* On error, skip the f_pos to the next block. */ ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; @@ -3339,7 +3348,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, struct super_block *sb = dir->i_sb; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_dir_entry *de, *last_de = NULL; - char *de_buf, *limit; + char *first_de, *de_buf, *limit; unsigned long offset = 0; unsigned int rec_len, new_rec_len, free_space; @@ -3352,14 +3361,16 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh, else free_space = dir->i_sb->s_blocksize - i_size_read(dir); - de_buf = di->id2.i_data.id_data; + first_de = di->id2.i_data.id_data; + de_buf = first_de; limit = de_buf + i_size_read(dir); rec_len = OCFS2_DIR_REC_LEN(namelen); while (de_buf < limit) { de = (struct ocfs2_dir_entry *)de_buf; - if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de, + i_size_read(dir), offset)) { ret = -ENOENT; goto out; } @@ -3441,7 +3452,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, /* move to next block */ de = (struct ocfs2_dir_entry *) bh->b_data; } - if (!ocfs2_check_dir_entry(dir, de, bh, offset)) { + if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize, + offset)) { status = -ENOENT; goto bail; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index cb40cafbc062..da78a04d6f0b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -221,12 +221,12 @@ struct ocfs2_lock_res_ops { */ #define LOCK_TYPE_USES_LVB 0x2 -static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .get_osb = ocfs2_get_inode_osb, .check_downconvert = ocfs2_check_meta_downconvert, .set_lvb = ocfs2_set_meta_lvb, @@ -234,50 +234,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_super_lops = { +static const struct ocfs2_lock_res_ops ocfs2_super_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH, }; -static struct ocfs2_lock_res_ops ocfs2_rename_lops = { +static const struct ocfs2_lock_res_ops ocfs2_rename_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { +static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { +static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { +static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { +static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = { .get_osb = ocfs2_get_dentry_osb, .post_unlock = ocfs2_dentry_post_unlock, .downconvert_worker = ocfs2_dentry_convert_worker, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_flock_lops = { +static const struct ocfs2_lock_res_ops ocfs2_flock_lops = { .get_osb = ocfs2_get_file_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { +static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { .set_lvb = ocfs2_set_qinfo_lvb, .get_osb = ocfs2_get_qinfo_osb, .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { +static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { .check_downconvert = ocfs2_check_refcount_downconvert, .downconvert_worker = ocfs2_refcount_convert_worker, .flags = 0, @@ -510,7 +510,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, - struct ocfs2_lock_res_ops *ops, + const struct ocfs2_lock_res_ops *ops, void *priv) { res->l_type = type; @@ -553,7 +553,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, unsigned int generation, struct inode *inode) { - struct ocfs2_lock_res_ops *ops; + const struct ocfs2_lock_res_ops *ops; switch(type) { case OCFS2_LOCK_TYPE_RW: diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 4d1ea8703fcd..59c92353151a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2189,8 +2189,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, * @osb: ocfs2 file system * @ret_orphan_dir: Orphan dir inode - returned locked! * @blkno: Actual block number of the inode to be inserted into orphan dir. + * @name: Buffer to store the name of the orphan. * @lookup: dir lookup result, to be passed back into functions like * ocfs2_orphan_add + * @dio: Flag indicating if direct IO is being used or not. * * Returns zero on success and the ret_orphan_dir, name and lookup * fields will be populated. diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8fe826143d7b..51c52768132d 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -154,7 +154,7 @@ struct ocfs2_lock_stats { struct ocfs2_lock_res { void *l_priv; - struct ocfs2_lock_res_ops *l_ops; + const struct ocfs2_lock_res_ops *l_ops; struct list_head l_blocked_list; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index c973c03f6fd8..10157d9d7a9c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations o2cb_stack_ops = { +static const struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, .this_node = o2cb_cluster_this_node, diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index c11406cd87a8..77edcd70f72c 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1065,7 +1065,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { +static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 3636847fae19..02ab072c528a 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -223,7 +223,7 @@ struct ocfs2_stack_operations { */ struct ocfs2_stack_plugin { char *sp_name; - struct ocfs2_stack_operations *sp_ops; + const struct ocfs2_stack_operations *sp_ops; struct module *sp_owner; /* These are managed by the stackglue code. */ diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3b81213ed7b8..35c0cc2a51af 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1062,13 +1062,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry, return i_ret + b_ret; } -static int ocfs2_xattr_find_entry(int name_index, +static int ocfs2_xattr_find_entry(struct inode *inode, int name_index, const char *name, struct ocfs2_xattr_search *xs) { struct ocfs2_xattr_entry *entry; size_t name_len; - int i, cmp = 1; + int i, name_offset, cmp = 1; if (name == NULL) return -EINVAL; @@ -1076,13 +1076,22 @@ static int ocfs2_xattr_find_entry(int name_index, name_len = strlen(name); entry = xs->here; for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { + if ((void *)entry >= xs->end) { + ocfs2_error(inode->i_sb, "corrupted xattr entries"); + return -EFSCORRUPTED; + } cmp = name_index - ocfs2_xattr_get_type(entry); if (!cmp) cmp = name_len - entry->xe_name_len; - if (!cmp) - cmp = memcmp(name, (xs->base + - le16_to_cpu(entry->xe_name_offset)), - name_len); + if (!cmp) { + name_offset = le16_to_cpu(entry->xe_name_offset); + if ((xs->base + name_offset + name_len) > xs->end) { + ocfs2_error(inode->i_sb, + "corrupted xattr entries"); + return -EFSCORRUPTED; + } + cmp = memcmp(name, (xs->base + name_offset), name_len); + } if (cmp == 0) break; entry += 1; @@ -1166,7 +1175,7 @@ static int ocfs2_xattr_ibody_get(struct inode *inode, xs->base = (void *)xs->header; xs->here = xs->header->xh_entries; - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret) return ret; size = le64_to_cpu(xs->here->xe_value_size); @@ -2698,7 +2707,7 @@ static int ocfs2_xattr_ibody_find(struct inode *inode, /* Find the named attribute. */ if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); if (ret && ret != -ENODATA) return ret; xs->not_found = ret; @@ -2833,7 +2842,7 @@ static int ocfs2_xattr_block_find(struct inode *inode, xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; xs->here = xs->header->xh_entries; - ret = ocfs2_xattr_find_entry(name_index, name, xs); + ret = ocfs2_xattr_find_entry(inode, name_index, name, xs); } else ret = ocfs2_xattr_index_block_find(inode, blk_bh, name_index, diff --git a/fs/pidfs.c b/fs/pidfs.c index c9cb14181def..7ffdc88dfb52 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -119,7 +119,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct task_struct *task __free(put_task) = NULL; struct nsproxy *nsp __free(put_nsproxy) = NULL; struct pid *pid = pidfd_pid(file); - struct ns_common *ns_common; + struct ns_common *ns_common = NULL; if (arg) return -EINVAL; @@ -146,52 +146,73 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) switch (cmd) { /* Namespaces that hang of nsproxy. */ case PIDFD_GET_CGROUP_NAMESPACE: - get_cgroup_ns(nsp->cgroup_ns); - ns_common = to_ns_common(nsp->cgroup_ns); + if (IS_ENABLED(CONFIG_CGROUPS)) { + get_cgroup_ns(nsp->cgroup_ns); + ns_common = to_ns_common(nsp->cgroup_ns); + } break; case PIDFD_GET_IPC_NAMESPACE: - get_ipc_ns(nsp->ipc_ns); - ns_common = to_ns_common(nsp->ipc_ns); + if (IS_ENABLED(CONFIG_IPC_NS)) { + get_ipc_ns(nsp->ipc_ns); + ns_common = to_ns_common(nsp->ipc_ns); + } break; case PIDFD_GET_MNT_NAMESPACE: get_mnt_ns(nsp->mnt_ns); ns_common = to_ns_common(nsp->mnt_ns); break; case PIDFD_GET_NET_NAMESPACE: - ns_common = to_ns_common(nsp->net_ns); - get_net_ns(ns_common); + if (IS_ENABLED(CONFIG_NET_NS)) { + ns_common = to_ns_common(nsp->net_ns); + get_net_ns(ns_common); + } break; case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: - get_pid_ns(nsp->pid_ns_for_children); - ns_common = to_ns_common(nsp->pid_ns_for_children); + if (IS_ENABLED(CONFIG_PID_NS)) { + get_pid_ns(nsp->pid_ns_for_children); + ns_common = to_ns_common(nsp->pid_ns_for_children); + } break; case PIDFD_GET_TIME_NAMESPACE: - get_time_ns(nsp->time_ns); - ns_common = to_ns_common(nsp->time_ns); + if (IS_ENABLED(CONFIG_TIME_NS)) { + get_time_ns(nsp->time_ns); + ns_common = to_ns_common(nsp->time_ns); + } break; case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: - get_time_ns(nsp->time_ns_for_children); - ns_common = to_ns_common(nsp->time_ns_for_children); + if (IS_ENABLED(CONFIG_TIME_NS)) { + get_time_ns(nsp->time_ns_for_children); + ns_common = to_ns_common(nsp->time_ns_for_children); + } break; case PIDFD_GET_UTS_NAMESPACE: - get_uts_ns(nsp->uts_ns); - ns_common = to_ns_common(nsp->uts_ns); + if (IS_ENABLED(CONFIG_UTS_NS)) { + get_uts_ns(nsp->uts_ns); + ns_common = to_ns_common(nsp->uts_ns); + } break; /* Namespaces that don't hang of nsproxy. */ case PIDFD_GET_USER_NAMESPACE: - rcu_read_lock(); - ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); - rcu_read_unlock(); + if (IS_ENABLED(CONFIG_USER_NS)) { + rcu_read_lock(); + ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); + rcu_read_unlock(); + } break; case PIDFD_GET_PID_NAMESPACE: - rcu_read_lock(); - ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task))); - rcu_read_unlock(); + if (IS_ENABLED(CONFIG_PID_NS)) { + rcu_read_lock(); + ns_common = to_ns_common( get_pid_ns(task_active_pid_ns(task))); + rcu_read_unlock(); + } break; default: return -ENOIOCTLCMD; } + if (!ns_common) + return -EOPNOTSUPP; + /* open_namespace() unconditionally consumes the reference */ return open_namespace(ns_common); } diff --git a/fs/pipe.c b/fs/pipe.c index 50c8a8596b52..7dff2aa50a6d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1469,7 +1469,7 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, return 0; } -static int proc_dopipe_max_size(struct ctl_table *table, int write, +static int proc_dopipe_max_size(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_douintvec(table, write, buffer, lenp, ppos, diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a71ac5379584..a8a8576d8592 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -13,6 +13,7 @@ #include <linux/binfmts.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> +#include <linux/mm.h> struct ctl_table_header; struct mempolicy; @@ -142,6 +143,38 @@ unsigned name_to_int(const struct qstr *qstr); /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 +/** + * folio_precise_page_mapcount() - Number of mappings of this folio page. + * @folio: The folio. + * @page: The page. + * + * The number of present user page table entries that reference this page + * as tracked via the RMAP: either referenced directly (PTE) or as part of + * a larger area that covers this page (e.g., PMD). + * + * Use this function only for the calculation of existing statistics + * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount). + * + * Do not add new users. + * + * Returns: The number of mappings of this folio page. 0 for + * folios that are not mapped to user space or are not tracked via the RMAP + * (e.g., shared zeropage). + */ +static inline int folio_precise_page_mapcount(struct folio *folio, + struct page *page) +{ + int mapcount = atomic_read(&page->_mapcount) + 1; + + /* Handle page_has_type() pages */ + if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + mapcount = 0; + if (folio_test_large(folio)) + mapcount += folio_entire_mapcount(folio); + + return mapcount; +} + /* * array.c */ diff --git a/fs/proc/page.c b/fs/proc/page.c index 2fb64bdb64eb..b7a5c84b5819 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -37,21 +37,19 @@ static inline unsigned long get_max_dump_pfn(void) #endif } -/* /proc/kpagecount - an array exposing page counts +/* /proc/kpagecount - an array exposing page mapcounts * * Each entry is a u64 representing the corresponding - * physical page count. + * physical page mapcount. */ static ssize_t kpagecount_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; - struct page *ppage; unsigned long src = *ppos; unsigned long pfn; ssize_t ret = 0; - u64 pcount; pfn = src / KPMSIZE; if (src & KPMMASK || count & KPMMASK) @@ -61,18 +59,19 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { + struct page *page; + u64 mapcount = 0; + /* * TODO: ZONE_DEVICE support requires to identify * memmaps that were actually initialized. */ - ppage = pfn_to_online_page(pfn); - - if (!ppage) - pcount = 0; - else - pcount = page_mapcount(ppage); + page = pfn_to_online_page(pfn); + if (page) + mapcount = folio_precise_page_mapcount(page_folio(page), + page); - if (put_user(pcount, out)) { + if (put_user(mapcount, out)) { ret = -EFAULT; break; } @@ -148,19 +147,16 @@ u64 stable_page_flags(const struct page *page) u |= 1 << KPF_COMPOUND_TAIL; if (folio_test_hugetlb(folio)) u |= 1 << KPF_HUGE; - /* - * We need to check PageLRU/PageAnon - * to make sure a given page is a thp, not a non-huge compound page. - */ - else if (folio_test_large(folio)) { - if ((k & (1 << PG_lru)) || is_anon) - u |= 1 << KPF_THP; - else if (is_huge_zero_folio(folio)) { - u |= 1 << KPF_ZERO_PAGE; - u |= 1 << KPF_THP; - } - } else if (is_zero_pfn(page_to_pfn(page))) + else if (folio_test_large(folio) && + folio_test_large_rmappable(folio)) { + /* Note: we indicate any THPs here, not just PMD-sized ones */ + u |= 1 << KPF_THP; + } else if (is_huge_zero_folio(folio)) { u |= 1 << KPF_ZERO_PAGE; + u |= 1 << KPF_THP; + } else if (is_zero_folio(folio)) { + u |= 1 << KPF_ZERO_PAGE; + } /* * Caveats on high order pages: PG_buddy and PG_slab will only be set diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 71e5039d940d..5f171ad7b436 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -22,6 +22,7 @@ #include <linux/pkeys.h> #include <linux/minmax.h> #include <linux/overflow.h> +#include <linux/buildid.h> #include <asm/elf.h> #include <asm/tlb.h> @@ -239,6 +240,67 @@ static int do_maps_open(struct inode *inode, struct file *file, sizeof(struct proc_maps_private)); } +static void get_vma_name(struct vm_area_struct *vma, + const struct path **path, + const char **name, + const char **name_fmt) +{ + struct anon_vma_name *anon_name = vma->vm_mm ? anon_vma_name(vma) : NULL; + + *name = NULL; + *path = NULL; + *name_fmt = NULL; + + /* + * Print the dentry name for named mappings, and a + * special [heap] marker for the heap: + */ + if (vma->vm_file) { + /* + * If user named this anon shared memory via + * prctl(PR_SET_VMA ..., use the provided name. + */ + if (anon_name) { + *name_fmt = "[anon_shmem:%s]"; + *name = anon_name->name; + } else { + *path = file_user_path(vma->vm_file); + } + return; + } + + if (vma->vm_ops && vma->vm_ops->name) { + *name = vma->vm_ops->name(vma); + if (*name) + return; + } + + *name = arch_vma_name(vma); + if (*name) + return; + + if (!vma->vm_mm) { + *name = "[vdso]"; + return; + } + + if (vma_is_initial_heap(vma)) { + *name = "[heap]"; + return; + } + + if (vma_is_initial_stack(vma)) { + *name = "[stack]"; + return; + } + + if (anon_name) { + *name_fmt = "[anon:%s]"; + *name = anon_name->name; + return; + } +} + static void show_vma_header_prefix(struct seq_file *m, unsigned long start, unsigned long end, vm_flags_t flags, unsigned long long pgoff, @@ -262,17 +324,15 @@ static void show_vma_header_prefix(struct seq_file *m, static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { - struct anon_vma_name *anon_name = NULL; - struct mm_struct *mm = vma->vm_mm; - struct file *file = vma->vm_file; + const struct path *path; + const char *name_fmt, *name; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; - const char *name = NULL; - if (file) { + if (vma->vm_file) { const struct inode *inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; @@ -283,57 +343,15 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) start = vma->vm_start; end = vma->vm_end; show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); - if (mm) - anon_name = anon_vma_name(vma); - /* - * Print the dentry name for named mappings, and a - * special [heap] marker for the heap: - */ - if (file) { + get_vma_name(vma, &path, &name, &name_fmt); + if (path) { seq_pad(m, ' '); - /* - * If user named this anon shared memory via - * prctl(PR_SET_VMA ..., use the provided name. - */ - if (anon_name) - seq_printf(m, "[anon_shmem:%s]", anon_name->name); - else - seq_path(m, file_user_path(file), "\n"); - goto done; - } - - if (vma->vm_ops && vma->vm_ops->name) { - name = vma->vm_ops->name(vma); - if (name) - goto done; - } - - name = arch_vma_name(vma); - if (!name) { - if (!mm) { - name = "[vdso]"; - goto done; - } - - if (vma_is_initial_heap(vma)) { - name = "[heap]"; - goto done; - } - - if (vma_is_initial_stack(vma)) { - name = "[stack]"; - goto done; - } - - if (anon_name) { - seq_pad(m, ' '); - seq_printf(m, "[anon:%s]", anon_name->name); - } - } - -done: - if (name) { + seq_path(m, path, "\n"); + } else if (name_fmt) { + seq_pad(m, ' '); + seq_printf(m, name_fmt, name); + } else if (name) { seq_pad(m, ' '); seq_puts(m, name); } @@ -358,11 +376,268 @@ static int pid_maps_open(struct inode *inode, struct file *file) return do_maps_open(inode, file, &proc_pid_maps_op); } +#define PROCMAP_QUERY_VMA_FLAGS ( \ + PROCMAP_QUERY_VMA_READABLE | \ + PROCMAP_QUERY_VMA_WRITABLE | \ + PROCMAP_QUERY_VMA_EXECUTABLE | \ + PROCMAP_QUERY_VMA_SHARED \ +) + +#define PROCMAP_QUERY_VALID_FLAGS_MASK ( \ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA | \ + PROCMAP_QUERY_FILE_BACKED_VMA | \ + PROCMAP_QUERY_VMA_FLAGS \ +) + +static int query_vma_setup(struct mm_struct *mm) +{ + return mmap_read_lock_killable(mm); +} + +static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) +{ + mmap_read_unlock(mm); +} + +static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) +{ + return find_vma(mm, addr); +} + +static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, + unsigned long addr, u32 flags) +{ + struct vm_area_struct *vma; + +next_vma: + vma = query_vma_find_by_addr(mm, addr); + if (!vma) + goto no_vma; + + /* user requested only file-backed VMA, keep iterating */ + if ((flags & PROCMAP_QUERY_FILE_BACKED_VMA) && !vma->vm_file) + goto skip_vma; + + /* VMA permissions should satisfy query flags */ + if (flags & PROCMAP_QUERY_VMA_FLAGS) { + u32 perm = 0; + + if (flags & PROCMAP_QUERY_VMA_READABLE) + perm |= VM_READ; + if (flags & PROCMAP_QUERY_VMA_WRITABLE) + perm |= VM_WRITE; + if (flags & PROCMAP_QUERY_VMA_EXECUTABLE) + perm |= VM_EXEC; + if (flags & PROCMAP_QUERY_VMA_SHARED) + perm |= VM_MAYSHARE; + + if ((vma->vm_flags & perm) != perm) + goto skip_vma; + } + + /* found covering VMA or user is OK with the matching next VMA */ + if ((flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) || vma->vm_start <= addr) + return vma; + +skip_vma: + /* + * If the user needs closest matching VMA, keep iterating. + */ + addr = vma->vm_end; + if (flags & PROCMAP_QUERY_COVERING_OR_NEXT_VMA) + goto next_vma; + +no_vma: + return ERR_PTR(-ENOENT); +} + +static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) +{ + struct procmap_query karg; + struct vm_area_struct *vma; + struct mm_struct *mm; + const char *name = NULL; + char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; + __u64 usize; + int err; + + if (copy_from_user(&usize, (void __user *)uarg, sizeof(usize))) + return -EFAULT; + /* argument struct can never be that large, reject abuse */ + if (usize > PAGE_SIZE) + return -E2BIG; + /* argument struct should have at least query_flags and query_addr fields */ + if (usize < offsetofend(struct procmap_query, query_addr)) + return -EINVAL; + err = copy_struct_from_user(&karg, sizeof(karg), uarg, usize); + if (err) + return err; + + /* reject unknown flags */ + if (karg.query_flags & ~PROCMAP_QUERY_VALID_FLAGS_MASK) + return -EINVAL; + /* either both buffer address and size are set, or both should be zero */ + if (!!karg.vma_name_size != !!karg.vma_name_addr) + return -EINVAL; + if (!!karg.build_id_size != !!karg.build_id_addr) + return -EINVAL; + + mm = priv->mm; + if (!mm || !mmget_not_zero(mm)) + return -ESRCH; + + err = query_vma_setup(mm); + if (err) { + mmput(mm); + return err; + } + + vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + vma = NULL; + goto out; + } + + karg.vma_start = vma->vm_start; + karg.vma_end = vma->vm_end; + + karg.vma_flags = 0; + if (vma->vm_flags & VM_READ) + karg.vma_flags |= PROCMAP_QUERY_VMA_READABLE; + if (vma->vm_flags & VM_WRITE) + karg.vma_flags |= PROCMAP_QUERY_VMA_WRITABLE; + if (vma->vm_flags & VM_EXEC) + karg.vma_flags |= PROCMAP_QUERY_VMA_EXECUTABLE; + if (vma->vm_flags & VM_MAYSHARE) + karg.vma_flags |= PROCMAP_QUERY_VMA_SHARED; + + karg.vma_page_size = vma_kernel_pagesize(vma); + + if (vma->vm_file) { + const struct inode *inode = file_user_inode(vma->vm_file); + + karg.vma_offset = ((__u64)vma->vm_pgoff) << PAGE_SHIFT; + karg.dev_major = MAJOR(inode->i_sb->s_dev); + karg.dev_minor = MINOR(inode->i_sb->s_dev); + karg.inode = inode->i_ino; + } else { + karg.vma_offset = 0; + karg.dev_major = 0; + karg.dev_minor = 0; + karg.inode = 0; + } + + if (karg.build_id_size) { + __u32 build_id_sz; + + err = build_id_parse(vma, build_id_buf, &build_id_sz); + if (err) { + karg.build_id_size = 0; + } else { + if (karg.build_id_size < build_id_sz) { + err = -ENAMETOOLONG; + goto out; + } + karg.build_id_size = build_id_sz; + } + } + + if (karg.build_id_size) { + __u32 build_id_sz; + + err = build_id_parse(vma, build_id_buf, &build_id_sz); + if (err) { + karg.build_id_size = 0; + } else { + if (karg.build_id_size < build_id_sz) { + err = -ENAMETOOLONG; + goto out; + } + karg.build_id_size = build_id_sz; + } + } + + if (karg.vma_name_size) { + size_t name_buf_sz = min_t(size_t, PATH_MAX, karg.vma_name_size); + const struct path *path; + const char *name_fmt; + size_t name_sz = 0; + + get_vma_name(vma, &path, &name, &name_fmt); + + if (path || name_fmt || name) { + name_buf = kmalloc(name_buf_sz, GFP_KERNEL); + if (!name_buf) { + err = -ENOMEM; + goto out; + } + } + if (path) { + name = d_path(path, name_buf, name_buf_sz); + if (IS_ERR(name)) { + err = PTR_ERR(name); + goto out; + } + name_sz = name_buf + name_buf_sz - name; + } else if (name || name_fmt) { + name_sz = 1 + snprintf(name_buf, name_buf_sz, name_fmt ?: "%s", name); + name = name_buf; + } + if (name_sz > name_buf_sz) { + err = -ENAMETOOLONG; + goto out; + } + karg.vma_name_size = name_sz; + } + + /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ + query_vma_teardown(mm, vma); + mmput(mm); + + if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), + name, karg.vma_name_size)) { + kfree(name_buf); + return -EFAULT; + } + kfree(name_buf); + + if (karg.build_id_size && copy_to_user(u64_to_user_ptr(karg.build_id_addr), + build_id_buf, karg.build_id_size)) + return -EFAULT; + + if (copy_to_user(uarg, &karg, min_t(size_t, sizeof(karg), usize))) + return -EFAULT; + + return 0; + +out: + query_vma_teardown(mm, vma); + mmput(mm); + kfree(name_buf); + return err; +} + +static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct seq_file *seq = file->private_data; + struct proc_maps_private *priv = seq->private; + + switch (cmd) { + case PROCMAP_QUERY: + return do_procmap_query(priv, (void __user *)arg); + default: + return -ENOIOCTLCMD; + } +} + const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, .release = proc_map_release, + .unlocked_ioctl = procfs_procmap_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; /* @@ -442,7 +717,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked, - bool migration) + bool present) { struct folio *folio = page_folio(page); int i, nr = compound ? compound_nr(page) : 1; @@ -471,24 +746,29 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, * Then accumulate quantities that may depend on sharing, or that may * differ page-by-page. * - * refcount == 1 guarantees the page is mapped exactly once. - * If any subpage of the compound page mapped with PTE it would elevate - * the refcount. + * refcount == 1 for present entries guarantees that the folio is mapped + * exactly once. For large folios this implies that exactly one + * PTE/PMD/... maps (a part of) this folio. * - * The page_mapcount() is called to get a snapshot of the mapcount. - * Without holding the page lock this snapshot can be slightly wrong as - * we cannot always read the mapcount atomically. It is not safe to - * call page_mapcount() even with PTL held if the page is not mapped, - * especially for migration entries. Treat regular migration entries - * as mapcount == 1. + * Treat all non-present entries (where relying on the mapcount and + * refcount doesn't make sense) as "maybe shared, but not sure how + * often". We treat device private entries as being fake-present. + * + * Note that it would not be safe to read the mapcount especially for + * pages referenced by migration entries, even with the PTL held. */ - if ((folio_ref_count(folio) == 1) || migration) { + if (folio_ref_count(folio) == 1 || !present) { smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT, - dirty, locked, true); + dirty, locked, present); return; } + /* + * We obtain a snapshot of the mapcount. Without holding the folio lock + * this snapshot can be slightly wrong as we cannot always read the + * mapcount atomically. + */ for (i = 0; i < nr; i++, page++) { - int mapcount = page_mapcount(page); + int mapcount = folio_precise_page_mapcount(folio, page); unsigned long pss = PAGE_SIZE << PSS_SHIFT; if (mapcount >= 2) pss /= mapcount; @@ -531,13 +811,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; - bool migration = false, young = false, dirty = false; + bool present = false, young = false, dirty = false; pte_t ptent = ptep_get(pte); if (pte_present(ptent)) { page = vm_normal_page(vma, addr, ptent); young = pte_young(ptent); dirty = pte_dirty(ptent); + present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); @@ -555,8 +836,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } } else if (is_pfn_swap_entry(swpent)) { - if (is_migration_entry(swpent)) - migration = true; + if (is_device_private_entry(swpent)) + present = true; page = pfn_swap_entry_to_page(swpent); } } else { @@ -567,7 +848,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, young, dirty, locked, migration); + smaps_account(mss, page, false, young, dirty, locked, present); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -578,18 +859,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool present = false; struct folio *folio; - bool migration = false; if (pmd_present(*pmd)) { page = vm_normal_page_pmd(vma, addr, *pmd); + present = true; } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); - if (is_migration_entry(entry)) { - migration = true; + if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); - } } if (IS_ERR_OR_NULL(page)) return; @@ -604,7 +884,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, mss->file_thp += HPAGE_PMD_SIZE; smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), - locked, migration); + locked, present); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -708,6 +988,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_SHADOW_STACK)] = "ss", #endif #ifdef CONFIG_64BIT + [ilog2(VM_DROPPABLE)] = "dp", [ilog2(VM_SEALED)] = "sl", #endif }; @@ -733,19 +1014,23 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; - pte_t ptent = huge_ptep_get(pte); + pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; + bool present = false; if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); + present = true; } else if (is_swap_pte(ptent)) { swp_entry_t swpent = pte_to_swp_entry(ptent); if (is_pfn_swap_entry(swpent)) folio = pfn_swap_entry_folio(swpent); } + if (folio) { - if (folio_likely_mapped_shared(folio) || + /* We treat non-present entries as "maybe shared". */ + if (!present || folio_likely_mapped_shared(folio) || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else @@ -1091,7 +1376,7 @@ struct clear_refs_private { static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - struct page *page; + struct folio *folio; if (!pte_write(pte)) return false; @@ -1099,10 +1384,10 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, return false; if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))) return false; - page = vm_normal_page(vma, addr, pte); - if (!page) + folio = vm_normal_folio(vma, addr, pte); + if (!folio) return false; - return page_maybe_dma_pinned(page); + return folio_maybe_dma_pinned(folio); } static inline void clear_soft_dirty(struct vm_area_struct *vma, @@ -1418,7 +1703,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, { u64 frame = 0, flags = 0; struct page *page = NULL; - bool migration = false; + struct folio *folio; if (pte_present(pte)) { if (pm->show_pfn) @@ -1450,17 +1735,20 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; - migration = is_migration_entry(entry); if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); if (pte_marker_entry_uffd_wp(entry)) flags |= PM_UFFD_WP; } - if (page && !PageAnon(page)) - flags |= PM_FILE; - if (page && !migration && page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; + if (page) { + folio = page_folio(page); + if (!folio_test_anon(folio)) + flags |= PM_FILE; + if ((flags & PM_PRESENT) && + folio_precise_page_mapcount(folio, page) == 1) + flags |= PM_MMAP_EXCLUSIVE; + } if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1476,13 +1764,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, pte_t *pte, *orig_pte; int err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - bool migration = false; ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { + unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; u64 flags = 0, frame = 0; pmd_t pmd = *pmdp; struct page *page = NULL; + struct folio *folio = NULL; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1496,8 +1785,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_uffd_wp(pmd)) flags |= PM_UFFD_WP; if (pm->show_pfn) - frame = pmd_pfn(pmd) + - ((addr & ~PMD_MASK) >> PAGE_SHIFT); + frame = pmd_pfn(pmd) + idx; } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { @@ -1506,11 +1794,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pm->show_pfn) { if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry); + offset = swp_offset_pfn(entry) + idx; else - offset = swp_offset(entry); - offset = offset + - ((addr & ~PMD_MASK) >> PAGE_SHIFT); + offset = swp_offset(entry) + idx; frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } @@ -1520,17 +1806,25 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; VM_BUG_ON(!is_pmd_migration_entry(pmd)); - migration = is_migration_entry(entry); page = pfn_swap_entry_to_page(entry); } #endif - if (page && !migration && page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; + if (page) { + folio = page_folio(page); + if (!folio_test_anon(folio)) + flags |= PM_FILE; + } + + for (; addr != end; addr += PAGE_SIZE, idx++) { + unsigned long cur_flags = flags; + pagemap_entry_t pme; - for (; addr != end; addr += PAGE_SIZE) { - pagemap_entry_t pme = make_pme(frame, flags); + if (folio && (flags & PM_PRESENT) && + folio_precise_page_mapcount(folio, page + idx) == 1) + cur_flags |= PM_MMAP_EXCLUSIVE; + pme = make_pme(frame, cur_flags); err = add_to_pagemap(&pme, pm); if (err) break; @@ -1585,7 +1879,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); @@ -2274,7 +2568,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask, if (~p->arg.flags & PM_SCAN_WP_MATCHING) { /* Go the short route when not write-protecting pages. */ - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) @@ -2286,7 +2580,7 @@ static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask, i_mmap_lock_write(vma->vm_file->f_mapping); ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep); - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(walk->mm, start, ptep); categories = p->cur_vma_category | pagemap_hugetlb_category(pte); if (!pagemap_scan_is_interesting_page(categories, p)) @@ -2566,7 +2860,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, unsigned long nr_pages) { struct folio *folio = page_folio(page); - int count = page_mapcount(page); + int count = folio_precise_page_mapcount(folio, page); md->pages += nr_pages; if (pte_dirty || folio_test_dirty(folio)) @@ -2682,7 +2976,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - pte_t huge_pte = huge_ptep_get(pte); + pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); struct numa_maps *md; struct page *page; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index a2b256dac36e..7ae885e6d5d7 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2913,7 +2913,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = { }; EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); -static int do_proc_dqstats(struct ctl_table *table, int write, +static int do_proc_dqstats(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int type = (unsigned long *)table->data - dqstats.stat; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 0e04cf8b1d89..5c2845e47cf2 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -133,8 +133,8 @@ struct ksmbd_transport_ops { }; struct ksmbd_transport { - struct ksmbd_conn *conn; - struct ksmbd_transport_ops *ops; + struct ksmbd_conn *conn; + const struct ksmbd_transport_ops *ops; }; #define KSMBD_TCP_RECV_TIMEOUT (7 * HZ) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index aec0a7a12405..162a12685d2c 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -149,6 +149,7 @@ void ksmbd_session_destroy(struct ksmbd_session *sess) ksmbd_tree_conn_session_logoff(sess); ksmbd_destroy_file_table(&sess->file_table); + ksmbd_launch_ksmbd_durable_scavenger(); ksmbd_session_rpc_clear_list(sess); free_channel_list(sess); kfree(sess->Preauth_HashValue); @@ -326,6 +327,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); up_write(&sessions_table_lock); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index e9da63f25b20..72bc88a63a40 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -11,13 +11,6 @@ #define OPLOCK_WAIT_TIME (35 * HZ) -/* SMB2 Oplock levels */ -#define SMB2_OPLOCK_LEVEL_NONE 0x00 -#define SMB2_OPLOCK_LEVEL_II 0x01 -#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 -#define SMB2_OPLOCK_LEVEL_BATCH 0x09 -#define SMB2_OPLOCK_LEVEL_LEASE 0xFF - /* Oplock states */ #define OPLOCK_STATE_NONE 0x00 #define OPLOCK_ACK_WAIT 0x01 diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index c67fbc8d6683..4d24cc105ef6 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -377,6 +377,7 @@ static void server_ctrl_handle_reset(struct server_ctrl_struct *ctrl) { ksmbd_ipc_soft_reset(); ksmbd_conn_transport_destroy(); + ksmbd_stop_durable_scavenger(); server_conf_free(); server_conf_init(); WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP); diff --git a/fs/smb/server/server.h b/fs/smb/server/server.h index db7278181760..4fc529335271 100644 --- a/fs/smb/server/server.h +++ b/fs/smb/server/server.h @@ -44,6 +44,7 @@ struct ksmbd_server_config { unsigned int max_connections; char *conf[SERVER_CONF_WORK_GROUP + 1]; + struct task_struct *dh_task; }; extern struct ksmbd_server_config server_conf; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 840c71c66b30..37a39ab4ee65 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3526,7 +3526,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_GUID_SIZE); if (dh_info.timeout) fp->durable_timeout = min(dh_info.timeout, - 300000); + DURABLE_HANDLE_MAX_TIMEOUT); else fp->durable_timeout = 60; } diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h index 643f5e1cfe35..3be7d5ae65a8 100644 --- a/fs/smb/server/smb2pdu.h +++ b/fs/smb/server/smb2pdu.h @@ -72,6 +72,8 @@ struct create_durable_req_v2 { __u8 CreateGuid[16]; } __packed; +#define DURABLE_HANDLE_MAX_TIMEOUT 300000 + struct create_durable_reconn_req { struct create_context_hdr ccontext; __u8 Name[8]; diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8faa25c6e129..cf4418f72772 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -164,7 +164,7 @@ enum { SMB_DIRECT_MSG_DATA_TRANSFER }; -static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; +static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops; struct smb_direct_send_ctx { struct list_head msg_list; @@ -2292,7 +2292,7 @@ out: return rdma_capable; } -static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { +static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .prepare = smb_direct_prepare, .disconnect = smb_direct_disconnect, .shutdown = smb_direct_shutdown, diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 6633fa78e9b9..a84788396daa 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -37,7 +37,7 @@ struct tcp_transport { unsigned int nr_iov; }; -static struct ksmbd_transport_ops ksmbd_tcp_transport_ops; +static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops; static void tcp_stop_kthread(struct task_struct *kthread); static struct interface *alloc_iface(char *ifname); @@ -649,7 +649,7 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz) return 0; } -static struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { +static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { .read = ksmbd_tcp_read, .writev = ksmbd_tcp_writev, .disconnect = ksmbd_tcp_disconnect, diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 8b2e37c8716e..4d4ee696e37c 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -8,6 +8,8 @@ #include <linux/filelock.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/kthread.h> +#include <linux/freezer.h> #include "glob.h" #include "vfs_cache.h" @@ -17,6 +19,7 @@ #include "mgmt/tree_connect.h" #include "mgmt/user_session.h" #include "smb_common.h" +#include "server.h" #define S_DEL_PENDING 1 #define S_DEL_ON_CLS 2 @@ -31,6 +34,10 @@ static struct ksmbd_file_table global_ft; static atomic_long_t fd_limit; static struct kmem_cache *filp_cache; +static bool durable_scavenger_running; +static DEFINE_MUTEX(durable_scavenger_lock); +static wait_queue_head_t dh_wq; + void ksmbd_set_fd_limit(unsigned long limit) { limit = min(limit, get_max_files()); @@ -280,9 +287,16 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) if (!has_file_id(fp->persistent_id)) return; - write_lock(&global_ft.lock); idr_remove(global_ft.idr, fp->persistent_id); +} + +static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) +{ + write_lock(&global_ft.lock); + __ksmbd_remove_durable_fd(fp); write_unlock(&global_ft.lock); + if (waitqueue_active(&dh_wq)) + wake_up(&dh_wq); } static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) @@ -305,7 +319,7 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) struct ksmbd_lock *smb_lock, *tmp_lock; fd_limit_close(); - __ksmbd_remove_durable_fd(fp); + ksmbd_remove_durable_fd(fp); if (ft) __ksmbd_remove_fd(ft, fp); @@ -477,7 +491,10 @@ struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id) struct ksmbd_file *fp; fp = __ksmbd_lookup_fd(&global_ft, id); - if (fp && fp->conn) { + if (fp && (fp->conn || + (fp->durable_scavenger_timeout && + (fp->durable_scavenger_timeout < + jiffies_to_msecs(jiffies))))) { ksmbd_put_durable_fd(fp); fp = NULL; } @@ -694,6 +711,142 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon, return fp->tcon != tcon; } +static bool ksmbd_durable_scavenger_alive(void) +{ + mutex_lock(&durable_scavenger_lock); + if (!durable_scavenger_running) { + mutex_unlock(&durable_scavenger_lock); + return false; + } + mutex_unlock(&durable_scavenger_lock); + + if (kthread_should_stop()) + return false; + + if (idr_is_empty(global_ft.idr)) + return false; + + return true; +} + +static void ksmbd_scavenger_dispose_dh(struct list_head *head) +{ + while (!list_empty(head)) { + struct ksmbd_file *fp; + + fp = list_first_entry(head, struct ksmbd_file, node); + list_del_init(&fp->node); + __ksmbd_close_fd(NULL, fp); + } +} + +static int ksmbd_durable_scavenger(void *dummy) +{ + struct ksmbd_file *fp = NULL; + unsigned int id; + unsigned int min_timeout = 1; + bool found_fp_timeout; + LIST_HEAD(scavenger_list); + unsigned long remaining_jiffies; + + __module_get(THIS_MODULE); + + set_freezable(); + while (ksmbd_durable_scavenger_alive()) { + if (try_to_freeze()) + continue; + + found_fp_timeout = false; + + remaining_jiffies = wait_event_timeout(dh_wq, + ksmbd_durable_scavenger_alive() == false, + __msecs_to_jiffies(min_timeout)); + if (remaining_jiffies) + min_timeout = jiffies_to_msecs(remaining_jiffies); + else + min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; + + write_lock(&global_ft.lock); + idr_for_each_entry(global_ft.idr, fp, id) { + if (!fp->durable_timeout) + continue; + + if (atomic_read(&fp->refcount) > 1 || + fp->conn) + continue; + + found_fp_timeout = true; + if (fp->durable_scavenger_timeout <= + jiffies_to_msecs(jiffies)) { + __ksmbd_remove_durable_fd(fp); + list_add(&fp->node, &scavenger_list); + } else { + unsigned long durable_timeout; + + durable_timeout = + fp->durable_scavenger_timeout - + jiffies_to_msecs(jiffies); + + if (min_timeout > durable_timeout) + min_timeout = durable_timeout; + } + } + write_unlock(&global_ft.lock); + + ksmbd_scavenger_dispose_dh(&scavenger_list); + + if (found_fp_timeout == false) + break; + } + + mutex_lock(&durable_scavenger_lock); + durable_scavenger_running = false; + mutex_unlock(&durable_scavenger_lock); + + module_put(THIS_MODULE); + + return 0; +} + +void ksmbd_launch_ksmbd_durable_scavenger(void) +{ + if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE)) + return; + + mutex_lock(&durable_scavenger_lock); + if (durable_scavenger_running == true) { + mutex_unlock(&durable_scavenger_lock); + return; + } + + durable_scavenger_running = true; + + server_conf.dh_task = kthread_run(ksmbd_durable_scavenger, + (void *)NULL, "ksmbd-durable-scavenger"); + if (IS_ERR(server_conf.dh_task)) + pr_err("cannot start conn thread, err : %ld\n", + PTR_ERR(server_conf.dh_task)); + mutex_unlock(&durable_scavenger_lock); +} + +void ksmbd_stop_durable_scavenger(void) +{ + if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE)) + return; + + mutex_lock(&durable_scavenger_lock); + if (!durable_scavenger_running) { + mutex_unlock(&durable_scavenger_lock); + return; + } + + durable_scavenger_running = false; + if (waitqueue_active(&dh_wq)) + wake_up(&dh_wq); + mutex_unlock(&durable_scavenger_lock); + kthread_stop(server_conf.dh_task); +} + static bool session_fd_check(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp) { @@ -718,6 +871,10 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, fp->tcon = NULL; fp->volatile_id = KSMBD_NO_FID; + if (fp->durable_timeout) + fp->durable_scavenger_timeout = + jiffies_to_msecs(jiffies) + fp->durable_timeout; + return true; } @@ -750,11 +907,12 @@ void ksmbd_free_global_file_table(void) unsigned int id; idr_for_each_entry(global_ft.idr, fp, id) { - __ksmbd_remove_durable_fd(fp); - kmem_cache_free(filp_cache, fp); + ksmbd_remove_durable_fd(fp); + __ksmbd_close_fd(NULL, fp); } - ksmbd_destroy_file_table(&global_ft); + idr_destroy(global_ft.idr); + kfree(global_ft.idr); } int ksmbd_validate_name_reconnect(struct ksmbd_share_config *share, @@ -810,6 +968,7 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) } up_write(&ci->m_lock); + fp->f_state = FP_NEW; __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); if (!has_file_id(fp->volatile_id)) { fp->conn = NULL; @@ -849,6 +1008,8 @@ int ksmbd_init_file_cache(void) if (!filp_cache) goto out; + init_waitqueue_head(&dh_wq); + return 0; out: diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 5a225e7055f1..b0f6d0f94cb8 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -101,6 +101,7 @@ struct ksmbd_file { struct list_head lock_list; int durable_timeout; + int durable_scavenger_timeout; /* if ls is happening on directory, below is valid*/ struct ksmbd_readdir_data readdir_data; @@ -152,6 +153,8 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid); struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry); unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp); struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp); +void ksmbd_launch_ksmbd_durable_scavenger(void); +void ksmbd_stop_durable_scavenger(void); void ksmbd_close_tree_conn_fds(struct ksmbd_work *work); void ksmbd_close_session_fds(struct ksmbd_work *work); int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode); diff --git a/fs/super.c b/fs/super.c index 095ba793e10c..38d72a3cf6fc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -736,6 +736,17 @@ struct super_block *sget_fc(struct fs_context *fc, struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns; int err; + /* + * Never allow s_user_ns != &init_user_ns when FS_USERNS_MOUNT is + * not set, as the filesystem is likely unprepared to handle it. + * This can happen when fsconfig() is called from init_user_ns with + * an fs_fd opened in another user namespace. + */ + if (user_ns != &init_user_ns && !(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) { + errorfc(fc, "VFS: Mounting from non-initial user namespace is not allowed"); + return ERR_PTR(-EPERM); + } + retry: spin_lock(&sb_lock); if (test) { diff --git a/fs/binfmt_elf_test.c b/fs/tests/binfmt_elf_kunit.c index 11d734fec366..11d734fec366 100644 --- a/fs/binfmt_elf_test.c +++ b/fs/tests/binfmt_elf_kunit.c diff --git a/fs/exec_test.c b/fs/tests/exec_kunit.c index 7c77d039680b..7c77d039680b 100644 --- a/fs/exec_test.c +++ b/fs/tests/exec_kunit.c diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 44666afc6209..bc625788589c 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1540,4 +1540,5 @@ static void __exit exit_ufs_fs(void) module_init(init_ufs_fs) module_exit(exit_ufs_fs) +MODULE_DESCRIPTION("UFS Filesystem"); MODULE_LICENSE("GPL"); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 17e409ceaa33..27a3e9285fbf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -257,7 +257,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, goto out; ret = false; - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep); /* * Lockless access: we're in a wait_event so it's ok if it diff --git a/fs/xattr.c b/fs/xattr.c index f8b643f91a98..7672ce5486c5 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -630,10 +630,9 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, ctx->kvalue, ctx->size, ctx->flags); } -static long -setxattr(struct mnt_idmap *idmap, struct dentry *d, - const char __user *name, const void __user *value, size_t size, - int flags) +static int path_setxattr(const char __user *pathname, + const char __user *name, const void __user *value, + size_t size, int flags, unsigned int lookup_flags) { struct xattr_name kname; struct xattr_ctx ctx = { @@ -643,33 +642,20 @@ setxattr(struct mnt_idmap *idmap, struct dentry *d, .kname = &kname, .flags = flags, }; + struct path path; int error; error = setxattr_copy(name, &ctx); if (error) return error; - error = do_setxattr(idmap, d, &ctx); - - kvfree(ctx.kvalue); - return error; -} - -static int path_setxattr(const char __user *pathname, - const char __user *name, const void __user *value, - size_t size, int flags, unsigned int lookup_flags) -{ - struct path path; - int error; - retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) - return error; + goto out; error = mnt_want_write(path.mnt); if (!error) { - error = setxattr(mnt_idmap(path.mnt), path.dentry, name, - value, size, flags); + error = do_setxattr(mnt_idmap(path.mnt), path.dentry, &ctx); mnt_drop_write(path.mnt); } path_put(&path); @@ -677,6 +663,9 @@ retry: lookup_flags |= LOOKUP_REVAL; goto retry; } + +out: + kvfree(ctx.kvalue); return error; } @@ -697,20 +686,32 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { - struct fd f = fdget(fd); - int error = -EBADF; + struct xattr_name kname; + struct xattr_ctx ctx = { + .cvalue = value, + .kvalue = NULL, + .size = size, + .kname = &kname, + .flags = flags, + }; + int error; + CLASS(fd, f)(fd); if (!f.file) - return error; + return -EBADF; + audit_file(f.file); + error = setxattr_copy(name, &ctx); + if (error) + return error; + error = mnt_want_write_file(f.file); if (!error) { - error = setxattr(file_mnt_idmap(f.file), - f.file->f_path.dentry, name, - value, size, flags); + error = do_setxattr(file_mnt_idmap(f.file), + f.file->f_path.dentry, &ctx); mnt_drop_write_file(f.file); } - fdput(f); + kvfree(ctx.kvalue); return error; } @@ -899,9 +900,17 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) * Extended attribute REMOVE operations */ static long -removexattr(struct mnt_idmap *idmap, struct dentry *d, - const char __user *name) +removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name) { + if (is_posix_acl_xattr(name)) + return vfs_remove_acl(idmap, d, name); + return vfs_removexattr(idmap, d, name); +} + +static int path_removexattr(const char __user *pathname, + const char __user *name, unsigned int lookup_flags) +{ + struct path path; int error; char kname[XATTR_NAME_MAX + 1]; @@ -910,25 +919,13 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d, error = -ERANGE; if (error < 0) return error; - - if (is_posix_acl_xattr(kname)) - return vfs_remove_acl(idmap, d, kname); - - return vfs_removexattr(idmap, d, kname); -} - -static int path_removexattr(const char __user *pathname, - const char __user *name, unsigned int lookup_flags) -{ - struct path path; - int error; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); if (!error) { - error = removexattr(mnt_idmap(path.mnt), path.dentry, name); + error = removexattr(mnt_idmap(path.mnt), path.dentry, kname); mnt_drop_write(path.mnt); } path_put(&path); @@ -954,15 +951,23 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { struct fd f = fdget(fd); + char kname[XATTR_NAME_MAX + 1]; int error = -EBADF; if (!f.file) return error; audit_file(f.file); + + error = strncpy_from_user(kname, name, sizeof(kname)); + if (error == 0 || error == sizeof(kname)) + error = -ERANGE; + if (error < 0) + return error; + error = mnt_want_write_file(f.file); if (!error) { error = removexattr(file_mnt_idmap(f.file), - f.file->f_path.dentry, name); + f.file->f_path.dentry, kname); mnt_drop_write_file(f.file); } fdput(f); diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index a191f6560f98..c84df23b494d 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -11,7 +11,7 @@ static struct ctl_table_header *xfs_table_header; #ifdef CONFIG_PROC_FS STATIC int xfs_stats_clear_proc_handler( - struct ctl_table *ctl, + const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, @@ -31,7 +31,7 @@ xfs_stats_clear_proc_handler( STATIC int xfs_panic_mask_proc_handler( - struct ctl_table *ctl, + const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, @@ -52,7 +52,7 @@ xfs_panic_mask_proc_handler( STATIC int xfs_deprecated_dointvec_minmax( - struct ctl_table *ctl, + const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, |