diff options
author | Yan, Zheng <zyan@redhat.com> | 2017-04-24 05:56:50 +0200 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2017-05-04 09:19:24 +0200 |
commit | b50c2de51e611da90cf3cf04c058f7e9bbe79e93 (patch) | |
tree | 709577ea2cd61fc1e8402ca7e2db67043a843ff4 | |
parent | rbd: exclusive map option (diff) | |
download | linux-b50c2de51e611da90cf3cf04c058f7e9bbe79e93.tar.xz linux-b50c2de51e611da90cf3cf04c058f7e9bbe79e93.zip |
ceph: choose readdir frag based on previous readdir reply
The dirfragtree is lazily updated, it's not always accurate. Infinite
loops happens in following circumstance.
- client send request to read frag A
- frag A has been fragmented into frag B and C. So mds fills the reply
with contents of frag B
- client wants to read next frag C. ceph_choose_frag(frag value of C)
return frag A.
The fix is using previous readdir reply to calculate next readdir frag
when possible.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r-- | fs/ceph/dir.c | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index ae61cdf7d489..e071d23f6148 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_mds_client *mdsc = fsc->mdsc; int i; int err; - u32 ftype; + unsigned frag = -1; struct ceph_mds_reply_info_parsed *rinfo; dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); @@ -341,7 +341,6 @@ more: /* do we have the correct frag content buffered? */ if (need_send_readdir(fi, ctx->pos)) { struct ceph_mds_request *req; - unsigned frag; int op = ceph_snap(inode) == CEPH_SNAPDIR ? CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; @@ -352,8 +351,11 @@ more: } if (is_hash_order(ctx->pos)) { - frag = ceph_choose_frag(ci, fpos_hash(ctx->pos), - NULL, NULL); + /* fragtree isn't always accurate. choose frag + * based on previous reply when possible. */ + if (frag == (unsigned)-1) + frag = ceph_choose_frag(ci, fpos_hash(ctx->pos), + NULL, NULL); } else { frag = fpos_frag(ctx->pos); } @@ -480,6 +482,7 @@ more: struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; struct ceph_vino vino; ino_t ino; + u32 ftype; BUG_ON(rde->offset < ctx->pos); @@ -502,15 +505,17 @@ more: ctx->pos++; } + ceph_mdsc_put_request(fi->last_readdir); + fi->last_readdir = NULL; + if (fi->next_offset > 2) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + frag = fi->frag; goto more; } /* more frags? */ if (!ceph_frag_is_rightmost(fi->frag)) { - unsigned frag = ceph_frag_next(fi->frag); + frag = ceph_frag_next(fi->frag); if (is_hash_order(ctx->pos)) { loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), fi->next_offset, true); |