From dccbf08005df800f5c8e948ab6132ed5536134bc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 17 Feb 2018 09:29:58 +0100 Subject: libceph, ceph: change ceph_calc_file_object_mapping() signature - make it void - xlen (object extent length) out parameter should be u32 because only a single stripe unit is mapped at a time Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- fs/ceph/addr.c | 16 ++++++---------- fs/ceph/ioctl.c | 11 +++-------- 2 files changed, 9 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b4336b42ce3b..c0fe1b6f47ac 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -945,19 +945,15 @@ get_more_pages: if (locked_pages == 0) { u64 objnum; u64 objoff; + u32 xlen; /* prepare async write request */ offset = (u64)page_offset(page); - len = wsize; - - rc = ceph_calc_file_object_mapping(&ci->i_layout, - offset, len, - &objnum, &objoff, - &len); - if (rc < 0) { - unlock_page(page); - break; - } + ceph_calc_file_object_mapping(&ci->i_layout, + offset, wsize, + &objnum, &objoff, + &xlen); + len = xlen; num_ops = 1; strip_unit_end = page->index + diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 851aa69ec8f0..b855d24a895a 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -185,7 +185,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; struct ceph_object_locator oloc; CEPH_DEFINE_OID_ONSTACK(oid); - u64 len = 1, olen; + u32 xlen; u64 tmp; struct ceph_pg pgid; int r; @@ -195,13 +195,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->lock); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, - &dl.object_no, &dl.object_offset, - &olen); - if (r < 0) { - up_read(&osdc->lock); - return -EIO; - } + ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, 1, + &dl.object_no, &dl.object_offset, &xlen); dl.file_offset -= dl.object_offset; dl.object_size = ci->i_layout.object_size; dl.block_size = ci->i_layout.stripe_unit; -- cgit v1.2.3 From 08c1ac508b6dc20ac866e7cdb7279245437c7d26 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 17 Feb 2018 10:41:20 +0100 Subject: libceph, ceph: move ceph_calc_file_object_mapping() to striper.c ceph_calc_file_object_mapping() has nothing to do with osdmaps. Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 1 + fs/ceph/ioctl.c | 2 +- include/linux/ceph/osdmap.h | 5 ----- include/linux/ceph/striper.h | 4 ++++ net/ceph/osd_client.c | 1 + net/ceph/osdmap.c | 37 ------------------------------------- net/ceph/striper.c | 37 ++++++++++++++++++++++++++++++++++++- 7 files changed, 43 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c0fe1b6f47ac..c3557a9ea73d 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -15,6 +15,7 @@ #include "mds_client.h" #include "cache.h" #include +#include /* * Ceph address space ops. diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index b855d24a895a..c90f03beb15d 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -5,7 +5,7 @@ #include "super.h" #include "mds_client.h" #include "ioctl.h" - +#include /* * ioctls diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 92314035dac1..e71fb222c7c3 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -5,7 +5,6 @@ #include #include #include -#include #include /* @@ -280,10 +279,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, bool any_change); -void ceph_calc_file_object_mapping(struct ceph_file_layout *l, - u64 off, u64 len, - u64 *objno, u64 *objoff, u32 *xlen); - int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, const struct ceph_object_id *oid, const struct ceph_object_locator *oloc, diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h index 74134ee5fdc8..cbd0d24b7148 100644 --- a/include/linux/ceph/striper.h +++ b/include/linux/ceph/striper.h @@ -7,6 +7,10 @@ struct ceph_file_layout; +void ceph_calc_file_object_mapping(struct ceph_file_layout *l, + u64 off, u64 len, + u64 *objno, u64 *objoff, u32 *xlen); + struct ceph_object_extent { struct list_head oe_item; u64 oe_objno; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 407be0533c18..4a3af96dc057 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -20,6 +20,7 @@ #include #include #include +#include #define OSD_OPREPLY_FRONT_LEN 512 diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index e3ebbe2ecdad..9645ffd6acfb 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -4,7 +4,6 @@ #include #include -#include #include #include @@ -2140,42 +2139,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, return false; } -/* - * Map a file extent to a stripe unit within an object. - * Fill in objno, offset into object, and object extent length (i.e. the - * number of bytes mapped, less than or equal to @l->stripe_unit). - * - * Example for stripe_count = 3, stripes_per_object = 4: - * - * blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19 - * stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6 - * stripepos | 0 | 1 | 2 | 0 | 1 - * objno | 0 | 1 | 2 | 3 | 4 - * objsetno | 0 | 1 - */ -void ceph_calc_file_object_mapping(struct ceph_file_layout *l, - u64 off, u64 len, - u64 *objno, u64 *objoff, u32 *xlen) -{ - u32 stripes_per_object = l->object_size / l->stripe_unit; - u64 blockno; /* which su in the file (i.e. globally) */ - u32 blockoff; /* offset into su */ - u64 stripeno; /* which stripe */ - u32 stripepos; /* which su in the stripe, - which object in the object set */ - u64 objsetno; /* which object set */ - u32 objsetpos; /* which stripe in the object set */ - - blockno = div_u64_rem(off, l->stripe_unit, &blockoff); - stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos); - objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos); - - *objno = objsetno * l->stripe_count + stripepos; - *objoff = objsetpos * l->stripe_unit + blockoff; - *xlen = min_t(u64, len, l->stripe_unit - blockoff); -} -EXPORT_SYMBOL(ceph_calc_file_object_mapping); - /* * Map an object into a PG. * diff --git a/net/ceph/striper.c b/net/ceph/striper.c index bc1e4de30df9..c36462dc86b7 100644 --- a/net/ceph/striper.c +++ b/net/ceph/striper.c @@ -5,10 +5,45 @@ #include #include -#include #include #include +/* + * Map a file extent to a stripe unit within an object. + * Fill in objno, offset into object, and object extent length (i.e. the + * number of bytes mapped, less than or equal to @l->stripe_unit). + * + * Example for stripe_count = 3, stripes_per_object = 4: + * + * blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19 + * stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6 + * stripepos | 0 | 1 | 2 | 0 | 1 + * objno | 0 | 1 | 2 | 3 | 4 + * objsetno | 0 | 1 + */ +void ceph_calc_file_object_mapping(struct ceph_file_layout *l, + u64 off, u64 len, + u64 *objno, u64 *objoff, u32 *xlen) +{ + u32 stripes_per_object = l->object_size / l->stripe_unit; + u64 blockno; /* which su in the file (i.e. globally) */ + u32 blockoff; /* offset into su */ + u64 stripeno; /* which stripe */ + u32 stripepos; /* which su in the stripe, + which object in the object set */ + u64 objsetno; /* which object set */ + u32 objsetpos; /* which stripe in the object set */ + + blockno = div_u64_rem(off, l->stripe_unit, &blockoff); + stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos); + objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos); + + *objno = objsetno * l->stripe_count + stripepos; + *objoff = objsetpos * l->stripe_unit + blockoff; + *xlen = min_t(u64, len, l->stripe_unit - blockoff); +} +EXPORT_SYMBOL(ceph_calc_file_object_mapping); + /* * Return the last extent with given objno (@object_extents is sorted * by objno). If not found, return NULL and set @add_pos so that the -- cgit v1.2.3 From 4c069a5821ddc568e9509f49fcc9481c8a43712f Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 30 Jan 2018 16:29:17 +0800 Subject: ceph: add newline to end of debug message format Some of dout format do not include newline in the end, fix for the files which are in fs/ceph and net/ceph directories, and changing printk to dout for printing debug info in super.c Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/cache.c | 2 +- fs/ceph/dir.c | 2 +- fs/ceph/file.c | 2 +- fs/ceph/locks.c | 20 ++++++++++---------- fs/ceph/mds_client.c | 10 +++++----- fs/ceph/super.c | 1 - net/ceph/ceph_common.c | 2 +- 7 files changed, 19 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index a3ab265d3215..3781f723ff31 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -190,7 +190,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; - dout("ceph inode 0x%p cached okay", ci); + dout("ceph inode 0x%p cached okay\n", ci); return FSCACHE_CHECKAUX_OKAY; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f1d9c6cc0491..b7771e9e0c71 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -451,7 +451,7 @@ more: fi->dir_ordered_count = req->r_dir_ordered_cnt; } } else { - dout("readdir !did_prepopulate"); + dout("readdir !did_prepopulate\n"); /* disable readdir cache */ fi->readdir_cache_idx = -1; /* preclude from marking dir complete */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b67eec3532a1..c80eb1d93b41 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -41,7 +41,7 @@ static __le32 ceph_flags_sys2wire(u32 flags) #undef ceph_sys2wire if (flags) - dout("unused open flags: %x", flags); + dout("unused open flags: %x\n", flags); return cpu_to_le32(wire_flags); } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 9e66f69ee8a5..9dae2ec7e1fa 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -95,7 +95,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, owner = secure_addr(fl->fl_owner); dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " - "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, + "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type, (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, wait, fl->fl_type); @@ -132,7 +132,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, } ceph_mdsc_put_request(req); dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " - "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, + "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type, (int)operation, (u64)fl->fl_pid, fl->fl_start, length, wait, fl->fl_type, err); return err; @@ -226,7 +226,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) return -ENOLCK; - dout("ceph_lock, fl_owner: %p", fl->fl_owner); + dout("ceph_lock, fl_owner: %p\n", fl->fl_owner); /* set wait bit as appropriate, then make command as Ceph expects it*/ if (IS_GETLK(cmd)) @@ -264,7 +264,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { if (op == CEPH_MDS_OP_SETFILELOCK) { - dout("mds locked, locking locally"); + dout("mds locked, locking locally\n"); err = posix_lock_file(file, fl, NULL); if (err) { /* undo! This should only happen if @@ -272,7 +272,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) * deadlock. */ ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on posix_lock_file, undid lock", + dout("got %d on posix_lock_file, undid lock\n", err); } } @@ -294,7 +294,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) if (fl->fl_type & LOCK_MAND) return -EOPNOTSUPP; - dout("ceph_flock, fl_file: %p", fl->fl_file); + dout("ceph_flock, fl_file: %p\n", fl->fl_file); spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { @@ -329,7 +329,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on locks_lock_file_wait, undid lock", err); + dout("got %d on locks_lock_file_wait, undid lock\n", err); } } return err; @@ -356,7 +356,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) ++(*flock_count); spin_unlock(&ctx->flc_lock); } - dout("counted %d flock locks and %d fcntl locks", + dout("counted %d flock locks and %d fcntl locks\n", *flock_count, *fcntl_count); } @@ -384,7 +384,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock, cephlock->type = CEPH_LOCK_UNLOCK; break; default: - dout("Have unknown lock type %d", lock->fl_type); + dout("Have unknown lock type %d\n", lock->fl_type); err = -EINVAL; } @@ -407,7 +407,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, int seen_flock = 0; int l = 0; - dout("encoding %d flock and %d fcntl locks", num_flock_locks, + dout("encoding %d flock and %d fcntl locks\n", num_flock_locks, num_fcntl_locks); if (!ctx) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2e8f90f96540..b9cbeeb32ab3 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -384,7 +384,7 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s) refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref)); return s; } else { - dout("mdsc get_session %p 0 -- FAIL", s); + dout("mdsc get_session %p 0 -- FAIL\n", s); return NULL; } } @@ -2531,10 +2531,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) * Otherwise we just have to return an ESTALE */ if (result == -ESTALE) { - dout("got ESTALE on request %llu", req->r_tid); + dout("got ESTALE on request %llu\n", req->r_tid); req->r_resend_mds = -1; if (req->r_direct_mode != USE_AUTH_MDS) { - dout("not using auth, setting for that now"); + dout("not using auth, setting for that now\n"); req->r_direct_mode = USE_AUTH_MDS; __do_request(mdsc, req); mutex_unlock(&mdsc->mutex); @@ -2542,13 +2542,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } else { int mds = __choose_mds(mdsc, req); if (mds >= 0 && mds != req->r_session->s_mds) { - dout("but auth changed, so resending"); + dout("but auth changed, so resending\n"); __do_request(mdsc, req); mutex_unlock(&mdsc->mutex); goto out; } } - dout("have to return ESTALE on request %llu", req->r_tid); + dout("have to return ESTALE on request %llu\n", req->r_tid); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fb2bc9c15a23..fb32379cd42c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -320,7 +320,6 @@ static int parse_fsopt_token(char *c, void *private) break; case Opt_poolperm: fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; - printk ("pool perm"); break; case Opt_nopoolperm: fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4d4c82229e9e..627ffab77695 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -217,7 +217,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid) if (i == 16) err = 0; - dout("parse_fsid ret %d got fsid %pU", err, fsid); + dout("parse_fsid ret %d got fsid %pU\n", err, fsid); return err; } -- cgit v1.2.3 From 7ae7a828d9ac249b175f1b6d1c21c77720cd6098 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 7 Feb 2018 10:27:06 +0800 Subject: ceph: keep consistent semantic in fscache related option combination When specifying multiple fscache related options, the result isn't always the same as option order, this fix will keep strict consistent meaning by order. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fb32379cd42c..ca8a830c07ad 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -314,9 +314,13 @@ static int parse_fsopt_token(char *c, void *private) break; case Opt_fscache: fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; + kfree(fsopt->fscache_uniq); + fsopt->fscache_uniq = NULL; break; case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; + kfree(fsopt->fscache_uniq); + fsopt->fscache_uniq = NULL; break; case Opt_poolperm: fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; -- cgit v1.2.3 From 11e1478df91cba4919c2bb011c49f659ebed3d16 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 10 Feb 2018 14:18:06 +0800 Subject: libceph, ceph: change permission for readonly debugfs entries Remove write permission for debugfs entries which only have readonly function. Signed-off-by: Chengguang Xu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/debugfs.c | 8 ++++---- net/ceph/debugfs.c | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 644def813754..abdf98deeec4 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -260,7 +260,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mdsmap_show_fops); @@ -268,7 +268,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mds_sessions_show_fops); @@ -276,7 +276,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mdsc = debugfs_create_file("mdsc", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mdsc_show_fops); @@ -292,7 +292,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &dentry_lru_show_fops); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 1eef6806aa1a..0ef0f32a4570 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->monc.debugfs_file = debugfs_create_file("monc", - 0600, + 0400, client->debugfs_dir, client, &monc_show_fops); @@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->osdc.debugfs_file = debugfs_create_file("osdc", - 0600, + 0400, client->debugfs_dir, client, &osdc_show_fops); @@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_monmap = debugfs_create_file("monmap", - 0600, + 0400, client->debugfs_dir, client, &monmap_show_fops); @@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_osdmap = debugfs_create_file("osdmap", - 0600, + 0400, client->debugfs_dir, client, &osdmap_show_fops); @@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_options = debugfs_create_file("client_options", - 0600, + 0400, client->debugfs_dir, client, &client_options_show_fops); -- cgit v1.2.3 From 4d8969af28a5c1bccd9f42cf98ab5cff3c0fe140 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 15 Feb 2018 15:39:05 +0800 Subject: ceph: use seq_show_option for string type options Using seq_show_option to replace seq_printf for string type options. Signed-off-by: Chengguang Xu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index ca8a830c07ad..52fec209f0a6 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -516,10 +516,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) seq_puts(m, ",nodcache"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { - if (fsopt->fscache_uniq) - seq_printf(m, ",fsc=%s", fsopt->fscache_uniq); - else - seq_puts(m, ",fsc"); + seq_show_option(m, "fsc", fsopt->fscache_uniq); } if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) seq_puts(m, ",nopoolperm"); @@ -532,7 +529,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) #endif if (fsopt->mds_namespace) - seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace); + seq_show_option(m, "mds_namespace", fsopt->mds_namespace); if (fsopt->wsize) seq_printf(m, ",wsize=%d", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE) -- cgit v1.2.3 From b884014a91a49ed0e7198d276b28887cc48363bd Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 23 Feb 2018 17:09:38 +0800 Subject: ceph: adding protection for showing cap reservation info Adding spinlock protection during getting cap reservation ralated fields so that the numbers match below BUG_ON condition in the code. BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0e5bd3e3344e..1d02648788e3 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -341,6 +341,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc, { struct ceph_mds_client *mdsc = fsc->mdsc; + spin_lock(&mdsc->caps_list_lock); + if (total) *total = mdsc->caps_total_count; if (avail) @@ -351,6 +353,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc, *reserved = mdsc->caps_reserve_count; if (min) *min = mdsc->caps_min_count; + + spin_unlock(&mdsc->caps_list_lock); } /* -- cgit v1.2.3 From e327ce068518e38c0182739e879b9dce477c8d85 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 24 Feb 2018 18:35:29 +0800 Subject: ceph: optimizing cap allocation When setting high volume of caps_min_count or having many unreserved caps, unused caps may always keep in the ->caps_list even can't get new cap from kmem_cache_alloc because lack of maximum limitation of caps_avail_count. Hence reuse caps in ->caps_list if available, it's maybe better than setting max limitation of caps_avail_count and releasing unused caps when reaching the limit. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 1d02648788e3..421cdce71fb0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -285,7 +285,23 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, mdsc->caps_use_count++; mdsc->caps_total_count++; spin_unlock(&mdsc->caps_list_lock); + } else { + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_avail_count) { + BUG_ON(list_empty(&mdsc->caps_list)); + + mdsc->caps_avail_count--; + mdsc->caps_use_count++; + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + mdsc->caps_avail_count); + } + spin_unlock(&mdsc->caps_list_lock); } + return cap; } -- cgit v1.2.3 From b517c1d87faafba0c33a38ffdd551e8b399f0a31 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sun, 25 Feb 2018 13:39:09 +0800 Subject: ceph: release unreserved caps if having enough available caps When unreserving caps check if there is too mamy available caps in the ->caps_list, if so release unreserved caps. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 421cdce71fb0..b9b5c47efd06 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -254,12 +254,26 @@ out_nomem: int ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { + int i; + struct ceph_cap *cap; + dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); if (ctx->count) { spin_lock(&mdsc->caps_list_lock); BUG_ON(mdsc->caps_reserve_count < ctx->count); mdsc->caps_reserve_count -= ctx->count; - mdsc->caps_avail_count += ctx->count; + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + mdsc->caps_total_count -= ctx->count; + for (i = 0; i < ctx->count; i++) { + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + } else { + mdsc->caps_avail_count += ctx->count; + } ctx->count = 0; dout("unreserve caps %d = %d used + %d resv + %d avail\n", mdsc->caps_total_count, mdsc->caps_use_count, -- cgit v1.2.3 From 79cd674aed7363a043222af3c8f97d0ea0a078cf Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 24 Feb 2018 18:36:02 +0800 Subject: ceph: optimizing cap reservation When caps_avail_count is in a low level, most newly trimmed caps will probably go into ->caps_list and caps_avail_count will be increased. Hence after trimming, should recheck caps_avail_count to effectly reuse newly trimmed caps. Also, when releasing unnecessary caps follow the same rule of ceph_put_cap. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 88 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b9b5c47efd06..25141b6683e9 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -184,36 +184,54 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, mdsc->caps_avail_count); spin_unlock(&mdsc->caps_list_lock); - for (i = have; i < need; i++) { -retry: + for (i = have; i < need; ) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); - if (!cap) { - if (!trimmed) { - for (j = 0; j < mdsc->max_sessions; j++) { - s = __ceph_lookup_mds_session(mdsc, j); - if (!s) - continue; - mutex_unlock(&mdsc->mutex); + if (cap) { + list_add(&cap->caps_item, &newcaps); + alloc++; + i++; + continue; + } - mutex_lock(&s->s_mutex); - max_caps = s->s_nr_caps - (need - i); - ceph_trim_caps(mdsc, s, max_caps); - mutex_unlock(&s->s_mutex); + if (!trimmed) { + for (j = 0; j < mdsc->max_sessions; j++) { + s = __ceph_lookup_mds_session(mdsc, j); + if (!s) + continue; + mutex_unlock(&mdsc->mutex); - ceph_put_mds_session(s); - mutex_lock(&mdsc->mutex); - } - trimmed = true; - goto retry; - } else { - pr_warn("reserve caps ctx=%p ENOMEM " - "need=%d got=%d\n", - ctx, need, have + alloc); - goto out_nomem; + mutex_lock(&s->s_mutex); + max_caps = s->s_nr_caps - (need - i); + ceph_trim_caps(mdsc, s, max_caps); + mutex_unlock(&s->s_mutex); + + ceph_put_mds_session(s); + mutex_lock(&mdsc->mutex); } + trimmed = true; + + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_avail_count) { + int more_have; + if (mdsc->caps_avail_count >= need - i) + more_have = need - i; + else + more_have = mdsc->caps_avail_count; + + i += more_have; + have += more_have; + mdsc->caps_avail_count -= more_have; + mdsc->caps_reserve_count += more_have; + + } + spin_unlock(&mdsc->caps_list_lock); + + continue; } - list_add(&cap->caps_item, &newcaps); - alloc++; + + pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", + ctx, need, have + alloc); + goto out_nomem; } BUG_ON(have + alloc != need); @@ -234,16 +252,28 @@ retry: return 0; out_nomem: + + spin_lock(&mdsc->caps_list_lock); + mdsc->caps_avail_count += have; + mdsc->caps_reserve_count -= have; + while (!list_empty(&newcaps)) { cap = list_first_entry(&newcaps, struct ceph_cap, caps_item); list_del(&cap->caps_item); - kmem_cache_free(ceph_cap_cachep, cap); + + /* Keep some preallocated caps around (ceph_min_count), to + * avoid lots of free/alloc churn. */ + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + kmem_cache_free(ceph_cap_cachep, cap); + } else { + mdsc->caps_avail_count++; + mdsc->caps_total_count++; + list_add(&cap->caps_item, &mdsc->caps_list); + } } - spin_lock(&mdsc->caps_list_lock); - mdsc->caps_avail_count += have; - mdsc->caps_reserve_count -= have; BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); -- cgit v1.2.3 From 73737682e0598ae73bd4c481e478a0d75884aa8a Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 28 Feb 2018 19:43:47 +0800 Subject: ceph: change variable name to follow common rule Variable name ci is mostly used for ceph_inode_info. Variable name fi is mostly used for ceph_file_info. Variable name cf is mostly used for ceph_cap_flush. Change variable name to follow above common rules in case of confusing. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 4 ++-- fs/ceph/dir.c | 18 +++++++++--------- fs/ceph/file.c | 38 +++++++++++++++++++------------------- 3 files changed, 30 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c3557a9ea73d..4b3cf53d5641 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -439,7 +439,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, { struct inode *inode = file_inode(file); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_file_info *ci = file->private_data; + struct ceph_file_info *fi = file->private_data; struct ceph_rw_context *rw_ctx; int rc = 0; int max = 0; @@ -453,7 +453,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc == 0) goto out; - rw_ctx = ceph_find_rw_context(ci); + rw_ctx = ceph_find_rw_context(fi); max = fsc->mount_options->rsize >> PAGE_SHIFT; dout("readpages %p file %p ctx %p nr_pages %d max %d\n", inode, file, rw_ctx, nr_pages, max); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index b7771e9e0c71..1aa3bfc9ef35 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1352,7 +1352,7 @@ static void ceph_d_prune(struct dentry *dentry) static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - struct ceph_file_info *cf = file->private_data; + struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); int left; @@ -1361,12 +1361,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) return -EISDIR; - if (!cf->dir_info) { - cf->dir_info = kmalloc(bufsize, GFP_KERNEL); - if (!cf->dir_info) + if (!fi->dir_info) { + fi->dir_info = kmalloc(bufsize, GFP_KERNEL); + if (!fi->dir_info) return -ENOMEM; - cf->dir_info_len = - snprintf(cf->dir_info, bufsize, + fi->dir_info_len = + snprintf(fi->dir_info, bufsize, "entries: %20lld\n" " files: %20lld\n" " subdirs: %20lld\n" @@ -1386,10 +1386,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, (long)ci->i_rctime.tv_nsec); } - if (*ppos >= cf->dir_info_len) + if (*ppos >= fi->dir_info_len) return 0; - size = min_t(unsigned, size, cf->dir_info_len-*ppos); - left = copy_to_user(buf, cf->dir_info + *ppos, size); + size = min_t(unsigned, size, fi->dir_info_len-*ppos); + left = copy_to_user(buf, fi->dir_info + *ppos, size); if (left == size) return -EFAULT; *ppos += (size - left); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index c80eb1d93b41..df9f435c5260 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -165,7 +165,7 @@ out: */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { - struct ceph_file_info *cf; + struct ceph_file_info *fi; int ret = 0; switch (inode->i_mode & S_IFMT) { @@ -175,19 +175,19 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) case S_IFDIR: dout("init_file %p %p 0%o (regular)\n", inode, file, inode->i_mode); - cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); - if (!cf) { + fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); + if (!fi) { ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ return -ENOMEM; } - cf->fmode = fmode; + fi->fmode = fmode; - spin_lock_init(&cf->rw_contexts_lock); - INIT_LIST_HEAD(&cf->rw_contexts); + spin_lock_init(&fi->rw_contexts_lock); + INIT_LIST_HEAD(&fi->rw_contexts); - cf->next_offset = 2; - cf->readdir_cache_idx = -1; - file->private_data = cf; + fi->next_offset = 2; + fi->readdir_cache_idx = -1; + file->private_data = fi; BUG_ON(inode->i_fop->release != ceph_release); break; @@ -278,11 +278,11 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct ceph_file_info *cf = file->private_data; + struct ceph_file_info *fi = file->private_data; int err; int flags, fmode, wanted; - if (cf) { + if (fi) { dout("open file %p is already opened\n", file); return 0; } @@ -460,16 +460,16 @@ out_acl: int ceph_release(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_file_info *cf = file->private_data; + struct ceph_file_info *fi = file->private_data; dout("release inode %p file %p\n", inode, file); - ceph_put_fmode(ci, cf->fmode); - if (cf->last_readdir) - ceph_mdsc_put_request(cf->last_readdir); - kfree(cf->last_name); - kfree(cf->dir_info); - WARN_ON(!list_empty(&cf->rw_contexts)); - kmem_cache_free(ceph_file_cachep, cf); + ceph_put_fmode(ci, fi->fmode); + if (fi->last_readdir) + ceph_mdsc_put_request(fi->last_readdir); + kfree(fi->last_name); + kfree(fi->dir_info); + WARN_ON(!list_empty(&fi->rw_contexts)); + kmem_cache_free(ceph_file_cachep, fi); /* wake up anyone waiting for caps on this inode */ wake_up_all(&ci->i_cap_wq); -- cgit v1.2.3 From bc4b5ad3a6a1cfe11494380affc4bfc4521af499 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 27 Feb 2018 13:49:44 +0800 Subject: ceph: mark the cap cache as unreclaimable Releasing cap is affected by many factors (e.g., avail_count/reserve_count/min_count) and min_count could be specified high volume in client mount option. Hence it's better to mark cap cache as unreclaimable in case of non-trivial discrepancies between memory shown as reclaimable and what is actually reclaimed. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 52fec209f0a6..9bf9e54259dd 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -698,8 +698,7 @@ static int __init init_caches(void) if (!ceph_inode_cachep) return -ENOMEM; - ceph_cap_cachep = KMEM_CACHE(ceph_cap, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); if (!ceph_cap_cachep) goto bad_cap; ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, -- cgit v1.2.3 From af9cc401ce7452f9d965ba4553d8ffe7f0ed42ee Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 4 Mar 2018 16:36:01 +0800 Subject: ceph: invalidate pages that beyond EOF in ceph_writepages_start() Dirty pages can be associated with different capsnap. Different capsnap may have different EOF value. So invalidating dirty pages according to the largest EOF value is wrong. Dirty pages beyond EOF, but associated with other capsnap, do not get invalidated. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 36 ++++++++++++++++++------------------ fs/ceph/inode.c | 11 ----------- 2 files changed, 18 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4b3cf53d5641..3376822a624e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -801,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct ceph_osd_request *req = NULL; struct ceph_writeback_ctl ceph_wbc; bool should_loop, range_whole = false; - bool stop, done = false; + bool done = false; dout("writepages_start %p (mode=%s)\n", inode, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : @@ -865,8 +865,7 @@ retry: ceph_put_snap_context(last_snapc); last_snapc = snapc; - stop = false; - while (!stop && index <= end) { + while (!done && index <= end) { int num_ops = 0, op_idx; unsigned i, pvec_pages, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; @@ -899,16 +898,26 @@ get_more_pages: unlock_page(page); continue; } - if (strip_unit_end && (page->index > strip_unit_end)) { - dout("end of strip unit %p\n", page); + /* only if matching snap context */ + pgsnapc = page_snap_context(page); + if (pgsnapc != snapc) { + dout("page snapc %p %lld != oldest %p %lld\n", + pgsnapc, pgsnapc->seq, snapc, snapc->seq); unlock_page(page); - break; + continue; } if (page_offset(page) >= ceph_wbc.i_size) { dout("%p page eof %llu\n", page, ceph_wbc.i_size); - /* not done if range_cyclic */ - stop = true; + if (ceph_wbc.size_stable || + page_offset(page) >= i_size_read(inode)) + mapping->a_ops->invalidatepage(page, + 0, PAGE_SIZE); + unlock_page(page); + continue; + } + if (strip_unit_end && (page->index > strip_unit_end)) { + dout("end of strip unit %p\n", page); unlock_page(page); break; } @@ -922,15 +931,6 @@ get_more_pages: wait_on_page_writeback(page); } - /* only if matching snap context */ - pgsnapc = page_snap_context(page); - if (pgsnapc != snapc) { - dout("page snapc %p %lld != oldest %p %lld\n", - pgsnapc, pgsnapc->seq, snapc, snapc->seq); - unlock_page(page); - continue; - } - if (!clear_page_dirty_for_io(page)) { dout("%p !clear_page_dirty_for_io\n", page); unlock_page(page); @@ -1143,7 +1143,7 @@ new_request: * we tagged for writeback prior to entering this loop. */ if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) - done = stop = true; + done = true; release_pvec_pages: dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index c6ec5aa46100..be5f12d0d637 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1867,20 +1867,9 @@ retry: * possibly truncate them.. so write AND block! */ if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { - struct ceph_cap_snap *capsnap; - to = ci->i_truncate_size; - list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { - // MDS should have revoked Frw caps - WARN_ON_ONCE(capsnap->writing); - if (capsnap->dirty_pages && capsnap->size > to) - to = capsnap->size; - } spin_unlock(&ci->i_ceph_lock); dout("__do_pending_vmtruncate %p flushing snaps first\n", inode); - - truncate_pagecache(inode, to); - filemap_write_and_wait_range(&inode->i_data, 0, inode->i_sb->s_maxbytes); goto retry; -- cgit v1.2.3 From 1582af2eaaf17cbcd7864172347c1db10b6b2210 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 6 Mar 2018 15:14:54 +0800 Subject: ceph: don't wait on writeback when there is no more dirty pages In sync mode, writepages() needs to write all dirty pages. But it can only write dirty pages associated with the oldest snapc. To write dirty pages associated with next snapc, it needs to wait until current writes complete. If there is no more dirty pages, writepages() should not wait on writeback. Otherwise, dirty page writeback becomes very slow. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3376822a624e..5f7ad3d0df2e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -857,7 +857,7 @@ retry: * in that range can be associated with newer snapc. * They are not writeable until we write all dirty pages * associated with 'snapc' get written */ - if (index > 0 || wbc->sync_mode != WB_SYNC_NONE) + if (index > 0) should_loop = true; dout(" non-head snapc, range whole\n"); } @@ -903,6 +903,10 @@ get_more_pages: if (pgsnapc != snapc) { dout("page snapc %p %lld != oldest %p %lld\n", pgsnapc, pgsnapc->seq, snapc, snapc->seq); + if (!should_loop && + !ceph_wbc.head_snapc && + wbc->sync_mode != WB_SYNC_NONE) + should_loop = true; unlock_page(page); continue; } -- cgit v1.2.3 From 51b10f3fe446f536b6edf90ce6941882033dd93b Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 9 Mar 2018 15:12:40 +0800 Subject: ceph: filter out used flags when printing unused open flags Filter out used access mode flags when printing unused open flags. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index df9f435c5260..a1f0aee29c27 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -30,6 +30,8 @@ static __le32 ceph_flags_sys2wire(u32 flags) break; } + flags &= ~O_ACCMODE; + #define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; } ceph_sys2wire(O_CREAT); -- cgit v1.2.3 From 57a35dfb522c8bbac622d49f5217906f9b5eceb0 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 10 Mar 2018 20:32:05 +0800 Subject: libceph, ceph: add __init attribution to init funcitons Add __init attribution to the functions which are called only once during initiating/registering operations and deleting unnecessary symbol exports. Signed-off-by: Chengguang Xu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/cache.c | 2 +- net/ceph/crypto.c | 6 ++++-- net/ceph/debugfs.c | 7 ++----- net/ceph/messenger.c | 4 +--- net/ceph/osd_client.c | 4 +--- 5 files changed, 9 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 3781f723ff31..797cc8160199 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -71,7 +71,7 @@ static const struct fscache_cookie_def ceph_fscache_fsid_object_def = { .get_key = ceph_fscache_session_get_key, }; -int ceph_fscache_register(void) +int __init ceph_fscache_register(void) { return fscache_register_netfs(&ceph_cache_netfs); } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index bf9d079cbafd..02172c408ff2 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -347,10 +347,12 @@ struct key_type key_type_ceph = { .destroy = ceph_key_destroy, }; -int ceph_crypto_init(void) { +int __init ceph_crypto_init(void) +{ return register_key_type(&key_type_ceph); } -void ceph_crypto_shutdown(void) { +void ceph_crypto_shutdown(void) +{ unregister_key_type(&key_type_ceph); } diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 0ef0f32a4570..02952605d121 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show) CEPH_DEFINE_SHOW_FUNC(osdc_show) CEPH_DEFINE_SHOW_FUNC(client_options_show) -int ceph_debugfs_init(void) +int __init ceph_debugfs_init(void) { ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); if (!ceph_debugfs_dir) @@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) #else /* CONFIG_DEBUG_FS */ -int ceph_debugfs_init(void) +int __init ceph_debugfs_init(void) { return 0; } @@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) } #endif /* CONFIG_DEBUG_FS */ - -EXPORT_SYMBOL(ceph_debugfs_init); -EXPORT_SYMBOL(ceph_debugfs_cleanup); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index cee4b3d307de..fcb40c12b1f8 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void) ceph_msgr_slab_exit(); } -int ceph_msgr_init(void) +int __init ceph_msgr_init(void) { if (ceph_msgr_slab_init()) return -ENOMEM; @@ -299,7 +299,6 @@ int ceph_msgr_init(void) return -ENOMEM; } -EXPORT_SYMBOL(ceph_msgr_init); void ceph_msgr_exit(void) { @@ -307,7 +306,6 @@ void ceph_msgr_exit(void) _ceph_msgr_exit(); } -EXPORT_SYMBOL(ceph_msgr_exit); void ceph_msgr_flush(void) { diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4a3af96dc057..ea2a6c9fb7ce 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5106,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, } EXPORT_SYMBOL(ceph_osdc_writepages); -int ceph_osdc_setup(void) +int __init ceph_osdc_setup(void) { size_t size = sizeof(struct ceph_osd_request) + CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op); @@ -5117,7 +5117,6 @@ int ceph_osdc_setup(void) return ceph_osd_request_cache ? 0 : -ENOMEM; } -EXPORT_SYMBOL(ceph_osdc_setup); void ceph_osdc_cleanup(void) { @@ -5125,7 +5124,6 @@ void ceph_osdc_cleanup(void) kmem_cache_destroy(ceph_osd_request_cache); ceph_osd_request_cache = NULL; } -EXPORT_SYMBOL(ceph_osdc_cleanup); /* * handle incoming message -- cgit v1.2.3 From 47474d0b011bb385719e91a60bb9ff7649d66526 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 13 Mar 2018 23:01:07 +0800 Subject: ceph: optimize mds session register Do memory allocation first, so that avoid unnecessary initialization of newly allocated session in error case. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b9cbeeb32ab3..a511a777ffb9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -448,6 +448,25 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) return ERR_PTR(-ENOMEM); + + if (mds >= mdsc->max_sessions) { + int newmax = 1 << get_count_order(mds + 1); + struct ceph_mds_session **sa; + + dout("%s: realloc to %d\n", __func__, newmax); + sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); + if (!sa) + goto fail_realloc; + if (mdsc->sessions) { + memcpy(sa, mdsc->sessions, + mdsc->max_sessions * sizeof(void *)); + kfree(mdsc->sessions); + } + mdsc->sessions = sa; + mdsc->max_sessions = newmax; + } + + dout("%s: mds%d\n", __func__, mds); s->s_mdsc = mdsc; s->s_mds = mds; s->s_state = CEPH_MDS_SESSION_NEW; @@ -476,23 +495,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_cap_releases); INIT_LIST_HEAD(&s->s_cap_flushing); - dout("register_session mds%d\n", mds); - if (mds >= mdsc->max_sessions) { - int newmax = 1 << get_count_order(mds+1); - struct ceph_mds_session **sa; - - dout("register_session realloc to %d\n", newmax); - sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); - if (!sa) - goto fail_realloc; - if (mdsc->sessions) { - memcpy(sa, mdsc->sessions, - mdsc->max_sessions * sizeof(void *)); - kfree(mdsc->sessions); - } - mdsc->sessions = sa; - mdsc->max_sessions = newmax; - } mdsc->sessions[mds] = s; atomic_inc(&mdsc->num_sessions); refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */ -- cgit v1.2.3 From bb48bd4dc45f9ee1e44d8e9fcb01023e0d0ba80d Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 13 Mar 2018 10:42:44 +0800 Subject: ceph: optimize memory usage In current code, regular file and directory use same struct ceph_file_info to store fs specific data so the struct has to include some fields which are only used for directory (e.g., readdir related info), when having plenty of regular files, it will lead to memory waste. This patch introduces dedicated ceph_dir_file_info cache for readdir related thins. So that regular file does not include those unused fields anymore. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 185 ++++++++++++++++++++++--------------------- fs/ceph/file.c | 88 ++++++++++++++------ fs/ceph/super.c | 8 ++ fs/ceph/super.h | 4 + include/linux/ceph/libceph.h | 1 + 5 files changed, 169 insertions(+), 117 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1aa3bfc9ef35..16405e0774a6 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -102,18 +102,18 @@ static int fpos_cmp(loff_t l, loff_t r) * regardless of what dir changes take place on the * server. */ -static int note_last_dentry(struct ceph_file_info *fi, const char *name, +static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, int len, unsigned next_offset) { char *buf = kmalloc(len+1, GFP_KERNEL); if (!buf) return -ENOMEM; - kfree(fi->last_name); - fi->last_name = buf; - memcpy(fi->last_name, name, len); - fi->last_name[len] = 0; - fi->next_offset = next_offset; - dout("note_last_dentry '%s'\n", fi->last_name); + kfree(dfi->last_name); + dfi->last_name = buf; + memcpy(dfi->last_name, name, len); + dfi->last_name[len] = 0; + dfi->next_offset = next_offset; + dout("note_last_dentry '%s'\n", dfi->last_name); return 0; } @@ -175,7 +175,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, static int __dcache_readdir(struct file *file, struct dir_context *ctx, int shared_gen) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); struct dentry *dentry, *last = NULL; @@ -222,7 +222,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, bool emit_dentry = false; dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl); if (!dentry) { - fi->flags |= CEPH_F_ATEND; + dfi->file_info.flags |= CEPH_F_ATEND; err = 0; break; } @@ -273,33 +273,33 @@ out: if (last) { int ret; di = ceph_dentry(last); - ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, + ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len, fpos_off(di->offset) + 1); if (ret < 0) err = ret; dput(last); /* last_name no longer match cache index */ - if (fi->readdir_cache_idx >= 0) { - fi->readdir_cache_idx = -1; - fi->dir_release_count = 0; + if (dfi->readdir_cache_idx >= 0) { + dfi->readdir_cache_idx = -1; + dfi->dir_release_count = 0; } } return err; } -static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos) +static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos) { - if (!fi->last_readdir) + if (!dfi->last_readdir) return true; if (is_hash_order(pos)) - return !ceph_frag_contains_value(fi->frag, fpos_hash(pos)); + return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos)); else - return fi->frag != fpos_frag(pos); + return dfi->frag != fpos_frag(pos); } static int ceph_readdir(struct file *file, struct dir_context *ctx) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); @@ -310,7 +310,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_mds_reply_info_parsed *rinfo; dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); - if (fi->flags & CEPH_F_ATEND) + if (dfi->file_info.flags & CEPH_F_ATEND) return 0; /* always start with . and .. */ @@ -351,15 +351,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* proceed with a normal readdir */ more: /* do we have the correct frag content buffered? */ - if (need_send_readdir(fi, ctx->pos)) { + if (need_send_readdir(dfi, ctx->pos)) { struct ceph_mds_request *req; int op = ceph_snap(inode) == CEPH_SNAPDIR ? CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; /* discard old result, if any */ - if (fi->last_readdir) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + if (dfi->last_readdir) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; } if (is_hash_order(ctx->pos)) { @@ -373,7 +373,7 @@ more: } dout("readdir fetching %llx.%llx frag %x offset '%s'\n", - ceph_vinop(inode), frag, fi->last_name); + ceph_vinop(inode), frag, dfi->last_name); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -389,8 +389,8 @@ more: __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); req->r_inode_drop = CEPH_CAP_FILE_EXCL; } - if (fi->last_name) { - req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); + if (dfi->last_name) { + req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL); if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; @@ -400,10 +400,10 @@ more: cpu_to_le32(fpos_hash(ctx->pos)); } - req->r_dir_release_cnt = fi->dir_release_count; - req->r_dir_ordered_cnt = fi->dir_ordered_count; - req->r_readdir_cache_idx = fi->readdir_cache_idx; - req->r_readdir_offset = fi->next_offset; + req->r_dir_release_cnt = dfi->dir_release_count; + req->r_dir_ordered_cnt = dfi->dir_ordered_count; + req->r_readdir_cache_idx = dfi->readdir_cache_idx; + req->r_readdir_offset = dfi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); req->r_args.readdir.flags = cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); @@ -427,35 +427,35 @@ more: if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { frag = le32_to_cpu(rinfo->dir_dir->frag); if (!rinfo->hash_order) { - fi->next_offset = req->r_readdir_offset; + dfi->next_offset = req->r_readdir_offset; /* adjust ctx->pos to beginning of frag */ ctx->pos = ceph_make_fpos(frag, - fi->next_offset, + dfi->next_offset, false); } } - fi->frag = frag; - fi->last_readdir = req; + dfi->frag = frag; + dfi->last_readdir = req; if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) { - fi->readdir_cache_idx = req->r_readdir_cache_idx; - if (fi->readdir_cache_idx < 0) { + dfi->readdir_cache_idx = req->r_readdir_cache_idx; + if (dfi->readdir_cache_idx < 0) { /* preclude from marking dir ordered */ - fi->dir_ordered_count = 0; + dfi->dir_ordered_count = 0; } else if (ceph_frag_is_leftmost(frag) && - fi->next_offset == 2) { + dfi->next_offset == 2) { /* note dir version at start of readdir so * we can tell if any dentries get dropped */ - fi->dir_release_count = req->r_dir_release_cnt; - fi->dir_ordered_count = req->r_dir_ordered_cnt; + dfi->dir_release_count = req->r_dir_release_cnt; + dfi->dir_ordered_count = req->r_dir_ordered_cnt; } } else { dout("readdir !did_prepopulate\n"); /* disable readdir cache */ - fi->readdir_cache_idx = -1; + dfi->readdir_cache_idx = -1; /* preclude from marking dir complete */ - fi->dir_release_count = 0; + dfi->dir_release_count = 0; } /* note next offset and last dentry name */ @@ -464,19 +464,19 @@ more: rinfo->dir_entries + (rinfo->dir_nr-1); unsigned next_offset = req->r_reply_info.dir_end ? 2 : (fpos_off(rde->offset) + 1); - err = note_last_dentry(fi, rde->name, rde->name_len, + err = note_last_dentry(dfi, rde->name, rde->name_len, next_offset); if (err) return err; } else if (req->r_reply_info.dir_end) { - fi->next_offset = 2; + dfi->next_offset = 2; /* keep last name */ } } - rinfo = &fi->last_readdir->r_reply_info; + rinfo = &dfi->last_readdir->r_reply_info; dout("readdir frag %x num %d pos %llx chunk first %llx\n", - fi->frag, rinfo->dir_nr, ctx->pos, + dfi->frag, rinfo->dir_nr, ctx->pos, rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); i = 0; @@ -520,52 +520,55 @@ more: ctx->pos++; } - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; - if (fi->next_offset > 2) { - frag = fi->frag; + if (dfi->next_offset > 2) { + frag = dfi->frag; goto more; } /* more frags? */ - if (!ceph_frag_is_rightmost(fi->frag)) { - frag = ceph_frag_next(fi->frag); + if (!ceph_frag_is_rightmost(dfi->frag)) { + frag = ceph_frag_next(dfi->frag); if (is_hash_order(ctx->pos)) { loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), - fi->next_offset, true); + dfi->next_offset, true); if (new_pos > ctx->pos) ctx->pos = new_pos; /* keep last_name */ } else { - ctx->pos = ceph_make_fpos(frag, fi->next_offset, false); - kfree(fi->last_name); - fi->last_name = NULL; + ctx->pos = ceph_make_fpos(frag, dfi->next_offset, + false); + kfree(dfi->last_name); + dfi->last_name = NULL; } dout("readdir next frag is %x\n", frag); goto more; } - fi->flags |= CEPH_F_ATEND; + dfi->file_info.flags |= CEPH_F_ATEND; /* * if dir_release_count still matches the dir, no dentries * were released during the whole readdir, and we should have * the complete dir contents in our cache. */ - if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { + if (atomic64_read(&ci->i_release_count) == + dfi->dir_release_count) { spin_lock(&ci->i_ceph_lock); - if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { + if (dfi->dir_ordered_count == + atomic64_read(&ci->i_ordered_count)) { dout(" marking %p complete and ordered\n", inode); /* use i_size to track number of entries in * readdir cache */ - BUG_ON(fi->readdir_cache_idx < 0); - i_size_write(inode, fi->readdir_cache_idx * + BUG_ON(dfi->readdir_cache_idx < 0); + i_size_write(inode, dfi->readdir_cache_idx * sizeof(struct dentry*)); } else { dout(" marking %p complete\n", inode); } - __ceph_dir_set_complete(ci, fi->dir_release_count, - fi->dir_ordered_count); + __ceph_dir_set_complete(ci, dfi->dir_release_count, + dfi->dir_ordered_count); spin_unlock(&ci->i_ceph_lock); } @@ -573,25 +576,25 @@ more: return 0; } -static void reset_readdir(struct ceph_file_info *fi) +static void reset_readdir(struct ceph_dir_file_info *dfi) { - if (fi->last_readdir) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + if (dfi->last_readdir) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; } - kfree(fi->last_name); - fi->last_name = NULL; - fi->dir_release_count = 0; - fi->readdir_cache_idx = -1; - fi->next_offset = 2; /* compensate for . and .. */ - fi->flags &= ~CEPH_F_ATEND; + kfree(dfi->last_name); + dfi->last_name = NULL; + dfi->dir_release_count = 0; + dfi->readdir_cache_idx = -1; + dfi->next_offset = 2; /* compensate for . and .. */ + dfi->file_info.flags &= ~CEPH_F_ATEND; } /* * discard buffered readdir content on seekdir(0), or seek to new frag, * or seek prior to current chunk */ -static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) +static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos) { struct ceph_mds_reply_info_parsed *rinfo; loff_t chunk_offset; @@ -600,10 +603,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) if (is_hash_order(new_pos)) { /* no need to reset last_name for a forward seek when * dentries are sotred in hash order */ - } else if (fi->frag != fpos_frag(new_pos)) { + } else if (dfi->frag != fpos_frag(new_pos)) { return true; } - rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; + rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL; if (!rinfo || !rinfo->dir_nr) return true; chunk_offset = rinfo->dir_entries[0].offset; @@ -613,7 +616,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file->f_mapping->host; loff_t retval; @@ -631,20 +634,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) } if (offset >= 0) { - if (need_reset_readdir(fi, offset)) { + if (need_reset_readdir(dfi, offset)) { dout("dir_llseek dropping %p content\n", file); - reset_readdir(fi); + reset_readdir(dfi); } else if (is_hash_order(offset) && offset > file->f_pos) { /* for hash offset, we don't know if a forward seek * is within same frag */ - fi->dir_release_count = 0; - fi->readdir_cache_idx = -1; + dfi->dir_release_count = 0; + dfi->readdir_cache_idx = -1; } if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; - fi->flags &= ~CEPH_F_ATEND; + dfi->file_info.flags &= ~CEPH_F_ATEND; } retval = offset; } @@ -1352,7 +1355,7 @@ static void ceph_d_prune(struct dentry *dentry) static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); int left; @@ -1361,12 +1364,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) return -EISDIR; - if (!fi->dir_info) { - fi->dir_info = kmalloc(bufsize, GFP_KERNEL); - if (!fi->dir_info) + if (!dfi->dir_info) { + dfi->dir_info = kmalloc(bufsize, GFP_KERNEL); + if (!dfi->dir_info) return -ENOMEM; - fi->dir_info_len = - snprintf(fi->dir_info, bufsize, + dfi->dir_info_len = + snprintf(dfi->dir_info, bufsize, "entries: %20lld\n" " files: %20lld\n" " subdirs: %20lld\n" @@ -1386,10 +1389,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, (long)ci->i_rctime.tv_nsec); } - if (*ppos >= fi->dir_info_len) + if (*ppos >= dfi->dir_info_len) return 0; - size = min_t(unsigned, size, fi->dir_info_len-*ppos); - left = copy_to_user(buf, fi->dir_info + *ppos, size); + size = min_t(unsigned, size, dfi->dir_info_len-*ppos); + left = copy_to_user(buf, dfi->dir_info + *ppos, size); if (left == size) return -EFAULT; *ppos += (size - left); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a1f0aee29c27..4a92acba1e9c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -161,13 +161,50 @@ out: return req; } +static int ceph_init_file_info(struct inode *inode, struct file *file, + int fmode, bool isdir) +{ + struct ceph_file_info *fi; + + dout("%s %p %p 0%o (%s)\n", __func__, inode, file, + inode->i_mode, isdir ? "dir" : "regular"); + BUG_ON(inode->i_fop->release != ceph_release); + + if (isdir) { + struct ceph_dir_file_info *dfi = + kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL); + if (!dfi) { + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ + return -ENOMEM; + } + + file->private_data = dfi; + fi = &dfi->file_info; + dfi->next_offset = 2; + dfi->readdir_cache_idx = -1; + } else { + fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); + if (!fi) { + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ + return -ENOMEM; + } + + file->private_data = fi; + } + + fi->fmode = fmode; + spin_lock_init(&fi->rw_contexts_lock); + INIT_LIST_HEAD(&fi->rw_contexts); + + return 0; +} + /* * initialize private struct file data. * if we fail, clean up by dropping fmode reference on the ceph_inode */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { - struct ceph_file_info *fi; int ret = 0; switch (inode->i_mode & S_IFMT) { @@ -175,22 +212,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) ceph_fscache_register_inode_cookie(inode); ceph_fscache_file_set_cookie(inode, file); case S_IFDIR: - dout("init_file %p %p 0%o (regular)\n", inode, file, - inode->i_mode); - fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); - if (!fi) { - ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ - return -ENOMEM; - } - fi->fmode = fmode; - - spin_lock_init(&fi->rw_contexts_lock); - INIT_LIST_HEAD(&fi->rw_contexts); - - fi->next_offset = 2; - fi->readdir_cache_idx = -1; - file->private_data = fi; - BUG_ON(inode->i_fop->release != ceph_release); + ret = ceph_init_file_info(inode, file, fmode, + S_ISDIR(inode->i_mode)); + if (ret) + return ret; break; case S_IFLNK: @@ -462,16 +487,27 @@ out_acl: int ceph_release(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_file_info *fi = file->private_data; - dout("release inode %p file %p\n", inode, file); - ceph_put_fmode(ci, fi->fmode); - if (fi->last_readdir) - ceph_mdsc_put_request(fi->last_readdir); - kfree(fi->last_name); - kfree(fi->dir_info); - WARN_ON(!list_empty(&fi->rw_contexts)); - kmem_cache_free(ceph_file_cachep, fi); + if (S_ISDIR(inode->i_mode)) { + struct ceph_dir_file_info *dfi = file->private_data; + dout("release inode %p dir file %p\n", inode, file); + WARN_ON(!list_empty(&dfi->file_info.rw_contexts)); + + ceph_put_fmode(ci, dfi->file_info.fmode); + + if (dfi->last_readdir) + ceph_mdsc_put_request(dfi->last_readdir); + kfree(dfi->last_name); + kfree(dfi->dir_info); + kmem_cache_free(ceph_dir_file_cachep, dfi); + } else { + struct ceph_file_info *fi = file->private_data; + dout("release inode %p regular file %p\n", inode, file); + WARN_ON(!list_empty(&fi->rw_contexts)); + + ceph_put_fmode(ci, fi->fmode); + kmem_cache_free(ceph_file_cachep, fi); + } /* wake up anyone waiting for caps on this inode */ wake_up_all(&ci->i_cap_wq); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9bf9e54259dd..0fc03c456c50 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -679,6 +679,7 @@ struct kmem_cache *ceph_cap_cachep; struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; +struct kmem_cache *ceph_dir_file_cachep; static void ceph_inode_init_once(void *foo) { @@ -715,6 +716,10 @@ static int __init init_caches(void) if (!ceph_file_cachep) goto bad_file; + ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); + if (!ceph_dir_file_cachep) + goto bad_dir_file; + error = ceph_fscache_register(); if (error) goto bad_fscache; @@ -722,6 +727,8 @@ static int __init init_caches(void) return 0; bad_fscache: + kmem_cache_destroy(ceph_dir_file_cachep); +bad_dir_file: kmem_cache_destroy(ceph_file_cachep); bad_file: kmem_cache_destroy(ceph_dentry_cachep); @@ -747,6 +754,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); + kmem_cache_destroy(ceph_dir_file_cachep); ceph_fscache_unregister(); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1c2086e0fec2..ff49433014e9 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -671,6 +671,10 @@ struct ceph_file_info { spinlock_t rw_contexts_lock; struct list_head rw_contexts; +}; + +struct ceph_dir_file_info { + struct ceph_file_info file_info; /* readdir: position within the dir */ u32 frag; diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c2ec44cf5098..49c93b9308d7 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -262,6 +262,7 @@ extern struct kmem_cache *ceph_cap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; +extern struct kmem_cache *ceph_dir_file_cachep; /* ceph_common.c */ extern bool libceph_compatible(void *data); -- cgit v1.2.3 From 98cfda81040f4a6925a98567d036a918e855d4ec Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 13 Mar 2018 10:43:45 +0800 Subject: ceph: return proper bool type to caller instead of pointer Change to return true/false only for bool type return code. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a511a777ffb9..3a555b604441 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -419,9 +419,10 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, static bool __have_session(struct ceph_mds_client *mdsc, int mds) { - if (mds >= mdsc->max_sessions) + if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) return false; - return mdsc->sessions[mds]; + else + return true; } static int __verify_registered_session(struct ceph_mds_client *mdsc, -- cgit v1.2.3 From 50c55aeca27f09ceda0cd58b72b0addeecef74eb Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 14 Mar 2018 13:47:33 +0800 Subject: ceph: fix invalid point dereference for error case in mdsc destroy 1. set fsc->mdsc after successfully allocate all necessary memory in mdsc init. 2. if fsc->mdsc is NULL, just skip destroy operation in mdsc destroy. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 3a555b604441..b50044374947 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3575,7 +3575,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) if (!mdsc) return -ENOMEM; mdsc->fsc = fsc; - fsc->mdsc = mdsc; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); if (!mdsc->mdsmap) { @@ -3583,6 +3582,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) return -ENOMEM; } + fsc->mdsc = mdsc; init_completion(&mdsc->safe_umount_waiters); init_waitqueue_head(&mdsc->session_close_wq); INIT_LIST_HEAD(&mdsc->waiting_for_map); @@ -3861,6 +3861,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) struct ceph_mds_client *mdsc = fsc->mdsc; dout("mdsc_destroy %p\n", mdsc); + if (!mdsc) + return; + /* flush out any connection work with references to us */ ceph_msgr_flush(); -- cgit v1.2.3 From 7aac453a03a2216fcda4f4d72ae4d2089a013bb3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 13 Mar 2018 19:52:20 +0800 Subject: ceph: rename function drop_leases() to a more descriptive name Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b50044374947..537048b4a4d5 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3473,13 +3473,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, } /* - * drop all leases (and dentry refs) in preparation for umount + * lock unlock sessions, to wait ongoing session activities */ -static void drop_leases(struct ceph_mds_client *mdsc) +static void lock_unlock_sessions(struct ceph_mds_client *mdsc) { int i; - dout("drop_leases\n"); mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); @@ -3663,7 +3662,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) dout("pre_umount\n"); mdsc->stopping = 1; - drop_leases(mdsc); + lock_unlock_sessions(mdsc); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); -- cgit v1.2.3 From fb18a57568c2b84cd611e242c0f6fa97b45e4907 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 5 Jan 2018 10:47:18 +0000 Subject: ceph: quota: add initial infrastructure to support cephfs quotas This patch adds the infrastructure required to support cephfs quotas as it is currently implemented in the ceph fuse client. Cephfs quotas can be set on any directory, and can restrict the number of bytes or the number of files stored beneath that point in the directory hierarchy. Quotas are set using the extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', and can be removed by setting these attributes to '0'. Link: http://tracker.ceph.com/issues/22372 Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- Documentation/filesystems/ceph.txt | 12 +++++++ fs/ceph/Makefile | 2 +- fs/ceph/inode.c | 6 ++++ fs/ceph/mds_client.c | 23 ++++++++++++++ fs/ceph/mds_client.h | 2 ++ fs/ceph/quota.c | 65 ++++++++++++++++++++++++++++++++++++++ fs/ceph/super.h | 8 +++++ fs/ceph/xattr.c | 44 ++++++++++++++++++++++++++ include/linux/ceph/ceph_features.h | 1 + include/linux/ceph/ceph_fs.h | 17 ++++++++++ net/ceph/ceph_common.c | 1 + 11 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 fs/ceph/quota.c (limited to 'fs') diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index 0b302a11718a..094772481263 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -62,6 +62,18 @@ subdirectories, and a summation of all nested file sizes. This makes the identification of large disk space consumers relatively quick, as no 'du' or similar recursive scan of the file system is required. +Finally, Ceph also allows quotas to be set on any directory in the system. +The quota can restrict the number of bytes or the number of files stored +beneath that point in the directory hierarchy. Quotas can be set using +extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg: + + setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir + getfattr -n ceph.quota.max_bytes /some/dir + +A limitation of the current quotas implementation is that it relies on the +cooperation of the client mounting the file system to stop writers when a +limit is reached. A modified or adversarial client cannot be prevented +from writing as much data as it needs. Mount Syntax ============ diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 174f5709e508..a699e320393f 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ - export.o caps.o snap.o xattr.o \ + export.o caps.o snap.o xattr.o quota.o \ mds_client.o mdsmap.o strings.o ceph_frag.o \ debugfs.o diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index be5f12d0d637..2c6f8be4ed63 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -441,6 +441,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb) atomic64_set(&ci->i_complete_seq[1], 0); ci->i_symlink = NULL; + ci->i_max_bytes = 0; + ci->i_max_files = 0; + memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); @@ -790,6 +793,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + ci->i_max_bytes = iinfo->max_bytes; + ci->i_max_files = iinfo->max_files; + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 537048b4a4d5..1c9877c1149f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -100,6 +100,26 @@ static int parse_reply_info_in(void **p, void *end, } else info->inline_version = CEPH_INLINE_NONE; + if (features & CEPH_FEATURE_MDS_QUOTA) { + u8 struct_v, struct_compat; + u32 struct_len; + + /* + * both struct_v and struct_compat are expected to be >= 1 + */ + ceph_decode_8_safe(p, end, struct_v, bad); + ceph_decode_8_safe(p, end, struct_compat, bad); + if (!struct_v || !struct_compat) + goto bad; + ceph_decode_32_safe(p, end, struct_len, bad); + ceph_decode_need(p, end, struct_len, bad); + ceph_decode_64_safe(p, end, info->max_bytes, bad); + ceph_decode_64_safe(p, end, info->max_files, bad); + } else { + info->max_bytes = 0; + info->max_files = 0; + } + info->pool_ns_len = 0; info->pool_ns_data = NULL; if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { @@ -4082,6 +4102,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) case CEPH_MSG_CLIENT_LEASE: handle_lease(mdsc, s, msg); break; + case CEPH_MSG_CLIENT_QUOTA: + ceph_handle_quota(mdsc, s, msg); + break; default: pr_err("received unknown message type %d %s\n", type, diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 71e3b783ee6f..2a67c8b01ae6 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -49,6 +49,8 @@ struct ceph_mds_reply_info_in { char *inline_data; u32 pool_ns_len; char *pool_ns_data; + u64 max_bytes; + u64 max_files; }; struct ceph_mds_reply_dir_entry { diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c new file mode 100644 index 000000000000..1b69d8365ec2 --- /dev/null +++ b/fs/ceph/quota.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * quota.c - CephFS quota + * + * Copyright (C) 2017-2018 SUSE + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "super.h" +#include "mds_client.h" + +void ceph_handle_quota(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_msg *msg) +{ + struct super_block *sb = mdsc->fsc->sb; + struct ceph_mds_quota *h = msg->front.iov_base; + struct ceph_vino vino; + struct inode *inode; + struct ceph_inode_info *ci; + + if (msg->front.iov_len != sizeof(*h)) { + pr_err("%s corrupt message mds%d len %d\n", __func__, + session->s_mds, (int)msg->front.iov_len); + ceph_msg_dump(msg); + return; + } + + /* increment msg sequence number */ + mutex_lock(&session->s_mutex); + session->s_seq++; + mutex_unlock(&session->s_mutex); + + /* lookup inode */ + vino.ino = le64_to_cpu(h->ino); + vino.snap = CEPH_NOSNAP; + inode = ceph_find_inode(sb, vino); + if (!inode) { + pr_warn("Failed to find inode %llu\n", vino.ino); + return; + } + ci = ceph_inode(inode); + + spin_lock(&ci->i_ceph_lock); + ci->i_rbytes = le64_to_cpu(h->rbytes); + ci->i_rfiles = le64_to_cpu(h->rfiles); + ci->i_rsubdirs = le64_to_cpu(h->rsubdirs); + ci->i_max_bytes = le64_to_cpu(h->max_bytes); + ci->i_max_files = le64_to_cpu(h->max_files); + spin_unlock(&ci->i_ceph_lock); + + iput(inode); +} diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ff49433014e9..0c95a929bab7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -310,6 +310,9 @@ struct ceph_inode_info { u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; + /* quotas */ + u64 i_max_bytes, i_max_files; + struct rb_root i_fragtree; int i_fragtree_nsplits; struct mutex i_fragtree_mutex; @@ -1070,4 +1073,9 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); +/* quota.c */ +extern void ceph_handle_quota(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_msg *msg); + #endif /* _FS_CEPH_SUPER_H */ diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index e1c4e0b12b4c..7e72348639e4 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -224,6 +224,31 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, (long)ci->i_rctime.tv_nsec); } +/* quotas */ + +static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci) +{ + return (ci->i_max_files || ci->i_max_bytes); +} + +static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val, + size_t size) +{ + return snprintf(val, size, "max_bytes=%llu max_files=%llu", + ci->i_max_bytes, ci->i_max_files); +} + +static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%llu", ci->i_max_bytes); +} + +static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%llu", ci->i_max_files); +} #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name #define CEPH_XATTR_NAME2(_type, _name, _name2) \ @@ -247,6 +272,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, .hidden = true, \ .exists_cb = ceph_vxattrcb_layout_exists, \ } +#define XATTR_QUOTA_FIELD(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .readonly = false, \ + .hidden = true, \ + .exists_cb = ceph_vxattrcb_quota_exists, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { { @@ -270,6 +304,16 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, rsubdirs), XATTR_NAME_CEPH(dir, rbytes), XATTR_NAME_CEPH(dir, rctime), + { + .name = "ceph.quota", + .name_size = sizeof("ceph.quota"), + .getxattr_cb = ceph_vxattrcb_quota, + .readonly = false, + .hidden = true, + .exists_cb = ceph_vxattrcb_quota_exists, + }, + XATTR_QUOTA_FIELD(quota, max_bytes), + XATTR_QUOTA_FIELD(quota, max_files), { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 59042d5ac520..3901927cf6a0 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_OSD_POOLRESEND | \ + CEPH_FEATURE_MDS_QUOTA | \ CEPH_FEATURE_CRUSH_V4 | \ CEPH_FEATURE_NEW_OSDOP_ENCODING | \ CEPH_FEATURE_SERVER_JEWEL | \ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 88dd51381aaf..7ecfc88314d8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -134,6 +134,7 @@ struct ceph_dir_layout { #define CEPH_MSG_CLIENT_LEASE 0x311 #define CEPH_MSG_CLIENT_SNAP 0x312 #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 +#define CEPH_MSG_CLIENT_QUOTA 0x314 /* pool ops */ #define CEPH_MSG_POOLOP_REPLY 48 @@ -807,4 +808,20 @@ struct ceph_mds_snap_realm { } __attribute__ ((packed)); /* followed by my snap list, then prior parent snap list */ +/* + * quotas + */ +struct ceph_mds_quota { + __le64 ino; /* ino */ + struct ceph_timespec rctime; + __le64 rbytes; /* dir stats */ + __le64 rfiles; + __le64 rsubdirs; + __u8 struct_v; /* compat */ + __u8 struct_compat; + __le32 struct_len; + __le64 max_bytes; /* quota max. bytes */ + __le64 max_files; /* quota max. files */ +} __attribute__ ((packed)); + #endif diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index c15e2699090c..ffbcc7f5e740 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -80,6 +80,7 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_CLIENT_REPLY: return "client_reply"; case CEPH_MSG_CLIENT_CAPS: return "client_caps"; case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; + case CEPH_MSG_CLIENT_QUOTA: return "client_quota"; case CEPH_MSG_CLIENT_SNAP: return "client_snap"; case CEPH_MSG_CLIENT_LEASE: return "client_lease"; case CEPH_MSG_POOLOP_REPLY: return "poolop_reply"; -- cgit v1.2.3 From b7a2921765cf796280baf653a52b22b52e0ba266 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 5 Jan 2018 10:47:19 +0000 Subject: ceph: quota: support for ceph.quota.max_files This patch adds support for the max_files quota. It hooks into all the ceph functions that add new filesystem objects that need to be checked against the quota limits. When these limits are hit, -EDQUOT is returned. Note that we're not checking quotas on ceph_link(). ceph_link doesn't really create a new inode, and since the MDS doesn't update the directory statistics when a new (hard) link is created (only with symlinks), they are not accounted as a new file. Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 11 ++++++++ fs/ceph/file.c | 4 ++- fs/ceph/quota.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/super.h | 1 + 4 files changed, 95 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 16405e0774a6..7d9851cd51bb 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -828,6 +828,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; + err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) return err; @@ -881,6 +884,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; + dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); if (IS_ERR(req)) { @@ -930,6 +936,11 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } + if (ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } + mode |= S_IFDIR; err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4a92acba1e9c..c3042330e0e9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -402,7 +402,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct ceph_mds_request *req; struct dentry *dn; struct ceph_acls_info acls = {}; - int mask; + int mask; int err; dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", @@ -413,6 +413,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, return -ENAMETOOLONG; if (flags & O_CREAT) { + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) return err; diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 1b69d8365ec2..cf1c78c4a4d2 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -63,3 +63,83 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, iput(inode); } + +enum quota_check_op { + QUOTA_CHECK_MAX_FILES_OP /* check quota max_files limit */ +}; + +/* + * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each + * realm, it will execute quota check operation defined by the 'op' parameter. + * The snaprealm walk is interrupted if the quota check detects that the quota + * is exceeded or if the root inode is reached. + */ +static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, + loff_t delta) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_inode_info *ci; + struct ceph_snap_realm *realm, *next; + struct ceph_vino vino; + struct inode *in; + u64 max, rvalue; + bool is_root; + bool exceeded = false; + + down_read(&mdsc->snap_rwsem); + realm = ceph_inode(inode)->i_snap_realm; + ceph_get_snap_realm(mdsc, realm); + while (realm) { + vino.ino = realm->ino; + vino.snap = CEPH_NOSNAP; + in = ceph_find_inode(inode->i_sb, vino); + if (!in) { + pr_warn("Failed to find inode for %llu\n", vino.ino); + break; + } + ci = ceph_inode(in); + spin_lock(&ci->i_ceph_lock); + if (op == QUOTA_CHECK_MAX_FILES_OP) { + max = ci->i_max_files; + rvalue = ci->i_rfiles + ci->i_rsubdirs; + } + is_root = (ci->i_vino.ino == CEPH_INO_ROOT); + spin_unlock(&ci->i_ceph_lock); + switch (op) { + case QUOTA_CHECK_MAX_FILES_OP: + exceeded = (max && (rvalue >= max)); + break; + default: + /* Shouldn't happen */ + pr_warn("Invalid quota check op (%d)\n", op); + exceeded = true; /* Just break the loop */ + } + iput(in); + + if (is_root || exceeded) + break; + next = realm->parent; + ceph_get_snap_realm(mdsc, next); + ceph_put_snap_realm(mdsc, realm); + realm = next; + } + ceph_put_snap_realm(mdsc, realm); + up_read(&mdsc->snap_rwsem); + + return exceeded; +} + +/* + * ceph_quota_is_max_files_exceeded - check if we can create a new file + * @inode: directory where a new file is being created + * + * This functions returns true is max_files quota allows a new file to be + * created. It is necessary to walk through the snaprealm hierarchy (until the + * FS root) to check all realms with quotas set. + */ +bool ceph_quota_is_max_files_exceeded(struct inode *inode) +{ + WARN_ON(!S_ISDIR(inode->i_mode)); + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0); +} diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 0c95a929bab7..4afc6cca8786 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1077,5 +1077,6 @@ extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); extern void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg); +extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); #endif /* _FS_CEPH_SUPER_H */ -- cgit v1.2.3 From cafe21a4fb3075fb2980caba8fdb533a1bdb52b0 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 5 Jan 2018 10:47:20 +0000 Subject: ceph: quota: don't allow cross-quota renames This patch changes ceph_rename so that -EXDEV is returned if an attempt is made to mv a file between two different dir trees with different quotas setup. Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 5 +++++ fs/ceph/quota.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/super.h | 1 + 3 files changed, 75 insertions(+) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 7d9851cd51bb..1f60498c4631 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1080,6 +1080,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, else return -EROFS; } + /* don't allow cross-quota renames */ + if ((old_dir != new_dir) && + (!ceph_quota_is_same_realm(old_dir, new_dir))) + return -EXDEV; + dout("rename dir %p dentry %p to dir %p dentry %p\n", old_dir, old_dentry, new_dir, new_dentry); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index cf1c78c4a4d2..5d7dada91a57 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -21,6 +21,11 @@ #include "super.h" #include "mds_client.h" +static inline bool ceph_has_quota(struct ceph_inode_info *ci) +{ + return (ci && (ci->i_max_files || ci->i_max_bytes)); +} + void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) @@ -64,6 +69,70 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, iput(inode); } +/* + * This function walks through the snaprealm for an inode and returns the + * ceph_snap_realm for the first snaprealm that has quotas set (either max_files + * or max_bytes). If the root is reached, return the root ceph_snap_realm + * instead. + * + * Note that the caller is responsible for calling ceph_put_snap_realm() on the + * returned realm. + */ +static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, + struct inode *inode) +{ + struct ceph_inode_info *ci = NULL; + struct ceph_snap_realm *realm, *next; + struct ceph_vino vino; + struct inode *in; + + realm = ceph_inode(inode)->i_snap_realm; + ceph_get_snap_realm(mdsc, realm); + while (realm) { + vino.ino = realm->ino; + vino.snap = CEPH_NOSNAP; + in = ceph_find_inode(inode->i_sb, vino); + if (!in) { + pr_warn("Failed to find inode for %llu\n", vino.ino); + break; + } + ci = ceph_inode(in); + if (ceph_has_quota(ci) || (ci->i_vino.ino == CEPH_INO_ROOT)) { + iput(in); + return realm; + } + iput(in); + next = realm->parent; + ceph_get_snap_realm(mdsc, next); + ceph_put_snap_realm(mdsc, realm); + realm = next; + } + if (realm) + ceph_put_snap_realm(mdsc, realm); + + return NULL; +} + +bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc; + struct ceph_snap_realm *old_realm, *new_realm; + bool is_same; + + down_read(&mdsc->snap_rwsem); + old_realm = get_quota_realm(mdsc, old); + new_realm = get_quota_realm(mdsc, new); + is_same = (old_realm == new_realm); + up_read(&mdsc->snap_rwsem); + + if (old_realm) + ceph_put_snap_realm(mdsc, old_realm); + if (new_realm) + ceph_put_snap_realm(mdsc, new_realm); + + return is_same; +} + enum quota_check_op { QUOTA_CHECK_MAX_FILES_OP /* check quota max_files limit */ }; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 4afc6cca8786..eea6f70f3bf9 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1078,5 +1078,6 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg); extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); +extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new); #endif /* _FS_CEPH_SUPER_H */ -- cgit v1.2.3 From 2b83845f8bd711e66e1c367a9bd56c9df3410236 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 5 Jan 2018 10:47:21 +0000 Subject: ceph: quota: support for ceph.quota.max_bytes Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 11 +++++++++++ fs/ceph/inode.c | 4 ++++ fs/ceph/quota.c | 28 +++++++++++++++++++++++++++- fs/ceph/super.h | 2 ++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index c3042330e0e9..0a2843fdebbd 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1378,6 +1378,11 @@ retry_snap: pos = iocb->ki_pos; count = iov_iter_count(from); + if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) { + err = -EDQUOT; + goto out; + } + err = file_remove_privs(file); if (err) goto out; @@ -1708,6 +1713,12 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } + if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) && + ceph_quota_is_max_bytes_exceeded(inode, offset + length)) { + ret = -EDQUOT; + goto unlock; + } + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 2c6f8be4ed63..50ccae151ea0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2147,6 +2147,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + if ((attr->ia_valid & ATTR_SIZE) && + ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) + return -EDQUOT; + err = __ceph_setattr(inode, attr); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 5d7dada91a57..745f9f47027b 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -134,7 +134,8 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) } enum quota_check_op { - QUOTA_CHECK_MAX_FILES_OP /* check quota max_files limit */ + QUOTA_CHECK_MAX_FILES_OP, /* check quota max_files limit */ + QUOTA_CHECK_MAX_BYTES_OP /* check quota max_files limit */ }; /* @@ -171,6 +172,9 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, if (op == QUOTA_CHECK_MAX_FILES_OP) { max = ci->i_max_files; rvalue = ci->i_rfiles + ci->i_rsubdirs; + } else { + max = ci->i_max_bytes; + rvalue = ci->i_rbytes; } is_root = (ci->i_vino.ino == CEPH_INO_ROOT); spin_unlock(&ci->i_ceph_lock); @@ -178,6 +182,9 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, case QUOTA_CHECK_MAX_FILES_OP: exceeded = (max && (rvalue >= max)); break; + case QUOTA_CHECK_MAX_BYTES_OP: + exceeded = (max && (rvalue + delta > max)); + break; default: /* Shouldn't happen */ pr_warn("Invalid quota check op (%d)\n", op); @@ -212,3 +219,22 @@ bool ceph_quota_is_max_files_exceeded(struct inode *inode) return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0); } + +/* + * ceph_quota_is_max_bytes_exceeded - check if we can write to a file + * @inode: inode being written + * @newsize: new size if write succeeds + * + * This functions returns true is max_bytes quota allows a file size to reach + * @newsize; it returns false otherwise. + */ +bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize) +{ + loff_t size = i_size_read(inode); + + /* return immediately if we're decreasing file size */ + if (newsize <= size) + return false; + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size)); +} diff --git a/fs/ceph/super.h b/fs/ceph/super.h index eea6f70f3bf9..5870c225c2fc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1079,5 +1079,7 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_msg *msg); extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new); +extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, + loff_t newlen); #endif /* _FS_CEPH_SUPER_H */ -- cgit v1.2.3 From 1ab302a0cb1455631646aa66b7fc02afd617ea4f Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 5 Jan 2018 10:47:22 +0000 Subject: ceph: quota: update MDS when max_bytes is approaching When we're reaching the ceph.quota.max_bytes limit, i.e., when writing more than 1/16th of the space left in a quota realm, update the MDS with the new file size. This mirrors the fuse-client approach with commit 122c50315ed1 ("client: Inform mds file size when approaching quota limit"), in the ceph git tree. Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 6 ++++++ fs/ceph/quota.c | 38 +++++++++++++++++++++++++++++++++++++- fs/ceph/super.h | 2 ++ 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0a2843fdebbd..f85040d73e3d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1464,6 +1464,7 @@ retry_snap: if (written >= 0) { int dirty; + spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, @@ -1471,6 +1472,8 @@ retry_snap: spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); + if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos)) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL); } dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", @@ -1767,6 +1770,9 @@ static long ceph_fallocate(struct file *file, int mode, spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); + if ((endoff > size) && + ceph_quota_is_max_bytes_approaching(inode, endoff)) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL); } ceph_put_cap_refs(ci, got); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 745f9f47027b..7d1e18e2249f 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -135,7 +135,9 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) enum quota_check_op { QUOTA_CHECK_MAX_FILES_OP, /* check quota max_files limit */ - QUOTA_CHECK_MAX_BYTES_OP /* check quota max_files limit */ + QUOTA_CHECK_MAX_BYTES_OP, /* check quota max_files limit */ + QUOTA_CHECK_MAX_BYTES_APPROACHING_OP /* check if quota max_files + limit is approaching */ }; /* @@ -185,6 +187,20 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, case QUOTA_CHECK_MAX_BYTES_OP: exceeded = (max && (rvalue + delta > max)); break; + case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP: + if (max) { + if (rvalue >= max) + exceeded = true; + else { + /* + * when we're writing more that 1/16th + * of the available space + */ + exceeded = + (((max - rvalue) >> 4) < delta); + } + } + break; default: /* Shouldn't happen */ pr_warn("Invalid quota check op (%d)\n", op); @@ -238,3 +254,23 @@ bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize) return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size)); } + +/* + * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes + * @inode: inode being written + * @newsize: new size if write succeeds + * + * This function returns true if the new file size @newsize will be consuming + * more than 1/16th of the available quota space; it returns false otherwise. + */ +bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize) +{ + loff_t size = ceph_inode(inode)->i_reported_size; + + /* return immediately if we're decreasing file size */ + if (newsize <= size) + return false; + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP, + (newsize - size)); +} diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 5870c225c2fc..e4ff485d24c7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1081,5 +1081,7 @@ extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new); extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newlen); +extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, + loff_t newlen); #endif /* _FS_CEPH_SUPER_H */ -- cgit v1.2.3 From 2596366907f872de7be0557720bb55bccf7489d9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 12 Jan 2018 16:26:17 +0800 Subject: ceph: don't check quota for snap inode snap inode's i_snap_realm is not pointing to ceph_snap_realm. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 3 ++- fs/ceph/quota.c | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1f60498c4631..6cd92a0e2af9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -936,7 +936,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } - if (ceph_quota_is_max_files_exceeded(dir)) { + if (op == CEPH_MDS_OP_MKDIR && + ceph_quota_is_max_files_exceeded(dir)) { err = -EDQUOT; goto out; } diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 7d1e18e2249f..529de67393a9 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -86,8 +86,15 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, struct ceph_vino vino; struct inode *in; + if (ceph_snap(inode) != CEPH_NOSNAP) + return NULL; + realm = ceph_inode(inode)->i_snap_realm; - ceph_get_snap_realm(mdsc, realm); + if (realm) + ceph_get_snap_realm(mdsc, realm); + else + pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " + "null i_snap_realm\n", ceph_vinop(inode)); while (realm) { vino.ino = realm->ino; vino.snap = CEPH_NOSNAP; @@ -158,9 +165,16 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, bool is_root; bool exceeded = false; + if (ceph_snap(inode) != CEPH_NOSNAP) + return false; + down_read(&mdsc->snap_rwsem); realm = ceph_inode(inode)->i_snap_realm; - ceph_get_snap_realm(mdsc, realm); + if (realm) + ceph_get_snap_realm(mdsc, realm); + else + pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " + "null i_snap_realm\n", ceph_vinop(inode)); while (realm) { vino.ino = realm->ino; vino.snap = CEPH_NOSNAP; -- cgit v1.2.3 From 0eb6bbe4d9cf02f639d661edf7c02defc3453a69 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 12 Jan 2018 16:55:31 +0800 Subject: ceph: fix root quota realm check Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/quota.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 529de67393a9..121819baeb58 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -85,6 +85,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, *next; struct ceph_vino vino; struct inode *in; + bool has_quota; if (ceph_snap(inode) != CEPH_NOSNAP) return NULL; @@ -104,12 +105,13 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, break; } ci = ceph_inode(in); - if (ceph_has_quota(ci) || (ci->i_vino.ino == CEPH_INO_ROOT)) { - iput(in); - return realm; - } + has_quota = ceph_has_quota(ci); iput(in); + next = realm->parent; + if (has_quota || !next) + return realm; + ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); realm = next; @@ -162,7 +164,6 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, struct ceph_vino vino; struct inode *in; u64 max, rvalue; - bool is_root; bool exceeded = false; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -192,7 +193,6 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, max = ci->i_max_bytes; rvalue = ci->i_rbytes; } - is_root = (ci->i_vino.ino == CEPH_INO_ROOT); spin_unlock(&ci->i_ceph_lock); switch (op) { case QUOTA_CHECK_MAX_FILES_OP: @@ -222,9 +222,9 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, } iput(in); - if (is_root || exceeded) - break; next = realm->parent; + if (exceeded || !next) + break; ceph_get_snap_realm(mdsc, next); ceph_put_snap_realm(mdsc, realm); realm = next; -- cgit v1.2.3 From e3161f17d92699ce6ca3b7988131b10ad4035cf9 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 12 Jan 2018 17:19:28 +0000 Subject: ceph: quota: cache inode pointer in ceph_snap_realm Keep a pointer to the inode in struct ceph_snap_realm. This allows to optimize functions that walk the realms hierarchy (e.g. in quotas). Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 4 +++- fs/ceph/inode.c | 3 +++ fs/ceph/quota.c | 24 ++++++++++-------------- fs/ceph/snap.c | 2 ++ fs/ceph/super.h | 1 + 5 files changed, 19 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 25141b6683e9..23dbfae16156 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -703,9 +703,11 @@ void ceph_add_cap(struct inode *inode, } spin_lock(&realm->inodes_with_caps_lock); - ci->i_snap_realm = realm; list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); + ci->i_snap_realm = realm; + if (realm->ino == ci->i_vino.ino) + realm->inode = inode; spin_unlock(&realm->inodes_with_caps_lock); if (oldrealm) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 50ccae151ea0..8ea11b1a2e23 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -551,6 +551,9 @@ void ceph_destroy_inode(struct inode *inode) dout(" dropping residual ref to snap realm %p\n", realm); spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + ci->i_snap_realm = NULL; + if (realm->ino == ci->i_vino.ino) + realm->inode = NULL; spin_unlock(&realm->inodes_with_caps_lock); ceph_put_snap_realm(mdsc, realm); } diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 121819baeb58..c561a85ea8b1 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -83,7 +83,6 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, { struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; - struct ceph_vino vino; struct inode *in; bool has_quota; @@ -97,13 +96,12 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " "null i_snap_realm\n", ceph_vinop(inode)); while (realm) { - vino.ino = realm->ino; - vino.snap = CEPH_NOSNAP; - in = ceph_find_inode(inode->i_sb, vino); - if (!in) { - pr_warn("Failed to find inode for %llu\n", vino.ino); + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (!in) break; - } + ci = ceph_inode(in); has_quota = ceph_has_quota(ci); iput(in); @@ -161,7 +159,6 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_inode_info *ci; struct ceph_snap_realm *realm, *next; - struct ceph_vino vino; struct inode *in; u64 max, rvalue; bool exceeded = false; @@ -177,13 +174,12 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " "null i_snap_realm\n", ceph_vinop(inode)); while (realm) { - vino.ino = realm->ino; - vino.snap = CEPH_NOSNAP; - in = ceph_find_inode(inode->i_sb, vino); - if (!in) { - pr_warn("Failed to find inode for %llu\n", vino.ino); + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (!in) break; - } + ci = ceph_inode(in); spin_lock(&ci->i_ceph_lock); if (op == QUOTA_CHECK_MAX_FILES_OP) { diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 07cf95e6413d..041c27ea8de1 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -931,6 +931,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); ci->i_snap_realm = realm; + if (realm->ino == ci->i_vino.ino) + realm->inode = inode; spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e4ff485d24c7..8217abf46182 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -755,6 +755,7 @@ struct ceph_readdir_cache_control { */ struct ceph_snap_realm { u64 ino; + struct inode *inode; atomic_t nref; struct rb_node node; -- cgit v1.2.3 From d557c48db730eaab6b75d4af332c135309b7a6a4 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 12 Jan 2018 17:19:29 +0000 Subject: ceph: quota: add counter for snaprealms with quota By keeping a counter with the number of snaprealms that have quota set allows to optimize the functions that need to walk throught the realms hierarchy looking for quotas. Thus, if this counter is zero it's safe to assume that there are no realms with quota. Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 6 ++++-- fs/ceph/mds_client.c | 1 + fs/ceph/mds_client.h | 2 ++ fs/ceph/quota.c | 29 ++++++++++++++++++++++++----- fs/ceph/super.h | 20 ++++++++++++++++++++ 5 files changed, 51 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8ea11b1a2e23..8bf60250309e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -539,6 +539,9 @@ void ceph_destroy_inode(struct inode *inode) ceph_queue_caps_release(inode); + if (__ceph_has_any_quota(ci)) + ceph_adjust_quota_realms_count(inode, false); + /* * we may still have a snap_realm reference if there are stray * caps in i_snap_caps. @@ -796,8 +799,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; - ci->i_max_bytes = iinfo->max_bytes; - ci->i_max_files = iinfo->max_files; + __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && (issued & CEPH_CAP_AUTH_EXCL) == 0) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1c9877c1149f..5ece2e6ad154 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3609,6 +3609,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) atomic_set(&mdsc->num_sessions, 0); mdsc->max_sessions = 0; mdsc->stopping = 0; + atomic64_set(&mdsc->quotarealms_count, 0); mdsc->last_snap_seq = 0; init_rwsem(&mdsc->snap_rwsem); mdsc->snap_realms = RB_ROOT; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 2a67c8b01ae6..2ec3b5b35067 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -314,6 +314,8 @@ struct ceph_mds_client { int max_sessions; /* len of s_mds_sessions */ int stopping; /* true if shutting down */ + atomic64_t quotarealms_count; /* # realms with quota */ + /* * snap_rwsem will cover cap linkage into snaprealms, and * realm snap contexts. (later, we can do per-realm snap diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index c561a85ea8b1..588744b4665f 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -21,9 +21,19 @@ #include "super.h" #include "mds_client.h" -static inline bool ceph_has_quota(struct ceph_inode_info *ci) +void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) { - return (ci && (ci->i_max_files || ci->i_max_bytes)); + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + if (inc) + atomic64_inc(&mdsc->quotarealms_count); + else + atomic64_dec(&mdsc->quotarealms_count); +} + +static inline bool ceph_has_realms_with_quotas(struct inode *inode) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + return atomic64_read(&mdsc->quotarealms_count) > 0; } void ceph_handle_quota(struct ceph_mds_client *mdsc, @@ -62,8 +72,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, ci->i_rbytes = le64_to_cpu(h->rbytes); ci->i_rfiles = le64_to_cpu(h->rfiles); ci->i_rsubdirs = le64_to_cpu(h->rsubdirs); - ci->i_max_bytes = le64_to_cpu(h->max_bytes); - ci->i_max_files = le64_to_cpu(h->max_files); + __ceph_update_quota(ci, le64_to_cpu(h->max_bytes), + le64_to_cpu(h->max_files)); spin_unlock(&ci->i_ceph_lock); iput(inode); @@ -103,7 +113,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, break; ci = ceph_inode(in); - has_quota = ceph_has_quota(ci); + has_quota = __ceph_has_any_quota(ci); iput(in); next = realm->parent; @@ -241,6 +251,9 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, */ bool ceph_quota_is_max_files_exceeded(struct inode *inode) { + if (!ceph_has_realms_with_quotas(inode)) + return false; + WARN_ON(!S_ISDIR(inode->i_mode)); return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0); @@ -258,6 +271,9 @@ bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize) { loff_t size = i_size_read(inode); + if (!ceph_has_realms_with_quotas(inode)) + return false; + /* return immediately if we're decreasing file size */ if (newsize <= size) return false; @@ -277,6 +293,9 @@ bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize) { loff_t size = ceph_inode(inode)->i_reported_size; + if (!ceph_has_realms_with_quotas(inode)) + return false; + /* return immediately if we're decreasing file size */ if (newsize <= size) return false; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 8217abf46182..1321a6749953 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1075,6 +1075,26 @@ extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); /* quota.c */ +static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci) +{ + return ci->i_max_files || ci->i_max_bytes; +} + +extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc); + +static inline void __ceph_update_quota(struct ceph_inode_info *ci, + u64 max_bytes, u64 max_files) +{ + bool had_quota, has_quota; + had_quota = __ceph_has_any_quota(ci); + ci->i_max_bytes = max_bytes; + ci->i_max_files = max_files; + has_quota = __ceph_has_any_quota(ci); + + if (had_quota != has_quota) + ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota); +} + extern void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg); -- cgit v1.2.3 From 9122eed5281e89bdb02162a8ecb3cc13ffc8985e Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Wed, 31 Jan 2018 10:53:13 +0000 Subject: ceph: quota: report root dir quota usage in statfs This commit changes statfs default behaviour when reporting usage statistics. Instead of using the overall filesystem usage, statfs now reports the quota for the filesystem root, if ceph.quota.max_bytes has been set for this inode. If quota hasn't been set, it falls back to the old statfs behaviour. A new mount option is also added ('noquotadf') to disable this behaviour. Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- Documentation/filesystems/ceph.txt | 4 +++ fs/ceph/quota.c | 56 ++++++++++++++++++++++++++++++++++++++ fs/ceph/super.c | 27 ++++++++++++++++-- fs/ceph/super.h | 3 ++ 4 files changed, 87 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index 094772481263..d7f011ddc150 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -149,6 +149,10 @@ Mount Options noasyncreaddir Do not use the dcache as above for readdir. + noquotadf + Report overall filesystem usage in statfs instead of using the root + directory quota. + More Information ================ diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 588744b4665f..242bfa5c0539 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -18,6 +18,8 @@ * along with this program; if not, see . */ +#include + #include "super.h" #include "mds_client.h" @@ -303,3 +305,57 @@ bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize) return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP, (newsize - size)); } + +/* + * ceph_quota_update_statfs - if root has quota update statfs with quota status + * @fsc: filesystem client instance + * @buf: statfs to update + * + * If the mounted filesystem root has max_bytes quota set, update the filesystem + * statistics with the quota status. + * + * This function returns true if the stats have been updated, false otherwise. + */ +bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) +{ + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_inode_info *ci; + struct ceph_snap_realm *realm; + struct inode *in; + u64 total = 0, used, free; + bool is_updated = false; + + down_read(&mdsc->snap_rwsem); + realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); + up_read(&mdsc->snap_rwsem); + if (!realm) + return false; + + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (in) { + ci = ceph_inode(in); + spin_lock(&ci->i_ceph_lock); + if (ci->i_max_bytes) { + total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT; + used = ci->i_rbytes >> CEPH_BLOCK_SHIFT; + /* It is possible for a quota to be exceeded. + * Report 'zero' in that case + */ + free = total > used ? total - used : 0; + } + spin_unlock(&ci->i_ceph_lock); + if (total) { + buf->f_blocks = total; + buf->f_bfree = free; + buf->f_bavail = free; + is_updated = true; + } + iput(in); + } + ceph_put_snap_realm(mdsc, realm); + + return is_updated; +} + diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 0fc03c456c50..b33082e6878f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -76,9 +76,18 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; - buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); - buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); - buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + + /* + * By default use root quota for stats; fallback to overall filesystem + * usage if using 'noquotadf' mount option or if the root dir doesn't + * have max_bytes quota set. + */ + if (ceph_test_mount_opt(fsc, NOQUOTADF) || + !ceph_quota_update_statfs(fsc, buf)) { + buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); + buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + } buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; @@ -151,6 +160,8 @@ enum { Opt_acl, #endif Opt_noacl, + Opt_quotadf, + Opt_noquotadf, }; static match_table_t fsopt_tokens = { @@ -187,6 +198,8 @@ static match_table_t fsopt_tokens = { {Opt_acl, "acl"}, #endif {Opt_noacl, "noacl"}, + {Opt_quotadf, "quotadf"}, + {Opt_noquotadf, "noquotadf"}, {-1, NULL} }; @@ -334,6 +347,12 @@ static int parse_fsopt_token(char *c, void *private) case Opt_norequire_active_mds: fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; break; + case Opt_quotadf: + fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; + break; + case Opt_noquotadf: + fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; + break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: fsopt->sb_flags |= SB_POSIXACL; @@ -520,6 +539,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) } if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) seq_puts(m, ",nopoolperm"); + if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) + seq_puts(m, ",noquotadf"); #ifdef CONFIG_CEPH_FS_POSIX_ACL if (fsopt->sb_flags & SB_POSIXACL) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1321a6749953..a7077a0c989f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -39,6 +39,7 @@ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ +#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE @@ -1104,5 +1105,7 @@ extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newlen); extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newlen); +extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, + struct kstatfs *buf); #endif /* _FS_CEPH_SUPER_H */ -- cgit v1.2.3