summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-21 03:26:55 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-21 03:26:55 +0200
commit0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad (patch)
treef010b008554ceb5f649c735684bdab2f84f894c2 /fs/ceph
parentMerge tag 'rtc-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/abellon... (diff)
parentceph: don't drop message if it contains more data than expected (diff)
downloadlinux-0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad.tar.xz
linux-0a78ac4b9bb15b2a00dc5a5aba22b0e48834e1ad.zip
Merge tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "The main things are support for cephx v2 authentication protocol and basic support for rbd images within namespaces (myself). Also included are y2038 conversion patches from Arnd, a pile of miscellaneous fixes from Chengguang and Zheng's feature bit infrastructure for the filesystem" * tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client: (40 commits) ceph: don't drop message if it contains more data than expected ceph: support cephfs' own feature bits crush: fix using plain integer as NULL warning libceph: remove unnecessary non NULL check for request_key ceph: refactor error handling code in ceph_reserve_caps() ceph: refactor ceph_unreserve_caps() ceph: change to void return type for __do_request() ceph: compare fsc->max_file_size and inode->i_size for max file size limit ceph: add additional size check in ceph_setattr() ceph: add additional offset check in ceph_write_iter() ceph: add additional range check in ceph_fallocate() ceph: add new field max_file_size in ceph_fs_client libceph: weaken sizeof check in ceph_x_verify_authorizer_reply() libceph: check authorizer reply/challenge length before reading libceph: implement CEPHX_V2 calculation mode libceph: add authorizer challenge libceph: factor out encrypt_authorizer() libceph: factor out __ceph_x_decrypt() libceph: factor out __prepare_write_connect() libceph: store ceph_auth_handshake pointer in ceph_connection ...
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/acl.c30
-rw-r--r--fs/ceph/addr.c74
-rw-r--r--fs/ceph/cache.c11
-rw-r--r--fs/ceph/caps.c138
-rw-r--r--fs/ceph/dir.c20
-rw-r--r--fs/ceph/file.c34
-rw-r--r--fs/ceph/inode.c83
-rw-r--r--fs/ceph/mds_client.c98
-rw-r--r--fs/ceph/mds_client.h14
-rw-r--r--fs/ceph/quota.c2
-rw-r--r--fs/ceph/snap.c6
-rw-r--r--fs/ceph/super.c6
-rw-r--r--fs/ceph/super.h12
-rw-r--r--fs/ceph/xattr.c4
14 files changed, 302 insertions, 230 deletions
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 59cb307b15fb..027408d55aee 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -45,6 +45,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
struct posix_acl *ceph_get_acl(struct inode *inode, int type)
{
int size;
+ unsigned int retry_cnt = 0;
const char *name;
char *value = NULL;
struct posix_acl *acl;
@@ -60,6 +61,7 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
BUG();
}
+retry:
size = __ceph_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_NOFS);
@@ -68,12 +70,22 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
size = __ceph_getxattr(inode, name, value, size);
}
- if (size > 0)
+ if (size == -ERANGE && retry_cnt < 10) {
+ retry_cnt++;
+ kfree(value);
+ value = NULL;
+ goto retry;
+ }
+
+ if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
- else if (size == -ERANGE || size == -ENODATA || size == 0)
+ } else if (size == -ENODATA || size == 0) {
acl = NULL;
- else
+ } else {
+ pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n",
+ ceph_vinop(inode), size);
acl = ERR_PTR(-EIO);
+ }
kfree(value);
@@ -89,6 +101,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
const char *name = NULL;
char *value = NULL;
struct iattr newattrs;
+ struct timespec64 old_ctime = inode->i_ctime;
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
switch (type) {
@@ -133,7 +146,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (new_mode != old_mode) {
newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
- newattrs.ia_valid = ATTR_MODE;
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
ret = __ceph_setattr(inode, &newattrs);
if (ret)
goto out_free;
@@ -142,8 +155,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
ret = __ceph_setxattr(inode, name, value, size, 0);
if (ret) {
if (new_mode != old_mode) {
+ newattrs.ia_ctime = old_ctime;
newattrs.ia_mode = old_mode;
- newattrs.ia_valid = ATTR_MODE;
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
__ceph_setattr(inode, &newattrs);
}
goto out_free;
@@ -171,10 +185,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
return err;
if (acl) {
- int ret = posix_acl_equiv_mode(acl, mode);
- if (ret < 0)
+ err = posix_acl_equiv_mode(acl, mode);
+ if (err < 0)
goto out_err;
- if (ret == 0) {
+ if (err == 0) {
posix_acl_release(acl);
acl = NULL;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 292b3d72d725..9c332a6f6667 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode,
*/
static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
{
- struct timespec ts;
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_fs_client *fsc;
@@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
set_page_writeback(page);
- ts = timespec64_to_timespec(inode->i_mtime);
err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, snapc, page_off, len,
ceph_wbc.truncate_seq,
ceph_wbc.truncate_size,
- &ts, &page, 1);
+ &inode->i_mtime, &page, 1);
if (err < 0) {
struct writeback_control tmp_wbc;
if (!wbc)
@@ -1134,7 +1132,7 @@ new_request:
pages = NULL;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
BUG_ON(rc);
req = NULL;
@@ -1431,7 +1429,7 @@ static void ceph_restore_sigs(sigset_t *oldset)
/*
* vm ops
*/
-static int ceph_filemap_fault(struct vm_fault *vmf)
+static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
@@ -1439,8 +1437,9 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
loff_t off = vmf->pgoff << PAGE_SHIFT;
- int want, got, ret;
+ int want, got, err;
sigset_t oldset;
+ vm_fault_t ret = VM_FAULT_SIGBUS;
ceph_block_sigs(&oldset);
@@ -1452,8 +1451,8 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
want = CEPH_CAP_FILE_CACHE;
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
- if (ret < 0)
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
+ if (err < 0)
goto out_restore;
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
@@ -1465,16 +1464,17 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
ceph_del_rw_context(fi, &rw_ctx);
+ dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
+ inode, off, (size_t)PAGE_SIZE,
+ ceph_cap_string(got), ret);
} else
- ret = -EAGAIN;
+ err = -EAGAIN;
- dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
- inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret);
if (pinned_page)
put_page(pinned_page);
ceph_put_cap_refs(ci, got);
- if (ret != -EAGAIN)
+ if (err != -EAGAIN)
goto out_restore;
/* read inline data */
@@ -1482,7 +1482,6 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
/* does not support inline data > PAGE_SIZE */
ret = VM_FAULT_SIGBUS;
} else {
- int ret1;
struct address_space *mapping = inode->i_mapping;
struct page *page = find_or_create_page(mapping, 0,
mapping_gfp_constraint(mapping,
@@ -1491,32 +1490,32 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
ret = VM_FAULT_OOM;
goto out_inline;
}
- ret1 = __ceph_do_getattr(inode, page,
+ err = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, true);
- if (ret1 < 0 || off >= i_size_read(inode)) {
+ if (err < 0 || off >= i_size_read(inode)) {
unlock_page(page);
put_page(page);
- if (ret1 < 0)
- ret = ret1;
+ if (err == -ENOMEM)
+ ret = VM_FAULT_OOM;
else
ret = VM_FAULT_SIGBUS;
goto out_inline;
}
- if (ret1 < PAGE_SIZE)
- zero_user_segment(page, ret1, PAGE_SIZE);
+ if (err < PAGE_SIZE)
+ zero_user_segment(page, err, PAGE_SIZE);
else
flush_dcache_page(page);
SetPageUptodate(page);
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
- dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
+ dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
inode, off, (size_t)PAGE_SIZE, ret);
}
out_restore:
ceph_restore_sigs(&oldset);
- if (ret < 0)
- ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ if (err < 0)
+ ret = vmf_error(err);
return ret;
}
@@ -1524,7 +1523,7 @@ out_restore:
/*
* Reuse write_begin here for simplicity.
*/
-static int ceph_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
@@ -1535,8 +1534,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
loff_t off = page_offset(page);
loff_t size = i_size_read(inode);
size_t len;
- int want, got, ret;
+ int want, got, err;
sigset_t oldset;
+ vm_fault_t ret = VM_FAULT_SIGBUS;
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
@@ -1550,10 +1550,10 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
lock_page(page);
locked_page = page;
}
- ret = ceph_uninline_data(vma->vm_file, locked_page);
+ err = ceph_uninline_data(vma->vm_file, locked_page);
if (locked_page)
unlock_page(locked_page);
- if (ret < 0)
+ if (err < 0)
goto out_free;
}
@@ -1570,9 +1570,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
&got, NULL);
- if (ret < 0)
+ if (err < 0)
goto out_free;
dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
@@ -1590,13 +1590,13 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
break;
}
- ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
- if (ret >= 0) {
+ err = ceph_update_writeable_page(vma->vm_file, off, len, page);
+ if (err >= 0) {
/* success. we'll keep the page locked. */
set_page_dirty(page);
ret = VM_FAULT_LOCKED;
}
- } while (ret == -EAGAIN);
+ } while (err == -EAGAIN);
if (ret == VM_FAULT_LOCKED ||
ci->i_inline_version != CEPH_INLINE_NONE) {
@@ -1610,14 +1610,14 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
__mark_inode_dirty(inode, dirty);
}
- dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
+ dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
inode, off, len, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
out_free:
ceph_restore_sigs(&oldset);
ceph_free_cap_flush(prealloc_cf);
- if (ret < 0)
- ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ if (err < 0)
+ ret = vmf_error(err);
return ret;
}
@@ -1734,7 +1734,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
goto out;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1776,7 +1776,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
goto out_put;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1937,7 +1937,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
0, false, true);
err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
- wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime);
+ wr_req->r_mtime = ci->vfs_inode.i_mtime;
err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
if (!err)
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 362900e42424..1bf3502bdd6f 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -25,8 +25,9 @@
#include "cache.h"
struct ceph_aux_inode {
- u64 version;
- struct timespec mtime;
+ u64 version;
+ u64 mtime_sec;
+ u64 mtime_nsec;
};
struct fscache_netfs ceph_cache_netfs = {
@@ -130,7 +131,8 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
- aux.mtime = timespec64_to_timespec(inode->i_mtime);
+ aux.mtime_sec = inode->i_mtime.tv_sec;
+ aux.mtime_nsec = inode->i_mtime.tv_nsec;
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
@@ -163,7 +165,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
if (!ci->fscache) {
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
- aux.mtime = timespec64_to_timespec(inode->i_mtime);
+ aux.mtime_sec = inode->i_mtime.tv_sec;
+ aux.mtime_nsec = inode->i_mtime.tv_nsec;
ci->fscache = fscache_acquire_cookie(fsc->fscache,
&ceph_fscache_inode_object_def,
&ci->i_vino, sizeof(ci->i_vino),
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 990258cbd836..dd7dfdd2ba13 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -156,6 +156,37 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
spin_unlock(&mdsc->caps_list_lock);
}
+static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
+{
+ struct ceph_cap *cap;
+ int i;
+
+ if (nr_caps) {
+ BUG_ON(mdsc->caps_reserve_count < nr_caps);
+ mdsc->caps_reserve_count -= nr_caps;
+ if (mdsc->caps_avail_count >=
+ mdsc->caps_reserve_count + mdsc->caps_min_count) {
+ mdsc->caps_total_count -= nr_caps;
+ for (i = 0; i < nr_caps; i++) {
+ cap = list_first_entry(&mdsc->caps_list,
+ struct ceph_cap, caps_item);
+ list_del(&cap->caps_item);
+ kmem_cache_free(ceph_cap_cachep, cap);
+ }
+ } else {
+ mdsc->caps_avail_count += nr_caps;
+ }
+
+ dout("%s: caps %d = %d used + %d resv + %d avail\n",
+ __func__,
+ mdsc->caps_total_count, mdsc->caps_use_count,
+ mdsc->caps_reserve_count, mdsc->caps_avail_count);
+ BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+ mdsc->caps_reserve_count +
+ mdsc->caps_avail_count);
+ }
+}
+
/*
* Called under mdsc->mutex.
*/
@@ -167,6 +198,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
int have;
int alloc = 0;
int max_caps;
+ int err = 0;
bool trimmed = false;
struct ceph_mds_session *s;
LIST_HEAD(newcaps);
@@ -233,9 +265,14 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
ctx, need, have + alloc);
- goto out_nomem;
+ err = -ENOMEM;
+ break;
+ }
+
+ if (!err) {
+ BUG_ON(have + alloc != need);
+ ctx->count = need;
}
- BUG_ON(have + alloc != need);
spin_lock(&mdsc->caps_list_lock);
mdsc->caps_total_count += alloc;
@@ -245,77 +282,26 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
mdsc->caps_reserve_count +
mdsc->caps_avail_count);
+
+ if (err)
+ __ceph_unreserve_caps(mdsc, have + alloc);
+
spin_unlock(&mdsc->caps_list_lock);
- ctx->count = need;
dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
ctx, mdsc->caps_total_count, mdsc->caps_use_count,
mdsc->caps_reserve_count, mdsc->caps_avail_count);
- return 0;
-
-out_nomem:
-
- spin_lock(&mdsc->caps_list_lock);
- mdsc->caps_avail_count += have;
- mdsc->caps_reserve_count -= have;
-
- while (!list_empty(&newcaps)) {
- cap = list_first_entry(&newcaps,
- struct ceph_cap, caps_item);
- list_del(&cap->caps_item);
-
- /* Keep some preallocated caps around (ceph_min_count), to
- * avoid lots of free/alloc churn. */
- if (mdsc->caps_avail_count >=
- mdsc->caps_reserve_count + mdsc->caps_min_count) {
- kmem_cache_free(ceph_cap_cachep, cap);
- } else {
- mdsc->caps_avail_count++;
- mdsc->caps_total_count++;
- list_add(&cap->caps_item, &mdsc->caps_list);
- }
- }
-
- BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
- mdsc->caps_reserve_count +
- mdsc->caps_avail_count);
- spin_unlock(&mdsc->caps_list_lock);
- return -ENOMEM;
+ return err;
}
-int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx)
{
- int i;
- struct ceph_cap *cap;
-
dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
- if (ctx->count) {
- spin_lock(&mdsc->caps_list_lock);
- BUG_ON(mdsc->caps_reserve_count < ctx->count);
- mdsc->caps_reserve_count -= ctx->count;
- if (mdsc->caps_avail_count >=
- mdsc->caps_reserve_count + mdsc->caps_min_count) {
- mdsc->caps_total_count -= ctx->count;
- for (i = 0; i < ctx->count; i++) {
- cap = list_first_entry(&mdsc->caps_list,
- struct ceph_cap, caps_item);
- list_del(&cap->caps_item);
- kmem_cache_free(ceph_cap_cachep, cap);
- }
- } else {
- mdsc->caps_avail_count += ctx->count;
- }
- ctx->count = 0;
- dout("unreserve caps %d = %d used + %d resv + %d avail\n",
- mdsc->caps_total_count, mdsc->caps_use_count,
- mdsc->caps_reserve_count, mdsc->caps_avail_count);
- BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
- mdsc->caps_reserve_count +
- mdsc->caps_avail_count);
- spin_unlock(&mdsc->caps_list_lock);
- }
- return 0;
+ spin_lock(&mdsc->caps_list_lock);
+ __ceph_unreserve_caps(mdsc, ctx->count);
+ ctx->count = 0;
+ spin_unlock(&mdsc->caps_list_lock);
}
struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
@@ -1125,7 +1111,7 @@ struct cap_msg_args {
u64 flush_tid, oldest_flush_tid, size, max_size;
u64 xattr_version;
struct ceph_buffer *xattr_buf;
- struct timespec atime, mtime, ctime;
+ struct timespec64 atime, mtime, ctime;
int op, caps, wanted, dirty;
u32 seq, issue_seq, mseq, time_warp_seq;
u32 flags;
@@ -1146,7 +1132,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
struct ceph_msg *msg;
void *p;
size_t extra_len;
- struct timespec zerotime = {0};
+ struct timespec64 zerotime = {0};
struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
@@ -1186,9 +1172,9 @@ static int send_cap_msg(struct cap_msg_args *arg)
fc->size = cpu_to_le64(arg->size);
fc->max_size = cpu_to_le64(arg->max_size);
- ceph_encode_timespec(&fc->mtime, &arg->mtime);
- ceph_encode_timespec(&fc->atime, &arg->atime);
- ceph_encode_timespec(&fc->ctime, &arg->ctime);
+ ceph_encode_timespec64(&fc->mtime, &arg->mtime);
+ ceph_encode_timespec64(&fc->atime, &arg->atime);
+ ceph_encode_timespec64(&fc->ctime, &arg->ctime);
fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq);
fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid));
@@ -1237,7 +1223,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
* We just zero these out for now, as the MDS ignores them unless
* the requisite feature flags are set (which we don't do yet).
*/
- ceph_encode_timespec(p, &zerotime);
+ ceph_encode_timespec64(p, &zerotime);
p += sizeof(struct ceph_timespec);
ceph_encode_64(&p, 0);
@@ -1360,9 +1346,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
arg.xattr_buf = NULL;
}
- arg.mtime = timespec64_to_timespec(inode->i_mtime);
- arg.atime = timespec64_to_timespec(inode->i_atime);
- arg.ctime = timespec64_to_timespec(inode->i_ctime);
+ arg.mtime = inode->i_mtime;
+ arg.atime = inode->i_atime;
+ arg.ctime = inode->i_ctime;
arg.op = op;
arg.caps = cap->implemented;
@@ -3148,11 +3134,11 @@ static void handle_cap_grant(struct inode *inode,
}
if (newcaps & CEPH_CAP_ANY_RD) {
- struct timespec mtime, atime, ctime;
+ struct timespec64 mtime, atime, ctime;
/* ctime/mtime/atime? */
- ceph_decode_timespec(&mtime, &grant->mtime);
- ceph_decode_timespec(&atime, &grant->atime);
- ceph_decode_timespec(&ctime, &grant->ctime);
+ ceph_decode_timespec64(&mtime, &grant->mtime);
+ ceph_decode_timespec64(&atime, &grant->atime);
+ ceph_decode_timespec64(&ctime, &grant->ctime);
ceph_fill_file_time(inode, extra_info->issued,
le32_to_cpu(grant->time_warp_seq),
&ctime, &mtime, &atime);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 036ac0f3a393..82928cea0209 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -827,12 +827,14 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
- if (ceph_quota_is_max_files_exceeded(dir))
- return -EDQUOT;
+ if (ceph_quota_is_max_files_exceeded(dir)) {
+ err = -EDQUOT;
+ goto out;
+ }
err = ceph_pre_init_acls(dir, &mode, &acls);
if (err < 0)
- return err;
+ goto out;
dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
dir, dentry, mode, rdev);
@@ -883,8 +885,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
- if (ceph_quota_is_max_files_exceeded(dir))
- return -EDQUOT;
+ if (ceph_quota_is_max_files_exceeded(dir)) {
+ err = -EDQUOT;
+ goto out;
+ }
dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
@@ -1393,7 +1397,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
" rfiles: %20lld\n"
" rsubdirs: %20lld\n"
"rbytes: %20lld\n"
- "rctime: %10ld.%09ld\n",
+ "rctime: %10lld.%09ld\n",
ci->i_files + ci->i_subdirs,
ci->i_files,
ci->i_subdirs,
@@ -1401,8 +1405,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
ci->i_rfiles,
ci->i_rsubdirs,
ci->i_rbytes,
- (long)ci->i_rctime.tv_sec,
- (long)ci->i_rctime.tv_nsec);
+ ci->i_rctime.tv_sec,
+ ci->i_rctime.tv_nsec);
}
if (*ppos >= dfi->dir_info_len)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e2679e8a2535..92ab20433682 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -720,7 +720,7 @@ struct ceph_aio_request {
struct list_head osd_reqs;
unsigned num_reqs;
atomic_t pending_reqs;
- struct timespec mtime;
+ struct timespec64 mtime;
struct ceph_cap_flush *prealloc_cf;
};
@@ -922,7 +922,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
int num_pages = 0;
int flags;
int ret;
- struct timespec mtime = timespec64_to_timespec(current_time(inode));
+ struct timespec64 mtime = current_time(inode);
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos;
bool write = iov_iter_rw(iter) == WRITE;
@@ -1130,7 +1130,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
int flags;
int ret;
bool check_caps = false;
- struct timespec mtime = timespec64_to_timespec(current_time(inode));
+ struct timespec64 mtime = current_time(inode);
size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -1383,12 +1383,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_osd_client *osdc =
- &ceph_sb_to_client(inode->i_sb)->client->osdc;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
loff_t pos;
+ loff_t limit = max(i_size_read(inode), fsc->max_file_size);
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -1414,6 +1414,13 @@ retry_snap:
goto out;
pos = iocb->ki_pos;
+ if (unlikely(pos >= limit)) {
+ err = -EFBIG;
+ goto out;
+ } else {
+ iov_iter_truncate(from, limit - pos);
+ }
+
count = iov_iter_count(from);
if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) {
err = -EDQUOT;
@@ -1435,7 +1442,7 @@ retry_snap:
}
/* FIXME: not complete since it doesn't account for being at quota */
- if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
+ if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1525,7 +1532,7 @@ retry_snap:
}
if (written >= 0) {
- if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL))
+ if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
}
@@ -1546,6 +1553,7 @@ out_unlocked:
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
loff_t i_size;
loff_t ret;
@@ -1590,7 +1598,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
break;
}
- ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+ ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size));
out:
inode_unlock(inode);
@@ -1662,7 +1670,7 @@ static int ceph_zero_partial_object(struct inode *inode,
goto out;
}
- req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+ req->r_mtime = inode->i_mtime;
ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!ret) {
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1727,8 +1735,7 @@ static long ceph_fallocate(struct file *file, int mode,
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_osd_client *osdc =
- &ceph_inode_to_client(inode)->client->osdc;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_cap_flush *prealloc_cf;
int want, got = 0;
int dirty;
@@ -1736,6 +1743,9 @@ static long ceph_fallocate(struct file *file, int mode,
loff_t endoff = 0;
loff_t size;
+ if ((offset + length) > max(i_size_read(inode), fsc->max_file_size))
+ return -EFBIG;
+
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
@@ -1759,7 +1769,7 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
- if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
+ if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) &&
!(mode & FALLOC_FL_PUNCH_HOLE)) {
ret = -ENOSPC;
goto unlock;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a866be999216..ebc7bdaed2d0 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -658,13 +658,10 @@ int ceph_fill_file_size(struct inode *inode, int issued,
}
void ceph_fill_file_time(struct inode *inode, int issued,
- u64 time_warp_seq, struct timespec *ctime,
- struct timespec *mtime, struct timespec *atime)
+ u64 time_warp_seq, struct timespec64 *ctime,
+ struct timespec64 *mtime, struct timespec64 *atime)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- struct timespec64 ctime64 = timespec_to_timespec64(*ctime);
- struct timespec64 mtime64 = timespec_to_timespec64(*mtime);
- struct timespec64 atime64 = timespec_to_timespec64(*atime);
int warn = 0;
if (issued & (CEPH_CAP_FILE_EXCL|
@@ -673,39 +670,39 @@ void ceph_fill_file_time(struct inode *inode, int issued,
CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) {
if (ci->i_version == 0 ||
- timespec64_compare(&ctime64, &inode->i_ctime) > 0) {
+ timespec64_compare(ctime, &inode->i_ctime) > 0) {
dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
- (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- (long long)ctime->tv_sec, ctime->tv_nsec);
- inode->i_ctime = ctime64;
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ ctime->tv_sec, ctime->tv_nsec);
+ inode->i_ctime = *ctime;
}
if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
/* the MDS did a utimes() */
dout("mtime %lld.%09ld -> %lld.%09ld "
"tw %d -> %d\n",
- (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- (long long)mtime->tv_sec, mtime->tv_nsec,
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ mtime->tv_sec, mtime->tv_nsec,
ci->i_time_warp_seq, (int)time_warp_seq);
- inode->i_mtime = mtime64;
- inode->i_atime = atime64;
+ inode->i_mtime = *mtime;
+ inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
} else if (time_warp_seq == ci->i_time_warp_seq) {
/* nobody did utimes(); take the max */
- if (timespec64_compare(&mtime64, &inode->i_mtime) > 0) {
+ if (timespec64_compare(mtime, &inode->i_mtime) > 0) {
dout("mtime %lld.%09ld -> %lld.%09ld inc\n",
- (long long)inode->i_mtime.tv_sec,
+ inode->i_mtime.tv_sec,
inode->i_mtime.tv_nsec,
- (long long)mtime->tv_sec, mtime->tv_nsec);
- inode->i_mtime = mtime64;
+ mtime->tv_sec, mtime->tv_nsec);
+ inode->i_mtime = *mtime;
}
- if (timespec64_compare(&atime64, &inode->i_atime) > 0) {
+ if (timespec64_compare(atime, &inode->i_atime) > 0) {
dout("atime %lld.%09ld -> %lld.%09ld inc\n",
- (long long)inode->i_atime.tv_sec,
+ inode->i_atime.tv_sec,
inode->i_atime.tv_nsec,
- (long long)atime->tv_sec, atime->tv_nsec);
- inode->i_atime = atime64;
+ atime->tv_sec, atime->tv_nsec);
+ inode->i_atime = *atime;
}
} else if (issued & CEPH_CAP_FILE_EXCL) {
/* we did a utimes(); ignore mds values */
@@ -715,9 +712,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
} else {
/* we have no write|excl caps; whatever the MDS says is true */
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
- inode->i_ctime = ctime64;
- inode->i_mtime = mtime64;
- inode->i_atime = atime64;
+ inode->i_ctime = *ctime;
+ inode->i_mtime = *mtime;
+ inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
} else {
warn = 1;
@@ -743,7 +740,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_inode *info = iinfo->in;
struct ceph_inode_info *ci = ceph_inode(inode);
int issued, new_issued, info_caps;
- struct timespec mtime, atime, ctime;
+ struct timespec64 mtime, atime, ctime;
struct ceph_buffer *xattr_blob = NULL;
struct ceph_string *pool_ns = NULL;
struct ceph_cap *new_cap = NULL;
@@ -823,9 +820,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
/* be careful with mtime, atime, size */
- ceph_decode_timespec(&atime, &info->atime);
- ceph_decode_timespec(&mtime, &info->mtime);
- ceph_decode_timespec(&ctime, &info->ctime);
+ ceph_decode_timespec64(&atime, &info->atime);
+ ceph_decode_timespec64(&mtime, &info->mtime);
+ ceph_decode_timespec64(&ctime, &info->ctime);
ceph_fill_file_time(inode, issued,
le32_to_cpu(info->time_warp_seq),
&ctime, &mtime, &atime);
@@ -872,7 +869,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
ci->i_rbytes = le64_to_cpu(info->rbytes);
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
- ceph_decode_timespec(&ci->i_rctime, &info->rctime);
+ ceph_decode_timespec64(&ci->i_rctime, &info->rctime);
}
}
@@ -1954,7 +1951,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
int err = 0;
int inode_dirty_flags = 0;
bool lock_snap_rwsem = false;
- struct timespec ts;
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
@@ -2030,8 +2026,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
if (ia_valid & ATTR_ATIME) {
dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode,
- (long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
- (long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
+ inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
+ attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_atime = attr->ia_atime;
@@ -2043,8 +2039,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
dirtied |= CEPH_CAP_FILE_WR;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
!timespec64_equal(&inode->i_atime, &attr->ia_atime)) {
- ts = timespec64_to_timespec(attr->ia_atime);
- ceph_encode_timespec(&req->r_args.setattr.atime, &ts);
+ ceph_encode_timespec64(&req->r_args.setattr.atime,
+ &attr->ia_atime);
mask |= CEPH_SETATTR_ATIME;
release |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
@@ -2052,8 +2048,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
}
if (ia_valid & ATTR_MTIME) {
dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
- (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
- (long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+ attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_mtime = attr->ia_mtime;
@@ -2065,8 +2061,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
dirtied |= CEPH_CAP_FILE_WR;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
!timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) {
- ts = timespec64_to_timespec(attr->ia_mtime);
- ceph_encode_timespec(&req->r_args.setattr.mtime, &ts);
+ ceph_encode_timespec64(&req->r_args.setattr.mtime,
+ &attr->ia_mtime);
mask |= CEPH_SETATTR_MTIME;
release |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
@@ -2097,8 +2093,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
- (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
- (long long)attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
if (only) {
/*
@@ -2140,7 +2136,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
- req->r_stamp = timespec64_to_timespec(attr->ia_ctime);
+ req->r_stamp = attr->ia_ctime;
err = ceph_mdsc_do_request(mdsc, NULL, req);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
@@ -2161,6 +2157,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int err;
if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -2171,6 +2168,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if ((attr->ia_valid & ATTR_SIZE) &&
+ attr->ia_size > max(inode->i_size, fsc->max_file_size))
+ return -EFBIG;
+
+ if ((attr->ia_valid & ATTR_SIZE) &&
ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
return -EDQUOT;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index dc8bc664a871..bc43c822426a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -902,6 +902,27 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
return msg;
}
+static void encode_supported_features(void **p, void *end)
+{
+ static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED;
+ static const size_t count = ARRAY_SIZE(bits);
+
+ if (count > 0) {
+ size_t i;
+ size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8;
+
+ BUG_ON(*p + 4 + size > end);
+ ceph_encode_32(p, size);
+ memset(*p, 0, size);
+ for (i = 0; i < count; i++)
+ ((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8);
+ *p += size;
+ } else {
+ BUG_ON(*p + 4 > end);
+ ceph_encode_32(p, 0);
+ }
+}
+
/*
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
* to include additional client metadata fields.
@@ -911,11 +932,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
int i = -1;
- int metadata_bytes = 0;
+ int extra_bytes = 0;
int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options;
struct ceph_mount_options *fsopt = mdsc->fsc->mount_options;
- void *p;
+ void *p, *end;
const char* metadata[][2] = {
{"hostname", mdsc->nodename},
@@ -926,21 +947,26 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
};
/* Calculate serialized length of metadata */
- metadata_bytes = 4; /* map length */
+ extra_bytes = 4; /* map length */
for (i = 0; metadata[i][0]; ++i) {
- metadata_bytes += 8 + strlen(metadata[i][0]) +
+ extra_bytes += 8 + strlen(metadata[i][0]) +
strlen(metadata[i][1]);
metadata_key_count++;
}
+ /* supported feature */
+ extra_bytes += 4 + 8;
/* Allocate the message */
- msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes,
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
GFP_NOFS, false);
if (!msg) {
pr_err("create_session_msg ENOMEM creating msg\n");
return NULL;
}
- h = msg->front.iov_base;
+ p = msg->front.iov_base;
+ end = p + msg->front.iov_len;
+
+ h = p;
h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
h->seq = cpu_to_le64(seq);
@@ -950,11 +976,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
*
* ClientSession messages with metadata are v2
*/
- msg->hdr.version = cpu_to_le16(2);
+ msg->hdr.version = cpu_to_le16(3);
msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
- p = msg->front.iov_base + sizeof(*h);
+ p += sizeof(*h);
/* Number of entries in the map */
ceph_encode_32(&p, metadata_key_count);
@@ -972,6 +998,10 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
p += val_len;
}
+ encode_supported_features(&p, end);
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
return msg;
}
@@ -1779,6 +1809,7 @@ struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
{
struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
+ struct timespec64 ts;
if (!req)
return ERR_PTR(-ENOMEM);
@@ -1797,7 +1828,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
- req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
+ ktime_get_coarse_real_ts64(&ts);
+ req->r_stamp = timespec64_trunc(ts, mdsc->fsc->sb->s_time_gran);
req->r_op = op;
req->r_direct_mode = mode;
@@ -2094,7 +2126,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
/* time stamp */
{
struct ceph_timespec ts;
- ceph_encode_timespec(&ts, &req->r_stamp);
+ ceph_encode_timespec64(&ts, &req->r_stamp);
ceph_encode_copy(&p, &ts, sizeof(ts));
}
@@ -2187,7 +2219,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
p = msg->front.iov_base + req->r_request_release_offset;
{
struct ceph_timespec ts;
- ceph_encode_timespec(&ts, &req->r_stamp);
+ ceph_encode_timespec64(&ts, &req->r_stamp);
ceph_encode_copy(&p, &ts, sizeof(ts));
}
@@ -2225,7 +2257,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/*
* send request, or put it on the appropriate wait list.
*/
-static int __do_request(struct ceph_mds_client *mdsc,
+static void __do_request(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
struct ceph_mds_session *session = NULL;
@@ -2235,7 +2267,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
__unregister_request(mdsc, req);
- goto out;
+ return;
}
if (req->r_timeout &&
@@ -2258,7 +2290,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (mdsc->mdsmap->m_epoch == 0) {
dout("do_request no mdsmap, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map);
- goto finish;
+ return;
}
if (!(mdsc->fsc->mount_options->flags &
CEPH_MOUNT_OPT_MOUNTWAIT) &&
@@ -2276,7 +2308,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
dout("do_request no mds or not active, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map);
- goto out;
+ return;
}
/* get, open session */
@@ -2326,8 +2358,7 @@ finish:
complete_request(mdsc, req);
__unregister_request(mdsc, req);
}
-out:
- return err;
+ return;
}
/*
@@ -2748,7 +2779,7 @@ static void handle_session(struct ceph_mds_session *session,
int wake = 0;
/* decode */
- if (msg->front.iov_len != sizeof(*h))
+ if (msg->front.iov_len < sizeof(*h))
goto bad;
op = le32_to_cpu(h->op);
seq = le64_to_cpu(h->seq);
@@ -2958,15 +2989,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v2.flock_len = (__force __le32)
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
- struct timespec ts;
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v1.issued = cpu_to_le32(cap->issued);
rec.v1.size = cpu_to_le64(inode->i_size);
- ts = timespec64_to_timespec(inode->i_mtime);
- ceph_encode_timespec(&rec.v1.mtime, &ts);
- ts = timespec64_to_timespec(inode->i_atime);
- ceph_encode_timespec(&rec.v1.atime, &ts);
+ ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
+ ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
rec.v1.pathbase = cpu_to_le64(pathbase);
}
@@ -3378,10 +3406,10 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
seq = le32_to_cpu(h->seq);
- dname.name = (void *)h + sizeof(*h) + sizeof(u32);
- dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
- if (dname.len != get_unaligned_le32(h+1))
+ dname.len = get_unaligned_le32(h + 1);
+ if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len)
goto bad;
+ dname.name = (void *)(h + 1) + sizeof(u32);
/* lookup inode */
inode = ceph_find_inode(sb, vino);
@@ -3644,8 +3672,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_rwsem(&mdsc->pool_perm_rwsem);
mdsc->pool_perm_tree = RB_ROOT;
- strncpy(mdsc->nodename, utsname()->nodename,
- sizeof(mdsc->nodename) - 1);
+ strscpy(mdsc->nodename, utsname()->nodename,
+ sizeof(mdsc->nodename));
return 0;
}
@@ -4019,7 +4047,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
} else {
mdsc->mdsmap = newmap; /* first mds map */
}
- mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
+ mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size,
+ MAX_LFS_FILESIZE);
__wake_requests(mdsc, &mdsc->waiting_for_map);
ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
@@ -4155,6 +4184,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
+static int add_authorizer_challenge(struct ceph_connection *con,
+ void *challenge_buf, int challenge_buf_len)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+ return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
+ challenge_buf, challenge_buf_len);
+}
static int verify_authorizer_reply(struct ceph_connection *con)
{
@@ -4218,6 +4257,7 @@ static const struct ceph_connection_operations mds_con_ops = {
.put = con_put,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
+ .add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 2ec3b5b35067..32fcce0d4d3c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -16,6 +16,18 @@
#include <linux/ceph/mdsmap.h>
#include <linux/ceph/auth.h>
+/* The first 8 bits are reserved for old ceph releases */
+#define CEPHFS_FEATURE_MIMIC 8
+
+#define CEPHFS_FEATURES_ALL { \
+ 0, 1, 2, 3, 4, 5, 6, 7, \
+ CEPHFS_FEATURE_MIMIC, \
+}
+
+#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL
+#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+
+
/*
* Some lock dependencies:
*
@@ -229,7 +241,7 @@ struct ceph_mds_request {
int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid;
kgid_t r_gid;
- struct timespec r_stamp;
+ struct timespec64 r_stamp;
/* for choosing which mds to send this request to */
int r_direct_mode;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 242bfa5c0539..32d4f13784ba 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -48,7 +48,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct inode *inode;
struct ceph_inode_info *ci;
- if (msg->front.iov_len != sizeof(*h)) {
+ if (msg->front.iov_len < sizeof(*h)) {
pr_err("%s corrupt message mds%d len %d\n", __func__,
session->s_mds, (int)msg->front.iov_len);
ceph_msg_dump(msg);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index af81555c14fd..041c27ea8de1 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -594,9 +594,9 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
BUG_ON(capsnap->writing);
capsnap->size = inode->i_size;
- capsnap->mtime = timespec64_to_timespec(inode->i_mtime);
- capsnap->atime = timespec64_to_timespec(inode->i_atime);
- capsnap->ctime = timespec64_to_timespec(inode->i_ctime);
+ capsnap->mtime = inode->i_mtime;
+ capsnap->atime = inode->i_atime;
+ capsnap->ctime = inode->i_ctime;
capsnap->time_warp_seq = ci->i_time_warp_seq;
capsnap->truncate_size = ci->i_truncate_size;
capsnap->truncate_seq = ci->i_truncate_seq;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 95a3b3ac9b6e..43ca3b763875 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -219,8 +219,7 @@ static int parse_fsopt_token(char *c, void *private)
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
- pr_err("bad mount option arg (not int) "
- "at '%s'\n", c);
+ pr_err("bad option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
@@ -941,11 +940,12 @@ static int ceph_set_super(struct super_block *s, void *data)
dout("set_super %p data %p\n", s, data);
s->s_flags = fsc->mount_options->sb_flags;
- s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
+ s->s_maxbytes = MAX_LFS_FILESIZE;
s->s_xattr = ceph_xattr_handlers;
s->s_fs_info = fsc;
fsc->sb = s;
+ fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
s->s_op = &ceph_super_ops;
s->s_d_op = &ceph_dentry_ops;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 971328b99ede..582e28fd1b7b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -98,6 +98,7 @@ struct ceph_fs_client {
unsigned long mount_state;
int min_caps; /* min caps i added */
+ loff_t max_file_size;
struct ceph_mds_client *mdsc;
@@ -193,7 +194,7 @@ struct ceph_cap_snap {
u64 xattr_version;
u64 size;
- struct timespec mtime, atime, ctime;
+ struct timespec64 mtime, atime, ctime;
u64 time_warp_seq;
u64 truncate_size;
u32 truncate_seq;
@@ -307,7 +308,7 @@ struct ceph_inode_info {
char *i_symlink;
/* for dirs */
- struct timespec i_rctime;
+ struct timespec64 i_rctime;
u64 i_rbytes, i_rfiles, i_rsubdirs;
u64 i_files, i_subdirs;
@@ -655,7 +656,7 @@ extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx, int need);
-extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
+extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx);
extern void ceph_reservation_status(struct ceph_fs_client *client,
int *total, int *avail, int *used,
@@ -857,8 +858,9 @@ extern struct inode *ceph_get_snapdir(struct inode *parent);
extern int ceph_fill_file_size(struct inode *inode, int issued,
u32 truncate_seq, u64 truncate_size, u64 size);
extern void ceph_fill_file_time(struct inode *inode, int issued,
- u64 time_warp_seq, struct timespec *ctime,
- struct timespec *mtime, struct timespec *atime);
+ u64 time_warp_seq, struct timespec64 *ctime,
+ struct timespec64 *mtime,
+ struct timespec64 *atime);
extern int ceph_fill_trace(struct super_block *sb,
struct ceph_mds_request *req);
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5bc8edb4c2a6..5cc8b94f8206 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -224,8 +224,8 @@ static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
size_t size)
{
- return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
- (long)ci->i_rctime.tv_nsec);
+ return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec,
+ ci->i_rctime.tv_nsec);
}
/* quotas */