From e1518c7c0a67a75727f7285780dbef0ca7121cc9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 May 2010 11:19:06 -0700 Subject: ceph: clean up mds reply, error handling We would occasionally BUG out in the reply handler because r_reply was nonzero, due to a race with ceph_mdsc_do_request temporarily setting r_reply to an ERR_PTR value. This is unnecessary, messy, and also wrong in the EIO case. Clean up by consistently using r_err for errors and r_reply for messages. Also fix the abort logic to trigger consistently for all errors that return to the caller early (e.g., EIO from timeout case). If an abort races with a reply, use the result from the reply. Also fix locking for r_err, r_reply update in the reply handler. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 111 +++++++++++++++++++++++++-------------------------- fs/ceph/mds_client.h | 2 +- 2 files changed, 55 insertions(+), 58 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 24561a557e01..b3b19f05b821 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1517,7 +1517,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, } msg = create_request_message(mdsc, req, mds); if (IS_ERR(msg)) { - req->r_reply = ERR_PTR(PTR_ERR(msg)); + req->r_err = PTR_ERR(msg); complete_request(mdsc, req); return -PTR_ERR(msg); } @@ -1552,7 +1552,7 @@ static int __do_request(struct ceph_mds_client *mdsc, int mds = -1; int err = -EAGAIN; - if (req->r_reply) + if (req->r_err || req->r_got_result) goto out; if (req->r_timeout && @@ -1609,7 +1609,7 @@ out: return err; finish: - req->r_reply = ERR_PTR(err); + req->r_err = err; complete_request(mdsc, req); goto out; } @@ -1689,59 +1689,53 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, __register_request(mdsc, req, dir); __do_request(mdsc, req); - /* wait */ - if (!req->r_reply) { - mutex_unlock(&mdsc->mutex); - if (req->r_timeout) { - err = (long)wait_for_completion_interruptible_timeout( - &req->r_completion, req->r_timeout); - if (err == 0) - req->r_reply = ERR_PTR(-EIO); - else if (err < 0) - req->r_reply = ERR_PTR(err); - } else { - err = wait_for_completion_interruptible( - &req->r_completion); - if (err) - req->r_reply = ERR_PTR(err); - } - mutex_lock(&mdsc->mutex); + if (req->r_err) { + err = req->r_err; + __unregister_request(mdsc, req); + dout("do_request early error %d\n", err); + goto out; } - if (IS_ERR(req->r_reply)) { - err = PTR_ERR(req->r_reply); - req->r_reply = NULL; - - if (err == -ERESTARTSYS) { - /* aborted */ - req->r_aborted = true; + /* wait */ + mutex_unlock(&mdsc->mutex); + dout("do_request waiting\n"); + if (req->r_timeout) { + err = (long)wait_for_completion_interruptible_timeout( + &req->r_completion, req->r_timeout); + if (err == 0) + err = -EIO; + } else { + err = wait_for_completion_interruptible(&req->r_completion); + } + dout("do_request waited, got %d\n", err); + mutex_lock(&mdsc->mutex); - if (req->r_locked_dir && - (req->r_op & CEPH_MDS_OP_WRITE)) { - struct ceph_inode_info *ci = - ceph_inode(req->r_locked_dir); + /* only abort if we didn't race with a real reply */ + if (req->r_got_result) { + err = le32_to_cpu(req->r_reply_info.head->result); + } else if (err < 0) { + dout("aborted request %lld with %d\n", req->r_tid, err); + req->r_err = err; + req->r_aborted = true; - dout("aborted, clearing I_COMPLETE on %p\n", - req->r_locked_dir); - spin_lock(&req->r_locked_dir->i_lock); - ci->i_ceph_flags &= ~CEPH_I_COMPLETE; - ci->i_release_count++; - spin_unlock(&req->r_locked_dir->i_lock); - } - } else { - /* clean up this request */ - __unregister_request(mdsc, req); - if (!list_empty(&req->r_unsafe_item)) - list_del_init(&req->r_unsafe_item); - complete(&req->r_safe_completion); + if (req->r_locked_dir && + (req->r_op & CEPH_MDS_OP_WRITE)) { + struct ceph_inode_info *ci = + ceph_inode(req->r_locked_dir); + + dout("aborted, clearing I_COMPLETE on %p\n", + req->r_locked_dir); + spin_lock(&req->r_locked_dir->i_lock); + ci->i_ceph_flags &= ~CEPH_I_COMPLETE; + ci->i_release_count++; + spin_unlock(&req->r_locked_dir->i_lock); } - } else if (req->r_err) { - err = req->r_err; } else { - err = le32_to_cpu(req->r_reply_info.head->result); + err = req->r_err; } - mutex_unlock(&mdsc->mutex); +out: + mutex_unlock(&mdsc->mutex); dout("do_request %p done, result %d\n", req, err); return err; } @@ -1838,11 +1832,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_unlock(&mdsc->mutex); goto out; } - } - - BUG_ON(req->r_reply); - - if (!head->safe) { + } else { req->r_got_unsafe = true; list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); } @@ -1880,12 +1870,19 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) up_read(&mdsc->snap_rwsem); out_err: - if (err) { - req->r_err = err; + mutex_lock(&mdsc->mutex); + if (!req->r_aborted) { + if (err) { + req->r_err = err; + } else { + req->r_reply = msg; + ceph_msg_get(msg); + req->r_got_result = true; + } } else { - req->r_reply = msg; - ceph_msg_get(msg); + dout("reply arrived after request %lld was aborted\n", tid); } + mutex_unlock(&mdsc->mutex); add_cap_releases(mdsc, req->r_session, -1); mutex_unlock(&session->s_mutex); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 961cc6f65878..0b1dd10be39a 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -213,7 +213,7 @@ struct ceph_mds_request { struct completion r_safe_completion; ceph_mds_request_callback_t r_callback; struct list_head r_unsafe_item; /* per-session unsafe list item */ - bool r_got_unsafe, r_got_safe; + bool r_got_unsafe, r_got_safe, r_got_result; bool r_did_prepopulate; u32 r_readdir_offset; -- cgit v1.2.3 From b4556396fac5b3f063d5b8ac54dc02f7612a75e1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 May 2010 12:01:13 -0700 Subject: ceph: fix race between aborted requests and fill_trace When we abort requests we need to prevent fill_trace et al from doing anything that relies on locks held by the VFS caller. This fixes a race between the reply handler and the abort code, ensuring that continue holding the dir mutex until the reply handler completes. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 11 +++++++++++ fs/ceph/mds_client.h | 2 ++ 2 files changed, 13 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b3b19f05b821..c0568fe3c0ba 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1181,6 +1181,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) if (!req) return ERR_PTR(-ENOMEM); + mutex_init(&req->r_fill_mutex); req->r_started = jiffies; req->r_resend_mds = -1; INIT_LIST_HEAD(&req->r_unsafe_dir_item); @@ -1715,8 +1716,16 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, err = le32_to_cpu(req->r_reply_info.head->result); } else if (err < 0) { dout("aborted request %lld with %d\n", req->r_tid, err); + + /* + * ensure we aren't running concurrently with + * ceph_fill_trace or ceph_readdir_prepopulate, which + * rely on locks (dir mutex) held by our caller. + */ + mutex_lock(&req->r_fill_mutex); req->r_err = err; req->r_aborted = true; + mutex_unlock(&req->r_fill_mutex); if (req->r_locked_dir && (req->r_op & CEPH_MDS_OP_WRITE)) { @@ -1861,12 +1870,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } /* insert trace into our cache */ + mutex_lock(&req->r_fill_mutex); err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); if (err == 0) { if (result == 0 && rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(&req->r_caps_reservation); } + mutex_unlock(&req->r_fill_mutex); up_read(&mdsc->snap_rwsem); out_err: diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 0b1dd10be39a..141a265bda75 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -165,6 +165,8 @@ struct ceph_mds_request { struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ struct inode *r_target_inode; /* resulting inode */ + struct mutex r_fill_mutex; + union ceph_mds_request_args r_args; int r_fmode; /* file mode, if expecting cap */ -- cgit v1.2.3 From 81a6cf2d30eac5d790f53cdff110892f7b18c7fe Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 May 2010 09:35:38 -0700 Subject: ceph: invalidate affected dentry leases on aborted requests If we abort a request, we return to caller, but the request may still complete. And if we hold the dir FILE_EXCL bit, we may not release a lease when sending a request. A simple un-tar, control-c, un-tar again will reproduce the bug (manifested as a 'Cannot open: File exists'). Ensure we invalidate affected dentry leases (as well dir I_COMPLETE) so we don't have valid (but incorrect) leases. Do the same, consistently, at other sites where I_COMPLETE is similarly cleared. Signed-off-by: Sage Weil --- fs/ceph/dir.c | 13 +++++++++++-- fs/ceph/inode.c | 13 +++++++++++-- fs/ceph/mds_client.c | 7 ++++++- fs/ceph/super.h | 1 + 4 files changed, 29 insertions(+), 5 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 650d2db5ed26..4b1a7a4bae0b 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -888,13 +888,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ - new_dentry->d_time = jiffies; - ceph_dentry(new_dentry)->lease_shared_gen = 0; + ceph_invalidate_dentry_lease(new_dentry); } ceph_mdsc_put_request(req); return err; } +/* + * Ensure a dentry lease will no longer revalidate. + */ +void ceph_invalidate_dentry_lease(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_time = jiffies; + ceph_dentry(dentry)->lease_shared_gen = 0; + spin_unlock(&dentry->d_lock); +} /* * Check if dentry lease is valid. If not, delete the lease. Try to diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 85b4d2ffdeba..220a2aec0545 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -938,8 +938,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, ceph_inode(req->r_locked_dir); dout(" clearing %p complete (empty trace)\n", req->r_locked_dir); + spin_lock(&req->r_locked_dir->i_lock); ci->i_ceph_flags &= ~CEPH_I_COMPLETE; ci->i_release_count++; + spin_unlock(&req->r_locked_dir->i_lock); + + if (req->r_dentry) + ceph_invalidate_dentry_lease(req->r_dentry); + if (req->r_old_dentry) + ceph_invalidate_dentry_lease(req->r_old_dentry); } return 0; } @@ -1011,13 +1018,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, req->r_old_dentry->d_name.len, req->r_old_dentry->d_name.name, dn, dn->d_name.len, dn->d_name.name); + /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ - dn->d_time = jiffies; - ceph_dentry(dn)->lease_shared_gen = 0; + ceph_invalidate_dentry_lease(dn); + /* take overwritten dentry's readdir offset */ ceph_dentry(req->r_old_dentry)->offset = ceph_dentry(dn)->offset; + dn = req->r_old_dentry; /* use old_dentry */ in = dn->d_inode; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c0568fe3c0ba..76995a960432 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1732,12 +1732,17 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci = ceph_inode(req->r_locked_dir); - dout("aborted, clearing I_COMPLETE on %p\n", + dout("aborted, clearing I_COMPLETE on %p, leases\n", req->r_locked_dir); spin_lock(&req->r_locked_dir->i_lock); ci->i_ceph_flags &= ~CEPH_I_COMPLETE; ci->i_release_count++; spin_unlock(&req->r_locked_dir->i_lock); + + if (req->r_dentry) + ceph_invalidate_dentry_lease(req->r_dentry); + if (req->r_old_dentry) + ceph_invalidate_dentry_lease(req->r_old_dentry); } } else { err = req->r_err; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 13513b80d87f..cfe3b0834d54 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -871,6 +871,7 @@ extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, extern void ceph_dentry_lru_add(struct dentry *dn); extern void ceph_dentry_lru_touch(struct dentry *dn); extern void ceph_dentry_lru_del(struct dentry *dn); +extern void ceph_invalidate_dentry_lease(struct dentry *dentry); /* * our d_ops vary depending on whether the inode is live, -- cgit v1.2.3 From 21b667f69b023979410188d7d94c9b219f216626 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 4 Mar 2010 10:22:59 -0800 Subject: ceph: simplify page setup for incoming data Drop largely useless helper __prepare_pages(), and simplify sanity checks. Signed-off-by: Sage Weil --- fs/ceph/osd_client.c | 56 +++++++++++----------------------------------------- 1 file changed, 12 insertions(+), 44 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 3514f71ff85f..22a33f8c8807 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -1087,45 +1087,6 @@ bad: return; } - -/* - * A read request prepares specific pages that data is to be read into. - * When a message is being read off the wire, we call prepare_pages to - * find those pages. - * 0 = success, -1 failure. - */ -static int __prepare_pages(struct ceph_connection *con, - struct ceph_msg_header *hdr, - struct ceph_osd_request *req, - u64 tid, - struct ceph_msg *m) -{ - struct ceph_osd *osd = con->private; - struct ceph_osd_client *osdc; - int ret = -1; - int data_len = le32_to_cpu(hdr->data_len); - unsigned data_off = le16_to_cpu(hdr->data_off); - - int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); - - if (!osd) - return -1; - - osdc = osd->o_osdc; - - dout("__prepare_pages on msg %p tid %llu, has %d pages, want %d\n", m, - tid, req->r_num_pages, want); - if (unlikely(req->r_num_pages < want)) - goto out; - m->pages = req->r_pages; - m->nr_pages = req->r_num_pages; - ret = 0; /* success */ -out: - BUG_ON(ret < 0 || m->nr_pages < want); - - return ret; -} - /* * Register request, send initial attempt. */ @@ -1394,7 +1355,8 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) } /* - * lookup and return message for incoming reply + * lookup and return message for incoming reply. set up reply message + * pages. */ static struct ceph_msg *get_reply(struct ceph_connection *con, struct ceph_msg_header *hdr, @@ -1407,7 +1369,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, int front = le32_to_cpu(hdr->front_len); int data_len = le32_to_cpu(hdr->data_len); u64 tid; - int err; tid = le64_to_cpu(hdr->tid); mutex_lock(&osdc->request_mutex); @@ -1439,12 +1400,19 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, m = ceph_msg_get(req->r_reply); if (data_len > 0) { - err = __prepare_pages(con, hdr, req, tid, m); - if (err < 0) { + unsigned data_off = le16_to_cpu(hdr->data_off); + int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); + + if (unlikely(req->r_num_pages < want)) { + pr_warning("tid %lld reply %d > expected %d pages\n", + tid, want, m->nr_pages); *skip = 1; ceph_msg_put(m); - m = ERR_PTR(err); + m = ERR_PTR(-EIO); + goto out; } + m->pages = req->r_pages; + m->nr_pages = req->r_num_pages; } *skip = 0; req->r_con_filling_msg = ceph_con_get(con); -- cgit v1.2.3 From f553069e5d7c6f53688ae4470173fcb1be97cbe7 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Mar 2010 08:53:04 -0700 Subject: ceph: update for removal of kref_set Signed-off-by: Stephen Rothwell Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index ca3b44a89f2d..030297f62fb7 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -170,7 +170,7 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); - kref_set(&msg->kref, 1); /* retake a single ref */ + kref_init(&msg->kref); /* retake a single ref */ list_add(&msg->list_head, &pool->msgs); pool->num++; dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, -- cgit v1.2.3 From 31459fe4b24c1e09712eff0d82a5276f4fd0e3cf Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Wed, 17 Mar 2010 13:54:02 -0700 Subject: ceph: use __page_cache_alloc and add_to_page_cache_lru Following Nick Piggin patches in btrfs, pagecache pages should be allocated with __page_cache_alloc, so they obey pagecache memory policies. Also, using add_to_page_cache_lru instead of using a private pagevec where applicable. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/addr.c | 9 ++------- fs/ceph/file.c | 2 +- fs/ceph/messenger.c | 2 +- fs/ceph/pagelist.c | 2 +- fs/ceph/xattr.c | 2 +- 5 files changed, 6 insertions(+), 11 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a9005d862ed4..caf76c7fa77c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; int rc = 0; struct page **pages; - struct pagevec pvec; loff_t offset; u64 len; @@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc < 0) goto out; - /* set uptodate and add to lru in pagevec-sized chunks */ - pagevec_init(&pvec, 0); for (; !list_empty(page_list) && len > 0; rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { struct page *page = @@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, zero_user_segment(page, s, PAGE_CACHE_SIZE); } - if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { page_cache_release(page); dout("readpages %p add_to_page_cache failed %p\n", inode, page); @@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); - if (pagevec_add(&pvec, page) == 0) - pagevec_lru_add_file(&pvec); /* add to lru */ + page_cache_release(page); } - pagevec_lru_add_file(&pvec); rc = 0; out: diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ed6f19721d6e..6230c3de1f06 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -326,7 +326,7 @@ static struct page **alloc_page_vector(int num_pages) if (!pages) return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++) { - pages[i] = alloc_page(GFP_NOFS); + pages[i] = __page_cache_alloc(GFP_NOFS); if (pages[i] == NULL) { ceph_release_page_vector(pages, i); return ERR_PTR(-ENOMEM); diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index cd4fadb6491a..af3b143fbf02 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1947,7 +1947,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) /* the zero page is needed if a request is "canceled" while the message * is being written over the socket */ - msgr->zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); if (!msgr->zero_page) { kfree(msgr); return ERR_PTR(-ENOMEM); diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index 5f8dbf7c745a..b6859f47d364 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c @@ -20,7 +20,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl) static int ceph_pagelist_addpage(struct ceph_pagelist *pl) { - struct page *page = alloc_page(GFP_NOFS); + struct page *page = __page_cache_alloc(GFP_NOFS); if (!page) return -ENOMEM; pl->room += PAGE_SIZE; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2845422907fc..8938934c4c93 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -641,7 +641,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, return -ENOMEM; err = -ENOMEM; for (i = 0; i < nr_pages; i++) { - pages[i] = alloc_page(GFP_NOFS); + pages[i] = __page_cache_alloc(GFP_NOFS); if (!pages[i]) { nr_pages = i; goto out; -- cgit v1.2.3 From 104648ad3f2ebe8556c020e5f0344853076cd5ee Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 18 Mar 2010 10:14:30 -0700 Subject: ceph: reduce build_path debug output Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 76995a960432..ccb4141e306f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1268,7 +1268,7 @@ retry: struct inode *inode = temp->d_inode; if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { - dout("build_path_dentry path+%d: %p SNAPDIR\n", + dout("build_path path+%d: %p SNAPDIR\n", pos, temp); } else if (stop_on_nosnap && inode && ceph_snap(inode) == CEPH_NOSNAP) { @@ -1279,20 +1279,18 @@ retry: break; strncpy(path + pos, temp->d_name.name, temp->d_name.len); - dout("build_path_dentry path+%d: %p '%.*s'\n", - pos, temp, temp->d_name.len, path + pos); } if (pos) path[--pos] = '/'; temp = temp->d_parent; if (temp == NULL) { - pr_err("build_path_dentry corrupt dentry\n"); + pr_err("build_path corrupt dentry\n"); kfree(path); return ERR_PTR(-EINVAL); } } if (pos != 0) { - pr_err("build_path_dentry did not end path lookup where " + pr_err("build_path did not end path lookup where " "expected, namelen is %d, pos is %d\n", len, pos); /* presumably this is only possible if racing with a rename of one of the parent directories (we can not @@ -1304,7 +1302,7 @@ retry: *base = ceph_ino(temp->d_inode); *plen = len; - dout("build_path_dentry on %p %d built %llx '%.*s'\n", + dout("build_path on %p %d built %llx '%.*s'\n", dentry, atomic_read(&dentry->d_count), *base, len, path); return path; } -- cgit v1.2.3 From c7708075f18086ee7d02df8b891910893e9ea372 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 16:01:27 +0300 Subject: ceph: cleanup: remove dead code "xattr" is never NULL here. We took care of that in the previous if statement block. Signed-off-by: Dan Carpenter Signed-off-by: Sage Weil --- fs/ceph/xattr.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 8938934c4c93..d940d14f6922 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -186,12 +186,6 @@ static int __set_xattr(struct ceph_inode_info *ci, ci->i_xattrs.names_size -= xattr->name_len; ci->i_xattrs.vals_size -= xattr->val_len; } - if (!xattr) { - pr_err("__set_xattr ENOMEM on %p %llx.%llx xattr %s=%s\n", - &ci->vfs_inode, ceph_vinop(&ci->vfs_inode), name, - xattr->val); - return -ENOMEM; - } ci->i_xattrs.names_size += name_len; ci->i_xattrs.vals_size += val_len; if (val) -- cgit v1.2.3 From 6f46cb29350963527b663c9eb4fe964daa9ae707 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 24 Mar 2010 21:30:19 -0700 Subject: ceph: fix theoretically possible double-put on connection This would only trigger if we bailed out before resetting r_con_filling_msg because the server reply was corrupt (oversized). Signed-off-by: Sage Weil --- fs/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ceph') diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 22a33f8c8807..3d2bfbc232dc 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -1386,6 +1386,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, req->r_reply, req->r_con_filling_msg); ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); ceph_con_put(req->r_con_filling_msg); + req->r_con_filling_msg = NULL; } if (front > req->r_reply->front.iov_len) { -- cgit v1.2.3 From 3143edd3a185f1fd370ebdd21b4151aa9f3283a3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 24 Mar 2010 21:43:33 -0700 Subject: ceph: clean up statfs Avoid unnecessary msgpool. Preallocate reply. Fix use-after-free race. Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 149 ++++++++++++++++++++++++++++++++------------------- fs/ceph/mon_client.h | 5 +- 2 files changed, 97 insertions(+), 57 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 8fdc011ca956..43cfab06fcee 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -393,16 +393,64 @@ static void __insert_statfs(struct ceph_mon_client *monc, rb_insert_color(&new->node, &monc->statfs_request_tree); } +static void release_statfs_request(struct kref *kref) +{ + struct ceph_mon_statfs_request *req = + container_of(kref, struct ceph_mon_statfs_request, kref); + + if (req->reply) + ceph_msg_put(req->reply); + if (req->request) + ceph_msg_put(req->request); +} + +static void put_statfs_request(struct ceph_mon_statfs_request *req) +{ + kref_put(&req->kref, release_statfs_request); +} + +static void get_statfs_request(struct ceph_mon_statfs_request *req) +{ + kref_get(&req->kref); +} + +static struct ceph_msg *get_statfs_reply(struct ceph_connection *con, + struct ceph_msg_header *hdr, + int *skip) +{ + struct ceph_mon_client *monc = con->private; + struct ceph_mon_statfs_request *req; + u64 tid = le64_to_cpu(hdr->tid); + struct ceph_msg *m; + + mutex_lock(&monc->mutex); + req = __lookup_statfs(monc, tid); + if (!req) { + dout("get_statfs_reply %lld dne\n", tid); + *skip = 1; + m = NULL; + } else { + dout("get_statfs_reply %lld got %p\n", tid, req->reply); + m = ceph_msg_get(req->reply); + /* + * we don't need to track the connection reading into + * this reply because we only have one open connection + * at a time, ever. + */ + } + mutex_unlock(&monc->mutex); + return m; +} + static void handle_statfs_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) { struct ceph_mon_statfs_request *req; struct ceph_mon_statfs_reply *reply = msg->front.iov_base; - u64 tid; + u64 tid = le64_to_cpu(msg->hdr.tid); if (msg->front.iov_len != sizeof(*reply)) goto bad; - tid = le64_to_cpu(msg->hdr.tid); dout("handle_statfs_reply %p tid %llu\n", msg, tid); mutex_lock(&monc->mutex); @@ -410,10 +458,13 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, if (req) { *req->buf = reply->st; req->result = 0; + get_statfs_request(req); } mutex_unlock(&monc->mutex); - if (req) + if (req) { complete(&req->completion); + put_statfs_request(req); + } return; bad: @@ -422,67 +473,63 @@ bad: } /* - * (re)send a statfs request + * Do a synchronous statfs(). */ -static int send_statfs(struct ceph_mon_client *monc, - struct ceph_mon_statfs_request *req) +int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) { - struct ceph_msg *msg; + struct ceph_mon_statfs_request *req; struct ceph_mon_statfs *h; + int err; + + req = kmalloc(sizeof(*req), GFP_NOFS); + if (!req) + return -ENOMEM; + + memset(req, 0, sizeof(*req)); + kref_init(&req->kref); + req->buf = buf; + init_completion(&req->completion); + + req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); + if (IS_ERR(req->request)) { + err = PTR_ERR(req->request); + goto out; + } + req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, 0, 0, NULL); + if (IS_ERR(req->reply)) { + err = PTR_ERR(req->reply); + goto out; + } - dout("send_statfs tid %llu\n", req->tid); - msg = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); - if (IS_ERR(msg)) - return PTR_ERR(msg); - req->request = msg; - msg->hdr.tid = cpu_to_le64(req->tid); - h = msg->front.iov_base; + /* fill out request */ + h = req->request->front.iov_base; h->monhdr.have_version = 0; h->monhdr.session_mon = cpu_to_le16(-1); h->monhdr.session_mon_tid = 0; h->fsid = monc->monmap->fsid; - ceph_con_send(monc->con, msg); - return 0; -} - -/* - * Do a synchronous statfs(). - */ -int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) -{ - struct ceph_mon_statfs_request req; - int err; - - req.buf = buf; - init_completion(&req.completion); - - /* allocate memory for reply */ - err = ceph_msgpool_resv(&monc->msgpool_statfs_reply, 1); - if (err) - return err; /* register request */ mutex_lock(&monc->mutex); - req.tid = ++monc->last_tid; - req.last_attempt = jiffies; - req.delay = BASE_DELAY_INTERVAL; - __insert_statfs(monc, &req); + req->tid = ++monc->last_tid; + req->request->hdr.tid = cpu_to_le64(req->tid); + __insert_statfs(monc, req); monc->num_statfs_requests++; mutex_unlock(&monc->mutex); /* send request and wait */ - err = send_statfs(monc, &req); - if (!err) - err = wait_for_completion_interruptible(&req.completion); + ceph_con_send(monc->con, ceph_msg_get(req->request)); + err = wait_for_completion_interruptible(&req->completion); mutex_lock(&monc->mutex); - rb_erase(&req.node, &monc->statfs_request_tree); + rb_erase(&req->node, &monc->statfs_request_tree); monc->num_statfs_requests--; - ceph_msgpool_resv(&monc->msgpool_statfs_reply, -1); mutex_unlock(&monc->mutex); if (!err) - err = req.result; + err = req->result; + +out: + kref_put(&req->kref, release_statfs_request); return err; } @@ -496,7 +543,7 @@ static void __resend_statfs(struct ceph_mon_client *monc) for (p = rb_first(&monc->statfs_request_tree); p; p = rb_next(p)) { req = rb_entry(p, struct ceph_mon_statfs_request, node); - send_statfs(monc, req); + ceph_con_send(monc->con, ceph_msg_get(req->request)); } } @@ -591,13 +638,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) sizeof(struct ceph_mon_subscribe_ack), 1, false); if (err < 0) goto out_monmap; - err = ceph_msgpool_init(&monc->msgpool_statfs_reply, - sizeof(struct ceph_mon_statfs_reply), 0, false); - if (err < 0) - goto out_pool1; err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); if (err < 0) - goto out_pool2; + goto out_pool; monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); monc->pending_auth = 0; @@ -624,10 +667,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) out_pool3: ceph_msgpool_destroy(&monc->msgpool_auth_reply); -out_pool2: +out_pool: ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); -out_pool1: - ceph_msgpool_destroy(&monc->msgpool_statfs_reply); out_monmap: kfree(monc->monmap); out: @@ -652,7 +693,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc) ceph_msg_put(monc->m_auth); ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); - ceph_msgpool_destroy(&monc->msgpool_statfs_reply); ceph_msgpool_destroy(&monc->msgpool_auth_reply); kfree(monc->monmap); @@ -773,8 +813,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, m = ceph_msgpool_get(&monc->msgpool_subscribe_ack, front_len); break; case CEPH_MSG_STATFS_REPLY: - m = ceph_msgpool_get(&monc->msgpool_statfs_reply, front_len); - break; + return get_statfs_reply(con, hdr, skip); case CEPH_MSG_AUTH_REPLY: m = ceph_msgpool_get(&monc->msgpool_auth_reply, front_len); break; diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index b958ad5afa06..cc89a86c8589 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h @@ -2,6 +2,7 @@ #define _FS_CEPH_MON_CLIENT_H #include +#include #include #include "messenger.h" @@ -44,13 +45,14 @@ struct ceph_mon_request { * to the caller */ struct ceph_mon_statfs_request { + struct kref kref; u64 tid; struct rb_node node; int result; struct ceph_statfs *buf; struct completion completion; - unsigned long last_attempt, delay; /* jiffies */ struct ceph_msg *request; /* original request */ + struct ceph_msg *reply; /* and reply */ }; struct ceph_mon_client { @@ -72,7 +74,6 @@ struct ceph_mon_client { /* msg pools */ struct ceph_msgpool msgpool_subscribe_ack; - struct ceph_msgpool msgpool_statfs_reply; struct ceph_msgpool msgpool_auth_reply; /* pending statfs requests */ -- cgit v1.2.3 From 6694d6b95cf3b41751e78815d05968fa2084d7bf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 24 Mar 2010 21:48:05 -0700 Subject: ceph: drop unnecessary msgpool for mon_client auth_reply Preallocate a single reply message that we can reuse instead. Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 19 ++++++++++++------- fs/ceph/mon_client.h | 5 ++--- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 43cfab06fcee..5a67732fa6e1 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -638,16 +638,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) sizeof(struct ceph_mon_subscribe_ack), 1, false); if (err < 0) goto out_monmap; - err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); - if (err < 0) + + monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, 0, 0, + NULL); + if (IS_ERR(monc->m_auth_reply)) { + err = PTR_ERR(monc->m_auth_reply); + monc->m_auth_reply = NULL; goto out_pool; + } monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); monc->pending_auth = 0; if (IS_ERR(monc->m_auth)) { err = PTR_ERR(monc->m_auth); monc->m_auth = NULL; - goto out_pool3; + goto out_auth_reply; } monc->cur_mon = -1; @@ -665,8 +670,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) monc->want_next_osdmap = 1; return 0; -out_pool3: - ceph_msgpool_destroy(&monc->msgpool_auth_reply); +out_auth_reply: + ceph_msg_put(monc->m_auth_reply); out_pool: ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); out_monmap: @@ -692,8 +697,8 @@ void ceph_monc_stop(struct ceph_mon_client *monc) ceph_auth_destroy(monc->auth); ceph_msg_put(monc->m_auth); + ceph_msg_put(monc->m_auth_reply); ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); - ceph_msgpool_destroy(&monc->msgpool_auth_reply); kfree(monc->monmap); } @@ -815,7 +820,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_STATFS_REPLY: return get_statfs_reply(con, hdr, skip); case CEPH_MSG_AUTH_REPLY: - m = ceph_msgpool_get(&monc->msgpool_auth_reply, front_len); + m = ceph_msg_get(monc->m_auth_reply); break; case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index cc89a86c8589..2658e3e3b27c 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h @@ -63,7 +63,7 @@ struct ceph_mon_client { struct delayed_work delayed_work; struct ceph_auth_client *auth; - struct ceph_msg *m_auth; + struct ceph_msg *m_auth, *m_auth_reply; int pending_auth; bool hunting; @@ -72,9 +72,8 @@ struct ceph_mon_client { struct ceph_connection *con; bool have_fsid; - /* msg pools */ + /* msgs */ struct ceph_msgpool msgpool_subscribe_ack; - struct ceph_msgpool msgpool_auth_reply; /* pending statfs requests */ struct rb_root statfs_request_tree; -- cgit v1.2.3 From 7c315c552c7442eab73461de61dbcce579a31d3a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 24 Mar 2010 21:52:30 -0700 Subject: ceph: drop unnecessary msgpool for mon_client subscribe_ack Preallocate a single message to reuse instead. Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 20 ++++++++++++-------- fs/ceph/mon_client.h | 6 +----- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 5a67732fa6e1..5bee9250bf2a 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -634,17 +634,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; /* msg pools */ - err = ceph_msgpool_init(&monc->msgpool_subscribe_ack, - sizeof(struct ceph_mon_subscribe_ack), 1, false); - if (err < 0) + monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, + sizeof(struct ceph_mon_subscribe_ack), + 0, 0, NULL); + if (IS_ERR(monc->m_subscribe_ack)) { + err = PTR_ERR(monc->m_subscribe_ack); + monc->m_subscribe_ack = NULL; goto out_monmap; + } monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, 0, 0, NULL); if (IS_ERR(monc->m_auth_reply)) { err = PTR_ERR(monc->m_auth_reply); monc->m_auth_reply = NULL; - goto out_pool; + goto out_subscribe_ack; } monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); @@ -672,8 +676,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) out_auth_reply: ceph_msg_put(monc->m_auth_reply); -out_pool: - ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); +out_subscribe_ack: + ceph_msg_put(monc->m_subscribe_ack); out_monmap: kfree(monc->monmap); out: @@ -698,7 +702,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc) ceph_msg_put(monc->m_auth); ceph_msg_put(monc->m_auth_reply); - ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); + ceph_msg_put(monc->m_subscribe_ack); kfree(monc->monmap); } @@ -815,7 +819,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, switch (type) { case CEPH_MSG_MON_SUBSCRIBE_ACK: - m = ceph_msgpool_get(&monc->msgpool_subscribe_ack, front_len); + m = ceph_msg_get(monc->m_subscribe_ack); break; case CEPH_MSG_STATFS_REPLY: return get_statfs_reply(con, hdr, skip); diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 2658e3e3b27c..0046deed0740 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h @@ -6,7 +6,6 @@ #include #include "messenger.h" -#include "msgpool.h" struct ceph_client; struct ceph_mount_args; @@ -63,7 +62,7 @@ struct ceph_mon_client { struct delayed_work delayed_work; struct ceph_auth_client *auth; - struct ceph_msg *m_auth, *m_auth_reply; + struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe_ack; int pending_auth; bool hunting; @@ -72,9 +71,6 @@ struct ceph_mon_client { struct ceph_connection *con; bool have_fsid; - /* msgs */ - struct ceph_msgpool msgpool_subscribe_ack; - /* pending statfs requests */ struct rb_root statfs_request_tree; int num_statfs_requests; -- cgit v1.2.3 From 2d06eeb877581a7f53209af1582c5f66c799f0bd Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 26 Mar 2010 18:04:40 +0800 Subject: ceph: handle kzalloc() failure Signed-off-by: Cheng Renquan Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ccb4141e306f..d549ab3adfda 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2631,6 +2631,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) mdsc->client = client; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); + if (mdsc->mdsmap == NULL) + return -ENOMEM; + init_completion(&mdsc->safe_umount_waiters); init_completion(&mdsc->session_close_waiters); INIT_LIST_HEAD(&mdsc->waiting_for_map); @@ -2656,6 +2659,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) init_waitqueue_head(&mdsc->cap_flushing_wq); spin_lock_init(&mdsc->dentry_lru_lock); INIT_LIST_HEAD(&mdsc->dentry_lru); + return 0; } -- cgit v1.2.3 From 640ef79d27c81b7a3265a344ec1d25644dd463ad Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Fri, 26 Mar 2010 17:40:33 +0800 Subject: ceph: use ceph_sb_to_client instead of ceph_client ceph_sb_to_client and ceph_client are really identical, we need to dump one; while function ceph_client is confusing with "struct ceph_client", ceph_sb_to_client's definition is more clear; so we'd better switch all call to ceph_sb_to_client. -static inline struct ceph_client *ceph_client(struct super_block *sb) -{ - return sb->s_fs_info; -} Signed-off-by: Cheng Renquan Signed-off-by: Sage Weil --- fs/ceph/addr.c | 2 +- fs/ceph/caps.c | 15 +++++++++------ fs/ceph/dir.c | 10 +++++----- fs/ceph/export.c | 2 +- fs/ceph/file.c | 2 +- fs/ceph/inode.c | 10 +++++----- fs/ceph/ioctl.c | 2 +- fs/ceph/snap.c | 2 +- fs/ceph/super.c | 8 ++++---- fs/ceph/super.h | 6 ------ fs/ceph/xattr.c | 4 ++-- 11 files changed, 30 insertions(+), 33 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index caf76c7fa77c..d9c60b84949a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -563,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req, ceph_release_pages(req->r_pages, req->r_num_pages); if (req->r_pages_from_pool) mempool_free(req->r_pages, - ceph_client(inode->i_sb)->wb_pagevec_pool); + ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); else kfree(req->r_pages); ceph_osdc_put_request(req); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d9400534b279..51fd39da1470 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap) { struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; - struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_mds_client *mdsc = + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; int removed = 0; dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); @@ -1298,7 +1299,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) */ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) { - struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_mds_client *mdsc = + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; struct inode *inode = &ci->vfs_inode; int was = ci->i_dirty_caps; int dirty = 0; @@ -1336,7 +1338,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) static int __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session) { - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int flushing; @@ -1663,7 +1665,7 @@ ack: static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, unsigned *flush_tid) { - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int unlock_session = session ? 0 : 1; int flushing = 0; @@ -1829,7 +1831,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) err = wait_event_interruptible(ci->i_cap_wq, caps_are_flushed(inode, flush_tid)); } else { - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = + &ceph_sb_to_client(inode->i_sb)->mdsc; spin_lock(&inode->i_lock); if (__ceph_caps_dirty(ci)) @@ -2411,7 +2414,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, __releases(inode->i_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4b1a7a4bae0b..33feac75c532 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -478,7 +478,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err) { - struct ceph_client *client = ceph_client(dentry->d_sb); + struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); struct inode *parent = dentry->d_parent->d_inode; /* .snap dir? */ @@ -1059,7 +1059,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, struct ceph_inode_info *ci = ceph_inode(inode); int left; - if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT)) + if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) return -EISDIR; if (!cf->dir_info) { @@ -1161,7 +1161,7 @@ void ceph_dentry_lru_add(struct dentry *dn) dout("dentry_lru_add %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); if (di) { - mdsc = &ceph_client(dn->d_sb)->mdsc; + mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_add_tail(&di->lru, &mdsc->dentry_lru); mdsc->num_dentry++; @@ -1177,7 +1177,7 @@ void ceph_dentry_lru_touch(struct dentry *dn) dout("dentry_lru_touch %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); if (di) { - mdsc = &ceph_client(dn->d_sb)->mdsc; + mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_move_tail(&di->lru, &mdsc->dentry_lru); spin_unlock(&mdsc->dentry_lru_lock); @@ -1192,7 +1192,7 @@ void ceph_dentry_lru_del(struct dentry *dn) dout("dentry_lru_del %p %p '%.*s'\n", di, dn, dn->d_name.len, dn->d_name.name); if (di) { - mdsc = &ceph_client(dn->d_sb)->mdsc; + mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); list_del_init(&di->lru); mdsc->num_dentry--; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9d67572fb328..15683905a7b6 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -115,7 +115,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, static struct dentry *__cfh_to_dentry(struct super_block *sb, struct ceph_nfs_confh *cfh) { - struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; struct inode *inode; struct dentry *dentry; struct ceph_vino vino; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 6230c3de1f06..834d2b83834a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -809,7 +809,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; + struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; loff_t endoff = pos + iov->iov_len; int got = 0; int ret, err; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 220a2aec0545..ef917232cf37 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -384,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode) */ if (ci->i_snap_realm) { struct ceph_mds_client *mdsc = - &ceph_client(ci->vfs_inode.i_sb)->mdsc; + &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; struct ceph_snap_realm *realm = ci->i_snap_realm; dout(" dropping residual ref to snap realm %p\n", realm); @@ -623,7 +623,7 @@ static int fill_inode(struct inode *inode, inode->i_mapping->a_ops = &ceph_aops; inode->i_mapping->backing_dev_info = - &ceph_client(inode->i_sb)->backing_dev_info; + &ceph_sb_to_client(inode->i_sb)->backing_dev_info; switch (inode->i_mode & S_IFMT) { case S_IFIFO: @@ -681,7 +681,7 @@ static int fill_inode(struct inode *inode, } /* it may be better to set st_size in getattr instead? */ - if (ceph_test_opt(ceph_client(inode->i_sb), RBYTES)) + if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) inode->i_size = ci->i_rbytes; break; default: @@ -1438,7 +1438,7 @@ void ceph_queue_vmtruncate(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); - if (queue_work(ceph_client(inode->i_sb)->trunc_wq, + if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, &ci->i_vmtruncate_work)) { dout("ceph_queue_vmtruncate %p\n", inode); igrab(inode); @@ -1527,7 +1527,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) struct inode *parent_inode = dentry->d_parent->d_inode; const unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; - struct ceph_mds_client *mdsc = &ceph_client(dentry->d_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; int issued; int release = 0, dirtied = 0; int mask = 0; diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8a5bcae62846..d085f07756b4 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -98,7 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) struct ceph_ioctl_dataloc dl; struct inode *inode = file->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; + struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; u64 len = 1, olen; u64 tmp; struct ceph_object_layout ol; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index d5114db70453..c0b26b6badba 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -512,7 +512,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { struct inode *inode = &ci->vfs_inode; - struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; BUG_ON(capsnap->writing); capsnap->size = inode->i_size; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 110857ba9269..93ad169a6ae1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -107,8 +107,8 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) static int ceph_syncfs(struct super_block *sb, int wait) { dout("sync_fs %d\n", wait); - ceph_osdc_sync(&ceph_client(sb)->osdc); - ceph_mdsc_sync(&ceph_client(sb)->mdsc); + ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); + ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); dout("sync_fs %d done\n", wait); return 0; } @@ -932,9 +932,9 @@ static int ceph_get_sb(struct file_system_type *fs_type, goto out; } - if (ceph_client(sb) != client) { + if (ceph_sb_to_client(sb) != client) { ceph_destroy_client(client); - client = ceph_client(sb); + client = ceph_sb_to_client(sb); dout("get_sb got existing client %p\n", client); } else { dout("get_sb using new client %p\n", client); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index cfe3b0834d54..51b3ff238454 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -160,12 +160,6 @@ struct ceph_client { #endif }; -static inline struct ceph_client *ceph_client(struct super_block *sb) -{ - return sb->s_fs_info; -} - - /* * File i/o capability. This tracks shared state with the metadata * server that allows us to cache or writeback attributes or to read diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index d940d14f6922..3b4c2620030a 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -616,7 +616,7 @@ out: static int ceph_sync_setxattr(struct dentry *dentry, const char *name, const char *value, size_t size, int flags) { - struct ceph_client *client = ceph_client(dentry->d_sb); + struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); struct inode *inode = dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); struct inode *parent_inode = dentry->d_parent->d_inode; @@ -773,7 +773,7 @@ out: static int ceph_send_removexattr(struct dentry *dentry, const char *name) { - struct ceph_client *client = ceph_client(dentry->d_sb); + struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); struct ceph_mds_client *mdsc = &client->mdsc; struct inode *inode = dentry->d_inode; struct inode *parent_inode = dentry->d_parent->d_inode; -- cgit v1.2.3 From d52f847a841bfeba0ea87a7842732d388a1ca2e8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 1 Apr 2010 15:23:14 -0700 Subject: ceph: rewrite msgpool using mempool_t Since we don't need to maintain large pools of messages, we can just use the standard mempool_t. We maintain a msgpool 'wrapper' because we need the mempool_t* in the alloc function, and mempool gives us only pool_data. Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 172 +++++++++--------------------------------------------- fs/ceph/msgpool.h | 8 +-- 2 files changed, 29 insertions(+), 151 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index 030297f62fb7..ca032223e87b 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -7,106 +7,43 @@ #include "msgpool.h" -/* - * We use msg pools to preallocate memory for messages we expect to - * receive over the wire, to avoid getting ourselves into OOM - * conditions at unexpected times. We take use a few different - * strategies: - * - * - for request/response type interactions, we preallocate the - * memory needed for the response when we generate the request. - * - * - for messages we can receive at any time from the MDS, we preallocate - * a pool of messages we can re-use. - * - * - for writeback, we preallocate some number of messages to use for - * requests and their replies, so that we always make forward - * progress. - * - * The msgpool behaves like a mempool_t, but keeps preallocated - * ceph_msgs strung together on a list_head instead of using a pointer - * vector. This avoids vector reallocation when we adjust the number - * of preallocated items (which happens frequently). - */ +static void *alloc_fn(gfp_t gfp_mask, void *arg) +{ + struct ceph_msgpool *pool = arg; + struct ceph_msg *m; + m = ceph_msg_new(0, pool->front_len, 0, 0, NULL); + if (IS_ERR(m)) + return NULL; + return m; +} -/* - * Allocate or release as necessary to meet our target pool size. - */ -static int __fill_msgpool(struct ceph_msgpool *pool) +static void free_fn(void *element, void *arg) { - struct ceph_msg *msg; - - while (pool->num < pool->min) { - dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num, - pool->min); - spin_unlock(&pool->lock); - msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); - spin_lock(&pool->lock); - if (IS_ERR(msg)) - return PTR_ERR(msg); - msg->pool = pool; - list_add(&msg->list_head, &pool->msgs); - pool->num++; - } - while (pool->num > pool->min) { - msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head); - dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num, - pool->min, msg); - list_del_init(&msg->list_head); - pool->num--; - ceph_msg_kfree(msg); - } - return 0; + ceph_msg_put(element); } int ceph_msgpool_init(struct ceph_msgpool *pool, - int front_len, int min, bool blocking) + int front_len, int size, bool blocking) { - int ret; - - dout("msgpool_init %p front_len %d min %d\n", pool, front_len, min); - spin_lock_init(&pool->lock); pool->front_len = front_len; - INIT_LIST_HEAD(&pool->msgs); - pool->num = 0; - pool->min = min; - pool->blocking = blocking; - init_waitqueue_head(&pool->wait); - - spin_lock(&pool->lock); - ret = __fill_msgpool(pool); - spin_unlock(&pool->lock); - return ret; + pool->pool = mempool_create(size, alloc_fn, free_fn, pool); + if (!pool->pool) + return -ENOMEM; + return 0; } void ceph_msgpool_destroy(struct ceph_msgpool *pool) { - dout("msgpool_destroy %p\n", pool); - spin_lock(&pool->lock); - pool->min = 0; - __fill_msgpool(pool); - spin_unlock(&pool->lock); -} - -int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) -{ - int ret; - - spin_lock(&pool->lock); - dout("msgpool_resv %p delta %d\n", pool, delta); - pool->min += delta; - ret = __fill_msgpool(pool); - spin_unlock(&pool->lock); - return ret; + mempool_destroy(pool->pool); } -struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) +struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, + int front_len) { - wait_queue_t wait; - struct ceph_msg *msg; + if (front_len > pool->front_len) { + struct ceph_msg *msg; - if (front_len && front_len > pool->front_len) { pr_err("msgpool_get pool %p need front %d, pool size is %d\n", pool, front_len, pool->front_len); WARN_ON(1); @@ -115,72 +52,17 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) msg = ceph_msg_new(0, front_len, 0, 0, NULL); if (!IS_ERR(msg)) return msg; + return NULL; } - if (!front_len) - front_len = pool->front_len; - - if (pool->blocking) { - /* mempool_t behavior; first try to alloc */ - msg = ceph_msg_new(0, front_len, 0, 0, NULL); - if (!IS_ERR(msg)) - return msg; - } - - while (1) { - spin_lock(&pool->lock); - if (likely(pool->num)) { - msg = list_entry(pool->msgs.next, struct ceph_msg, - list_head); - list_del_init(&msg->list_head); - pool->num--; - dout("msgpool_get %p got %p, now %d/%d\n", pool, msg, - pool->num, pool->min); - spin_unlock(&pool->lock); - return msg; - } - pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, - pool->min, pool->blocking ? "waiting" : "may fail"); - spin_unlock(&pool->lock); - - if (!pool->blocking) { - WARN_ON(1); - - /* maybe we can allocate it now? */ - msg = ceph_msg_new(0, front_len, 0, 0, NULL); - if (!IS_ERR(msg)) - return msg; - - pr_err("msgpool_get %p empty + alloc failed\n", pool); - return ERR_PTR(-ENOMEM); - } - - init_wait(&wait); - prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); - schedule(); - finish_wait(&pool->wait, &wait); - } + return mempool_alloc(pool->pool, GFP_NOFS); } void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { - spin_lock(&pool->lock); - if (pool->num < pool->min) { - /* reset msg front_len; user may have changed it */ - msg->front.iov_len = pool->front_len; - msg->hdr.front_len = cpu_to_le32(pool->front_len); + /* reset msg front_len; user may have changed it */ + msg->front.iov_len = pool->front_len; + msg->hdr.front_len = cpu_to_le32(pool->front_len); - kref_init(&msg->kref); /* retake a single ref */ - list_add(&msg->list_head, &pool->msgs); - pool->num++; - dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, - pool->num, pool->min); - spin_unlock(&pool->lock); - wake_up(&pool->wait); - } else { - dout("msgpool_put %p drop %p, at %d/%d\n", pool, msg, - pool->num, pool->min); - spin_unlock(&pool->lock); - ceph_msg_kfree(msg); - } + kref_init(&msg->kref); /* retake single ref */ } diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h index bc834bfcd720..62a61c7fca08 100644 --- a/fs/ceph/msgpool.h +++ b/fs/ceph/msgpool.h @@ -1,6 +1,7 @@ #ifndef _FS_CEPH_MSGPOOL #define _FS_CEPH_MSGPOOL +#include #include "messenger.h" /* @@ -8,18 +9,13 @@ * avoid unexpected OOM conditions. */ struct ceph_msgpool { - spinlock_t lock; + mempool_t *pool; int front_len; /* preallocated payload size */ - struct list_head msgs; /* msgs in the pool; each has 1 ref */ - int num, min; /* cur, min # msgs in the pool */ - bool blocking; - wait_queue_head_t wait; }; extern int ceph_msgpool_init(struct ceph_msgpool *pool, int front_len, int size, bool blocking); extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); -extern int ceph_msgpool_resv(struct ceph_msgpool *, int delta); extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, int front_len); extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); -- cgit v1.2.3 From a79832f26be370ee26ea81eecdfd42d10e49d66a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 1 Apr 2010 16:06:19 -0700 Subject: ceph: make ceph_msg_new return NULL on failure; clean up, fix callers Returning ERR_PTR(-ENOMEM) is useless extra work. Return NULL on failure instead, and fix up the callers (about half of which were wrong anyway). Signed-off-by: Sage Weil --- fs/ceph/caps.c | 4 ++-- fs/ceph/file.c | 4 ++-- fs/ceph/mds_client.c | 40 +++++++++++++++++----------------------- fs/ceph/messenger.c | 20 +++++++------------- fs/ceph/mon_client.c | 25 +++++++------------------ fs/ceph/msgpool.c | 13 ++----------- fs/ceph/osd_client.c | 22 +++++++++++----------- 7 files changed, 48 insertions(+), 80 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 51fd39da1470..8755e2d83d4c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -939,8 +939,8 @@ static int send_cap_msg(struct ceph_mds_session *session, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); - if (IS_ERR(msg)) - return PTR_ERR(msg); + if (!msg) + return -ENOMEM; msg->hdr.tid = cpu_to_le64(flush_tid); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 834d2b83834a..b0426090e8c3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -649,8 +649,8 @@ more: do_sync, ci->i_truncate_seq, ci->i_truncate_size, &mtime, false, 2); - if (IS_ERR(req)) - return PTR_ERR(req); + if (!req) + return -ENOMEM; num_pages = calc_pages_for(pos, len); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d549ab3adfda..7e89c185d38d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -666,9 +666,9 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) struct ceph_mds_session_head *h; msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), 0, 0, NULL); - if (IS_ERR(msg)) { + if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); - return ERR_PTR(PTR_ERR(msg)); + return NULL; } h = msg->front.iov_base; h->op = cpu_to_le32(op); @@ -687,7 +687,6 @@ static int __open_session(struct ceph_mds_client *mdsc, struct ceph_msg *msg; int mstate; int mds = session->s_mds; - int err = 0; /* wait for mds to go active? */ mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); @@ -698,13 +697,9 @@ static int __open_session(struct ceph_mds_client *mdsc, /* send connect message */ msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); - if (IS_ERR(msg)) { - err = PTR_ERR(msg); - goto out; - } + if (!msg) + return -ENOMEM; ceph_con_send(&session->s_con, msg); - -out: return 0; } @@ -883,8 +878,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, ceph_mds_state_name(state)); msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, ++session->s_renew_seq); - if (IS_ERR(msg)) - return PTR_ERR(msg); + if (!msg) + return -ENOMEM; ceph_con_send(&session->s_con, msg); return 0; } @@ -931,17 +926,15 @@ static int request_close_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { struct ceph_msg *msg; - int err = 0; dout("request_close_session mds%d state %s seq %lld\n", session->s_mds, session_state_name(session->s_state), session->s_seq); msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); - if (IS_ERR(msg)) - err = PTR_ERR(msg); - else - ceph_con_send(&session->s_con, msg); - return err; + if (!msg) + return -ENOMEM; + ceph_con_send(&session->s_con, msg); + return 0; } /* @@ -1426,8 +1419,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len += req->r_old_dentry->d_name.len; msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, 0, 0, NULL); - if (IS_ERR(msg)) + if (!msg) { + msg = ERR_PTR(-ENOMEM); goto out_free2; + } msg->hdr.tid = cpu_to_le64(req->r_tid); @@ -1518,7 +1513,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, if (IS_ERR(msg)) { req->r_err = PTR_ERR(msg); complete_request(mdsc, req); - return -PTR_ERR(msg); + return PTR_ERR(msg); } req->r_request = msg; @@ -2158,11 +2153,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) goto fail_nopagelist; ceph_pagelist_init(pagelist); + err = -ENOMEM; reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, 0, 0, NULL); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); + if (!reply) goto fail_nomsg; - } /* find session */ session = __ceph_lookup_mds_session(mdsc, mds); @@ -2469,7 +2463,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, len += dnamelen; msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, NULL); - if (IS_ERR(msg)) + if (!msg) return; lease = msg->front.iov_base; lease->action = action; diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index af3b143fbf02..fe7d0c52ae3c 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1402,19 +1402,17 @@ static int read_partial_message(struct ceph_connection *con) con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); if (skip) { /* skip this message */ - dout("alloc_msg returned NULL, skipping message\n"); + dout("alloc_msg said skip message\n"); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; return 0; } - if (IS_ERR(con->in_msg)) { - ret = PTR_ERR(con->in_msg); - con->in_msg = NULL; + if (!con->in_msg) { con->error_msg = "error allocating memory for incoming message"; - return ret; + return -ENOMEM; } m = con->in_msg; m->front.iov_len = 0; /* haven't read it yet */ @@ -2147,7 +2145,7 @@ out2: ceph_msg_put(m); out: pr_err("msg_new can't create type %d len %d\n", type, front_len); - return ERR_PTR(-ENOMEM); + return NULL; } /* @@ -2190,10 +2188,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, mutex_unlock(&con->mutex); msg = con->ops->alloc_msg(con, hdr, skip); mutex_lock(&con->mutex); - if (IS_ERR(msg)) - return msg; - - if (*skip) + if (!msg || *skip) return NULL; } if (!msg) { @@ -2202,17 +2197,16 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, if (!msg) { pr_err("unable to allocate msg type %d len %d\n", type, front_len); - return ERR_PTR(-ENOMEM); + return NULL; } } memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); if (middle_len) { ret = ceph_alloc_middle(con, msg); - if (ret < 0) { ceph_msg_put(msg); - return msg; + return NULL; } } diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 5bee9250bf2a..35f593e7e364 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -490,16 +490,13 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) req->buf = buf; init_completion(&req->completion); + err = -ENOMEM; req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); - if (IS_ERR(req->request)) { - err = PTR_ERR(req->request); + if (!req->request) goto out; - } req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, 0, 0, NULL); - if (IS_ERR(req->reply)) { - err = PTR_ERR(req->reply); + if (!req->reply) goto out; - } /* fill out request */ h = req->request->front.iov_base; @@ -634,30 +631,22 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; /* msg pools */ + err = -ENOMEM; monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, sizeof(struct ceph_mon_subscribe_ack), 0, 0, NULL); - if (IS_ERR(monc->m_subscribe_ack)) { - err = PTR_ERR(monc->m_subscribe_ack); - monc->m_subscribe_ack = NULL; + if (!monc->m_subscribe_ack) goto out_monmap; - } monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, 0, 0, NULL); - if (IS_ERR(monc->m_auth_reply)) { - err = PTR_ERR(monc->m_auth_reply); - monc->m_auth_reply = NULL; + if (!monc->m_auth_reply) goto out_subscribe_ack; - } monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); monc->pending_auth = 0; - if (IS_ERR(monc->m_auth)) { - err = PTR_ERR(monc->m_auth); - monc->m_auth = NULL; + if (!monc->m_auth) goto out_auth_reply; - } monc->cur_mon = -1; monc->hunting = true; diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index ca032223e87b..04fea84890da 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -10,12 +10,8 @@ static void *alloc_fn(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; - struct ceph_msg *m; - m = ceph_msg_new(0, pool->front_len, 0, 0, NULL); - if (IS_ERR(m)) - return NULL; - return m; + return ceph_msg_new(0, pool->front_len, 0, 0, NULL); } static void free_fn(void *element, void *arg) @@ -42,17 +38,12 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { if (front_len > pool->front_len) { - struct ceph_msg *msg; - pr_err("msgpool_get pool %p need front %d, pool size is %d\n", pool, front_len, pool->front_len); WARN_ON(1); /* try to alloc a fresh message */ - msg = ceph_msg_new(0, front_len, 0, 0, NULL); - if (!IS_ERR(msg)) - return msg; - return NULL; + return ceph_msg_new(0, front_len, 0, 0, NULL); } return mempool_alloc(pool->pool, GFP_NOFS); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 3d2bfbc232dc..a51d0df2af30 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -147,7 +147,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req = kzalloc(sizeof(*req), GFP_NOFS); } if (req == NULL) - return ERR_PTR(-ENOMEM); + return NULL; req->r_osdc = osdc; req->r_mempool = use_mempool; @@ -165,9 +165,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, OSD_OPREPLY_FRONT_LEN, 0, 0, NULL); - if (IS_ERR(msg)) { + if (!msg) { ceph_osdc_put_request(req); - return ERR_PTR(PTR_ERR(msg)); + return NULL; } req->r_reply = msg; @@ -179,9 +179,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, 0, 0, NULL); - if (IS_ERR(msg)) { + if (!msg) { ceph_osdc_put_request(req); - return ERR_PTR(PTR_ERR(msg)); + return NULL; } msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); memset(msg->front.iov_base, 0, msg->front.iov_len); @@ -1263,8 +1263,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, false, 1); - if (IS_ERR(req)) - return PTR_ERR(req); + if (!req) + return -ENOMEM; /* it may be a short read due to an object boundary */ req->r_pages = pages; @@ -1306,8 +1306,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, snapc, do_sync, truncate_seq, truncate_size, mtime, nofail, 1); - if (IS_ERR(req)) - return PTR_ERR(req); + if (!req) + return -ENOMEM; /* it may be a short write due to an object boundary */ req->r_pages = pages; @@ -1393,7 +1393,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, pr_warning("get_reply front %d > preallocated %d\n", front, (int)req->r_reply->front.iov_len); m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, 0, 0, NULL); - if (IS_ERR(m)) + if (!m) goto out; ceph_msg_put(req->r_reply); req->r_reply = m; @@ -1409,7 +1409,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, tid, want, m->nr_pages); *skip = 1; ceph_msg_put(m); - m = ERR_PTR(-EIO); + m = NULL; goto out; } m->pages = req->r_pages; -- cgit v1.2.3 From bb257664f748bcfc80715f85f70f0f560caec3b4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 1 Apr 2010 16:07:23 -0700 Subject: ceph: simplify ceph_msg_new We only need to pass in front_len. Callers can attach any other payload pieces (middle, data) as they see fit. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- fs/ceph/mds_client.c | 11 +++++------ fs/ceph/messenger.c | 20 +++++++++----------- fs/ceph/messenger.h | 4 +--- fs/ceph/mon_client.c | 16 +++++++--------- fs/ceph/msgpool.c | 4 ++-- fs/ceph/osd_client.c | 8 ++++---- 7 files changed, 29 insertions(+), 36 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8755e2d83d4c..74c74842c860 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -938,7 +938,7 @@ static int send_cap_msg(struct ceph_mds_session *session, seq, issue_seq, mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc)); if (!msg) return -ENOMEM; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7e89c185d38d..35dbdad07b1c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -665,7 +665,7 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) struct ceph_msg *msg; struct ceph_mds_session_head *h; - msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h)); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); return NULL; @@ -1051,8 +1051,7 @@ static int add_cap_releases(struct ceph_mds_client *mdsc, while (session->s_num_cap_releases < session->s_nr_caps + extra) { spin_unlock(&session->s_cap_lock); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, - 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE); if (!msg) goto out_unlocked; dout("add_cap_releases %p msg %p now %d\n", session, msg, @@ -1418,7 +1417,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, if (req->r_old_dentry_drop) len += req->r_old_dentry->d_name.len; - msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len); if (!msg) { msg = ERR_PTR(-ENOMEM); goto out_free2; @@ -2154,7 +2153,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) ceph_pagelist_init(pagelist); err = -ENOMEM; - reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, 0, 0, NULL); + reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0); if (!reply) goto fail_nomsg; @@ -2462,7 +2461,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, dnamelen = dentry->d_name.len; len += dnamelen; - msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len); if (!msg) return; lease = msg->front.iov_base; diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index fe7d0c52ae3c..395ce326beda 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -2081,8 +2081,7 @@ void ceph_con_keepalive(struct ceph_connection *con) * construct a new message with given type, size * the new msg has a ref count of 1. */ -struct ceph_msg *ceph_msg_new(int type, int front_len, - int page_len, int page_off, struct page **pages) +struct ceph_msg *ceph_msg_new(int type, int front_len) { struct ceph_msg *m; @@ -2098,8 +2097,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, m->hdr.version = 0; m->hdr.front_len = cpu_to_le32(front_len); m->hdr.middle_len = 0; - m->hdr.data_len = cpu_to_le32(page_len); - m->hdr.data_off = cpu_to_le16(page_off); + m->hdr.data_len = 0; + m->hdr.data_off = 0; m->hdr.reserved = 0; m->footer.front_crc = 0; m->footer.middle_crc = 0; @@ -2133,18 +2132,17 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, m->middle = NULL; /* data */ - m->nr_pages = calc_pages_for(page_off, page_len); - m->pages = pages; + m->nr_pages = 0; + m->pages = NULL; m->pagelist = NULL; - dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, - m->nr_pages); + dout("ceph_msg_new %p front %d\n", m, front_len); return m; out2: ceph_msg_put(m); out: - pr_err("msg_new can't create type %d len %d\n", type, front_len); + pr_err("msg_new can't create type %d front %d\n", type, front_len); return NULL; } @@ -2193,7 +2191,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, } if (!msg) { *skip = 0; - msg = ceph_msg_new(type, front_len, 0, 0, NULL); + msg = ceph_msg_new(type, front_len); if (!msg) { pr_err("unable to allocate msg type %d len %d\n", type, front_len); @@ -2202,7 +2200,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, } memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); - if (middle_len) { + if (middle_len && !msg->middle) { ret = ceph_alloc_middle(con, msg); if (ret < 0) { ceph_msg_put(msg); diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a5caf91cc971..27fb69585f63 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -234,9 +234,7 @@ extern void ceph_con_keepalive(struct ceph_connection *con); extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); extern void ceph_con_put(struct ceph_connection *con); -extern struct ceph_msg *ceph_msg_new(int type, int front_len, - int page_len, int page_off, - struct page **pages); +extern struct ceph_msg *ceph_msg_new(int type, int front_len); extern void ceph_msg_kfree(struct ceph_msg *m); diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 35f593e7e364..9900706fee75 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -191,7 +191,7 @@ static void __send_subscribe(struct ceph_mon_client *monc) struct ceph_mon_subscribe_item *i; void *p, *end; - msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96); if (!msg) return; @@ -491,10 +491,10 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) init_completion(&req->completion); err = -ENOMEM; - req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); + req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h)); if (!req->request) goto out; - req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, 0, 0, NULL); + req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024); if (!req->reply) goto out; @@ -633,17 +633,15 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) /* msg pools */ err = -ENOMEM; monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, - sizeof(struct ceph_mon_subscribe_ack), - 0, 0, NULL); + sizeof(struct ceph_mon_subscribe_ack)); if (!monc->m_subscribe_ack) goto out_monmap; - monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, 0, 0, - NULL); + monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096); if (!monc->m_auth_reply) goto out_subscribe_ack; - monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); + monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096); monc->pending_auth = 0; if (!monc->m_auth) goto out_auth_reply; @@ -818,7 +816,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: - m = ceph_msg_new(type, front_len, 0, 0, NULL); + m = ceph_msg_new(type, front_len); break; } diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index 04fea84890da..10d3632cc403 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -11,7 +11,7 @@ static void *alloc_fn(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; - return ceph_msg_new(0, pool->front_len, 0, 0, NULL); + return ceph_msg_new(0, pool->front_len); } static void free_fn(void *element, void *arg) @@ -43,7 +43,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, WARN_ON(1); /* try to alloc a fresh message */ - return ceph_msg_new(0, front_len, 0, 0, NULL); + return ceph_msg_new(0, front_len); } return mempool_alloc(pool->pool, GFP_NOFS); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index a51d0df2af30..a44b3b6374ee 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -164,7 +164,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN, 0, 0, NULL); + OSD_OPREPLY_FRONT_LEN); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -178,7 +178,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, 0, 0, NULL); + msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -1392,7 +1392,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (front > req->r_reply->front.iov_len) { pr_warning("get_reply front %d > preallocated %d\n", front, (int)req->r_reply->front.iov_len); - m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, 0, 0, NULL); + m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front); if (!m) goto out; ceph_msg_put(req->r_reply); @@ -1435,7 +1435,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, switch (type) { case CEPH_MSG_OSD_MAP: - return ceph_msg_new(type, front, 0, 0, NULL); + return ceph_msg_new(type, front); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); default: -- cgit v1.2.3 From 6f2bc3ff4cdb03903c79e155e9e1889ce176de09 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 2 Apr 2010 16:16:34 -0700 Subject: ceph: clean up connection reset Reset out_keepalive_pending and peer_global_seq, and drop unused var. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 2 ++ fs/ceph/messenger.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 395ce326beda..8cfca375c6a9 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -340,6 +340,7 @@ static void reset_connection(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } + con->out_keepalive_pending = false; con->in_seq = 0; con->in_seq_acked = 0; } @@ -357,6 +358,7 @@ void ceph_con_close(struct ceph_connection *con) clear_bit(WRITE_PENDING, &con->state); mutex_lock(&con->mutex); reset_connection(con); + con->peer_global_seq = 0; cancel_delayed_work(&con->work); mutex_unlock(&con->mutex); queue_con(con); diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 27fb69585f63..e56564f7e71c 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -158,7 +158,6 @@ struct ceph_connection { struct list_head out_queue; struct list_head out_sent; /* sending or sent but unacked */ u64 out_seq; /* last message queued for send */ - u64 out_seq_sent; /* last message sent */ bool out_keepalive_pending; u64 in_seq, in_seq_acked; /* last message received, acked */ -- cgit v1.2.3 From 6822d00b5462e7a9dfa11dcc60cc25823a2107c5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 7 Apr 2010 11:23:20 -0700 Subject: ceph: wait for both monmap and osdmap when opening session Signed-off-by: Yehuda Sadeh --- fs/ceph/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 93ad169a6ae1..a8124e89dea1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -682,9 +682,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) /* * true if we have the mon map (and have thus joined the cluster) */ -static int have_mon_map(struct ceph_client *client) +static int have_mon_and_osd_map(struct ceph_client *client) { - return client->monc.monmap && client->monc.monmap->epoch; + return client->monc.monmap && client->monc.monmap->epoch && + client->osdc.osdmap && client->osdc.osdmap->epoch; } /* @@ -762,7 +763,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, if (err < 0) goto out; - while (!have_mon_map(client)) { + while (!have_mon_and_osd_map(client)) { err = -EIO; if (timeout && time_after_eq(jiffies, started + timeout)) goto out; @@ -770,8 +771,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, /* wait */ dout("mount waiting for mon_map\n"); err = wait_event_interruptible_timeout(client->auth_wq, - have_mon_map(client) || (client->auth_err < 0), - timeout); + have_mon_and_osd_map(client) || (client->auth_err < 0), + timeout); if (err == -EINTR || err == -ERESTARTSYS) goto out; if (client->auth_err < 0) { -- cgit v1.2.3 From 1bb71637d07d58e993ef3f8e2c6b7ca6f4c0e0b8 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Thu, 8 Apr 2010 19:48:57 +0800 Subject: ceph: remove unused #includes Remove unused #include's in fs/ceph/super.c Signed-off-by: Huang Weiyi Signed-off-by: Sage Weil --- fs/ceph/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a8124e89dea1..b3225bf63f58 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -8,14 +8,11 @@ #include #include #include -#include #include #include #include #include #include -#include -#include #include "decode.h" #include "super.h" -- cgit v1.2.3 From c473ad927e6b3be0bac51ddf312e5d8d2b9220b0 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 13 Apr 2010 19:34:26 +0100 Subject: ceph: wake up mount thread when getting osdmap Now that the mount thread waits for the osdmap, it needs to be awaken. Signed-off-by: Yehuda Sadeh --- fs/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ceph') diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index a44b3b6374ee..b81e6f97010e 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -1078,6 +1078,7 @@ done: if (newmap) kick_requests(osdc, NULL); up_read(&osdc->map_sem); + wake_up(&osdc->client->auth_wq); return; bad: -- cgit v1.2.3 From 0d509c949a4d3f21943bd93863a2e6c2f0d0c004 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Apr 2010 12:31:13 +0200 Subject: ceph: d_obtain_alias() returns ERR_PTR() d_obtain_alias() doesn't return NULL, it returns an ERR_PTR(). Signed-off-by: Dan Carpenter Signed-off-by: Sage Weil --- fs/ceph/export.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 15683905a7b6..17447644d675 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -93,11 +93,11 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, return ERR_PTR(-ESTALE); dentry = d_obtain_alias(inode); - if (!dentry) { + if (IS_ERR(dentry)) { pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", fh->ino, inode); iput(inode); - return ERR_PTR(-ENOMEM); + return dentry; } err = ceph_init_dentry(dentry); @@ -149,11 +149,11 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, } dentry = d_obtain_alias(inode); - if (!dentry) { + if (IS_ERR(dentry)) { pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", cfh->ino, inode); iput(inode); - return ERR_PTR(-ENOMEM); + return dentry; } err = ceph_init_dentry(dentry); if (err < 0) { @@ -202,11 +202,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, return ERR_PTR(-ESTALE); dentry = d_obtain_alias(inode); - if (!dentry) { + if (IS_ERR(dentry)) { pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", cfh->ino, inode); iput(inode); - return ERR_PTR(-ENOMEM); + return dentry; } err = ceph_init_dentry(dentry); if (err < 0) { -- cgit v1.2.3 From f26e681d52fcc4778d8c604ef047487ca9f3df95 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 21 Apr 2010 11:09:38 -0700 Subject: ceph: osdtimeout=0 for now timeout Allow the osd reset timeout to be disabled. Signed-off-by: Sage Weil --- fs/ceph/osd_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index b81e6f97010e..97a3a57fe8cd 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -715,7 +715,7 @@ static void handle_timeout(struct work_struct *work) * should mark the osd as failed and we should find out about * it from an updated osd map. */ - while (!list_empty(&osdc->req_lru)) { + while (timeout && !list_empty(&osdc->req_lru)) { req = list_entry(osdc->req_lru.next, struct ceph_osd_request, r_req_lru_item); -- cgit v1.2.3 From 559c1e0073ae779d60e1c673cda837f3e4295302 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 May 2010 09:55:18 -0700 Subject: ceph: include auth method in error messages Signed-off-by: Sage Weil --- fs/ceph/auth.c | 9 +++++---- fs/ceph/auth.h | 2 ++ fs/ceph/auth_none.c | 1 + fs/ceph/auth_x.c | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 818afe72e6c7..9f46de2ba7a7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -150,7 +150,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac, ret = ac->ops->build_request(ac, p + sizeof(u32), end); if (ret < 0) { - pr_err("error %d building request\n", ret); + pr_err("error %d building auth method %s request\n", ret, + ac->ops->name); return ret; } dout(" built request %d bytes\n", ret); @@ -216,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, if (ac->protocol != protocol) { ret = ceph_auth_init_protocol(ac, protocol); if (ret) { - pr_err("error %d on auth protocol %d init\n", - ret, protocol); + pr_err("error %d on auth method %s init\n", + ret, ac->ops->name); goto out; } } @@ -229,7 +230,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, if (ret == -EAGAIN) { return ceph_build_auth_request(ac, reply_buf, reply_len); } else if (ret) { - pr_err("authentication error %d\n", ret); + pr_err("auth method '%s' error %d\n", ac->ops->name, ret); return ret; } return 0; diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index ca4f57cfb267..4429a707c021 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h @@ -15,6 +15,8 @@ struct ceph_auth_client; struct ceph_authorizer; struct ceph_auth_client_ops { + const char *name; + /* * true if we are authenticated and can connect to * services. diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 8cd9e3af07f7..24407c119291 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -94,6 +94,7 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, } static const struct ceph_auth_client_ops ceph_auth_none_ops = { + .name = "none", .reset = reset, .destroy = destroy, .is_authenticated = is_authenticated, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index fee5a08da881..18c1a1de5547 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -618,6 +618,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static const struct ceph_auth_client_ops ceph_x_ops = { + .name = "x", .is_authenticated = ceph_x_is_authenticated, .build_request = ceph_x_build_request, .handle_reply = ceph_x_handle_reply, -- cgit v1.2.3 From 8c6efb58a5bab880d45b2078cb55ec4320707daf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 23 Apr 2010 11:36:54 -0700 Subject: ceph: fix memory leak due to possible dentry init race Free dentry_info in error path. Signed-off-by: Sage Weil --- fs/ceph/dir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 33feac75c532..422794fd10e0 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry) return -ENOMEM; /* oh well */ spin_lock(&dentry->d_lock); - if (dentry->d_fsdata) /* lost a race */ + if (dentry->d_fsdata) { + /* lost a race */ + kmem_cache_free(ceph_dentry_cachep, di); goto out_unlock; + } di->dentry = dentry; di->lease_session = NULL; dentry->d_fsdata = di; -- cgit v1.2.3 From 4f48280ee1d0654390cd50ad0c41ea93309e7c91 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 24 Apr 2010 09:56:35 -0700 Subject: ceph: name msgpools; useful error messages Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 13 +++++++++---- fs/ceph/msgpool.h | 4 +++- fs/ceph/osd_client.c | 6 ++++-- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index 10d3632cc403..50fe5cc5de76 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -10,8 +10,12 @@ static void *alloc_fn(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; + void *p; - return ceph_msg_new(0, pool->front_len); + p = ceph_msg_new(0, pool->front_len); + if (!p) + pr_err("msgpool %s alloc failed\n", pool->name); + return p; } static void free_fn(void *element, void *arg) @@ -20,12 +24,13 @@ static void free_fn(void *element, void *arg) } int ceph_msgpool_init(struct ceph_msgpool *pool, - int front_len, int size, bool blocking) + int front_len, int size, bool blocking, const char *name) { pool->front_len = front_len; pool->pool = mempool_create(size, alloc_fn, free_fn, pool); if (!pool->pool) return -ENOMEM; + pool->name = name; return 0; } @@ -38,8 +43,8 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { if (front_len > pool->front_len) { - pr_err("msgpool_get pool %p need front %d, pool size is %d\n", - pool, front_len, pool->front_len); + pr_err("msgpool_get pool %s need front %d, pool size is %d\n", + pool->name, front_len, pool->front_len); WARN_ON(1); /* try to alloc a fresh message */ diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h index 62a61c7fca08..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/fs/ceph/msgpool.h @@ -9,12 +9,14 @@ * avoid unexpected OOM conditions. */ struct ceph_msgpool { + const char *name; mempool_t *pool; int front_len; /* preallocated payload size */ }; extern int ceph_msgpool_init(struct ceph_msgpool *pool, - int front_len, int size, bool blocking); + int front_len, int size, bool blocking, + const char *name); extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, int front_len); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 97a3a57fe8cd..c0aca732efd3 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -1214,11 +1214,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) if (!osdc->req_mempool) goto out; - err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true); + err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, + "osd_op"); if (err < 0) goto out_mempool; err = ceph_msgpool_init(&osdc->msgpool_op_reply, - OSD_OPREPLY_FRONT_LEN, 10, true); + OSD_OPREPLY_FRONT_LEN, 10, true, + "osd_op_reply"); if (err < 0) goto out_msgpool; return 0; -- cgit v1.2.3 From 9dd4658db1be5ca92c2ed2fd7a100d973125d9c5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 28 Apr 2010 13:51:50 -0700 Subject: ceph: close messenger race Simplify messenger locking, and close race between ceph_con_close() setting the CLOSED bit and con_work() checking the bit, then taking the mutex. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 8cfca375c6a9..bb16edb1aef6 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1546,7 +1546,6 @@ static int try_write(struct ceph_connection *con) dout("try_write start %p state %lu nref %d\n", con, con->state, atomic_read(&con->nref)); - mutex_lock(&con->mutex); more: dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); @@ -1639,7 +1638,6 @@ do_next: done: ret = 0; out: - mutex_unlock(&con->mutex); dout("try_write done on %p\n", con); return ret; } @@ -1651,7 +1649,6 @@ out: */ static int try_read(struct ceph_connection *con) { - struct ceph_messenger *msgr; int ret = -1; if (!con->sock) @@ -1661,9 +1658,6 @@ static int try_read(struct ceph_connection *con) return 0; dout("try_read start on %p\n", con); - msgr = con->msgr; - - mutex_lock(&con->mutex); more: dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, @@ -1758,7 +1752,6 @@ more: done: ret = 0; out: - mutex_unlock(&con->mutex); dout("try_read done on %p\n", con); return ret; @@ -1830,6 +1823,8 @@ more: dout("con_work %p start, clearing QUEUED\n", con); clear_bit(QUEUED, &con->state); + mutex_lock(&con->mutex); + if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ dout("con_work CLOSED\n"); con_close_socket(con); @@ -1844,11 +1839,16 @@ more: if (test_and_clear_bit(SOCK_CLOSED, &con->state) || try_read(con) < 0 || try_write(con) < 0) { + mutex_unlock(&con->mutex); backoff = 1; ceph_fault(con); /* error/fault path */ + goto done_unlocked; } done: + mutex_unlock(&con->mutex); + +done_unlocked: clear_bit(BUSY, &con->state); dout("con->state=%lu\n", con->state); if (test_bit(QUEUED, &con->state)) { -- cgit v1.2.3 From a6424e48c8d54a5795430b07c4487f1ed280df4e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 29 Apr 2010 09:28:11 -0700 Subject: ceph: fix xattr dangling pointer / double free If we use the xattr_blob, clear the pointer so we don't release the memory at the bottom of the fuction. Reported-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ceph') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ef917232cf37..913cafd70cd0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -619,6 +619,7 @@ static int fill_inode(struct inode *inode, memcpy(ci->i_xattrs.blob->vec.iov_base, iinfo->xattr_data, iinfo->xattr_len); ci->i_xattrs.version = le64_to_cpu(info->xattr_version); + xattr_blob = NULL; } inode->i_mapping->a_ops = &ceph_aops; -- cgit v1.2.3 From bddfa3cc18fcd9c9313a1030b19d3b0ea2639310 Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Thu, 29 Apr 2010 09:32:28 -0700 Subject: ceph: listxattr should compare version by >= If the version hasn't changed, don't rebuild the index. Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 3b4c2620030a..7185d0794df3 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -568,7 +568,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) ci->i_xattrs.version, ci->i_xattrs.index_version); if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && - (ci->i_xattrs.index_version > ci->i_xattrs.version)) { + (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { goto list_xattr; } else { spin_unlock(&inode->i_lock); -- cgit v1.2.3 From f1f2765faedc24f8f2e9fd68521a5ea469801b60 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 3 May 2010 21:50:39 -0700 Subject: ceph: set next_offset on readdir finish Set next_offset to 2 (always 2!), not 0, on readdir finish. Signed-off-by: Sage Weil --- fs/ceph/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 422794fd10e0..0b0a39d05ca6 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -338,7 +338,7 @@ more: if (req->r_reply_info.dir_end) { kfree(fi->last_name); fi->last_name = NULL; - fi->next_offset = 0; + fi->next_offset = 2; } else { rinfo = &req->r_reply_info; err = note_last_dentry(fi, -- cgit v1.2.3 From e8a7498715181ece36130335536e13733a5c3187 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 15 Apr 2010 14:08:49 -0700 Subject: ceph: skip set_dentry_offset work if directory not I_COMPLETE Signed-off-by: Sage Weil --- fs/ceph/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 913cafd70cd0..49a0935c4390 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -861,6 +861,10 @@ static void ceph_set_dentry_offset(struct dentry *dn) di = ceph_dentry(dn); spin_lock(&inode->i_lock); + if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { + spin_unlock(&inode->i_lock); + return; + } di->offset = ceph_inode(inode)->i_max_offset++; spin_unlock(&inode->i_lock); -- cgit v1.2.3 From 1b7facc41b42c2ab904b2f88b64b1f8ca0ca6cb7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 16 Apr 2010 12:58:02 -0700 Subject: ceph: don't clobber i_max_offset on already complete dir This can screw up offsets assigned to new dentries and break dcache readdir results. Signed-off-by: Sage Weil --- fs/ceph/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 49a0935c4390..aa22a0bce52f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -675,7 +675,8 @@ static int fill_inode(struct inode *inode, /* set dir completion flag? */ if (ci->i_files == 0 && ci->i_subdirs == 0 && ceph_snap(inode) == CEPH_NOSNAP && - (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED)) { + (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && + (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { dout(" marking %p complete (empty)\n", inode); ci->i_ceph_flags |= CEPH_I_COMPLETE; ci->i_max_offset = 2; -- cgit v1.2.3 From 1cd3935bedccf592d44343890251452a6dd74fc4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 3 May 2010 22:08:02 -0700 Subject: ceph: set dn offset when spliced We want to assign an offset when the dentry goes from null to linked, which is always done by splice_dentry(). Notably, we should NOT assign an offset when a dentry is first created and is still null. BUG if we try to splice a non-null dentry (we shouldn't). Signed-off-by: Sage Weil --- fs/ceph/dir.c | 13 ++++++----- fs/ceph/inode.c | 71 ++++++++++++++++++++++++++++++--------------------------- 2 files changed, 44 insertions(+), 40 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0b0a39d05ca6..d3bb8132a1aa 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -128,7 +128,8 @@ more: dentry = list_entry(p, struct dentry, d_u.d_child); di = ceph_dentry(dentry); while (1) { - dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next, + dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, + d_unhashed(dentry) ? "!hashed" : "hashed", parent->d_subdirs.prev, parent->d_subdirs.next); if (p == &parent->d_subdirs) { fi->at_end = 1; @@ -571,7 +572,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, !is_root_ceph_dentry(dir, dentry) && (ci->i_ceph_flags & CEPH_I_COMPLETE) && (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { - di->offset = ci->i_max_offset++; spin_unlock(&dir->i_lock); dout(" dir %p complete, -ENOENT\n", dir); d_add(dentry, NULL); @@ -984,8 +984,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *dir = dentry->d_parent->d_inode; - dout("d_revalidate %p '%.*s' inode %p\n", dentry, - dentry->d_name.len, dentry->d_name.name, dentry->d_inode); + dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, + dentry->d_name.len, dentry->d_name.name, dentry->d_inode, + ceph_dentry(dentry)->offset); /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { @@ -1177,8 +1178,8 @@ void ceph_dentry_lru_touch(struct dentry *dn) struct ceph_dentry_info *di = ceph_dentry(dn); struct ceph_mds_client *mdsc; - dout("dentry_lru_touch %p %p '%.*s'\n", di, dn, - dn->d_name.len, dn->d_name.name); + dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, + dn->d_name.len, dn->d_name.name, di->offset); if (di) { mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_lru_lock); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index aa22a0bce52f..1bcf98bd0255 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -803,6 +803,37 @@ out_unlock: return; } +/* + * Set dentry's directory position based on the current dir's max, and + * order it in d_subdirs, so that dcache_readdir behaves. + */ +static void ceph_set_dentry_offset(struct dentry *dn) +{ + struct dentry *dir = dn->d_parent; + struct inode *inode = dn->d_parent->d_inode; + struct ceph_dentry_info *di; + + BUG_ON(!inode); + + di = ceph_dentry(dn); + + spin_lock(&inode->i_lock); + if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { + spin_unlock(&inode->i_lock); + return; + } + di->offset = ceph_inode(inode)->i_max_offset++; + spin_unlock(&inode->i_lock); + + spin_lock(&dcache_lock); + spin_lock(&dn->d_lock); + list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); + dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, + dn->d_u.d_child.prev, dn->d_u.d_child.next); + spin_unlock(&dn->d_lock); + spin_unlock(&dcache_lock); +} + /* * splice a dentry to an inode. * caller must hold directory i_mutex for this to be safe. @@ -816,6 +847,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, { struct dentry *realdn; + BUG_ON(dn->d_inode); + /* dn must be unhashed */ if (!d_unhashed(dn)) d_drop(dn); @@ -837,47 +870,16 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, dn = realdn; } else { BUG_ON(!ceph_dentry(dn)); - dout("dn %p attached to %p ino %llx.%llx\n", dn, dn->d_inode, ceph_vinop(dn->d_inode)); } if ((!prehash || *prehash) && d_unhashed(dn)) d_rehash(dn); + ceph_set_dentry_offset(dn); out: return dn; } -/* - * Set dentry's directory position based on the current dir's max, and - * order it in d_subdirs, so that dcache_readdir behaves. - */ -static void ceph_set_dentry_offset(struct dentry *dn) -{ - struct dentry *dir = dn->d_parent; - struct inode *inode = dn->d_parent->d_inode; - struct ceph_dentry_info *di; - - BUG_ON(!inode); - - di = ceph_dentry(dn); - - spin_lock(&inode->i_lock); - if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { - spin_unlock(&inode->i_lock); - return; - } - di->offset = ceph_inode(inode)->i_max_offset++; - spin_unlock(&inode->i_lock); - - spin_lock(&dcache_lock); - spin_lock(&dn->d_lock); - list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); - dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, - dn->d_u.d_child.prev, dn->d_u.d_child.next); - spin_unlock(&dn->d_lock); - spin_unlock(&dcache_lock); -} - /* * Incorporate results into the local cache. This is either just * one inode, or a directory, dentry, and possibly linked-to inode (e.g., @@ -1030,6 +1032,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, ceph_invalidate_dentry_lease(dn); /* take overwritten dentry's readdir offset */ + dout("dn %p gets %p offset %lld (old offset %lld)\n", + req->r_old_dentry, dn, ceph_dentry(dn)->offset, + ceph_dentry(req->r_old_dentry)->offset); ceph_dentry(req->r_old_dentry)->offset = ceph_dentry(dn)->offset; @@ -1074,7 +1079,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, goto done; } req->r_dentry = dn; /* may have spliced */ - ceph_set_dentry_offset(dn); igrab(in); } else if (ceph_ino(in) == vino.ino && ceph_snap(in) == vino.snap) { @@ -1117,7 +1121,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, err = PTR_ERR(dn); goto done; } - ceph_set_dentry_offset(dn); req->r_dentry = dn; /* may have spliced */ igrab(in); rinfo->head->is_dentry = 1; /* fool notrace handlers */ -- cgit v1.2.3 From 6e19a16ef28aee09dbcbb9f3ff24ac4f439def7d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 29 Apr 2010 16:38:32 -0700 Subject: ceph: clean up mount options, ->show_options() Ensure all options are included in /proc/mounts. Some cleanup. Signed-off-by: Sage Weil --- fs/ceph/super.c | 90 +++++++++++++++++++++++++++++++++++++-------------------- fs/ceph/super.h | 19 ++++++------ 2 files changed, 69 insertions(+), 40 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b3225bf63f58..34b16cb302fe 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -110,6 +110,34 @@ static int ceph_syncfs(struct super_block *sb, int wait) return 0; } +static int default_congestion_kb(void) +{ + int congestion_kb; + + /* + * Copied from NFS + * + * congestion size, scale with available memory. + * + * 64MB: 8192k + * 128MB: 11585k + * 256MB: 16384k + * 512MB: 23170k + * 1GB: 32768k + * 2GB: 46340k + * 4GB: 65536k + * 8GB: 92681k + * 16GB: 131072k + * + * This allows larger machines to have larger/more transfers. + * Limit the default to 256M + */ + congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + if (congestion_kb > 256*1024) + congestion_kb = 256*1024; + + return congestion_kb; +} /** * ceph_show_options - Show mount options in /proc/mounts @@ -135,6 +163,33 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",nocrc"); if (args->flags & CEPH_OPT_NOASYNCREADDIR) seq_puts(m, ",noasyncreaddir"); + + if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) + seq_printf(m, ",mount_timeout=%d", args->mount_timeout); + if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) + seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); + if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) + seq_printf(m, ",osdtimeout=%d", args->osd_timeout); + if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) + seq_printf(m, ",osdkeepalivetimeout=%d", + args->osd_keepalive_timeout); + if (args->wsize) + seq_printf(m, ",wsize=%d", args->wsize); + if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) + seq_printf(m, ",rsize=%d", args->rsize); + if (args->congestion_kb != default_congestion_kb()) + seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); + if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) + seq_printf(m, ",caps_wanted_delay_min=%d", + args->caps_wanted_delay_min); + if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) + seq_printf(m, ",caps_wanted_delay_max=%d", + args->caps_wanted_delay_max); + if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) + seq_printf(m, ",cap_release_safety=%d", + args->cap_release_safety); + if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) + seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) seq_printf(m, ",snapdirname=%s", args->snapdir_name); if (args->name) @@ -158,35 +213,6 @@ static void ceph_inode_init_once(void *foo) inode_init_once(&ci->vfs_inode); } -static int default_congestion_kb(void) -{ - int congestion_kb; - - /* - * Copied from NFS - * - * congestion size, scale with available memory. - * - * 64MB: 8192k - * 128MB: 11585k - * 256MB: 16384k - * 512MB: 23170k - * 1GB: 32768k - * 2GB: 46340k - * 4GB: 65536k - * 8GB: 92681k - * 16GB: 131072k - * - * This allows larger machines to have larger/more transfers. - * Limit the default to 256M - */ - congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); - if (congestion_kb > 256*1024) - congestion_kb = 256*1024; - - return congestion_kb; -} - static int __init init_caches(void) { ceph_inode_cachep = kmem_cache_create("ceph_inode_info", @@ -305,6 +331,7 @@ enum { Opt_osd_idle_ttl, Opt_caps_wanted_delay_min, Opt_caps_wanted_delay_max, + Opt_cap_release_safety, Opt_readdir_max_entries, Opt_congestion_kb, Opt_last_int, @@ -336,6 +363,7 @@ static match_table_t arg_tokens = { {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, + {Opt_cap_release_safety, "cap_release_safety=%d"}, {Opt_readdir_max_entries, "readdir_max_entries=%d"}, {Opt_congestion_kb, "write_congestion_kb=%d"}, /* int args above */ @@ -385,8 +413,8 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); - args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; - args->max_readdir = 1024; + args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; + args->max_readdir = CEPH_MAX_READDIR_DEFAULT; args->congestion_kb = default_congestion_kb(); /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 51b3ff238454..395adc5fcebb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -52,24 +52,24 @@ struct ceph_mount_args { int sb_flags; + int flags; + struct ceph_fsid fsid; + struct ceph_entity_addr my_addr; int num_mon; struct ceph_entity_addr *mon_addr; - int flags; int mount_timeout; int osd_idle_ttl; - int caps_wanted_delay_min, caps_wanted_delay_max; - struct ceph_fsid fsid; - struct ceph_entity_addr my_addr; + int osd_timeout; + int osd_keepalive_timeout; int wsize; int rsize; /* max readahead */ + int congestion_kb; /* max writeback in flight */ + int caps_wanted_delay_min, caps_wanted_delay_max; + int cap_release_safety; int max_readdir; /* max readdir size */ - int congestion_kb; /* max readdir size */ - int osd_timeout; - int osd_keepalive_timeout; char *snapdir_name; /* default ".snap" */ char *name; char *secret; - int cap_release_safety; }; /* @@ -80,13 +80,13 @@ struct ceph_mount_args { #define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_IDLE_TTL_DEFAULT 60 #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ +#define CEPH_MAX_READDIR_DEFAULT 1024 #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) #define CEPH_SNAPDIRNAME_DEFAULT ".snap" #define CEPH_AUTH_NAME_DEFAULT "guest" - /* * Delay telling the MDS we no longer want caps, in case we reopen * the file. Delay a minimum amount of time, even if we send a cap @@ -96,6 +96,7 @@ struct ceph_mount_args { #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ +#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) /* mount state */ enum { -- cgit v1.2.3 From b736b3d9d0ba52693701373d7cd88aaad8e5bed3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 30 Apr 2010 12:45:02 -0700 Subject: ceph: adjust masked struct_v variable names Reported-by: Bill Pemberton Signed-off-by: Sage Weil --- fs/ceph/auth_x.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 18c1a1de5547..7b206231566d 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -127,7 +127,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, int ret; char *dbuf; char *ticket_buf; - u8 struct_v; + u8 reply_struct_v; dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); if (!dbuf) @@ -139,14 +139,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, goto out_dbuf; ceph_decode_need(&p, end, 1 + sizeof(u32), bad); - struct_v = ceph_decode_8(&p); - if (struct_v != 1) + reply_struct_v = ceph_decode_8(&p); + if (reply_struct_v != 1) goto bad; num = ceph_decode_32(&p); dout("%d tickets\n", num); while (num--) { int type; - u8 struct_v; + u8 tkt_struct_v, blob_struct_v; struct ceph_x_ticket_handler *th; void *dp, *dend; int dlen; @@ -165,8 +165,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, type = ceph_decode_32(&p); dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); - struct_v = ceph_decode_8(&p); - if (struct_v != 1) + tkt_struct_v = ceph_decode_8(&p); + if (tkt_struct_v != 1) goto bad; th = get_ticket_handler(ac, type); @@ -186,8 +186,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, dend = dbuf + dlen; dp = dbuf; - struct_v = ceph_decode_8(&dp); - if (struct_v != 1) + tkt_struct_v = ceph_decode_8(&dp); + if (tkt_struct_v != 1) goto bad; memcpy(&old_key, &th->session_key, sizeof(old_key)); @@ -224,7 +224,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, tpend = tp + dlen; dout(" ticket blob is %d bytes\n", dlen); ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); - struct_v = ceph_decode_8(&tp); + blob_struct_v = ceph_decode_8(&tp); new_secret_id = ceph_decode_64(&tp); ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); if (ret) -- cgit v1.2.3 From 56b7cf9581fa0486657102a6fb8efabc3eadeba1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 3 May 2010 15:22:00 -0700 Subject: ceph: skip mds sync on forced unmount Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 35dbdad07b1c..0d451a83bc83 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2748,6 +2748,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { u64 want_tid, want_flush; + if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) + return; + dout("sync\n"); mutex_lock(&mdsc->mutex); want_tid = mdsc->last_tid; -- cgit v1.2.3 From 31e0cf8f6a1488b6ca69dcdceeaed107ecfd6463 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 4 May 2010 16:39:35 -0700 Subject: ceph: name bdi ceph-%d instead of major:minor The bdi_setup_and_register() helper doesn't help us since we bdi_init() in create_client() and bdi_register() only when sget() succeeds. Signed-off-by: Sage Weil --- fs/ceph/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 34b16cb302fe..7f5b20dc4945 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -910,6 +910,8 @@ static int ceph_compare_super(struct super_block *sb, void *data) /* * construct our own bdi so we can control readahead, etc. */ +static atomic_long_t bdi_seq = ATOMIC_INIT(0); + static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { int err; @@ -919,7 +921,8 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) client->backing_dev_info.ra_pages = (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; - err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); + err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", + atomic_long_inc_return(&bdi_seq)); if (!err) sb->s_bdi = &client->backing_dev_info; return err; -- cgit v1.2.3 From 0f8605f2bde2c69737709765dfc574558ea35d4e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 5 May 2010 15:51:35 -0700 Subject: ceph: clean up cap release loop vs spinlock Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0d451a83bc83..824a9b2cde71 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1143,10 +1143,8 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_msg *msg; dout("send_cap_releases mds%d\n", session->s_mds); - while (1) { - spin_lock(&session->s_cap_lock); - if (list_empty(&session->s_cap_releases_done)) - break; + spin_lock(&session->s_cap_lock); + while (!list_empty(&session->s_cap_releases_done)) { msg = list_first_entry(&session->s_cap_releases_done, struct ceph_msg, list_head); list_del_init(&msg->list_head); @@ -1154,6 +1152,7 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); dout("send_cap_releases mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); + spin_lock(&session->s_cap_lock); } spin_unlock(&session->s_cap_lock); } -- cgit v1.2.3 From a5ee751c15016d0deee0d651e42a3b163ea73ade Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 7 May 2010 10:27:14 +0200 Subject: ceph: cleanup: remove unused assignement We don't ever use "dirty" so we can remove it. Signed-off-by: Dan Carpenter Signed-off-by: Sage Weil --- fs/ceph/caps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 74c74842c860..0e85d3c80790 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1718,10 +1718,9 @@ out_unlocked: static int caps_are_flushed(struct inode *inode, unsigned tid) { struct ceph_inode_info *ci = ceph_inode(inode); - int dirty, i, ret = 1; + int i, ret = 1; spin_lock(&inode->i_lock); - dirty = __ceph_caps_dirty(ci); for (i = 0; i < CEPH_CAP_BITS; i++) if ((ci->i_flushing_caps & (1 << i)) && ci->i_cap_flush_tid[i] <= tid) { -- cgit v1.2.3 From dbad185d4939ffb806f6fa753ef9f470e3b72b62 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 25 Mar 2010 15:45:38 -0700 Subject: ceph: drop src address(es) from message header [new protocol feature] The CEPH_FEATURE_NOSRCADDR protocol feature avoids putting the full source address in each message header (twice). This patch switches the client to the new scheme, and _requires_ this feature on the server. The server will support both the old and new schemes. That means an old client will work with a new server, but a new client will not work with an old server. Signed-off-by: Sage Weil --- fs/ceph/ceph_fs.h | 13 +++++++++++-- fs/ceph/messenger.c | 14 ++++++-------- fs/ceph/msgr.h | 20 +++++++++++++++++++- 3 files changed, 36 insertions(+), 11 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 0c2241ef3653..8b194c6ef7d3 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -53,8 +53,17 @@ /* * feature bits */ -#define CEPH_FEATURE_SUPPORTED 0 -#define CEPH_FEATURE_REQUIRED 0 +#define CEPH_FEATURE_UID 1 +#define CEPH_FEATURE_NOSRCADDR 2 + +#define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID +#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID +#define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID +#define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR /* diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index bb16edb1aef6..fe75ec7d8402 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -663,7 +663,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, con->connect_seq, global_seq, proto); - con->out_connect.features = CEPH_FEATURE_SUPPORTED; + con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); con->out_connect.global_seq = cpu_to_le32(global_seq); @@ -1126,8 +1126,8 @@ static void fail_protocol(struct ceph_connection *con) static int process_connect(struct ceph_connection *con) { - u64 sup_feat = CEPH_FEATURE_SUPPORTED; - u64 req_feat = CEPH_FEATURE_REQUIRED; + u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; + u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; u64 server_feat = le64_to_cpu(con->in_reply.features); dout("process_connect on %p tag %d\n", con, (int)con->in_tag); @@ -1514,14 +1514,14 @@ static void process_message(struct ceph_connection *con) /* if first message, set peer_name */ if (con->peer_name.type == 0) - con->peer_name = msg->hdr.src.name; + con->peer_name = msg->hdr.src; con->in_seq++; mutex_unlock(&con->mutex); dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", msg, le64_to_cpu(msg->hdr.seq), - ENTITY_NAME(msg->hdr.src.name), + ENTITY_NAME(msg->hdr.src), le16_to_cpu(msg->hdr.type), ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), le32_to_cpu(msg->hdr.front_len), @@ -1987,9 +1987,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) } /* set src+dst */ - msg->hdr.src.name = con->msgr->inst.name; - msg->hdr.src.addr = con->msgr->my_enc_addr; - msg->hdr.orig_src = msg->hdr.src; + msg->hdr.src = con->msgr->inst.name; BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 8aaab414f3f8..6baa8e4f8e7f 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h @@ -120,7 +120,7 @@ struct ceph_msg_connect_reply { /* * message header */ -struct ceph_msg_header { +struct ceph_msg_header_old { __le64 seq; /* message seq# for this session */ __le64 tid; /* transaction id */ __le16 type; /* message type */ @@ -138,6 +138,24 @@ struct ceph_msg_header { __le32 crc; /* header crc32c */ } __attribute__ ((packed)); +struct ceph_msg_header { + __le64 seq; /* message seq# for this session */ + __le64 tid; /* transaction id */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 version; /* version of message encoding */ + + __le32 front_len; /* bytes in main payload */ + __le32 middle_len;/* bytes in middle payload */ + __le32 data_len; /* bytes of data payload */ + __le16 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + + struct ceph_entity_name src; + __le32 reserved; + __le32 crc; /* header crc32c */ +} __attribute__ ((packed)); + #define CEPH_MSG_PRIO_LOW 64 #define CEPH_MSG_PRIO_DEFAULT 127 #define CEPH_MSG_PRIO_HIGH 196 -- cgit v1.2.3 From f8c76f6f250edbdc9d2011b0e05d196a3d8ae895 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Thu, 22 Apr 2010 15:40:37 -0700 Subject: ceph: make mon client statfs handling more generic This is being done so that we could reuse the statfs infrastructure with other requests that return values. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/debugfs.c | 13 ++++++--- fs/ceph/mon_client.c | 82 ++++++++++++++++++++++++++-------------------------- fs/ceph/mon_client.h | 15 +++++----- 3 files changed, 58 insertions(+), 52 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f7048da92acc..3be33fb066cc 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p) static int monc_show(struct seq_file *s, void *p) { struct ceph_client *client = s->private; - struct ceph_mon_statfs_request *req; + struct ceph_mon_generic_request *req; struct ceph_mon_client *monc = &client->monc; struct rb_node *rp; @@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p) if (monc->want_next_osdmap) seq_printf(s, "want next osdmap\n"); - for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) { - req = rb_entry(rp, struct ceph_mon_statfs_request, node); - seq_printf(s, "%lld statfs\n", req->tid); + for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { + __u16 op; + req = rb_entry(rp, struct ceph_mon_generic_request, node); + op = le16_to_cpu(req->request->hdr.type); + if (op == CEPH_MSG_STATFS) + seq_printf(s, "%lld statfs\n", req->tid); + else + seq_printf(s, "%lld unknown\n", req->tid); } mutex_unlock(&monc->mutex); diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 9900706fee75..7df2f8c157f1 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -353,14 +353,14 @@ out: /* * statfs */ -static struct ceph_mon_statfs_request *__lookup_statfs( +static struct ceph_mon_generic_request *__lookup_generic_req( struct ceph_mon_client *monc, u64 tid) { - struct ceph_mon_statfs_request *req; - struct rb_node *n = monc->statfs_request_tree.rb_node; + struct ceph_mon_generic_request *req; + struct rb_node *n = monc->generic_request_tree.rb_node; while (n) { - req = rb_entry(n, struct ceph_mon_statfs_request, node); + req = rb_entry(n, struct ceph_mon_generic_request, node); if (tid < req->tid) n = n->rb_left; else if (tid > req->tid) @@ -371,16 +371,16 @@ static struct ceph_mon_statfs_request *__lookup_statfs( return NULL; } -static void __insert_statfs(struct ceph_mon_client *monc, - struct ceph_mon_statfs_request *new) +static void __insert_generic_request(struct ceph_mon_client *monc, + struct ceph_mon_generic_request *new) { - struct rb_node **p = &monc->statfs_request_tree.rb_node; + struct rb_node **p = &monc->generic_request_tree.rb_node; struct rb_node *parent = NULL; - struct ceph_mon_statfs_request *req = NULL; + struct ceph_mon_generic_request *req = NULL; while (*p) { parent = *p; - req = rb_entry(parent, struct ceph_mon_statfs_request, node); + req = rb_entry(parent, struct ceph_mon_generic_request, node); if (new->tid < req->tid) p = &(*p)->rb_left; else if (new->tid > req->tid) @@ -390,13 +390,13 @@ static void __insert_statfs(struct ceph_mon_client *monc, } rb_link_node(&new->node, parent, p); - rb_insert_color(&new->node, &monc->statfs_request_tree); + rb_insert_color(&new->node, &monc->generic_request_tree); } -static void release_statfs_request(struct kref *kref) +static void release_generic_request(struct kref *kref) { - struct ceph_mon_statfs_request *req = - container_of(kref, struct ceph_mon_statfs_request, kref); + struct ceph_mon_generic_request *req = + container_of(kref, struct ceph_mon_generic_request, kref); if (req->reply) ceph_msg_put(req->reply); @@ -404,33 +404,33 @@ static void release_statfs_request(struct kref *kref) ceph_msg_put(req->request); } -static void put_statfs_request(struct ceph_mon_statfs_request *req) +static void put_generic_request(struct ceph_mon_generic_request *req) { - kref_put(&req->kref, release_statfs_request); + kref_put(&req->kref, release_generic_request); } -static void get_statfs_request(struct ceph_mon_statfs_request *req) +static void get_generic_request(struct ceph_mon_generic_request *req) { kref_get(&req->kref); } -static struct ceph_msg *get_statfs_reply(struct ceph_connection *con, +static struct ceph_msg *get_generic_reply(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip) { struct ceph_mon_client *monc = con->private; - struct ceph_mon_statfs_request *req; + struct ceph_mon_generic_request *req; u64 tid = le64_to_cpu(hdr->tid); struct ceph_msg *m; mutex_lock(&monc->mutex); - req = __lookup_statfs(monc, tid); + req = __lookup_generic_req(monc, tid); if (!req) { - dout("get_statfs_reply %lld dne\n", tid); + dout("get_generic_reply %lld dne\n", tid); *skip = 1; m = NULL; } else { - dout("get_statfs_reply %lld got %p\n", tid, req->reply); + dout("get_generic_reply %lld got %p\n", tid, req->reply); m = ceph_msg_get(req->reply); /* * we don't need to track the connection reading into @@ -445,7 +445,7 @@ static struct ceph_msg *get_statfs_reply(struct ceph_connection *con, static void handle_statfs_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) { - struct ceph_mon_statfs_request *req; + struct ceph_mon_generic_request *req; struct ceph_mon_statfs_reply *reply = msg->front.iov_base; u64 tid = le64_to_cpu(msg->hdr.tid); @@ -454,21 +454,21 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, dout("handle_statfs_reply %p tid %llu\n", msg, tid); mutex_lock(&monc->mutex); - req = __lookup_statfs(monc, tid); + req = __lookup_generic_req(monc, tid); if (req) { - *req->buf = reply->st; + *(struct ceph_statfs *)req->buf = reply->st; req->result = 0; - get_statfs_request(req); + get_generic_request(req); } mutex_unlock(&monc->mutex); if (req) { complete(&req->completion); - put_statfs_request(req); + put_generic_request(req); } return; bad: - pr_err("corrupt statfs reply, no tid\n"); + pr_err("corrupt generic reply, no tid\n"); ceph_msg_dump(msg); } @@ -477,7 +477,7 @@ bad: */ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) { - struct ceph_mon_statfs_request *req; + struct ceph_mon_generic_request *req; struct ceph_mon_statfs *h; int err; @@ -509,8 +509,8 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) mutex_lock(&monc->mutex); req->tid = ++monc->last_tid; req->request->hdr.tid = cpu_to_le64(req->tid); - __insert_statfs(monc, req); - monc->num_statfs_requests++; + __insert_generic_request(monc, req); + monc->num_generic_requests++; mutex_unlock(&monc->mutex); /* send request and wait */ @@ -518,28 +518,28 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) err = wait_for_completion_interruptible(&req->completion); mutex_lock(&monc->mutex); - rb_erase(&req->node, &monc->statfs_request_tree); - monc->num_statfs_requests--; + rb_erase(&req->node, &monc->generic_request_tree); + monc->num_generic_requests--; mutex_unlock(&monc->mutex); if (!err) err = req->result; out: - kref_put(&req->kref, release_statfs_request); + kref_put(&req->kref, release_generic_request); return err; } /* * Resend pending statfs requests. */ -static void __resend_statfs(struct ceph_mon_client *monc) +static void __resend_generic_request(struct ceph_mon_client *monc) { - struct ceph_mon_statfs_request *req; + struct ceph_mon_generic_request *req; struct rb_node *p; - for (p = rb_first(&monc->statfs_request_tree); p; p = rb_next(p)) { - req = rb_entry(p, struct ceph_mon_statfs_request, node); + for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { + req = rb_entry(p, struct ceph_mon_generic_request, node); ceph_con_send(monc->con, ceph_msg_get(req->request)); } } @@ -652,8 +652,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) monc->sub_sent = 0; INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); - monc->statfs_request_tree = RB_ROOT; - monc->num_statfs_requests = 0; + monc->generic_request_tree = RB_ROOT; + monc->num_generic_requests = 0; monc->last_tid = 0; monc->have_mdsmap = 0; @@ -717,7 +717,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, monc->client->msgr->inst.name.num = monc->auth->global_id; __send_subscribe(monc); - __resend_statfs(monc); + __resend_generic_request(monc); } mutex_unlock(&monc->mutex); } @@ -809,7 +809,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, m = ceph_msg_get(monc->m_subscribe_ack); break; case CEPH_MSG_STATFS_REPLY: - return get_statfs_reply(con, hdr, skip); + return get_generic_reply(con, hdr, skip); case CEPH_MSG_AUTH_REPLY: m = ceph_msg_get(monc->m_auth_reply); break; diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 0046deed0740..76887785a4d7 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h @@ -22,7 +22,7 @@ struct ceph_monmap { }; struct ceph_mon_client; -struct ceph_mon_statfs_request; +struct ceph_mon_generic_request; /* @@ -40,15 +40,16 @@ struct ceph_mon_request { }; /* - * statfs() is done a bit differently because we need to get data back + * ceph_mon_generic_request is being used for the statfs and poolop requests + * which are bening done a bit differently because we need to get data back * to the caller */ -struct ceph_mon_statfs_request { +struct ceph_mon_generic_request { struct kref kref; u64 tid; struct rb_node node; int result; - struct ceph_statfs *buf; + void *buf; struct completion completion; struct ceph_msg *request; /* original request */ struct ceph_msg *reply; /* and reply */ @@ -71,9 +72,9 @@ struct ceph_mon_client { struct ceph_connection *con; bool have_fsid; - /* pending statfs requests */ - struct rb_root statfs_request_tree; - int num_statfs_requests; + /* pending generic requests */ + struct rb_root generic_request_tree; + int num_generic_requests; u64 last_tid; /* mds/osd map */ -- cgit v1.2.3 From e01a594646ebbf964b6058e3bf28125379063439 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 10 May 2010 15:36:44 -0700 Subject: ceph: dicard cap releases on mds restart If the MDS restarts, the expire caps state is no longer shared, and can be thrown out. Caps state will be rebuilt on the MDS during the reconnect process that follows. Zero out any release messages and adjust the release counter accordingly. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 824a9b2cde71..a4d9e5b0fd3d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1157,6 +1157,44 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, spin_unlock(&session->s_cap_lock); } +static void discard_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) +{ + struct ceph_msg *msg; + struct ceph_mds_cap_release *head; + unsigned num; + + dout("discard_cap_releases mds%d\n", session->s_mds); + spin_lock(&session->s_cap_lock); + + /* zero out the in-progress message */ + msg = list_first_entry(&session->s_cap_releases, + struct ceph_msg, list_head); + head = msg->front.iov_base; + num = le32_to_cpu(head->num); + dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); + head->num = cpu_to_le32(0); + session->s_num_cap_releases += num; + + /* requeue completed messages */ + while (!list_empty(&session->s_cap_releases_done)) { + msg = list_first_entry(&session->s_cap_releases_done, + struct ceph_msg, list_head); + list_del_init(&msg->list_head); + + head = msg->front.iov_base; + num = le32_to_cpu(head->num); + dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, + num); + session->s_num_cap_releases += num; + head->num = cpu_to_le32(0); + msg->front.iov_len = sizeof(*head); + list_add(&msg->list_head, &session->s_cap_releases); + } + + spin_unlock(&session->s_cap_lock); +} + /* * requests */ @@ -2183,6 +2221,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) dout("session %p state %s\n", session, session_state_name(session->s_state)); + /* drop old cap expires; we're about to reestablish that state */ + discard_cap_releases(mdsc, session); + /* traverse this session's caps */ err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); if (err) -- cgit v1.2.3 From aab53dd9e81ccefa7b8d88eec5138dd73639a783 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 17 Mar 2010 16:30:21 -0700 Subject: ceph: only send cap releases when mds is OPEN|HUNG On OPENING we shouldn't have any caps (or releases). On CLOSING, we should wait until we succeed (and throw it all out), or don't (and are OPEN again). On RECONNECTING we can wait until we are OPEN. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a4d9e5b0fd3d..d45787470fb5 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2647,7 +2647,9 @@ static void delayed_work(struct work_struct *work) else ceph_con_keepalive(&s->s_con); add_cap_releases(mdsc, s, -1); - send_cap_releases(mdsc, s); + if (s->s_state == CEPH_MDS_SESSION_OPEN || + s->s_state == CEPH_MDS_SESSION_HUNG) + send_cap_releases(mdsc, s); mutex_unlock(&s->s_mutex); ceph_put_mds_session(s); -- cgit v1.2.3 From 29790f26ab3e63b2a083f0811b80e2f086e4fcb2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 18 Mar 2010 14:45:05 -0700 Subject: ceph: wait for mds OPEN reply to indicate reconnect success We used to infer reconnect success by watching the MDS state, essentially assuming that hearing nothing meant things were ok. That wasn't particularly reliable. Instead, the MDS replies with an explicit OPEN message to indicate success. Strictly speaking, this is a protocol change, but it is a backwards compatible one that does not break new clients + old servers or old clients + new servers. At least not yet. Drop unused @all argument from kick_requests while we're at it. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d45787470fb5..972ec5ae2ac4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1660,10 +1660,9 @@ static void __wake_requests(struct ceph_mds_client *mdsc, /* * Wake up threads with requests pending for @mds, so that they can - * resubmit their requests to a possibly different mds. If @all is set, - * wake up if their requests has been forwarded to @mds, too. + * resubmit their requests to a possibly different mds. */ -static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) +static void kick_requests(struct ceph_mds_client *mdsc, int mds) { struct ceph_mds_request *req; struct rb_node *p; @@ -2026,6 +2025,8 @@ static void handle_session(struct ceph_mds_session *session, switch (op) { case CEPH_SESSION_OPEN: + if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) + pr_info("mds%d reconnect success\n", session->s_mds); session->s_state = CEPH_MDS_SESSION_OPEN; renewed_caps(mdsc, session, 0); wake = 1; @@ -2039,10 +2040,12 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_CLOSE: + if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) + pr_info("mds%d reconnect denied\n", session->s_mds); remove_session_caps(session); wake = 1; /* for good measure */ complete(&mdsc->session_close_waiters); - kick_requests(mdsc, mds, 0); /* cur only */ + kick_requests(mdsc, mds); break; case CEPH_SESSION_STALE: @@ -2258,7 +2261,6 @@ send: reply->nr_pages = calc_pages_for(0, pagelist->length); ceph_con_send(&session->s_con, reply); - session->s_state = CEPH_MDS_SESSION_OPEN; mutex_unlock(&session->s_mutex); mutex_lock(&mdsc->mutex); @@ -2334,7 +2336,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, } /* kick any requests waiting on the recovering mds */ - kick_requests(mdsc, i, 1); + kick_requests(mdsc, i); } else if (oldstate == newstate) { continue; /* nothing new with this mds */ } @@ -2347,18 +2349,14 @@ static void check_new_map(struct ceph_mds_client *mdsc, send_mds_reconnect(mdsc, i); /* - * kick requests on any mds that has gone active. - * - * kick requests on cur or forwarder: we may have sent - * the request to mds1, mds1 told us it forwarded it - * to mds2, but then we learn mds1 failed and can't be - * sure it successfully forwarded our request before - * it died. + * kick request on any mds that has gone active. */ if (oldstate < CEPH_MDS_STATE_ACTIVE && newstate >= CEPH_MDS_STATE_ACTIVE) { - pr_info("mds%d reconnect completed\n", s->s_mds); - kick_requests(mdsc, i, 1); + if (oldstate != CEPH_MDS_STATE_CREATING && + oldstate != CEPH_MDS_STATE_STARTING) + pr_info("mds%d recovery completed\n", s->s_mds); + kick_requests(mdsc, i); ceph_kick_flushing_caps(mdsc, s); wake_up_session_caps(s, 1); } -- cgit v1.2.3 From 34b6c855fafc54ef130649809cd580f98e3f8416 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 10 May 2010 16:31:25 -0700 Subject: ceph: clean up send_mds_reconnect interface Pass a ceph_mds_session, since the caller has it. Remove the dead code for sending empty reconnects. It used to be used when the MDS contacted _us_ to solicit a reconnect, and we could reply saying "go away, I have no session." Now we only send reconnects based on the mds map, and only when we do in fact have an open session. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 972ec5ae2ac4..e310b4be5588 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2177,50 +2177,38 @@ out: * * called with mdsc->mutex held. */ -static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) +static void send_mds_reconnect(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) { - struct ceph_mds_session *session = NULL; struct ceph_msg *reply; struct rb_node *p; + int mds = session->s_mds; int err = -ENOMEM; struct ceph_pagelist *pagelist; - pr_info("reconnect to recovering mds%d\n", mds); + pr_info("mds%d reconnect start\n", mds); pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); if (!pagelist) goto fail_nopagelist; ceph_pagelist_init(pagelist); - err = -ENOMEM; reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0); if (!reply) goto fail_nomsg; - /* find session */ - session = __ceph_lookup_mds_session(mdsc, mds); - mutex_unlock(&mdsc->mutex); /* drop lock for duration */ - - if (session) { - mutex_lock(&session->s_mutex); - - session->s_state = CEPH_MDS_SESSION_RECONNECTING; - session->s_seq = 0; + mutex_lock(&session->s_mutex); + session->s_state = CEPH_MDS_SESSION_RECONNECTING; + session->s_seq = 0; - ceph_con_open(&session->s_con, - ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); + ceph_con_open(&session->s_con, + ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); - /* replay unsafe requests */ - replay_unsafe_requests(mdsc, session); - } else { - dout("no session for mds%d, will send short reconnect\n", - mds); - } + /* replay unsafe requests */ + replay_unsafe_requests(mdsc, session); down_read(&mdsc->snap_rwsem); - if (!session) - goto send; dout("session %p state %s\n", session, session_state_name(session->s_state)); @@ -2255,7 +2243,6 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) goto fail; } -send: reply->pagelist = pagelist; reply->hdr.data_len = cpu_to_le32(pagelist->length); reply->nr_pages = calc_pages_for(0, pagelist->length); @@ -2267,23 +2254,18 @@ send: __wake_requests(mdsc, &session->s_waiting); mutex_unlock(&mdsc->mutex); - ceph_put_mds_session(session); - up_read(&mdsc->snap_rwsem); - mutex_lock(&mdsc->mutex); return; fail: ceph_msg_put(reply); up_read(&mdsc->snap_rwsem); mutex_unlock(&session->s_mutex); - ceph_put_mds_session(session); fail_nomsg: ceph_pagelist_release(pagelist); kfree(pagelist); fail_nopagelist: pr_err("error %d preparing reconnect for mds%d\n", err, mds); - mutex_lock(&mdsc->mutex); return; } @@ -2345,8 +2327,11 @@ static void check_new_map(struct ceph_mds_client *mdsc, * send reconnect? */ if (s->s_state == CEPH_MDS_SESSION_RESTARTING && - newstate >= CEPH_MDS_STATE_RECONNECT) - send_mds_reconnect(mdsc, i); + newstate >= CEPH_MDS_STATE_RECONNECT) { + mutex_unlock(&mdsc->mutex); + send_mds_reconnect(mdsc, s); + mutex_lock(&mdsc->mutex); + } /* * kick request on any mds that has gone active. -- cgit v1.2.3 From 7e70f0ed9f3ee47394576be86c593f66832413e9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 18 Mar 2010 13:59:12 -0700 Subject: ceph: attempt mds reconnect if mds closes our session Currently, if our session is closed (due to a timeout, or explicit close, or whatever), we just sit there doing nothing unless/until the MDS restarts, at which point we try to reconnect. Change client to attempt an immediate reconnect if our session is closed. Note that currently the MDS doesn't support this, and our attempt will fail. We'll get a session CLOSE, our caps and dirty cap state will be dropped, and the client will be free to attempt to reconnect. That's clearly not as nice as a successful reconnect, but it at least allows us to try to carry on, and in the future the MDS will support a reconnect and we will fare better. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index e310b4be5588..525085f36db9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2958,9 +2958,10 @@ static void con_put(struct ceph_connection *con) static void peer_reset(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; - pr_err("mds%d gave us the boot. IMPLEMENT RECONNECT.\n", - s->s_mds); + pr_warning("mds%d closed our session\n", s->s_mds); + send_mds_reconnect(mdsc, s); } static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) -- cgit v1.2.3 From 6c99f2545dbb9e53afe0d1d037c51ab04ef1ff4e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 10 May 2010 16:12:25 -0700 Subject: ceph: throw out dirty caps metadata, data on session teardown The remove_session_caps() helper is called when an MDS closes out our session (either normally, or as a result of a failed reconnect), and when we tear down state for umount. If we remove the last cap, and there are no cap migrations in progress, then there is little hope of us flushing out that data to the mds (without heroic efforts to reconnect and flush). So, to avoid leaving inodes pinned (due to dirty state) and crashing after umount, throw out dirty caps state and unpin the inodes. Print a warning to the console so we know something was lost. NOTE: Although we drop wrbuffer refs, we don't actually mark pages clean; maybe a truncate should be queued? Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 525085f36db9..114bada97c16 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -799,12 +799,49 @@ out: } static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, - void *arg) + void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); + int drop = 0; + dout("removing cap %p, ci is %p, inode is %p\n", cap, ci, &ci->vfs_inode); - ceph_remove_cap(cap); + spin_lock(&inode->i_lock); + __ceph_remove_cap(cap); + if (!__ceph_is_any_real_caps(ci)) { + struct ceph_mds_client *mdsc = + &ceph_sb_to_client(inode->i_sb)->mdsc; + + spin_lock(&mdsc->cap_dirty_lock); + if (!list_empty(&ci->i_dirty_item)) { + pr_info(" dropping dirty %s state for %p %lld\n", + ceph_cap_string(ci->i_dirty_caps), + inode, ceph_ino(inode)); + ci->i_dirty_caps = 0; + list_del_init(&ci->i_dirty_item); + drop = 1; + } + if (!list_empty(&ci->i_flushing_item)) { + pr_info(" dropping dirty+flushing %s state for %p %lld\n", + ceph_cap_string(ci->i_flushing_caps), + inode, ceph_ino(inode)); + ci->i_flushing_caps = 0; + list_del_init(&ci->i_flushing_item); + mdsc->num_cap_flushing--; + drop = 1; + } + if (drop && ci->i_wrbuffer_ref) { + pr_info(" dropping dirty data for %p %lld\n", + inode, ceph_ino(inode)); + ci->i_wrbuffer_ref = 0; + ci->i_wrbuffer_ref_head = 0; + drop++; + } + spin_unlock(&mdsc->cap_dirty_lock); + } + spin_unlock(&inode->i_lock); + while (drop--) + iput(inode); return 0; } @@ -816,6 +853,7 @@ static void remove_session_caps(struct ceph_mds_session *session) dout("remove_session_caps on %p\n", session); iterate_session_caps(session, remove_session_caps_cb, NULL); BUG_ON(session->s_nr_caps > 0); + BUG_ON(!list_empty(&session->s_cap_flushing)); cleanup_cap_releases(session); } @@ -1281,7 +1319,7 @@ retry: len += 1 + temp->d_name.len; temp = temp->d_parent; if (temp == NULL) { - pr_err("build_path_dentry corrupt dentry %p\n", dentry); + pr_err("build_path corrupt dentry %p\n", dentry); return ERR_PTR(-EINVAL); } } -- cgit v1.2.3 From 1a75627896fe67d0124eab6fe2f83dd40188c40c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 11 May 2010 11:40:25 -0700 Subject: ceph: use ceph. prefix for virtual xattrs Drop the 'user.' prefix and use just 'ceph.' for fs virtual xattrs. Signed-off-by: Sage Weil --- fs/ceph/xattr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 7185d0794df3..68aeebc69681 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -7,7 +7,8 @@ static bool ceph_is_valid_xattr(const char *name) { - return !strncmp(name, XATTR_SECURITY_PREFIX, + return !strncmp(name, "ceph.", 5) || + !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); @@ -76,14 +77,14 @@ static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val, } static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { - { true, "user.ceph.dir.entries", ceph_vxattrcb_entries}, - { true, "user.ceph.dir.files", ceph_vxattrcb_files}, - { true, "user.ceph.dir.subdirs", ceph_vxattrcb_subdirs}, - { true, "user.ceph.dir.rentries", ceph_vxattrcb_rentries}, - { true, "user.ceph.dir.rfiles", ceph_vxattrcb_rfiles}, - { true, "user.ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, - { true, "user.ceph.dir.rbytes", ceph_vxattrcb_rbytes}, - { true, "user.ceph.dir.rctime", ceph_vxattrcb_rctime}, + { true, "ceph.dir.entries", ceph_vxattrcb_entries}, + { true, "ceph.dir.files", ceph_vxattrcb_files}, + { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs}, + { true, "ceph.dir.rentries", ceph_vxattrcb_rentries}, + { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles}, + { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, + { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes}, + { true, "ceph.dir.rctime", ceph_vxattrcb_rctime}, { true, NULL, NULL } }; @@ -107,7 +108,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, } static struct ceph_vxattr_cb ceph_file_vxattrs[] = { - { true, "user.ceph.layout", ceph_vxattrcb_layout}, + { true, "ceph.layout", ceph_vxattrcb_layout}, { NULL, NULL } }; -- cgit v1.2.3 From ca9d93a292e327bbcddd8f8ea4197397e35097d4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 12 May 2010 14:48:20 -0700 Subject: ceph: resync headers with userland Notable changes include pool op defines and types, FLOCK feature bit, and new CMPXATTR osd ops. Signed-off-by: Sage Weil --- fs/ceph/ceph_fs.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- fs/ceph/ceph_strings.c | 23 ++++++++++++++++++++++- fs/ceph/messenger.c | 12 ------------ fs/ceph/messenger.h | 4 +--- fs/ceph/msgr.h | 1 - fs/ceph/rados.h | 23 +++++++++++++++++++++-- 6 files changed, 91 insertions(+), 22 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 8b194c6ef7d3..715e39ce517a 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -19,7 +19,7 @@ * Ceph release version */ #define CEPH_VERSION_MAJOR 0 -#define CEPH_VERSION_MINOR 19 +#define CEPH_VERSION_MINOR 20 #define CEPH_VERSION_PATCH 0 #define _CEPH_STRINGIFY(x) #x @@ -36,7 +36,7 @@ * client-facing protocol. */ #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 9 /* cluster internal */ +#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ #define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ @@ -55,10 +55,11 @@ */ #define CEPH_FEATURE_UID 1 #define CEPH_FEATURE_NOSRCADDR 2 +#define CEPH_FEATURE_FLOCK 4 #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID -#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR +#define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID @@ -100,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); #define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_CEPHX 0x2 +#define CEPH_AUTH_UID_DEFAULT ((__u64) -1) + /********************************************* * message layer @@ -137,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); #define CEPH_MSG_CLIENT_SNAP 0x312 #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 +/* pool ops */ +#define CEPH_MSG_POOLOP_REPLY 48 +#define CEPH_MSG_POOLOP 49 + + /* osd */ #define CEPH_MSG_OSD_MAP 41 #define CEPH_MSG_OSD_OP 42 #define CEPH_MSG_OSD_OPREPLY 43 +/* pool operations */ +enum { + POOL_OP_CREATE = 0x01, + POOL_OP_DELETE = 0x02, + POOL_OP_AUID_CHANGE = 0x03, + POOL_OP_CREATE_SNAP = 0x11, + POOL_OP_DELETE_SNAP = 0x12, + POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, + POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, +}; + struct ceph_mon_request_header { __le64 have_version; __le16 session_mon; @@ -164,6 +183,31 @@ struct ceph_mon_statfs_reply { struct ceph_statfs st; } __attribute__ ((packed)); +const char *ceph_pool_op_name(int op); + +struct ceph_mon_poolop { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 pool; + __le32 op; + __le64 auid; + __le64 snapid; + __le32 name_len; +} __attribute__ ((packed)); + +struct ceph_mon_poolop_reply { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 reply_code; + __le32 epoch; + char has_data; + char data[0]; +} __attribute__ ((packed)); + +struct ceph_mon_unmanaged_snap { + __le64 snapid; +} __attribute__ ((packed)); + struct ceph_osd_getmap { struct ceph_mon_request_header monhdr; struct ceph_fsid fsid; diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 8e4be6a80c62..78908301ff17 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c @@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type) case CEPH_ENTITY_TYPE_OSD: return "osd"; case CEPH_ENTITY_TYPE_MON: return "mon"; case CEPH_ENTITY_TYPE_CLIENT: return "client"; - case CEPH_ENTITY_TYPE_ADMIN: return "admin"; case CEPH_ENTITY_TYPE_AUTH: return "auth"; default: return "unknown"; } @@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; case CEPH_OSD_OP_RMXATTR: return "rmxattr"; + case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; case CEPH_OSD_OP_PULL: return "pull"; case CEPH_OSD_OP_PUSH: return "push"; @@ -174,3 +174,24 @@ const char *ceph_snap_op_name(int o) } return "???"; } + +const char *ceph_pool_op_name(int op) { + switch (op) { + case POOL_OP_CREATE: + return "create pool"; + case POOL_OP_DELETE: + return "delete pool"; + case POOL_OP_AUID_CHANGE: + return "change auid"; + case POOL_OP_CREATE_SNAP: + return "create snap"; + case POOL_OP_DELETE_SNAP: + return "delete snap"; + case POOL_OP_CREATE_UNMANAGED_SNAP: + return "create unmanaged snap"; + case POOL_OP_DELETE_UNMANAGED_SNAP: + return "delete unmanaged snap"; + default: + return "unknown"; + } +} diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index fe75ec7d8402..d18a544490f6 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -39,18 +39,6 @@ static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); static void ceph_fault(struct ceph_connection *con); -const char *ceph_name_type_str(int t) -{ - switch (t) { - case CEPH_ENTITY_TYPE_MON: return "mon"; - case CEPH_ENTITY_TYPE_MDS: return "mds"; - case CEPH_ENTITY_TYPE_OSD: return "osd"; - case CEPH_ENTITY_TYPE_CLIENT: return "client"; - case CEPH_ENTITY_TYPE_ADMIN: return "admin"; - default: return "???"; - } -} - /* * nicely render a sockaddr as a string. */ diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index e56564f7e71c..4e5764c7fbc2 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -49,10 +49,8 @@ struct ceph_connection_operations { int *skip); }; -extern const char *ceph_name_type_str(int t); - /* use format string %s%d */ -#define ENTITY_NAME(n) ceph_name_type_str((n).type), le64_to_cpu((n).num) +#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) struct ceph_messenger { struct ceph_entity_inst inst; /* my name+address */ diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 6baa8e4f8e7f..892a0298dfdf 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h @@ -50,7 +50,6 @@ struct ceph_entity_name { #define CEPH_ENTITY_TYPE_MDS 0x02 #define CEPH_ENTITY_TYPE_OSD 0x04 #define CEPH_ENTITY_TYPE_CLIENT 0x08 -#define CEPH_ENTITY_TYPE_ADMIN 0x10 #define CEPH_ENTITY_TYPE_AUTH 0x20 #define CEPH_ENTITY_TYPE_ANY 0xFF diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index fd56451a871f..8fcc023056c7 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -101,8 +101,8 @@ struct ceph_pg_pool { __le64 snap_seq; /* seq for per-pool snapshot */ __le32 snap_epoch; /* epoch of last snap */ __le32 num_snaps; - __le32 num_removed_snap_intervals; - __le64 uid; + __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ + __le64 auid; /* who owns the pg */ } __attribute__ ((packed)); /* @@ -208,6 +208,7 @@ enum { /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, + CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, /* write */ CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -305,6 +306,22 @@ enum { #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ #define EBLACKLISTED ESHUTDOWN /* blacklisted */ +/* xattr comparison */ +enum { + CEPH_OSD_CMPXATTR_OP_NOP = 0, + CEPH_OSD_CMPXATTR_OP_EQ = 1, + CEPH_OSD_CMPXATTR_OP_NE = 2, + CEPH_OSD_CMPXATTR_OP_GT = 3, + CEPH_OSD_CMPXATTR_OP_GTE = 4, + CEPH_OSD_CMPXATTR_OP_LT = 5, + CEPH_OSD_CMPXATTR_OP_LTE = 6 +}; + +enum { + CEPH_OSD_CMPXATTR_MODE_STRING = 1, + CEPH_OSD_CMPXATTR_MODE_U64 = 2 +}; + /* * an individual object operation. each may be accompanied by some data * payload @@ -321,6 +338,8 @@ struct ceph_osd_op { struct { __le32 name_len; __le32 value_len; + __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ + __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ } __attribute__ ((packed)) xattr; struct { __u8 class_len; -- cgit v1.2.3 From aba558e28ac40a598542d995c09efa8439ee3ed4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 12 May 2010 15:23:30 -0700 Subject: ceph: save peer feature bits in connection structure These are used for adjusting behavior, such as conditionally encoding a newer message format. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 1 + fs/ceph/messenger.h | 1 + 2 files changed, 2 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index d18a544490f6..50c5f24086fd 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1223,6 +1223,7 @@ static int process_connect(struct ceph_connection *con) clear_bit(CONNECTING, &con->state); con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); con->connect_seq++; + con->peer_features = server_feat; dout("process_connect got READY gseq %d cseq %d (%d)\n", con->peer_global_seq, le32_to_cpu(con->in_reply.connect_seq), diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 4e5764c7fbc2..889f81f093c9 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -142,6 +142,7 @@ struct ceph_connection { struct ceph_entity_addr peer_addr; /* peer address */ struct ceph_entity_name peer_name; /* peer name */ struct ceph_entity_addr peer_addr_for_me; + unsigned peer_features; u32 connect_seq; /* identify the most recent connection attempt for this connection, client */ u32 peer_global_seq; /* peer's global seq for this connection */ -- cgit v1.2.3 From 85792d0dd6e7a7a18fba55c97e49871211b28fe0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 May 2010 09:06:02 -0700 Subject: ceph: cope with out of order (unsafe after safe) mds reply Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 114bada97c16..40dd437a26a9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1871,6 +1871,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_unlock(&mdsc->mutex); goto out; } + if (req->r_got_safe && !head->safe) { + pr_warning("got unsafe after safe on %llu from mds%d\n", + tid, mds); + mutex_unlock(&mdsc->mutex); + goto out; + } result = le32_to_cpu(head->result); -- cgit v1.2.3 From 167c9e352deb7e25568c926c49c3eafad69cbe76 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 May 2010 10:02:57 -0700 Subject: ceph: use common helper for aborted dir request invalidation We invalidate I_COMPLETE and dentry leases in two places: on aborted mds request and on request replay. Use common helper to avoid duplicate code. Signed-off-by: Sage Weil --- fs/ceph/inode.c | 17 ++--------------- fs/ceph/mds_client.c | 39 +++++++++++++++++++++++---------------- fs/ceph/mds_client.h | 2 ++ 3 files changed, 27 insertions(+), 31 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1bcf98bd0255..a81b8b662c7b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -941,21 +941,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, if (!rinfo->head->is_target && !rinfo->head->is_dentry) { dout("fill_trace reply is empty!\n"); - if (rinfo->head->result == 0 && req->r_locked_dir) { - struct ceph_inode_info *ci = - ceph_inode(req->r_locked_dir); - dout(" clearing %p complete (empty trace)\n", - req->r_locked_dir); - spin_lock(&req->r_locked_dir->i_lock); - ci->i_ceph_flags &= ~CEPH_I_COMPLETE; - ci->i_release_count++; - spin_unlock(&req->r_locked_dir->i_lock); - - if (req->r_dentry) - ceph_invalidate_dentry_lease(req->r_dentry); - if (req->r_old_dentry) - ceph_invalidate_dentry_lease(req->r_old_dentry); - } + if (rinfo->head->result == 0 && req->r_locked_dir) + ceph_invalidate_dir_request(req); return 0; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 40dd437a26a9..5c17ab44d02d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1794,22 +1794,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, mutex_unlock(&req->r_fill_mutex); if (req->r_locked_dir && - (req->r_op & CEPH_MDS_OP_WRITE)) { - struct ceph_inode_info *ci = - ceph_inode(req->r_locked_dir); - - dout("aborted, clearing I_COMPLETE on %p, leases\n", - req->r_locked_dir); - spin_lock(&req->r_locked_dir->i_lock); - ci->i_ceph_flags &= ~CEPH_I_COMPLETE; - ci->i_release_count++; - spin_unlock(&req->r_locked_dir->i_lock); - - if (req->r_dentry) - ceph_invalidate_dentry_lease(req->r_dentry); - if (req->r_old_dentry) - ceph_invalidate_dentry_lease(req->r_old_dentry); - } + (req->r_op & CEPH_MDS_OP_WRITE)) + ceph_invalidate_dir_request(req); } else { err = req->r_err; } @@ -1820,6 +1806,27 @@ out: return err; } +/* + * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS + * namespace request. + */ +void ceph_invalidate_dir_request(struct ceph_mds_request *req) +{ + struct inode *inode = req->r_locked_dir; + struct ceph_inode_info *ci = ceph_inode(inode); + + dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode); + spin_lock(&inode->i_lock); + ci->i_ceph_flags &= ~CEPH_I_COMPLETE; + ci->i_release_count++; + spin_unlock(&inode->i_lock); + + if (req->r_dentry) + ceph_invalidate_dentry_lease(req->r_dentry); + if (req->r_old_dentry) + ceph_invalidate_dentry_lease(req->r_old_dentry); +} + /* * Handle mds reply. * diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 141a265bda75..d9936c4f1212 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -303,6 +303,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, struct dentry *dn, int mask); +extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); + extern struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, -- cgit v1.2.3 From cffe7b6d8cc029d13524acedf4917210dc0102ab Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 13 May 2010 22:07:29 +0200 Subject: ceph: Use kzalloc Use kzalloc rather than the combination of kmalloc and memset. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,size,flags; statement S; @@ -x = kmalloc(size,flags); +x = kzalloc(size,flags); if (x == NULL) S -memset(x, 0, size); // Signed-off-by: Julia Lawall Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 7df2f8c157f1..7062b889ab2f 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -481,11 +481,10 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) struct ceph_mon_statfs *h; int err; - req = kmalloc(sizeof(*req), GFP_NOFS); + req = kzalloc(sizeof(*req), GFP_NOFS); if (!req) return -ENOMEM; - memset(req, 0, sizeof(*req)); kref_init(&req->kref); req->buf = buf; init_completion(&req->completion); -- cgit v1.2.3 From 366837706bae00abc2edd75add2579c1be18b2b8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 May 2010 11:36:48 -0700 Subject: ceph: cleanup pool op strings Signed-off-by: Sage Weil --- fs/ceph/ceph_strings.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 78908301ff17..7503aee828ce 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c @@ -175,23 +175,16 @@ const char *ceph_snap_op_name(int o) return "???"; } -const char *ceph_pool_op_name(int op) { - switch (op) { - case POOL_OP_CREATE: - return "create pool"; - case POOL_OP_DELETE: - return "delete pool"; - case POOL_OP_AUID_CHANGE: - return "change auid"; - case POOL_OP_CREATE_SNAP: - return "create snap"; - case POOL_OP_DELETE_SNAP: - return "delete snap"; - case POOL_OP_CREATE_UNMANAGED_SNAP: - return "create unmanaged snap"; - case POOL_OP_DELETE_UNMANAGED_SNAP: - return "delete unmanaged snap"; - default: - return "unknown"; - } +const char *ceph_pool_op_name(int op) +{ + switch (op) { + case POOL_OP_CREATE: return "create"; + case POOL_OP_DELETE: return "delete"; + case POOL_OP_AUID_CHANGE: return "auid change"; + case POOL_OP_CREATE_SNAP: return "create snap"; + case POOL_OP_DELETE_SNAP: return "delete snap"; + case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; + case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; + } + return "???"; } -- cgit v1.2.3 From 23804d91f112df09b832cd091b71af4dc2831aa8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 May 2010 13:06:30 -0700 Subject: ceph: specify max_bytes on readdir replies Specify max bytes in request to bound size of reply. Add associated mount option with default value of 512 KB. Signed-off-by: Sage Weil --- fs/ceph/ceph_fs.h | 1 + fs/ceph/dir.c | 2 ++ fs/ceph/super.c | 8 ++++++++ fs/ceph/super.h | 4 +++- 4 files changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 715e39ce517a..3b9eeed097b3 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -361,6 +361,7 @@ union ceph_mds_request_args { struct { __le32 frag; /* which dir fragment */ __le32 max_entries; /* how many dentries to grab */ + __le32 max_bytes; } __attribute__ ((packed)) readdir; struct { __le32 mode; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d3bb8132a1aa..4fd30900eff7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -233,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) u32 ftype; struct ceph_mds_reply_info_parsed *rinfo; const int max_entries = client->mount_args->max_readdir; + const int max_bytes = client->mount_args->max_readdir_bytes; dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); if (fi->at_end) @@ -316,6 +317,7 @@ more: req->r_readdir_offset = fi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); req->r_args.readdir.max_entries = cpu_to_le32(max_entries); + req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); req->r_num_caps = max_entries + 1; err = ceph_mdsc_do_request(mdsc, NULL, req); if (err < 0) { diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7f5b20dc4945..bac13898b943 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -190,6 +190,8 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) args->cap_release_safety); if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); + if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) + seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) seq_printf(m, ",snapdirname=%s", args->snapdir_name); if (args->name) @@ -333,6 +335,7 @@ enum { Opt_caps_wanted_delay_max, Opt_cap_release_safety, Opt_readdir_max_entries, + Opt_readdir_max_bytes, Opt_congestion_kb, Opt_last_int, /* int args above */ @@ -365,6 +368,7 @@ static match_table_t arg_tokens = { {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, {Opt_cap_release_safety, "cap_release_safety=%d"}, {Opt_readdir_max_entries, "readdir_max_entries=%d"}, + {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, {Opt_congestion_kb, "write_congestion_kb=%d"}, /* int args above */ {Opt_snapdirname, "snapdirname=%s"}, @@ -415,6 +419,7 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; args->max_readdir = CEPH_MAX_READDIR_DEFAULT; + args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; args->congestion_kb = default_congestion_kb(); /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ @@ -522,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, case Opt_readdir_max_entries: args->max_readdir = intval; break; + case Opt_readdir_max_bytes: + args->max_readdir_bytes = intval; + break; case Opt_congestion_kb: args->congestion_kb = intval; break; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 395adc5fcebb..3725c9ee9d08 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -66,7 +66,8 @@ struct ceph_mount_args { int congestion_kb; /* max writeback in flight */ int caps_wanted_delay_min, caps_wanted_delay_max; int cap_release_safety; - int max_readdir; /* max readdir size */ + int max_readdir; /* max readdir result (entires) */ + int max_readdir_bytes; /* max readdir result (bytes) */ char *snapdir_name; /* default ".snap" */ char *name; char *secret; @@ -81,6 +82,7 @@ struct ceph_mount_args { #define CEPH_OSD_IDLE_TTL_DEFAULT 60 #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ #define CEPH_MAX_READDIR_DEFAULT 1024 +#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) -- cgit v1.2.3 From 34d23762d988b7dcb08390ac72a353df3d60193c Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 6 Apr 2010 14:33:58 -0700 Subject: ceph: all allocation functions should get gfp_mask This is essential, as for the rados block device we'll need to run in different contexts that would need flags that are other than GFP_NOFS. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- fs/ceph/file.c | 10 +++++----- fs/ceph/mds_client.c | 11 ++++++----- fs/ceph/messenger.c | 10 +++++----- fs/ceph/messenger.h | 2 +- fs/ceph/mon_client.c | 15 ++++++++------- fs/ceph/msgpool.c | 4 ++-- fs/ceph/osd_client.c | 8 ++++---- 8 files changed, 32 insertions(+), 30 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0e85d3c80790..0dd0b81e64f7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -938,7 +938,7 @@ static int send_cap_msg(struct ceph_mds_session *session, seq, issue_seq, mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc)); + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS); if (!msg) return -ENOMEM; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b0426090e8c3..0611ec3698aa 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -317,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages) /* * allocate a vector new pages */ -static struct page **alloc_page_vector(int num_pages) +struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) { struct page **pages; int i; - pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); + pages = kmalloc(sizeof(*pages) * num_pages, flags); if (!pages) return ERR_PTR(-ENOMEM); for (i = 0; i < num_pages; i++) { - pages[i] = __page_cache_alloc(GFP_NOFS); + pages[i] = __page_cache_alloc(flags); if (pages[i] == NULL) { ceph_release_page_vector(pages, i); return ERR_PTR(-ENOMEM); @@ -540,7 +540,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, * in sequence. */ } else { - pages = alloc_page_vector(num_pages); + pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); } if (IS_ERR(pages)) return PTR_ERR(pages); @@ -668,7 +668,7 @@ more: truncate_inode_pages_range(inode->i_mapping, pos, (pos+len) | (PAGE_CACHE_SIZE-1)); } else { - pages = alloc_page_vector(num_pages); + pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5c17ab44d02d..1b786cf2c7af 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -665,7 +665,7 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) struct ceph_msg *msg; struct ceph_mds_session_head *h; - msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h)); + msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); return NULL; @@ -1089,7 +1089,8 @@ static int add_cap_releases(struct ceph_mds_client *mdsc, while (session->s_num_cap_releases < session->s_nr_caps + extra) { spin_unlock(&session->s_cap_lock); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE); + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, + GFP_NOFS); if (!msg) goto out_unlocked; dout("add_cap_releases %p msg %p now %d\n", session, msg, @@ -1492,7 +1493,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, if (req->r_old_dentry_drop) len += req->r_old_dentry->d_name.len; - msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len); + msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS); if (!msg) { msg = ERR_PTR(-ENOMEM); goto out_free2; @@ -2244,7 +2245,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, goto fail_nopagelist; ceph_pagelist_init(pagelist); - reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0); + reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS); if (!reply) goto fail_nomsg; @@ -2535,7 +2536,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, dnamelen = dentry->d_name.len; len += dnamelen; - msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len); + msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS); if (!msg) return; lease = msg->front.iov_base; diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 50c5f24086fd..60b74839ebec 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -2070,11 +2070,11 @@ void ceph_con_keepalive(struct ceph_connection *con) * construct a new message with given type, size * the new msg has a ref count of 1. */ -struct ceph_msg *ceph_msg_new(int type, int front_len) +struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) { struct ceph_msg *m; - m = kmalloc(sizeof(*m), GFP_NOFS); + m = kmalloc(sizeof(*m), flags); if (m == NULL) goto out; kref_init(&m->kref); @@ -2101,11 +2101,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len) /* front */ if (front_len) { if (front_len > PAGE_CACHE_SIZE) { - m->front.iov_base = __vmalloc(front_len, GFP_NOFS, + m->front.iov_base = __vmalloc(front_len, flags, PAGE_KERNEL); m->front_is_vmalloc = true; } else { - m->front.iov_base = kmalloc(front_len, GFP_NOFS); + m->front.iov_base = kmalloc(front_len, flags); } if (m->front.iov_base == NULL) { pr_err("msg_new can't allocate %d bytes\n", @@ -2180,7 +2180,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, } if (!msg) { *skip = 0; - msg = ceph_msg_new(type, front_len); + msg = ceph_msg_new(type, front_len, GFP_NOFS); if (!msg) { pr_err("unable to allocate msg type %d len %d\n", type, front_len); diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 889f81f093c9..00a9430b1ffc 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -232,7 +232,7 @@ extern void ceph_con_keepalive(struct ceph_connection *con); extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); extern void ceph_con_put(struct ceph_connection *con); -extern struct ceph_msg *ceph_msg_new(int type, int front_len); +extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); extern void ceph_msg_kfree(struct ceph_msg *m); diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 7062b889ab2f..61cb11701f10 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -191,7 +191,7 @@ static void __send_subscribe(struct ceph_mon_client *monc) struct ceph_mon_subscribe_item *i; void *p, *end; - msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96); + msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); if (!msg) return; @@ -490,10 +490,10 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) init_completion(&req->completion); err = -ENOMEM; - req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h)); + req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); if (!req->request) goto out; - req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024); + req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); if (!req->reply) goto out; @@ -632,15 +632,16 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) /* msg pools */ err = -ENOMEM; monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, - sizeof(struct ceph_mon_subscribe_ack)); + sizeof(struct ceph_mon_subscribe_ack), + GFP_NOFS); if (!monc->m_subscribe_ack) goto out_monmap; - monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096); + monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); if (!monc->m_auth_reply) goto out_subscribe_ack; - monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096); + monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); monc->pending_auth = 0; if (!monc->m_auth) goto out_auth_reply; @@ -815,7 +816,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: - m = ceph_msg_new(type, front_len); + m = ceph_msg_new(type, front_len, GFP_NOFS); break; } diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index 50fe5cc5de76..dd65a6438131 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -12,7 +12,7 @@ static void *alloc_fn(gfp_t gfp_mask, void *arg) struct ceph_msgpool *pool = arg; void *p; - p = ceph_msg_new(0, pool->front_len); + p = ceph_msg_new(0, pool->front_len, gfp_mask); if (!p) pr_err("msgpool %s alloc failed\n", pool->name); return p; @@ -48,7 +48,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, WARN_ON(1); /* try to alloc a fresh message */ - return ceph_msg_new(0, front_len); + return ceph_msg_new(0, front_len, GFP_NOFS); } return mempool_alloc(pool->pool, GFP_NOFS); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index c0aca732efd3..16141e6e8b73 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -164,7 +164,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN); + OSD_OPREPLY_FRONT_LEN, GFP_NOFS); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -178,7 +178,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size); + msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -1395,7 +1395,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (front > req->r_reply->front.iov_len) { pr_warning("get_reply front %d > preallocated %d\n", front, (int)req->r_reply->front.iov_len); - m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front); + m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); if (!m) goto out; ceph_msg_put(req->r_reply); @@ -1438,7 +1438,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, switch (type) { case CEPH_MSG_OSD_MAP: - return ceph_msg_new(type, front); + return ceph_msg_new(type, front, GFP_NOFS); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); default: -- cgit v1.2.3 From 9e32789f63fc5ad91c8b10f68ec23a86856d5af5 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 20 May 2010 10:40:19 +0200 Subject: ceph: Storage class should be before const qualifier The C99 specification states in section 6.11.5: The placement of a storage-class specifier other than at the beginning of the declaration specifiers in a declaration is an obsolescent feature. Signed-off-by: Tobias Klauser Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 4 ++-- fs/ceph/mon_client.c | 4 ++-- fs/ceph/osd_client.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1b786cf2c7af..885aa5710cfd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -40,7 +40,7 @@ static void __wake_requests(struct ceph_mds_client *mdsc, struct list_head *head); -const static struct ceph_connection_operations mds_con_ops; +static const struct ceph_connection_operations mds_con_ops; /* @@ -3120,7 +3120,7 @@ static int invalidate_authorizer(struct ceph_connection *con) return ceph_monc_validate_auth(&mdsc->client->monc); } -const static struct ceph_connection_operations mds_con_ops = { +static const struct ceph_connection_operations mds_con_ops = { .get = con_get, .put = con_put, .dispatch = dispatch, diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 61cb11701f10..478729a6ffce 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -28,7 +28,7 @@ * resend any outstanding requests. */ -const static struct ceph_connection_operations mon_con_ops; +static const struct ceph_connection_operations mon_con_ops; static int __validate_auth(struct ceph_mon_client *monc); @@ -861,7 +861,7 @@ out: mutex_unlock(&monc->mutex); } -const static struct ceph_connection_operations mon_con_ops = { +static const struct ceph_connection_operations mon_con_ops = { .get = ceph_con_get, .put = ceph_con_put, .dispatch = dispatch, diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 16141e6e8b73..afa7bb3895c4 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -16,7 +16,7 @@ #define OSD_OP_FRONT_LEN 4096 #define OSD_OPREPLY_FRONT_LEN 512 -const static struct ceph_connection_operations osd_con_ops; +static const struct ceph_connection_operations osd_con_ops; static int __kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *kickosd); @@ -1524,7 +1524,7 @@ static int invalidate_authorizer(struct ceph_connection *con) return ceph_monc_validate_auth(&osdc->client->monc); } -const static struct ceph_connection_operations osd_con_ops = { +static const struct ceph_connection_operations osd_con_ops = { .get = get_osd_con, .put = put_osd_con, .dispatch = dispatch, -- cgit v1.2.3 From 970690012c572fc3b7be532080564b730f6a9c02 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 21 May 2010 12:31:49 -0700 Subject: ceph: avoid resending queued message to monitor The auth_reply handler will (re)send any pending requests. For the initial mon authenticate phase, that's correct, but when a auth ticket renewal races with an in-flight request, we may resend a request message that is already in flight. Avoid this by revoking the message before sending it. We should also avoid resending requests at all during ticket renewal; that will come soon. Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 478729a6ffce..12d94f24ee97 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -104,6 +104,7 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) monc->pending_auth = 1; monc->m_auth->front.iov_len = len; monc->m_auth->hdr.front_len = cpu_to_le32(len); + ceph_con_revoke(monc->con, monc->m_auth); ceph_msg_get(monc->m_auth); /* keep our ref */ ceph_con_send(monc->con, monc->m_auth); } @@ -539,6 +540,7 @@ static void __resend_generic_request(struct ceph_mon_client *monc) for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { req = rb_entry(p, struct ceph_mon_generic_request, node); + ceph_con_revoke(monc->con, req->request); ceph_con_send(monc->con, ceph_msg_get(req->request)); } } -- cgit v1.2.3 From 3981f2e2a04df4b95129ddbb8bb869ef1d57bea9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Mar 2010 19:22:29 -0400 Subject: ceph: should use deactivate_locked_super() on failure exits Signed-off-by: Al Viro --- fs/ceph/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 110857ba9269..9307bbee6fbe 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -952,8 +952,7 @@ static int ceph_get_sb(struct file_system_type *fs_type, out_splat: ceph_mdsc_close_sessions(&client->mdsc); - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); goto out_final; out: -- cgit v1.2.3 From 8018ab057480974e7f26a387bf4ce040e9a5f6f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Mar 2010 17:32:25 +0100 Subject: sanitize vfs_fsync calling conventions Now that the last user passing a NULL file pointer is gone we can remove the redundant dentry argument and associated hacks inside vfs_fsynmc_range. The next step will be removig the dentry argument from ->fsync, but given the luck with the last round of method prototype changes I'd rather defer this until after the main merge window. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- drivers/block/loop.c | 4 ++-- drivers/md/bitmap.c | 2 +- drivers/usb/gadget/storage_common.c | 2 +- fs/ceph/file.c | 3 +-- fs/coda/file.c | 2 +- fs/ecryptfs/file.c | 4 +--- fs/nfsd/nfs4recover.c | 6 +++--- fs/nfsd/vfs.c | 5 ++--- fs/sync.c | 42 +++++++------------------------------ include/linux/fs.h | 6 +++--- mm/msync.c | 2 +- 11 files changed, 24 insertions(+), 54 deletions(-) (limited to 'fs/ceph') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a90e83c9be96..6120922f459f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -485,7 +485,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) goto out; } - ret = vfs_fsync(file, file->f_path.dentry, 0); + ret = vfs_fsync(file, 0); if (unlikely(ret)) { ret = -EIO; goto out; @@ -495,7 +495,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) ret = lo_send(lo, bio, pos); if (barrier && !ret) { - ret = vfs_fsync(file, file->f_path.dentry, 0); + ret = vfs_fsync(file, 0); if (unlikely(ret)) ret = -EIO; } diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index f084249295d9..53e8bea295e2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1692,7 +1692,7 @@ int bitmap_create(mddev_t *mddev) * and bypass the page cache, we must sync the file * first. */ - vfs_fsync(file, file->f_dentry, 1); + vfs_fsync(file, 1); } /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ if (!mddev->bitmap_info.external) diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c index 868d8ee86756..04c462ff0ea6 100644 --- a/drivers/usb/gadget/storage_common.c +++ b/drivers/usb/gadget/storage_common.c @@ -654,7 +654,7 @@ static int fsg_lun_fsync_sub(struct fsg_lun *curlun) if (curlun->ro || !filp) return 0; - return vfs_fsync(filp, filp->f_path.dentry, 1); + return vfs_fsync(filp, 1); } static void store_cdrom_address(u8 *dest, int msf, u32 addr) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ed6f19721d6e..7d634938edc9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -844,8 +844,7 @@ retry_snap: if ((ret >= 0 || ret == -EIOCBQUEUED) && ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { - err = vfs_fsync_range(file, file->f_path.dentry, - pos, pos + ret - 1, 1); + err = vfs_fsync_range(file, pos, pos + ret - 1, 1); if (err < 0) ret = err; } diff --git a/fs/coda/file.c b/fs/coda/file.c index 4c813f2cdc52..7196077b1688 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -217,7 +217,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - err = vfs_fsync(host_file, host_file->f_path.dentry, datasync); + err = vfs_fsync(host_file, datasync); if ( !err && !datasync ) { lock_kernel(); err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index e7440a6f5ebf..3bdddbcc785f 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -276,9 +276,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) static int ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) { - return vfs_fsync(ecryptfs_file_to_lower(file), - ecryptfs_dentry_to_lower(dentry), - datasync); + return vfs_fsync(ecryptfs_file_to_lower(file), datasync); } static int ecryptfs_fasync(int fd, struct file *file, int flag) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index dada03f2c13b..7e26caab2a26 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -158,7 +158,7 @@ out_unlock: mutex_unlock(&dir->d_inode->i_mutex); if (status == 0) { clp->cl_firststate = 1; - vfs_fsync(rec_file, rec_file->f_path.dentry, 0); + vfs_fsync(rec_file, 0); } nfs4_reset_creds(original_cred); dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); @@ -288,7 +288,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); nfs4_reset_creds(original_cred); if (status == 0) - vfs_fsync(rec_file, rec_file->f_path.dentry, 0); + vfs_fsync(rec_file, 0); mnt_drop_write(rec_file->f_path.mnt); out: if (status) @@ -325,7 +325,7 @@ nfsd4_recdir_purge_old(void) { goto out; status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old); if (status == 0) - vfs_fsync(rec_file, rec_file->f_path.dentry, 0); + vfs_fsync(rec_file, 0); mnt_drop_write(rec_file->f_path.mnt); out: if (status) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 23c06f77f4ca..ebbf3b6b2457 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -999,7 +999,7 @@ static int wait_for_concurrent_writes(struct file *file) if (inode->i_state & I_DIRTY) { dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - err = vfs_fsync(file, file->f_path.dentry, 0); + err = vfs_fsync(file, 0); } last_ino = inode->i_ino; last_dev = inode->i_sb->s_dev; @@ -1175,8 +1175,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; if (EX_ISSYNC(fhp->fh_export)) { - int err2 = vfs_fsync_range(file, file->f_path.dentry, - offset, end, 0); + int err2 = vfs_fsync_range(file, offset, end, 0); if (err2 != -EINVAL) err = nfserrno(err2); diff --git a/fs/sync.c b/fs/sync.c index d5369203f8e4..5a537ccd2e85 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -158,7 +158,6 @@ EXPORT_SYMBOL(file_fsync); /** * vfs_fsync_range - helper to sync a range of data & metadata to disk * @file: file to sync - * @dentry: dentry of @file * @start: offset in bytes of the beginning of data range to sync * @end: offset in bytes of the end of data range (inclusive) * @datasync: perform only datasync @@ -166,32 +165,13 @@ EXPORT_SYMBOL(file_fsync); * Write back data in range @start..@end and metadata for @file to disk. If * @datasync is set only metadata needed to access modified file data is * written. - * - * In case this function is called from nfsd @file may be %NULL and - * only @dentry is set. This can only happen when the filesystem - * implements the export_operations API. */ -int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, - loff_t end, int datasync) +int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { - const struct file_operations *fop; - struct address_space *mapping; + struct address_space *mapping = file->f_mapping; int err, ret; - /* - * Get mapping and operations from the file in case we have - * as file, or get the default values for them in case we - * don't have a struct file available. Damn nfsd.. - */ - if (file) { - mapping = file->f_mapping; - fop = file->f_op; - } else { - mapping = dentry->d_inode->i_mapping; - fop = dentry->d_inode->i_fop; - } - - if (!fop || !fop->fsync) { + if (!file->f_op || !file->f_op->fsync) { ret = -EINVAL; goto out; } @@ -203,7 +183,7 @@ int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, * livelocks in fsync_buffers_list(). */ mutex_lock(&mapping->host->i_mutex); - err = fop->fsync(file, dentry, datasync); + err = file->f_op->fsync(file, file->f_path.dentry, datasync); if (!ret) ret = err; mutex_unlock(&mapping->host->i_mutex); @@ -216,19 +196,14 @@ EXPORT_SYMBOL(vfs_fsync_range); /** * vfs_fsync - perform a fsync or fdatasync on a file * @file: file to sync - * @dentry: dentry of @file * @datasync: only perform a fdatasync operation * * Write back data and metadata for @file to disk. If @datasync is * set only metadata needed to access modified file data is written. - * - * In case this function is called from nfsd @file may be %NULL and - * only @dentry is set. This can only happen when the filesystem - * implements the export_operations API. */ -int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int vfs_fsync(struct file *file, int datasync) { - return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync); + return vfs_fsync_range(file, 0, LLONG_MAX, datasync); } EXPORT_SYMBOL(vfs_fsync); @@ -239,7 +214,7 @@ static int do_fsync(unsigned int fd, int datasync) file = fget(fd); if (file) { - ret = vfs_fsync(file, file->f_path.dentry, datasync); + ret = vfs_fsync(file, datasync); fput(file); } return ret; @@ -267,8 +242,7 @@ int generic_write_sync(struct file *file, loff_t pos, loff_t count) { if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) return 0; - return vfs_fsync_range(file, file->f_path.dentry, pos, - pos + count - 1, + return vfs_fsync_range(file, pos, pos + count - 1, (file->f_flags & __O_SYNC) ? 0 : 1); } EXPORT_SYMBOL(generic_write_sync); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6660db898c41..f3e108314c93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2084,9 +2084,9 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); -extern int vfs_fsync_range(struct file *file, struct dentry *dentry, - loff_t start, loff_t end, int datasync); -extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); +extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, + int datasync); +extern int vfs_fsync(struct file *file, int datasync); extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); extern void sync_supers(void); extern void emergency_sync(void); diff --git a/mm/msync.c b/mm/msync.c index 4083209b7f02..632df4527c01 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -82,7 +82,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) (vma->vm_flags & VM_SHARED)) { get_file(file); up_read(&mm->mmap_sem); - error = vfs_fsync(file, file->f_path.dentry, 0); + error = vfs_fsync(file, 0); fput(file); if (error || start >= end) goto out; -- cgit v1.2.3 From 240ed68eb567d80dd6bab739341999a5ab0ad55d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 21 May 2010 14:57:25 -0700 Subject: ceph: reuse mon subscribe message instead of allocated anew Use the same message, allocated during startup. No need to reallocate a new one each time around (and potentially ENOMEM). Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 22 +++++++++++++--------- fs/ceph/mon_client.h | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 12d94f24ee97..f6510a476e7e 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -188,16 +188,12 @@ static void __send_subscribe(struct ceph_mon_client *monc) monc->want_next_osdmap); if ((__sub_expired(monc) && !monc->sub_sent) || monc->want_next_osdmap == 1) { - struct ceph_msg *msg; + struct ceph_msg *msg = monc->m_subscribe; struct ceph_mon_subscribe_item *i; void *p, *end; - msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); - if (!msg) - return; - p = msg->front.iov_base; - end = p + msg->front.iov_len; + end = p + msg->front_max; dout("__send_subscribe to 'mdsmap' %u+\n", (unsigned)monc->have_mdsmap); @@ -227,7 +223,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - ceph_con_send(monc->con, msg); + ceph_con_revoke(monc->con, msg); + ceph_con_send(monc->con, ceph_msg_get(msg)); monc->sub_sent = jiffies | 1; /* never 0 */ } @@ -631,7 +628,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; - /* msg pools */ + /* msgs */ err = -ENOMEM; monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, sizeof(struct ceph_mon_subscribe_ack), @@ -639,9 +636,13 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) if (!monc->m_subscribe_ack) goto out_monmap; + monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); + if (!monc->m_subscribe) + goto out_subscribe_ack; + monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); if (!monc->m_auth_reply) - goto out_subscribe_ack; + goto out_subscribe; monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); monc->pending_auth = 0; @@ -665,6 +666,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) out_auth_reply: ceph_msg_put(monc->m_auth_reply); +out_subscribe: + ceph_msg_put(monc->m_subscribe); out_subscribe_ack: ceph_msg_put(monc->m_subscribe_ack); out_monmap: @@ -691,6 +694,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc) ceph_msg_put(monc->m_auth); ceph_msg_put(monc->m_auth_reply); + ceph_msg_put(monc->m_subscribe); ceph_msg_put(monc->m_subscribe_ack); kfree(monc->monmap); diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 76887785a4d7..174d794321d0 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h @@ -63,7 +63,7 @@ struct ceph_mon_client { struct delayed_work delayed_work; struct ceph_auth_client *auth; - struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe_ack; + struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; int pending_auth; bool hunting; -- cgit v1.2.3 From 7ea8085910ef3dd4f3cad6845aaa2b580d39b115 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 May 2010 17:53:25 +0200 Subject: drop unused dentry argument to ->fsync Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- arch/powerpc/platforms/cell/spufs/file.c | 3 +-- drivers/char/ps3flash.c | 3 +-- drivers/mtd/ubi/cdev.c | 3 +-- drivers/staging/pohmelfs/inode.c | 2 +- drivers/usb/gadget/printer.c | 2 +- drivers/video/fb_defio.c | 2 +- fs/9p/vfs_file.c | 6 ++---- fs/affs/affs.h | 2 +- fs/affs/file.c | 4 ++-- fs/afs/internal.h | 2 +- fs/afs/write.c | 3 ++- fs/bad_inode.c | 3 +-- fs/block_dev.c | 7 +------ fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 3 ++- fs/ceph/caps.c | 4 ++-- fs/ceph/dir.c | 5 ++--- fs/ceph/super.h | 2 +- fs/cifs/cifsfs.h | 2 +- fs/cifs/file.c | 4 ++-- fs/coda/coda_int.h | 3 +-- fs/coda/file.c | 4 ++-- fs/ecryptfs/file.c | 2 +- fs/exofs/file.c | 7 +++---- fs/ext2/ext2.h | 2 +- fs/ext2/file.c | 6 +++--- fs/ext3/fsync.c | 4 ++-- fs/ext4/ext4.h | 2 +- fs/ext4/fsync.c | 8 ++++---- fs/fat/fat.h | 3 +-- fs/fat/file.c | 6 +++--- fs/fuse/dir.c | 5 ++--- fs/fuse/file.c | 9 ++++----- fs/fuse/fuse_i.h | 3 +-- fs/gfs2/file.c | 4 ++-- fs/hostfs/hostfs_kern.c | 4 ++-- fs/hpfs/file.c | 4 ++-- fs/hpfs/hpfs_fn.h | 2 +- fs/hppfs/hppfs.c | 2 +- fs/jffs2/file.c | 4 ++-- fs/jffs2/os-linux.h | 2 +- fs/jfs/file.c | 4 ++-- fs/jfs/jfs_inode.h | 2 +- fs/libfs.c | 6 +++--- fs/logfs/file.c | 4 ++-- fs/logfs/logfs.h | 2 +- fs/ncpfs/file.c | 2 +- fs/nfs/dir.c | 6 ++++-- fs/nfs/file.c | 5 +++-- fs/nilfs2/file.c | 4 ++-- fs/nilfs2/nilfs.h | 2 +- fs/ntfs/dir.c | 5 ++--- fs/ntfs/file.c | 9 ++------- fs/ocfs2/file.c | 7 +++---- fs/reiserfs/dir.c | 8 +++----- fs/reiserfs/file.c | 5 ++--- fs/smbfs/file.c | 3 ++- fs/sync.c | 8 +++----- fs/ubifs/file.c | 4 ++-- fs/ubifs/ubifs.h | 2 +- fs/xfs/linux-2.6/xfs_file.c | 10 +++++----- include/linux/buffer_head.h | 2 +- include/linux/ext3_fs.h | 2 +- include/linux/fb.h | 5 +---- include/linux/fs.h | 8 ++++---- include/trace/events/ext4.h | 6 ++++-- ipc/shm.c | 11 ++++------- 69 files changed, 129 insertions(+), 157 deletions(-) (limited to 'fs/ceph') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 61c98f03baa1..96d4293607ec 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -380,7 +380,7 @@ prototypes: int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, struct dentry *, int datasync); + int (*fsync) (struct file *, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index b66858538df5..d4f5731dcbbb 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -729,7 +729,7 @@ struct file_operations { int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, struct dentry *, int datasync); + int (*fsync) (struct file *, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 5c2808252516..1a40da92154c 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1849,8 +1849,7 @@ out: return ret; } -static int spufs_mfc_fsync(struct file *file, struct dentry *dentry, - int datasync) +static int spufs_mfc_fsync(struct file *file, int datasync) { return spufs_mfc_flush(file, NULL); } diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 606048b72bcf..85c004a518ee 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -305,8 +305,7 @@ static int ps3flash_flush(struct file *file, fl_owner_t id) return ps3flash_writeback(ps3flash_dev); } -static int ps3flash_fsync(struct file *file, struct dentry *dentry, - int datasync) +static int ps3flash_fsync(struct file *file, int datasync) { return ps3flash_writeback(ps3flash_dev); } diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 72ebb3f06b86..4dfa6b90c21c 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -189,8 +189,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) return new_offset; } -static int vol_cdev_fsync(struct file *file, struct dentry *dentry, - int datasync) +static int vol_cdev_fsync(struct file *file, int datasync) { struct ubi_volume_desc *desc = file->private_data; struct ubi_device *ubi = desc->vol->ubi; diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index 9286e863b0e7..e92595eff1b8 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -880,7 +880,7 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb) /* * We want fsync() to work on POHMELFS. */ -static int pohmelfs_fsync(struct file *file, struct dentry *dentry, int datasync) +static int pohmelfs_fsync(struct file *file, int datasync) { struct inode *inode = file->f_mapping->host; struct writeback_control wbc = { diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c index 6b8bf8c781c4..43abf55d8c60 100644 --- a/drivers/usb/gadget/printer.c +++ b/drivers/usb/gadget/printer.c @@ -794,7 +794,7 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } static int -printer_fsync(struct file *fd, struct dentry *dentry, int datasync) +printer_fsync(struct file *fd, int datasync) { struct printer_dev *dev = fd->private_data; unsigned long flags; diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 1105a591dcc1..073c9b408cf7 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -66,7 +66,7 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma, return 0; } -int fb_deferred_io_fsync(struct file *file, struct dentry *dentry, int datasync) +int fb_deferred_io_fsync(struct file *file, int datasync) { struct fb_info *info = file->private_data; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 25b300e1c9d7..2bedc6c94fc2 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -257,15 +257,13 @@ v9fs_file_write(struct file *filp, const char __user * data, return total; } -static int v9fs_file_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int v9fs_file_fsync(struct file *filp, int datasync) { struct p9_fid *fid; struct p9_wstat wstat; int retval; - P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp, - dentry, datasync); + P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); fid = filp->private_data; v9fs_blank_wstat(&wstat); diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 861dae68ac12..f05b6155ccc8 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -183,7 +183,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent void affs_free_prealloc(struct inode *inode); extern void affs_truncate(struct inode *); -int affs_file_fsync(struct file *, struct dentry *, int); +int affs_file_fsync(struct file *, int); /* dir.c */ diff --git a/fs/affs/file.c b/fs/affs/file.c index 184e55c1c9ba..322710c3eedf 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -916,9 +916,9 @@ affs_truncate(struct inode *inode) affs_free_prealloc(inode); } -int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +int affs_file_fsync(struct file *filp, int datasync) { - struct inode * inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int ret, err; ret = write_inode_now(inode, 0); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 807f284cc75e..5f679b77ce24 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -740,7 +740,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern int afs_writeback_all(struct afs_vnode *); -extern int afs_fsync(struct file *, struct dentry *, int); +extern int afs_fsync(struct file *, int); /*****************************************************************************/ diff --git a/fs/afs/write.c b/fs/afs/write.c index 3bed54a294d4..3dab9e9948d0 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -701,8 +701,9 @@ int afs_writeback_all(struct afs_vnode *vnode) * - the return status from this call provides a reliable indication of * whether any write errors occurred for this process. */ -int afs_fsync(struct file *file, struct dentry *dentry, int datasync) +int afs_fsync(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct afs_writeback *wb, *xwb; struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); int ret; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index a05287a23f62..52e59bf4aa5f 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -93,8 +93,7 @@ static int bad_file_release(struct inode *inode, struct file *filp) return -EIO; } -static int bad_file_fsync(struct file *file, struct dentry *dentry, - int datasync) +static int bad_file_fsync(struct file *file, int datasync) { return -EIO; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 26e5f5026620..d0b37e626a1a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -358,12 +358,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) return retval; } -/* - * Filp is never NULL; the only case when ->fsync() is called with - * NULL first argument is nfsd_sync_dir() and that's not a directory. - */ - -int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) +int blkdev_fsync(struct file *filp, int datasync) { struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e9bf86415e86..29c20092847e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2434,7 +2434,7 @@ void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); /* file.c */ -int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); +int btrfs_sync_file(struct file *file, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); int btrfs_check_file(struct btrfs_root *root, struct inode *inode); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 79437c5eeb1e..787b50a16a14 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1101,8 +1101,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * important optimization for directories because holding the mutex prevents * new operations on the dir while we write to disk. */ -int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) +int btrfs_sync_file(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0dd0b81e64f7..ae3e3a306445 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1776,9 +1776,9 @@ out: spin_unlock(&ci->i_unsafe_lock); } -int ceph_fsync(struct file *file, struct dentry *dentry, int datasync) +int ceph_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); unsigned flush_tid; int ret; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4fd30900eff7..0057f4a07347 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1107,10 +1107,9 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, * an fsync() on a dir will wait for any uncommitted directory * operations to commit. */ -static int ceph_dir_fsync(struct file *file, struct dentry *dentry, - int datasync) +static int ceph_dir_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); struct list_head *head = &ci->i_unsafe_dirops; struct ceph_mds_request *req; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3725c9ee9d08..dd1e7add656b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -811,7 +811,7 @@ extern void ceph_put_cap(struct ceph_cap *cap); extern void ceph_queue_caps_release(struct inode *inode); extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); -extern int ceph_fsync(struct file *file, struct dentry *dentry, int datasync); +extern int ceph_fsync(struct file *file, int datasync); extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern int ceph_get_cap_mds(struct inode *inode); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0242ff9cbf41..a7eb65c84b1c 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -84,7 +84,7 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data, extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, size_t write_size, loff_t *poffset); extern int cifs_lock(struct file *, int, struct file_lock *); -extern int cifs_fsync(struct file *, struct dentry *, int); +extern int cifs_fsync(struct file *, int); extern int cifs_flush(struct file *, fl_owner_t id); extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a83541ec9713..f1ff785b2292 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1676,7 +1676,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, return rc; } -int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) +int cifs_fsync(struct file *file, int datasync) { int xid; int rc = 0; @@ -1688,7 +1688,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) xid = GetXid(); cFYI(1, "Sync file - name: %s datasync: 0x%x", - dentry->d_name.name, datasync); + file->f_path.dentry->d_name.name, datasync); rc = filemap_write_and_wait(inode->i_mapping); if (rc == 0) { diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index d99860a33890..6b443ff43a19 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h @@ -11,8 +11,7 @@ extern int coda_fake_statfs; void coda_destroy_inodecache(void); int coda_init_inodecache(void); -int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, - int datasync); +int coda_fsync(struct file *coda_file, int datasync); void coda_sysctl_init(void); void coda_sysctl_clean(void); diff --git a/fs/coda/file.c b/fs/coda/file.c index 7196077b1688..ad3cd2abeeb4 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -202,10 +202,10 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) return 0; } -int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) +int coda_fsync(struct file *coda_file, int datasync) { struct file *host_file; - struct inode *coda_inode = coda_dentry->d_inode; + struct inode *coda_inode = coda_file->f_path.dentry->d_inode; struct coda_file_info *cfi; int err = 0; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 3bdddbcc785f..e8fcf4e2ed7d 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -274,7 +274,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) } static int -ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) +ecryptfs_fsync(struct file *file, int datasync) { return vfs_fsync(ecryptfs_file_to_lower(file), datasync); } diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 839b9dc1e70f..fef6899be397 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -40,12 +40,11 @@ static int exofs_release_file(struct inode *inode, struct file *filp) return 0; } -static int exofs_file_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int exofs_file_fsync(struct file *filp, int datasync) { int ret; struct address_space *mapping = filp->f_mapping; - struct inode *inode = dentry->d_inode; + struct inode *inode = mapping->host; struct super_block *sb; ret = filemap_write_and_wait(mapping); @@ -66,7 +65,7 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry, static int exofs_flush(struct file *file, fl_owner_t id) { - exofs_file_fsync(file, file->f_path.dentry, 1); + exofs_file_fsync(file, 1); /* TODO: Flush the OSD target */ return 0; } diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 0b038e47ad2f..8e917b686510 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -155,7 +155,7 @@ extern void ext2_write_super (struct super_block *); extern const struct file_operations ext2_dir_operations; /* file.c */ -extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync); +extern int ext2_fsync(struct file *file, int datasync); extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; extern const struct file_operations ext2_xip_file_operations; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5d198d0697fb..48bcfc327014 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp) return 0; } -int ext2_fsync(struct file *file, struct dentry *dentry, int datasync) +int ext2_fsync(struct file *file, int datasync) { int ret; - struct super_block *sb = dentry->d_inode->i_sb; + struct super_block *sb = file->f_mapping->host->i_sb; struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; - ret = simple_fsync(file, dentry, datasync); + ret = simple_fsync(file, datasync); if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { /* We don't really know where the IO error happened... */ ext2_error(sb, __func__, diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index fcf7487734b6..d7e9f74dc3a6 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -43,9 +43,9 @@ * inode to disk. */ -int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) +int ext3_sync_file(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ext3_inode_info *ei = EXT3_I(inode); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; int ret, needs_barrier = 0; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 60bd31026e7c..19a4de57128a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1519,7 +1519,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, extern void ext4_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ -extern int ext4_sync_file(struct file *, struct dentry *, int); +extern int ext4_sync_file(struct file *, int); /* hash.c */ extern int ext4fs_dirhash(const char *name, int len, struct diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index b6a74f991bf4..40f345201737 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -71,9 +71,9 @@ static void ext4_sync_parent(struct inode *inode) * i_mutex lock is held when entering and exiting this function */ -int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) +int ext4_sync_file(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; int ret; @@ -81,7 +81,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) J_ASSERT(ext4_journal_current_handle() == NULL); - trace_ext4_sync_file(file, dentry, datasync); + trace_ext4_sync_file(file, datasync); if (inode->i_sb->s_flags & MS_RDONLY) return 0; @@ -91,7 +91,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) return ret; if (!journal) { - ret = simple_fsync(file, dentry, datasync); + ret = simple_fsync(file, datasync); if (!ret && !list_empty(&inode->i_dentry)) ext4_sync_parent(inode); return ret; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 53dba57b49a1..a1c8c4b44fdb 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -309,8 +309,7 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr); extern void fat_truncate(struct inode *inode); extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -extern int fat_file_fsync(struct file *file, struct dentry *dentry, - int datasync); +extern int fat_file_fsync(struct file *file, int datasync); /* fat/inode.c */ extern void fat_attach(struct inode *inode, loff_t i_pos); diff --git a/fs/fat/file.c b/fs/fat/file.c index a14c2f6a489e..29a576944374 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -149,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp) return 0; } -int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +int fat_file_fsync(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int res, err; - res = simple_fsync(filp, dentry, datasync); + res = simple_fsync(filp, datasync); err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); return res ? res : err; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4787ae6c5c1c..3cdc5f78a406 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1156,10 +1156,9 @@ static int fuse_dir_release(struct inode *inode, struct file *file) return 0; } -static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) +static int fuse_dir_fsync(struct file *file, int datasync) { - /* nfsd can call this with no file */ - return file ? fuse_fsync_common(file, de, datasync, 1) : 0; + return fuse_fsync_common(file, datasync, 1); } static bool update_mtime(unsigned ivalid) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a9f5e137f1d3..b5fd6f9905e4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -351,10 +351,9 @@ static void fuse_sync_writes(struct inode *inode) fuse_release_nowrite(inode); } -int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, - int isdir) +int fuse_fsync_common(struct file *file, int datasync, int isdir) { - struct inode *inode = de->d_inode; + struct inode *inode = file->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; struct fuse_req *req; @@ -403,9 +402,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, return err; } -static int fuse_fsync(struct file *file, struct dentry *de, int datasync) +static int fuse_fsync(struct file *file, int datasync) { - return fuse_fsync_common(file, de, datasync, 0); + return fuse_fsync_common(file, datasync, 0); } void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 01cc462ff45d..2c0d14a86779 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -568,8 +568,7 @@ void fuse_release_common(struct file *file, int opcode); /** * Send FSYNC or FSYNCDIR request */ -int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, - int isdir); +int fuse_fsync_common(struct file *file, int datasync, int isdir); /** * Notify poll wakeup diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index b20bfcc9fa2d..ed9a94f0ef15 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -554,9 +554,9 @@ static int gfs2_close(struct inode *inode, struct file *file) * Returns: errno */ -static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) +static int gfs2_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); int ret = 0; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 3a029d8f4cf1..87ac1891a185 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -411,9 +411,9 @@ int hostfs_file_open(struct inode *ino, struct file *file) return 0; } -int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int hostfs_fsync(struct file *file, int datasync) { - return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync); + return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync); } static const struct file_operations hostfs_file_fops = { diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 3efabff00367..a9ae9bfa752f 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -19,9 +19,9 @@ static int hpfs_file_release(struct inode *inode, struct file *file) return 0; } -int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) +int hpfs_file_fsync(struct file *file, int datasync) { - /*return file_fsync(file, dentry);*/ + /*return file_fsync(file, datasync);*/ return 0; /* Don't fsync :-) */ } diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 97bf738cd5d6..75f9d4324851 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *, /* file.c */ -int hpfs_file_fsync(struct file *, struct dentry *, int); +int hpfs_file_fsync(struct file *, int); extern const struct file_operations hpfs_file_ops; extern const struct inode_operations hpfs_file_iops; extern const struct address_space_operations hpfs_aops; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 2e4dfa8593da..826c3f9d29ac 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -587,7 +587,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) return err; } -static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) +static int hppfs_fsync(struct file *file, int datasync) { return 0; } diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index e7291c161a19..813497024437 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -26,9 +26,9 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page **pagep, void **fsdata); static int jffs2_readpage (struct file *filp, struct page *pg); -int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync) +int jffs2_fsync(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); /* Trigger GC to flush any pending writes for this inode */ diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 035a767f958b..4791aacf3084 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations; extern const struct file_operations jffs2_file_operations; extern const struct inode_operations jffs2_file_inode_operations; extern const struct address_space_operations jffs2_file_address_operations; -int jffs2_fsync(struct file *, struct dentry *, int); +int jffs2_fsync(struct file *, int); int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); /* ioctl.c */ diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 85d9ec659225..127263cc8657 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -27,9 +27,9 @@ #include "jfs_acl.h" #include "jfs_debug.h" -int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int jfs_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int rc = 0; if (!(inode->i_state & I_DIRTY) || diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 9e6bda30a6e8..11042b1f44b5 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -21,7 +21,7 @@ struct fid; extern struct inode *ialloc(struct inode *, umode_t); -extern int jfs_fsync(struct file *, struct dentry *, int); +extern int jfs_fsync(struct file *, int); extern long jfs_ioctl(struct file *, unsigned int, unsigned long); extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); diff --git a/fs/libfs.c b/fs/libfs.c index 232bea425b09..e57ea58bda68 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -58,7 +58,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na return NULL; } -int simple_sync_file(struct file * file, struct dentry *dentry, int datasync) +int simple_sync_file(struct file *file, int datasync) { return 0; } @@ -851,13 +851,13 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, } EXPORT_SYMBOL_GPL(generic_fh_to_parent); -int simple_fsync(struct file *file, struct dentry *dentry, int datasync) +int simple_fsync(struct file *file, int datasync) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 0, /* metadata-only; caller takes care of data */ }; - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int err; int ret; diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 0de524071870..abe1cafbd4c2 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -219,9 +219,9 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, } } -int logfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int logfs_fsync(struct file *file, int datasync) { - struct super_block *sb = dentry->d_inode->i_sb; + struct super_block *sb = file->f_mapping->host->i_sb; logfs_write_anchor(sb); return 0; diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 1a9db84f8d8f..c838c4d72111 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -506,7 +506,7 @@ extern const struct address_space_operations logfs_reg_aops; int logfs_readpage(struct file *file, struct page *page); int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); -int logfs_fsync(struct file *file, struct dentry *dentry, int datasync); +int logfs_fsync(struct file *file, int datasync); /* gc.c */ u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index b93870892892..3639cc5cbdae 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -22,7 +22,7 @@ #include #include "ncplib_kernel.h" -static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync) +static int ncp_fsync(struct file *file, int datasync) { return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index db64854b7b09..782b431ef91c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -53,7 +53,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *); static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -static int nfs_fsync_dir(struct file *, struct dentry *, int); +static int nfs_fsync_dir(struct file *, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); const struct file_operations nfs_dir_operations = { @@ -641,8 +641,10 @@ out: * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. */ -static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) +static int nfs_fsync_dir(struct file *filp, int datasync) { + struct dentry *dentry = filp->f_path.dentry; + dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cac96bcc91e4..36a5e74f51b4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -53,7 +53,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); -static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync); +static int nfs_file_fsync(struct file *, int datasync); static int nfs_check_flags(int flags); static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); @@ -322,8 +322,9 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * whether any write errors occurred for this process. */ static int -nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) +nfs_file_fsync(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 30292df443ce..c9a30d7ff6fc 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -27,7 +27,7 @@ #include "nilfs.h" #include "segment.h" -int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) +int nilfs_sync_file(struct file *file, int datasync) { /* * Called from fsync() system call @@ -37,7 +37,7 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) * This function should be implemented when the writeback function * will be implemented. */ - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int err; if (!nilfs_inode_dirty(inode)) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 8723e5bfd071..47d6d7928122 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -228,7 +228,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, struct page *, struct inode *); /* file.c */ -extern int nilfs_sync_file(struct file *, struct dentry *, int); +extern int nilfs_sync_file(struct file *, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index fe44d3feee4a..0f48e7c5d9e1 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1527,10 +1527,9 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) * this problem for now. We do write the $BITMAP attribute if it is present * which is the important one for a directory so things are not too bad. */ -static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int ntfs_dir_fsync(struct file *filp, int datasync) { - struct inode *bmp_vi, *vi = dentry->d_inode; + struct inode *bmp_vi, *vi = filp->f_mapping->host; int err, ret; ntfs_attr na; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index a1924a0d2ab0..113ebd9f25a4 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2133,7 +2133,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, /** * ntfs_file_fsync - sync a file to disk * @filp: file to be synced - * @dentry: dentry describing the file to sync * @datasync: if non-zero only flush user data and not metadata * * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync @@ -2149,19 +2148,15 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, * Also, if @datasync is true, we do not wait on the inode to be written out * but we always wait on the page cache pages to be written out. * - * Note: In the past @filp could be NULL so we ignore it as we don't need it - * anyway. - * * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore * this problem for now. */ -static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int ntfs_file_fsync(struct file *filp, int datasync) { - struct inode *vi = dentry->d_inode; + struct inode *vi = filp->f_mapping->host; int err, ret = 0; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 97e54b9e654b..1c6220a8e072 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -175,13 +175,12 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file) return 0; } -static int ocfs2_sync_file(struct file *file, - struct dentry *dentry, - int datasync) +static int ocfs2_sync_file(struct file *file, int datasync) { int err = 0; journal_t *journal; - struct inode *inode = dentry->d_inode; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = file->f_mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 4455fbe269a3..198dabf1b2bb 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -14,8 +14,7 @@ extern const struct reiserfs_key MIN_KEY; static int reiserfs_readdir(struct file *, void *, filldir_t); -static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, - int datasync); +static int reiserfs_dir_fsync(struct file *filp, int datasync); const struct file_operations reiserfs_dir_operations = { .llseek = generic_file_llseek, @@ -28,10 +27,9 @@ const struct file_operations reiserfs_dir_operations = { #endif }; -static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int reiserfs_dir_fsync(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int err; reiserfs_write_lock(inode->i_sb); err = reiserfs_commit_for_inode(inode); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 9977df9f3a54..b82cdd8a45dd 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -134,10 +134,9 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) * be removed... */ -static int reiserfs_sync_file(struct file *filp, - struct dentry *dentry, int datasync) +static int reiserfs_sync_file(struct file *filp, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; int err; int barrier_done; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 84ecf0e43f91..8e187a0f94bb 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -28,8 +28,9 @@ #include "proto.h" static int -smb_fsync(struct file *file, struct dentry * dentry, int datasync) +smb_fsync(struct file *file, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct smb_sb_info *server = server_from_dentry(dentry); int result; diff --git a/fs/sync.c b/fs/sync.c index e8cbd415e50a..c9f83f480ec5 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -130,12 +130,10 @@ void emergency_sync(void) /* * Generic function to fsync a file. - * - * filp may be NULL if called via the msync of a vma. */ -int file_fsync(struct file *filp, struct dentry *dentry, int datasync) +int file_fsync(struct file *filp, int datasync) { - struct inode * inode = dentry->d_inode; + struct inode *inode = filp->f_mapping->host; struct super_block * sb; int ret, err; @@ -183,7 +181,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) * livelocks in fsync_buffers_list(). */ mutex_lock(&mapping->host->i_mutex); - err = file->f_op->fsync(file, file->f_path.dentry, datasync); + err = file->f_op->fsync(file, datasync); if (!ret) ret = err; mutex_unlock(&mapping->host->i_mutex); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5692cf72b807..c726da68e6be 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1304,9 +1304,9 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) +int ubifs_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; int err; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index bd2542dad014..b0904536cc1c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1678,7 +1678,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_fsync(struct file *file, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); /* dir.c */ diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index d8fb1b5d6cb5..257a56b127cf 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -100,10 +100,10 @@ xfs_iozero( STATIC int xfs_file_fsync( struct file *file, - struct dentry *dentry, int datasync) { - struct xfs_inode *ip = XFS_I(dentry->d_inode); + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); struct xfs_trans *tp; int error = 0; int log_flushed = 0; @@ -140,8 +140,8 @@ xfs_file_fsync( * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ - if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || - ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && + if (((inode->i_state & I_DIRTY_DATASYNC) || + ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the @@ -868,7 +868,7 @@ write_retry: mutex_lock(&inode->i_mutex); xfs_ilock(ip, iolock); - error2 = -xfs_file_fsync(file, file->f_path.dentry, + error2 = -xfs_file_fsync(file, (file->f_flags & __O_SYNC) ? 0 : 1); if (!error) error = error2; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 16ed0284d780..05e5f5996216 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -224,7 +224,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int file_fsync(struct file *, struct dentry *, int); +int file_fsync(struct file *, int); int nobh_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 5f494b465097..7fc62d4550b2 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -868,7 +868,7 @@ extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, extern void ext3_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ -extern int ext3_sync_file (struct file *, struct dentry *, int); +extern int ext3_sync_file(struct file *, int); /* hash.c */ extern int ext3fs_dirhash(const char *name, int len, struct diff --git a/include/linux/fb.h b/include/linux/fb.h index f3793ebc241c..907ace3a64c8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -4,8 +4,6 @@ #include #include -struct dentry; - /* Definitions of frame buffers */ #define FB_MAX 32 /* sufficient for now */ @@ -1017,8 +1015,7 @@ extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); extern void fb_deferred_io_cleanup(struct fb_info *info); -extern int fb_deferred_io_fsync(struct file *file, struct dentry *dentry, - int datasync); +extern int fb_deferred_io_fsync(struct file *file, int datasync); static inline bool fb_be_math(struct fb_info *info) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 3d9ed8302402..eb39e5eb77f5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1498,7 +1498,7 @@ struct file_operations { int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, struct dentry *, int datasync); + int (*fsync) (struct file *, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); @@ -2213,7 +2213,7 @@ extern int generic_segment_checks(const struct iovec *iov, /* fs/block_dev.c */ extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); -extern int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync); +extern int blkdev_fsync(struct file *filp, int datasync); /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, @@ -2348,7 +2348,7 @@ extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -extern int simple_sync_file(struct file *, struct dentry *, int); +extern int simple_sync_file(struct file *, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); extern int simple_write_begin(struct file *file, struct address_space *mapping, @@ -2373,7 +2373,7 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); -extern int simple_fsync(struct file *, struct dentry *, int); +extern int simple_fsync(struct file *, int); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 5d60ad4ebf78..f5b1ba90e952 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -606,9 +606,9 @@ TRACE_EVENT(ext4_free_blocks, ); TRACE_EVENT(ext4_sync_file, - TP_PROTO(struct file *file, struct dentry *dentry, int datasync), + TP_PROTO(struct file *file, int datasync), - TP_ARGS(file, dentry, datasync), + TP_ARGS(file, datasync), TP_STRUCT__entry( __field( dev_t, dev ) @@ -618,6 +618,8 @@ TRACE_EVENT(ext4_sync_file, ), TP_fast_assign( + struct dentry *dentry = file->f_path.dentry; + __entry->dev = dentry->d_inode->i_sb->s_dev; __entry->ino = dentry->d_inode->i_ino; __entry->datasync = datasync; diff --git a/ipc/shm.c b/ipc/shm.c index 1a314c89f93c..52ed77eb9713 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -273,16 +273,13 @@ static int shm_release(struct inode *ino, struct file *file) return 0; } -static int shm_fsync(struct file *file, struct dentry *dentry, int datasync) +static int shm_fsync(struct file *file, int datasync) { - int (*fsync) (struct file *, struct dentry *, int datasync); struct shm_file_data *sfd = shm_file_data(file); - int ret = -EINVAL; - fsync = sfd->file->f_op->fsync; - if (fsync) - ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync); - return ret; + if (!sfd->file->f_op->fsync) + return -EINVAL; + return sfd->file->f_op->fsync(sfd->file, datasync); } static unsigned long shm_get_unmapped_area(struct file *file, -- cgit v1.2.3 From aa91647c898d62e869fcf35e977ab3c533be8fc1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 May 2010 11:15:51 -0700 Subject: ceph: make mds requests killable, not interruptible The underlying problem is that many mds requests can't be restarted. For example, a restarted create() would return -EEXIST if the original request succeeds. However, we do not want a hung MDS to hang the client too. So, use the _killable wait_for_completion variants to abort on SIGKILL but nothing else. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 885aa5710cfd..08413c8a85b2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1768,12 +1768,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); if (req->r_timeout) { - err = (long)wait_for_completion_interruptible_timeout( + err = (long)wait_for_completion_killable_timeout( &req->r_completion, req->r_timeout); if (err == 0) err = -EIO; } else { - err = wait_for_completion_interruptible(&req->r_completion); + err = wait_for_completion_killable(&req->r_completion); } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); -- cgit v1.2.3 From e95e9a7ae4c1e7655a0438579f891b3c60178d77 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 09:24:42 -0700 Subject: ceph: avoid possible null dereference ac->ops may be null; use protocol id in error message instead. Reported-by: Dan Carpenter Signed-off-by: Sage Weil --- fs/ceph/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 9f46de2ba7a7..01c4a1cd53c0 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -217,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, if (ac->protocol != protocol) { ret = ceph_auth_init_protocol(ac, protocol); if (ret) { - pr_err("error %d on auth method %s init\n", - ret, ac->ops->name); + pr_err("error %d on auth protocol %d init\n", + ret, protocol); goto out; } } -- cgit v1.2.3 From 984c76908efd3c6795aa03dff16a8fc3496af99f Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sun, 23 May 2010 21:47:58 +0200 Subject: ceph: removed duplicated #includes fs/ceph/auth.c: linux/slab.h is included more than once. fs/ceph/super.h: linux/slab.h is included more than once. Acked-by: Christoph Lameter Signed-off-by: Andrea Gelmini Signed-off-by: Sage Weil --- fs/ceph/auth.c | 1 - fs/ceph/super.h | 1 - 2 files changed, 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 01c4a1cd53c0..a28ebdf465d7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -1,7 +1,6 @@ #include "ceph_debug.h" #include -#include #include #include diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3725c9ee9d08..d84a2527046c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 09c4d6a7d40dd26c1b35674c582382b7ea551368 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 15:38:06 -0700 Subject: ceph: do not resend mon requests on auth ticket renewal We only want to send pending mon requests when we successfully authenticate. If we are already authenticated, like when we renew our ticket, there is no need to resend pending requests. Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index f6510a476e7e..21c62e9b7d1d 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -704,8 +704,11 @@ static void handle_auth_reply(struct ceph_mon_client *monc, struct ceph_msg *msg) { int ret; + int was_auth = 0; mutex_lock(&monc->mutex); + if (monc->auth->ops) + was_auth = monc->auth->ops->is_authenticated(monc->auth); monc->pending_auth = 0; ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, msg->front.iov_len, @@ -716,7 +719,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, wake_up(&monc->client->auth_wq); } else if (ret > 0) { __send_prepared_auth_request(monc, ret); - } else if (monc->auth->ops->is_authenticated(monc->auth)) { + } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; -- cgit v1.2.3 From a41359fa355e7b450c610ed8e913d5d75c3c9c3b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 15:39:06 -0700 Subject: ceph: renew auth tickets before they expire We were only requesting renewal after our tickets expire; do so before that. Most of the low-level logic for this was already there; just use it. Signed-off-by: Sage Weil --- fs/ceph/auth.c | 2 +- fs/ceph/auth.h | 6 ++++++ fs/ceph/auth_none.c | 8 ++++++++ fs/ceph/auth_x.c | 12 ++++++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index a28ebdf465d7..89490beaf537 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c @@ -246,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac, if (!ac->protocol) return ceph_auth_build_hello(ac, msg_buf, msg_len); BUG_ON(!ac->ops); - if (!ac->ops->is_authenticated(ac)) + if (ac->ops->should_authenticate(ac)) return ceph_build_auth_request(ac, msg_buf, msg_len); return 0; } diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index 4429a707c021..d38a2fb4a137 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h @@ -23,6 +23,12 @@ struct ceph_auth_client_ops { */ int (*is_authenticated)(struct ceph_auth_client *ac); + /* + * true if we should (re)authenticate, e.g., when our tickets + * are getting old and crusty. + */ + int (*should_authenticate)(struct ceph_auth_client *ac); + /* * build requests and process replies during monitor * handshake. if handle_reply returns -EAGAIN, we build diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 24407c119291..ad1dc21286c7 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c @@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac) return !xi->starting; } +static int should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_auth_none_info *xi = ac->private; + + return xi->starting; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -98,6 +105,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .reset = reset, .destroy = destroy, .is_authenticated = is_authenticated, + .should_authenticate = should_authenticate, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 7b206231566d..83d4d2785ffe 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) return (ac->want_keys & xi->have_keys) == ac->want_keys; } +static int ceph_x_should_authenticate(struct ceph_auth_client *ac) +{ + struct ceph_x_info *xi = ac->private; + int need; + + ceph_x_validate_tickets(ac, &need); + dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", + ac->want_keys, need, xi->have_keys); + return need != 0; +} + static int ceph_x_encrypt_buflen(int ilen) { return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + @@ -620,6 +631,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static const struct ceph_auth_client_ops ceph_x_ops = { .name = "x", .is_authenticated = ceph_x_is_authenticated, + .should_authenticate = ceph_x_should_authenticate, .build_request = ceph_x_build_request, .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, -- cgit v1.2.3 From 7e34bc524ecae3a04d8cc427ee76ddad826a937b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 May 2010 12:01:14 +0200 Subject: fs/ceph: Use ERR_CAST Use ERR_CAST(x) rather than ERR_PTR(PTR_ERR(x)). The former makes more clear what is the purpose of the operation, which otherwise looks like a no-op. In the case of fs/ceph/inode.c, ERR_CAST is not needed, because the type of the returned value is the same as the type of the enclosing function. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ type T; T x; identifier f; @@ T f (...) { <+... - ERR_PTR(PTR_ERR(x)) + x ...+> } @@ expression x; @@ - ERR_PTR(PTR_ERR(x)) + ERR_CAST(x) // Signed-off-by: Julia Lawall Signed-off-by: Sage Weil --- fs/ceph/dir.c | 2 +- fs/ceph/export.c | 2 +- fs/ceph/file.c | 2 +- fs/ceph/inode.c | 2 +- fs/ceph/osdmap.c | 2 +- fs/ceph/super.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4fd30900eff7..a86c1d5bf84f 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -587,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP; req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; /* we only need inode linkage */ diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 17447644d675..4480cb1c63e7 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -133,7 +133,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_ino1 = vino; req->r_ino2.ino = cfh->parent_ino; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0611ec3698aa..f06f902367e1 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -230,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a81b8b662c7b..226f5a50d362 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -69,7 +69,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) BUG_ON(!S_ISDIR(parent->i_mode)); if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return inode; inode->i_mode = parent->i_mode; inode->i_uid = parent->i_uid; inode->i_gid = parent->i_gid; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index cfdd8f4388b7..ddc656fb5c05 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -706,7 +706,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, len, *p, end); newcrush = crush_decode(*p, min(*p+len, end)); if (IS_ERR(newcrush)) - return ERR_PTR(PTR_ERR(newcrush)); + return ERR_CAST(newcrush); } /* new flags? */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bac13898b943..9b46bb951e1f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -738,7 +738,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client, dout("open_root_inode opening '%s'\n", path); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); req->r_path1 = kstrdup(path, GFP_NOFS); req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; -- cgit v1.2.3 From dd1c9057366f329911180e9000e2b425f23fc287 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 May 2010 16:45:25 -0700 Subject: ceph: make lease code DN specific The lease code includes a mask in the CEPH_LOCK_* namespace, but that namespace is changing, and only one mask (formerly _DN == 1) is used, so hard code for that value for now. If we ever extend this code to handle leases over different data types we can extend it accordingly. Signed-off-by: Sage Weil --- fs/ceph/ceph_fs.h | 21 +++++++++++---------- fs/ceph/mds_client.c | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 3b9eeed097b3..2fa992eaf7da 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -265,16 +265,17 @@ extern const char *ceph_mds_state_name(int s); * - they also define the lock ordering by the MDS * - a few of these are internal to the mds */ -#define CEPH_LOCK_DN 1 -#define CEPH_LOCK_ISNAP 2 -#define CEPH_LOCK_IVERSION 4 /* mds internal */ -#define CEPH_LOCK_IFILE 8 /* mds internal */ -#define CEPH_LOCK_IAUTH 32 -#define CEPH_LOCK_ILINK 64 -#define CEPH_LOCK_IDFT 128 /* dir frag tree */ -#define CEPH_LOCK_INEST 256 /* mds internal */ -#define CEPH_LOCK_IXATTR 512 -#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_DVERSION 1 +#define CEPH_LOCK_DN 2 +#define CEPH_LOCK_ISNAP 16 +#define CEPH_LOCK_IVERSION 32 /* mds internal */ +#define CEPH_LOCK_IFILE 64 +#define CEPH_LOCK_IAUTH 128 +#define CEPH_LOCK_ILINK 256 +#define CEPH_LOCK_IDFT 512 /* dir frag tree */ +#define CEPH_LOCK_INEST 1024 /* mds internal */ +#define CEPH_LOCK_IXATTR 2048 +#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ /* client_session ops */ enum { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 08413c8a85b2..5a88b7bb3798 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2541,7 +2541,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, return; lease = msg->front.iov_base; lease->action = action; - lease->mask = cpu_to_le16(CEPH_LOCK_DN); + lease->mask = cpu_to_le16(1); lease->ino = cpu_to_le64(ceph_vino(inode).ino); lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->seq = cpu_to_le32(seq); @@ -2571,7 +2571,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, BUG_ON(inode == NULL); BUG_ON(dentry == NULL); - BUG_ON(mask != CEPH_LOCK_DN); + BUG_ON(mask == 0); /* is dentry lease valid? */ spin_lock(&dentry->d_lock); -- cgit v1.2.3 From a922d38fd10d55d5033f10df15baf966e8f5b18c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 29 May 2010 09:41:23 -0700 Subject: ceph: close out mds, osd connections before stopping auth The auth module (part of the mon_client) is needed to free any ceph_authorizer(s) used by the mds and osd connections. Flush the msgr workqueue before stopping monc to ensure that the destroy_authorizer auth op is available when those connections are closed out. Signed-off-by: Sage Weil --- fs/ceph/messenger.c | 6 ++++++ fs/ceph/messenger.h | 1 + fs/ceph/super.c | 10 +++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 60b74839ebec..64b8b1f7863d 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -120,6 +120,12 @@ void ceph_msgr_exit(void) destroy_workqueue(ceph_msgr_wq); } +void ceph_msgr_flush() +{ + flush_workqueue(ceph_msgr_wq); +} + + /* * socket callback functions */ diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 00a9430b1ffc..76fbc957bc13 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -213,6 +213,7 @@ extern int ceph_parse_ips(const char *c, const char *end, extern int ceph_msgr_init(void); extern void ceph_msgr_exit(void); +extern void ceph_msgr_flush(void); extern struct ceph_messenger *ceph_messenger_create( struct ceph_entity_addr *myaddr); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9b46bb951e1f..5cf6fba8b705 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -669,9 +669,17 @@ static void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_mdsc_stop(&client->mdsc); - ceph_monc_stop(&client->monc); ceph_osdc_stop(&client->osdc); + /* + * make sure mds and osd connections close out before destroying + * the auth module, which is needed to free those connections' + * ceph_authorizers. + */ + ceph_msgr_flush(); + + ceph_monc_stop(&client->monc); + ceph_adjust_min_caps(-client->min_caps); ceph_debugfs_client_cleanup(client); -- cgit v1.2.3 From 79494d1b9b92259eb40ea6e939ba5aff4b8de5f1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 27 May 2010 14:15:49 -0700 Subject: ceph: fix leak of osd authorizer Release the ceph_authorizer when releasing osd state. Signed-off-by: Sage Weil --- fs/ceph/osd_client.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index afa7bb3895c4..d25b4add85b4 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -361,8 +361,13 @@ static void put_osd(struct ceph_osd *osd) { dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), atomic_read(&osd->o_ref) - 1); - if (atomic_dec_and_test(&osd->o_ref)) + if (atomic_dec_and_test(&osd->o_ref)) { + struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; + + if (osd->o_authorizer) + ac->ops->destroy_authorizer(ac, osd->o_authorizer); kfree(osd); + } } /* -- cgit v1.2.3 From 2a8e5e3637e2fc058798f5d3626f525729ffaaaf Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 28 May 2010 16:43:16 -0700 Subject: ceph: clean up on forwarded aborted mds request If an mds request is aborted (timeout, SIGKILL), it is left registered to keep our state in sync with the mds. If we get a forward notification, though, we know the request didn't succeed and we can unregister it safely. We were trying to resend it, but then bailing out (and not unregistering) in __do_request. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5a88b7bb3798..b49f12822cbc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2014,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc, mutex_lock(&mdsc->mutex); req = __lookup_request(mdsc, tid); if (!req) { - dout("forward %llu to mds%d - req dne\n", tid, next_mds); + dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); goto out; /* dup reply? */ } - if (fwd_seq <= req->r_num_fwd) { - dout("forward %llu to mds%d - old seq %d <= %d\n", + if (req->r_aborted) { + dout("forward tid %llu aborted, unregistering\n", tid); + __unregister_request(mdsc, req); + } else if (fwd_seq <= req->r_num_fwd) { + dout("forward tid %llu to mds%d - old seq %d <= %d\n", tid, next_mds, req->r_num_fwd, fwd_seq); } else { /* resend. forward race not possible; mds would drop */ - dout("forward %llu to mds%d (we resend)\n", tid, next_mds); + dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); + BUG_ON(req->r_err); + BUG_ON(req->r_got_result); req->r_num_fwd = fwd_seq; req->r_resend_mds = next_mds; put_request_session(req); -- cgit v1.2.3 From 13a4214cd9ec14d7b77e98bd3ee51f60f868a6e5 Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Tue, 1 Jun 2010 11:31:08 -0700 Subject: ceph: fix d_subdirs ordering problem We misused list_move_tail() to order the dentry in d_subdirs. This will screw up the d_subdirs order. This bug can be reliably reproduced by: 1. mount ceph fs. 2. on ceph fs, git clone git://ceph.newdream.net/git/ceph.git 3. Run autogen.sh in ceph directory. (Note: Errors only occur at the first time you run autogen.sh.) Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 226f5a50d362..ab47f46ca282 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_lock(&dcache_lock); spin_lock(&dn->d_lock); - list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); + list_move(&dn->d_u.d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); -- cgit v1.2.3 From 205475679a74fe40b63a1c7f41110fdb64daa8b9 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 1 Jun 2010 10:37:40 -0700 Subject: ceph: fix memory leak in statfs Freeing the statfs request structure when required. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 21c62e9b7d1d..07a539906e67 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref) ceph_msg_put(req->reply); if (req->request) ceph_msg_put(req->request); + + kfree(req); } static void put_generic_request(struct ceph_mon_generic_request *req) -- cgit v1.2.3 From 558d3499bd059d4534b1f2b69dc1c562acc733fe Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 1 Jun 2010 12:51:12 -0700 Subject: ceph: fix f_namelen reported by statfs We were setting f_namelen in kstatfs to PATH_MAX instead of NAME_MAX. That disagrees with ceph_lookup behavior (which checks against NAME_MAX), and also makes the pjd posix test suite spit out ugly errors because with can't clean up its temporary files. Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e0bee240b9d..8db88792b5ad 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; - buf->f_namelen = PATH_MAX; + buf->f_namelen = NAME_MAX; buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ -- cgit v1.2.3 From 1e5ea23df11c7c90c7e7268dd3a6603bfa5aadf7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 4 Jun 2010 10:05:40 -0700 Subject: ceph: fix lease revocation when seq doesn't match If the client revokes a lease with a higher seq than what we have, keep the mds's seq, so that it honors our release. Otherwise, we can hang indefinitely. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b49f12822cbc..29b4485cf1ca 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2433,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_dentry_info *di; int mds = session->s_mds; struct ceph_mds_lease *h = msg->front.iov_base; + u32 seq; struct ceph_vino vino; int mask; struct qstr dname; @@ -2446,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; mask = le16_to_cpu(h->mask); + seq = le32_to_cpu(h->seq); dname.name = (void *)h + sizeof(*h) + sizeof(u32); dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); if (dname.len != get_unaligned_le32(h+1)) @@ -2456,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc, /* lookup inode */ inode = ceph_find_inode(sb, vino); - dout("handle_lease '%s', mask %d, ino %llx %p\n", - ceph_lease_op_name(h->action), mask, vino.ino, inode); + dout("handle_lease %s, mask %d, ino %llx %p %.*s\n", + ceph_lease_op_name(h->action), mask, vino.ino, inode, + dname.len, dname.name); if (inode == NULL) { dout("handle_lease no inode %llx\n", vino.ino); goto release; @@ -2482,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc, switch (h->action) { case CEPH_MDS_LEASE_REVOKE: if (di && di->lease_session == session) { - h->seq = cpu_to_le32(di->lease_seq); + if (ceph_seq_cmp(di->lease_seq, seq) > 0) + h->seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); } release = 1; @@ -2496,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, unsigned long duration = le32_to_cpu(h->duration_ms) * HZ / 1000; - di->lease_seq = le32_to_cpu(h->seq); + di->lease_seq = seq; dentry->d_time = di->lease_renew_from + duration; di->lease_renew_after = di->lease_renew_from + (duration >> 1); -- cgit v1.2.3 From 00d5643e7c5ed4ae1bb0b385fe2f41bb951cc3cd Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 10 Jun 2010 11:13:58 -0400 Subject: ceph: fix atomic64_t initialization on ia64 bdi_seq is an atomic_long_t but we're using ATOMIC_INIT, which causes build failures on ia64. This patch fixes it to use ATOMIC_LONG_INIT. Signed-off-by: Jeff Mahoney Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 8db88792b5ad..fa87f51e38e1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -926,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data) /* * construct our own bdi so we can control readahead, etc. */ -static atomic_long_t bdi_seq = ATOMIC_INIT(0); +static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { -- cgit v1.2.3 From 9dbd412f56c453f15014396c6024b895c1485ccb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 10 Jun 2010 13:21:20 -0700 Subject: ceph: fix misleading/incorrect debug message Nothing is released here: the caps message is simply ignored in this case. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ae3e3a306445..da2a0e3cb200 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2714,7 +2714,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&inode->i_lock); cap = __get_cap_for_mds(ceph_inode(inode), mds); if (!cap) { - dout("no cap on %p ino %llx.%llx from mds%d, releasing\n", + dout(" no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), mds); spin_unlock(&inode->i_lock); goto done; -- cgit v1.2.3 From 3d7ded4d81d807c2f75f310a8d74a5d72be13a1b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 9 Jun 2010 16:47:10 -0700 Subject: ceph: release cap on import if we don't have the inode If we get an IMPORT that give us a cap, but we don't have the inode, queue a release (and try to send it immediately) so that the MDS doesn't get stuck waiting for us. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 90 ++++++++++++++++++++++++++++++++-------------------- fs/ceph/mds_client.c | 6 ++-- fs/ceph/mds_client.h | 3 ++ 3 files changed, 61 insertions(+), 38 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index da2a0e3cb200..7c692e746237 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -981,6 +981,46 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } +static void __queue_cap_release(struct ceph_mds_session *session, + u64 ino, u64 cap_id, u32 migrate_seq, + u32 issue_seq) +{ + struct ceph_msg *msg; + struct ceph_mds_cap_release *head; + struct ceph_mds_cap_item *item; + + spin_lock(&session->s_cap_lock); + BUG_ON(!session->s_num_cap_releases); + msg = list_first_entry(&session->s_cap_releases, + struct ceph_msg, list_head); + + dout(" adding %llx release to mds%d msg %p (%d left)\n", + ino, session->s_mds, msg, session->s_num_cap_releases); + + BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); + head = msg->front.iov_base; + head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); + item = msg->front.iov_base + msg->front.iov_len; + item->ino = cpu_to_le64(ino); + item->cap_id = cpu_to_le64(cap_id); + item->migrate_seq = cpu_to_le32(migrate_seq); + item->seq = cpu_to_le32(issue_seq); + + session->s_num_cap_releases--; + + msg->front.iov_len += sizeof(*item); + if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + dout(" release msg %p full\n", msg); + list_move_tail(&msg->list_head, &session->s_cap_releases_done); + } else { + dout(" release msg %p at %d/%d (%d)\n", msg, + (int)le32_to_cpu(head->num), + (int)CEPH_CAPS_PER_RELEASE, + (int)msg->front.iov_len); + } + spin_unlock(&session->s_cap_lock); +} + /* * Queue cap releases when an inode is dropped from our cache. Since * inode is about to be destroyed, there is no need for i_lock. @@ -994,41 +1034,9 @@ void ceph_queue_caps_release(struct inode *inode) while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); struct ceph_mds_session *session = cap->session; - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - struct ceph_mds_cap_item *item; - spin_lock(&session->s_cap_lock); - BUG_ON(!session->s_num_cap_releases); - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - - dout(" adding %p release to mds%d msg %p (%d left)\n", - inode, session->s_mds, msg, session->s_num_cap_releases); - - BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); - head = msg->front.iov_base; - head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); - item = msg->front.iov_base + msg->front.iov_len; - item->ino = cpu_to_le64(ceph_ino(inode)); - item->cap_id = cpu_to_le64(cap->cap_id); - item->migrate_seq = cpu_to_le32(cap->mseq); - item->seq = cpu_to_le32(cap->issue_seq); - - session->s_num_cap_releases--; - - msg->front.iov_len += sizeof(*item); - if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { - dout(" release msg %p full\n", msg); - list_move_tail(&msg->list_head, - &session->s_cap_releases_done); - } else { - dout(" release msg %p at %d/%d (%d)\n", msg, - (int)le32_to_cpu(head->num), - (int)CEPH_CAPS_PER_RELEASE, - (int)msg->front.iov_len); - } - spin_unlock(&session->s_cap_lock); + __queue_cap_release(session, ceph_ino(inode), cap->cap_id, + cap->mseq, cap->issue_seq); p = rb_next(p); __ceph_remove_cap(cap); } @@ -2655,7 +2663,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; int mds = session->s_mds; int op; - u32 seq; + u32 seq, mseq; struct ceph_vino vino; u64 cap_id; u64 size, max_size; @@ -2675,6 +2683,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap = CEPH_NOSNAP; cap_id = le64_to_cpu(h->cap_id); seq = le32_to_cpu(h->seq); + mseq = le32_to_cpu(h->migrate_seq); size = le64_to_cpu(h->size); max_size = le64_to_cpu(h->max_size); @@ -2689,6 +2698,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap, inode); if (!inode) { dout(" i don't have ino %llx\n", vino.ino); + + if (op == CEPH_CAP_OP_IMPORT) + __queue_cap_release(session, vino.ino, cap_id, + mseq, seq); + + /* + * send any full release message to try to move things + * along for the mds (who clearly thinks we still have this + * cap). + */ + ceph_send_cap_releases(mdsc, session); goto done; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 29b4485cf1ca..d28b6a9c0f96 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) /* * called under s_mutex */ -static void send_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) +void ceph_send_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) { struct ceph_msg *msg; @@ -2693,7 +2693,7 @@ static void delayed_work(struct work_struct *work) add_cap_releases(mdsc, s, -1); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) - send_cap_releases(mdsc, s); + ceph_send_cap_releases(mdsc, s); mutex_unlock(&s->s_mutex); ceph_put_mds_session(s); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d9936c4f1212..e43752b52635 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -322,6 +322,9 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session); + extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, -- cgit v1.2.3 From 2b2300d62ea413bec631d5b880effa2cc5363acb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 9 Jun 2010 16:52:04 -0700 Subject: ceph: try to send partial cap release on cap message on missing inode If we have enough memory to allocate a new cap release message, do so, so that we can send a partial release message immediately. This keeps us from making the MDS wait when the cap release it needs is in a partially full release message. If we fail because of ENOMEM, oh well, they'll just have to wait a bit longer. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 1 + fs/ceph/mds_client.c | 10 +++++----- fs/ceph/mds_client.h | 3 +++ 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7c692e746237..619b61655ee5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2708,6 +2708,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, * along for the mds (who clearly thinks we still have this * cap). */ + ceph_add_cap_releases(mdsc, session, -1); ceph_send_cap_releases(mdsc, session); goto done; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d28b6a9c0f96..1766947fc07a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc, * * Called under s_mutex. */ -static int add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session, - int extra) +int ceph_add_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int extra) { struct ceph_msg *msg; struct ceph_mds_cap_release *head; @@ -1980,7 +1980,7 @@ out_err: } mutex_unlock(&mdsc->mutex); - add_cap_releases(mdsc, req->r_session, -1); + ceph_add_cap_releases(mdsc, req->r_session, -1); mutex_unlock(&session->s_mutex); /* kick calling process */ @@ -2690,7 +2690,7 @@ static void delayed_work(struct work_struct *work) send_renew_caps(mdsc, s); else ceph_con_keepalive(&s->s_con); - add_cap_releases(mdsc, s, -1); + ceph_add_cap_releases(mdsc, s, -1); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) ceph_send_cap_releases(mdsc, s); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e43752b52635..b292fa42a66d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -322,6 +322,9 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int extra); extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); -- cgit v1.2.3