summaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c59
-rw-r--r--fs/nfs/blocklayout/blocklayout.h4
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c35
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_proc.c25
-rw-r--r--fs/nfs/callback_xdr.c36
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/file.c19
-rw-r--r--fs/nfs/fscache-index.c4
-rw-r--r--fs/nfs/idmap.c25
-rw-r--r--fs/nfs/inode.c22
-rw-r--r--fs/nfs/internal.h10
-rw-r--r--fs/nfs/nfs4_fs.h32
-rw-r--r--fs/nfs/nfs4filelayout.c41
-rw-r--r--fs/nfs/nfs4proc.c119
-rw-r--r--fs/nfs/nfs4renewd.c12
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c876
-rw-r--r--fs/nfs/objlayout/objlayout.c209
-rw-r--r--fs/nfs/objlayout/objlayout.h48
-rw-r--r--fs/nfs/objlayout/pnfs_osd_xdr_cli.c3
-rw-r--r--fs/nfs/pagelist.c3
-rw-r--r--fs/nfs/pnfs.c78
-rw-r--r--fs/nfs/pnfs.h5
-rw-r--r--fs/nfs/pnfs_dev.c1
-rw-r--r--fs/nfs/read.c40
-rw-r--r--fs/nfs/super.c42
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfs/write.c79
32 files changed, 649 insertions, 1209 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index e56564d2ef95..281ae95932c9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -36,6 +36,7 @@
#include <linux/namei.h>
#include <linux/bio.h> /* struct bio */
#include <linux/buffer_head.h> /* various write calls */
+#include <linux/prefetch.h>
#include "blocklayout.h"
@@ -175,17 +176,6 @@ retry:
return bio;
}
-static void bl_set_lo_fail(struct pnfs_layout_segment *lseg)
-{
- if (lseg->pls_range.iomode == IOMODE_RW) {
- dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- } else {
- dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
- }
-}
-
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_read(struct bio *bio, int err)
{
@@ -205,7 +195,7 @@ static void bl_end_io_read(struct bio *bio, int err)
if (!uptodate) {
if (!rdata->pnfs_error)
rdata->pnfs_error = -EIO;
- bl_set_lo_fail(rdata->lseg);
+ pnfs_set_lo_fail(rdata->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -302,6 +292,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
bl_end_io_read, par);
if (IS_ERR(bio)) {
rdata->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
goto out;
}
}
@@ -369,7 +360,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
if (!uptodate) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
- bl_set_lo_fail(wdata->lseg);
+ pnfs_set_lo_fail(wdata->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -385,7 +376,7 @@ static void bl_end_io_write(struct bio *bio, int err)
if (!uptodate) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
- bl_set_lo_fail(wdata->lseg);
+ pnfs_set_lo_fail(wdata->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -542,6 +533,11 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
fill_invalid_ext:
dprintk("%s need to zero %d pages\n", __func__, npg_zero);
for (;npg_zero > 0; npg_zero--) {
+ if (bl_is_sector_init(be->be_inval, isect)) {
+ dprintk("isect %llu already init\n",
+ (unsigned long long)isect);
+ goto next_page;
+ }
/* page ref released in bl_end_io_write_zero */
index = isect >> PAGE_CACHE_SECTOR_SHIFT;
dprintk("%s zero %dth page: index %lu isect %llu\n",
@@ -561,8 +557,7 @@ fill_invalid_ext:
* PageUptodate: It was read before
* sector_initialized: already written out
*/
- if (PageDirty(page) || PageWriteback(page) ||
- bl_is_sector_init(be->be_inval, isect)) {
+ if (PageDirty(page) || PageWriteback(page)) {
print_page(page);
unlock_page(page);
page_cache_release(page);
@@ -591,6 +586,7 @@ fill_invalid_ext:
bl_end_io_write_zero, par);
if (IS_ERR(bio)) {
wdata->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
goto out;
}
/* FIXME: This should be done in bi_end_io */
@@ -639,6 +635,7 @@ next_page:
bl_end_io_write, par);
if (IS_ERR(bio)) {
wdata->pnfs_error = PTR_ERR(bio);
+ bio = NULL;
goto out;
}
isect += PAGE_CACHE_SECTORS;
@@ -804,7 +801,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
struct nfs4_deviceid *d_id)
{
struct pnfs_device *dev;
- struct pnfs_block_dev *rv = NULL;
+ struct pnfs_block_dev *rv;
u32 max_resp_sz;
int max_pages;
struct page **pages = NULL;
@@ -822,18 +819,20 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
dev = kmalloc(sizeof(*dev), GFP_NOFS);
if (!dev) {
dprintk("%s kmalloc failed\n", __func__);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
kfree(dev);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
for (i = 0; i < max_pages; i++) {
pages[i] = alloc_page(GFP_NOFS);
- if (!pages[i])
+ if (!pages[i]) {
+ rv = ERR_PTR(-ENOMEM);
goto out_free;
+ }
}
memcpy(&dev->dev_id, d_id, sizeof(*d_id));
@@ -846,8 +845,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
rc = nfs4_proc_getdeviceinfo(server, dev);
dprintk("%s getdevice info returns %d\n", __func__, rc);
- if (rc)
+ if (rc) {
+ rv = ERR_PTR(rc);
goto out_free;
+ }
rv = nfs4_blk_decode_device(server, dev);
out_free:
@@ -865,7 +866,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
struct pnfs_devicelist *dlist = NULL;
struct pnfs_block_dev *bdev;
LIST_HEAD(block_disklist);
- int status = 0, i;
+ int status, i;
dprintk("%s enter\n", __func__);
@@ -897,8 +898,8 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
for (i = 0; i < dlist->num_devs; i++) {
bdev = nfs4_blk_get_deviceinfo(server, fh,
&dlist->dev_id[i]);
- if (!bdev) {
- status = -ENODEV;
+ if (IS_ERR(bdev)) {
+ status = PTR_ERR(bdev);
goto out_error;
}
spin_lock(&b_mt_id->bm_lock);
@@ -959,7 +960,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
};
static const struct rpc_pipe_ops bl_upcall_ops = {
- .upcall = bl_pipe_upcall,
+ .upcall = rpc_pipe_generic_upcall,
.downcall = bl_pipe_downcall,
.destroy_msg = bl_pipe_destroy_msg,
};
@@ -988,17 +989,20 @@ static int __init nfs4blocklayout_init(void)
mnt,
NFS_PIPE_DIRNAME, 0, &path);
if (ret)
- goto out_remove;
+ goto out_putrpc;
bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
&bl_upcall_ops, 0);
+ path_put(&path);
if (IS_ERR(bl_device_pipe)) {
ret = PTR_ERR(bl_device_pipe);
- goto out_remove;
+ goto out_putrpc;
}
out:
return ret;
+out_putrpc:
+ rpc_put_mount();
out_remove:
pnfs_unregister_layoutdriver(&blocklayout_type);
return ret;
@@ -1011,6 +1015,7 @@ static void __exit nfs4blocklayout_exit(void)
pnfs_unregister_layoutdriver(&blocklayout_type);
rpc_unlink(bl_device_pipe);
+ rpc_put_mount();
}
MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index f27d827960a3..42acf7ef5992 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -150,7 +150,7 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
}
struct bl_dev_msg {
- int status;
+ int32_t status;
uint32_t major, minor;
};
@@ -169,8 +169,6 @@ extern wait_queue_head_t bl_wq;
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* blocklayoutdev.c */
-ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
- char __user *, size_t);
ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
struct block_device *nfs4_blkdev_get(dev_t dev);
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a83b393fb01c..d08ba9107fde 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -79,28 +79,6 @@ int nfs4_blkdev_put(struct block_device *bdev)
return blkdev_put(bdev, FMODE_READ);
}
-/*
- * Shouldn't there be a rpc_generic_upcall() to do this for us?
- */
-ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
- char __user *dst, size_t buflen)
-{
- char *data = (char *)msg->data + msg->copied;
- size_t mlen = min(msg->len - msg->copied, buflen);
- unsigned long left;
-
- left = copy_to_user(dst, data, mlen);
- if (left == mlen) {
- msg->errno = -EFAULT;
- return -EFAULT;
- }
-
- mlen -= left;
- msg->copied += mlen;
- msg->errno = 0;
- return mlen;
-}
-
static struct bl_dev_msg bl_mount_reply;
ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
@@ -131,7 +109,7 @@ struct pnfs_block_dev *
nfs4_blk_decode_device(struct nfs_server *server,
struct pnfs_device *dev)
{
- struct pnfs_block_dev *rv = NULL;
+ struct pnfs_block_dev *rv;
struct block_device *bd = NULL;
struct rpc_pipe_msg msg;
struct bl_msg_hdr bl_msg = {
@@ -141,7 +119,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
uint8_t *dataptr;
DECLARE_WAITQUEUE(wq, current);
struct bl_dev_msg *reply = &bl_mount_reply;
- int offset, len, i;
+ int offset, len, i, rc;
dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
@@ -168,8 +146,10 @@ nfs4_blk_decode_device(struct nfs_server *server,
dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
add_wait_queue(&bl_wq, &wq);
- if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) {
+ rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg);
+ if (rc < 0) {
remove_wait_queue(&bl_wq, &wq);
+ rv = ERR_PTR(rc);
goto out;
}
@@ -187,8 +167,9 @@ nfs4_blk_decode_device(struct nfs_server *server,
bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor));
if (IS_ERR(bd)) {
- dprintk("%s failed to open device : %ld\n",
- __func__, PTR_ERR(bd));
+ rc = PTR_ERR(bd);
+ dprintk("%s failed to open device : %d\n", __func__, rc);
+ rv = ERR_PTR(rc);
goto out;
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e3d294269058..516f3375e067 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -125,7 +125,7 @@ nfs4_callback_up(struct svc_serv *serv)
else
goto out_err;
- return svc_prepare_thread(serv, &serv->sv_pools[0]);
+ return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
out_err:
if (ret == 0)
@@ -199,7 +199,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
INIT_LIST_HEAD(&serv->sv_cb_list);
spin_lock_init(&serv->sv_cb_lock);
init_waitqueue_head(&serv->sv_cb_waitq);
- rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+ rqstp = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
if (IS_ERR(rqstp)) {
svc_xprt_put(serv->sv_bc_xprt);
serv->sv_bc_xprt = NULL;
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b257383bb565..07df5f1d85e5 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum {
struct cb_process_state {
__be32 drc_status;
struct nfs_client *clp;
+ int slotid;
};
struct cb_compound_hdr_arg {
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall(
void *dummy, struct cb_process_state *cps);
extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
-extern void nfs4_cb_take_slot(struct nfs_client *clp);
struct cb_devicenotifyitem {
uint32_t cbd_notify_type;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 74780f9f852c..43926add945b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
/* Normal */
if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
slot->seq_nr++;
- return htonl(NFS4_OK);
+ goto out_ok;
}
/* Replay */
@@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
/* Wraparound */
if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
slot->seq_nr = 1;
- return htonl(NFS4_OK);
+ goto out_ok;
}
/* Misordered request */
return htonl(NFS4ERR_SEQ_MISORDERED);
+out_ok:
+ tbl->highest_used_slotid = args->csa_slotid;
+ return htonl(NFS4_OK);
}
/*
@@ -433,26 +436,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_sequenceres *res,
struct cb_process_state *cps)
{
+ struct nfs4_slot_table *tbl;
struct nfs_client *clp;
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
- cps->clp = NULL;
-
clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
if (clp == NULL)
goto out;
+ tbl = &clp->cl_session->bc_slot_table;
+
+ spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
- status = NFS4ERR_DELAY;
+ spin_unlock(&tbl->slot_tbl_lock);
+ status = htonl(NFS4ERR_DELAY);
+ /* Return NFS4ERR_BADSESSION if we're draining the session
+ * in order to reset it.
+ */
+ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ status = htonl(NFS4ERR_BADSESSION);
goto out;
}
status = validate_seqid(&clp->cl_session->bc_slot_table, args);
+ spin_unlock(&tbl->slot_tbl_lock);
if (status)
goto out;
+ cps->slotid = args->csa_slotid;
+
/*
* Check for pending referring calls. If a match is found, a
* related callback was received before the response to the original
@@ -469,7 +483,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
- nfs4_cb_take_slot(clp);
out:
cps->clp = clp; /* put in nfs4_callback_compound */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c6c86a77e043..726e59a9e50f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -488,17 +488,18 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
struct xdr_stream *xdr,
struct cb_recallanyargs *args)
{
- __be32 *p;
+ uint32_t bitmap[2];
+ __be32 *p, status;
args->craa_addr = svc_addr(rqstp);
p = read_buf(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
args->craa_objs_to_keep = ntohl(*p++);
- p = read_buf(xdr, 4);
- if (unlikely(p == NULL))
- return htonl(NFS4ERR_BADXDR);
- args->craa_type_mask = ntohl(*p);
+ status = decode_bitmap(xdr, bitmap);
+ if (unlikely(status))
+ return status;
+ args->craa_type_mask = bitmap[0];
return 0;
}
@@ -754,26 +755,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
* Let the state manager know callback processing done.
* A single slot, so highest used slotid is either 0 or -1
*/
- tbl->highest_used_slotid--;
+ tbl->highest_used_slotid = -1;
nfs4_check_drain_bc_complete(session);
spin_unlock(&tbl->slot_tbl_lock);
}
-static void nfs4_cb_free_slot(struct nfs_client *clp)
+static void nfs4_cb_free_slot(struct cb_process_state *cps)
{
- if (clp && clp->cl_session)
- nfs4_callback_free_slot(clp->cl_session);
-}
-
-/* A single slot, so highest used slotid is either 0 or -1 */
-void nfs4_cb_take_slot(struct nfs_client *clp)
-{
- struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
-
- spin_lock(&tbl->slot_tbl_lock);
- tbl->highest_used_slotid++;
- BUG_ON(tbl->highest_used_slotid != 0);
- spin_unlock(&tbl->slot_tbl_lock);
+ if (cps->slotid != -1)
+ nfs4_callback_free_slot(cps->clp->cl_session);
}
#else /* CONFIG_NFS_V4_1 */
@@ -784,7 +774,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
}
-static void nfs4_cb_free_slot(struct nfs_client *clp)
+static void nfs4_cb_free_slot(struct cb_process_state *cps)
{
}
#endif /* CONFIG_NFS_V4_1 */
@@ -866,6 +856,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
struct cb_process_state cps = {
.drc_status = 0,
.clp = NULL,
+ .slotid = -1,
};
unsigned int nops = 0;
@@ -906,7 +897,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
*hdr_res.status = status;
*hdr_res.nops = htonl(nops);
- nfs4_cb_free_slot(cps.clp);
+ nfs4_cb_free_slot(&cps);
nfs_put_client(cps.clp);
dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
@@ -996,4 +987,5 @@ struct svc_version nfs4_callback_version4 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
+ .vs_hidden = 1,
};
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5833fbbf59b0..873bf00d51a2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -336,11 +336,12 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
- if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
- sin1->sin6_scope_id != sin2->sin6_scope_id)
+ if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
return 0;
+ else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ return sin1->sin6_scope_id == sin2->sin6_scope_id;
- return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
+ return 1;
}
#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
@@ -1867,6 +1868,10 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
/* display one transport per line on subsequent lines */
clp = list_entry(v, struct nfs_client, cl_share_link);
+ /* Check if the client is initialized */
+ if (clp->cl_cons_state != NFS_CS_READY)
+ return 0;
+
seq_printf(m, "v%u %s %s %3d %s\n",
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 321a66bc3846..7f2654069806 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -240,7 +240,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
sizeof(delegation->stateid.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
- delegation->change_attr = nfsi->change_attr;
+ delegation->change_attr = inode->i_version;
delegation->cred = get_rpccred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 28b8c3f3cda3..0a1f8312b4dc 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -137,11 +137,9 @@ nfs_file_open(struct inode *inode, struct file *filp)
static int
nfs_file_release(struct inode *inode, struct file *filp)
{
- struct dentry *dentry = filp->f_path.dentry;
-
dprintk("NFS: release(%s/%s)\n",
- dentry->d_parent->d_name.name,
- dentry->d_name.name);
+ filp->f_path.dentry->d_parent->d_name.name,
+ filp->f_path.dentry->d_name.name);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return nfs_release(inode, filp);
@@ -180,8 +178,6 @@ force_reval:
static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
{
- loff_t loff;
-
dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
filp->f_path.dentry->d_parent->d_name.name,
filp->f_path.dentry->d_name.name,
@@ -197,13 +193,9 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
int retval = nfs_revalidate_file_size(inode, filp);
if (retval < 0)
return (loff_t)retval;
+ }
- spin_lock(&inode->i_lock);
- loff = generic_file_llseek_unlocked(filp, offset, origin);
- spin_unlock(&inode->i_lock);
- } else
- loff = generic_file_llseek_unlocked(filp, offset, origin);
- return loff;
+ return generic_file_llseek(filp, offset, origin);
}
/*
@@ -234,14 +226,13 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
struct dentry * dentry = iocb->ki_filp->f_path.dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
- size_t count = iov_length(iov, nr_segs);
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_read(iocb, iov, nr_segs, pos);
dprintk("NFS: read(%s/%s, %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long) pos);
+ (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
index 5b1006480bc2..7cf2c4699b08 100644
--- a/fs/nfs/fscache-index.c
+++ b/fs/nfs/fscache-index.c
@@ -212,7 +212,7 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
auxdata.ctime = nfsi->vfs_inode.i_ctime;
if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
- auxdata.change_attr = nfsi->change_attr;
+ auxdata.change_attr = nfsi->vfs_inode.i_version;
if (bufmax > sizeof(auxdata))
bufmax = sizeof(auxdata);
@@ -244,7 +244,7 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
auxdata.ctime = nfsi->vfs_inode.i_ctime;
if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
- auxdata.change_attr = nfsi->change_attr;
+ auxdata.change_attr = nfsi->vfs_inode.i_version;
if (memcmp(data, &auxdata, datalen) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index f20801ae0a16..47d1c6ff2d8e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -336,8 +336,6 @@ struct idmap {
struct idmap_hashtable idmap_group_hash;
};
-static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
- char __user *, size_t);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
@@ -345,7 +343,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
static unsigned int fnvhash32(const void *, size_t);
static const struct rpc_pipe_ops idmap_upcall_ops = {
- .upcall = idmap_pipe_upcall,
+ .upcall = rpc_pipe_generic_upcall,
.downcall = idmap_pipe_downcall,
.destroy_msg = idmap_pipe_destroy_msg,
};
@@ -595,27 +593,6 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
return ret;
}
-/* RPC pipefs upcall/downcall routines */
-static ssize_t
-idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
- char __user *dst, size_t buflen)
-{
- char *data = (char *)msg->data + msg->copied;
- size_t mlen = min(msg->len, buflen);
- unsigned long left;
-
- left = copy_to_user(dst, data, mlen);
- if (left == mlen) {
- msg->errno = -EFAULT;
- return -EFAULT;
- }
-
- mlen -= left;
- msg->copied += mlen;
- msg->errno = 0;
- return mlen;
-}
-
static ssize_t
idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index fe1203797b2b..c07a55aec838 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -318,9 +318,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
memset(&inode->i_atime, 0, sizeof(inode->i_atime));
memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
- nfsi->change_attr = 0;
+ inode->i_version = 0;
inode->i_size = 0;
- inode->i_nlink = 0;
+ clear_nlink(inode);
inode->i_uid = -2;
inode->i_gid = -2;
inode->i_blocks = 0;
@@ -344,7 +344,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL;
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
- nfsi->change_attr = fattr->change_attr;
+ inode->i_version = fattr->change_attr;
else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
nfsi->cache_validity |= NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_DATA;
@@ -355,7 +355,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
| NFS_INO_INVALID_DATA
| NFS_INO_REVAL_PAGECACHE;
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
- inode->i_nlink = fattr->nlink;
+ set_nlink(inode, fattr->nlink);
else if (nfs_server_capable(inode, NFS_CAP_NLINK))
nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
@@ -897,8 +897,8 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
- && nfsi->change_attr == fattr->pre_change_attr) {
- nfsi->change_attr = fattr->change_attr;
+ && inode->i_version == fattr->pre_change_attr) {
+ inode->i_version = fattr->change_attr;
if (S_ISDIR(inode->i_mode))
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
ret |= NFS_INO_INVALID_ATTR;
@@ -952,7 +952,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return -EIO;
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
- nfsi->change_attr != fattr->change_attr)
+ inode->i_version != fattr->change_attr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Verify a few of the more important attributes */
@@ -1163,7 +1163,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
}
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) {
- fattr->pre_change_attr = NFS_I(inode)->change_attr;
+ fattr->pre_change_attr = inode->i_version;
fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
@@ -1244,13 +1244,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/* More cache consistency checks */
if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
- if (nfsi->change_attr != fattr->change_attr) {
+ if (inode->i_version != fattr->change_attr) {
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
- nfsi->change_attr = fattr->change_attr;
+ inode->i_version = fattr->change_attr;
}
} else if (server->caps & NFS_CAP_CHANGE_ATTR)
invalid |= save_cache_validity;
@@ -1361,7 +1361,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR;
if (S_ISDIR(inode->i_mode))
invalid |= NFS_INO_INVALID_DATA;
- inode->i_nlink = fattr->nlink;
+ set_nlink(inode, fattr->nlink);
}
} else if (server->caps & NFS_CAP_NLINK)
invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ab12913dd473..c1a1bd8ddf1c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -457,13 +457,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
PAGE_SIZE - 1) >> PAGE_SHIFT;
}
-/*
- * Helper for restarting RPC calls in the possible presence of NFSv4.1
- * sessions.
- */
-static inline int nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp)
-{
- if (nfs4_has_session(clp))
- return rpc_restart_call_prepare(task);
- return rpc_restart_call(task);
-}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 1ec1a85fa71c..693ae22f8731 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,30 +13,6 @@
struct idmap;
-/*
- * In a seqid-mutating op, this macro controls which error return
- * values trigger incrementation of the seqid.
- *
- * from rfc 3010:
- * The client MUST monotonically increment the sequence number for the
- * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
- * operations. This is true even in the event that the previous
- * operation that used the sequence number received an error. The only
- * exception to this rule is if the previous operation received one of
- * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
- * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
- * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
- *
- */
-#define seqid_mutating_err(err) \
-(((err) != NFSERR_STALE_CLIENTID) && \
- ((err) != NFSERR_STALE_STATEID) && \
- ((err) != NFSERR_BAD_STATEID) && \
- ((err) != NFSERR_BAD_SEQID) && \
- ((err) != NFSERR_BAD_XDR) && \
- ((err) != NFSERR_RESOURCE) && \
- ((err) != NFSERR_NOFILEHANDLE))
-
enum nfs4_client_state {
NFS4CLNT_MANAGER_RUNNING = 0,
NFS4CLNT_CHECK_LEASE,
@@ -56,6 +32,9 @@ enum nfs4_session_state {
NFS4_SESSION_DRAINING,
};
+#define NFS4_RENEW_TIMEOUT 0x01
+#define NFS4_RENEW_DELEGATION_CB 0x02
+
struct nfs4_minor_version_ops {
u32 minor_version;
@@ -225,7 +204,7 @@ struct nfs4_state_recovery_ops {
};
struct nfs4_state_maintenance_ops {
- int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *);
+ int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned);
struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *);
int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
};
@@ -237,8 +216,6 @@ extern const struct inode_operations nfs4_dir_inode_operations;
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
-extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -349,6 +326,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
+extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e8915d4840ad..a62d36b9a99e 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -31,6 +31,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
+#include <linux/module.h>
#include "internal.h"
#include "nfs4filelayout.h"
@@ -77,19 +78,6 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
-/* For data server errors we don't recover from */
-static void
-filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
-{
- if (lseg->pls_range.iomode == IOMODE_RW) {
- dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- } else {
- dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
- }
-}
-
static int filelayout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
@@ -135,7 +123,6 @@ static int filelayout_async_handle_error(struct rpc_task *task,
static int filelayout_read_done_cb(struct rpc_task *task,
struct nfs_read_data *data)
{
- struct nfs_client *clp = data->ds_clp;
int reset = 0;
dprintk("%s DS read\n", __func__);
@@ -145,11 +132,10 @@ static int filelayout_read_done_cb(struct rpc_task *task,
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
- filelayout_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(data->lseg);
nfs4_reset_read(task, data);
- clp = NFS_SERVER(data->inode)->nfs_client;
}
- nfs_restart_rpc(task, clp);
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -216,17 +202,13 @@ static int filelayout_write_done_cb(struct rpc_task *task,
if (filelayout_async_handle_error(task, data->args.context->state,
data->ds_clp, &reset) == -EAGAIN) {
- struct nfs_client *clp;
-
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
- filelayout_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(data->lseg);
nfs4_reset_write(task, data);
- clp = NFS_SERVER(data->inode)->nfs_client;
- } else
- clp = data->ds_clp;
- nfs_restart_rpc(task, clp);
+ }
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -256,9 +238,9 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
prepare_to_resend_writes(data);
- filelayout_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(data->lseg);
} else
- nfs_restart_rpc(task, data->ds_clp);
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -468,9 +450,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
fl->dsaddr = dsaddr;
- if (fl->first_stripe_index < 0 ||
- fl->first_stripe_index >= dsaddr->stripe_count) {
- dprintk("%s Bad first_stripe_index %d\n",
+ if (fl->first_stripe_index >= dsaddr->stripe_count) {
+ dprintk("%s Bad first_stripe_index %u\n",
__func__, fl->first_stripe_index);
goto out_put;
}
@@ -571,7 +552,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
/* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
* Futher checking is done in filelayout_check_layout */
- if (fl->num_fh < 0 || fl->num_fh >
+ if (fl->num_fh >
max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
goto out_err;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8c77039e7a81..b60fddf606f7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -73,9 +73,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data);
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
-static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir,
- const struct qstr *name, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
@@ -753,9 +750,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
spin_lock(&dir->i_lock);
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
- if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
+ if (!cinfo->atomic || cinfo->before != dir->i_version)
nfs_force_lookup_revalidate(dir);
- nfsi->change_attr = cinfo->after;
+ dir->i_version = cinfo->after;
spin_unlock(&dir->i_lock);
}
@@ -1596,8 +1593,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
int status;
status = nfs4_run_open_task(data, 0);
- if (status != 0 || !data->rpc_done)
+ if (!data->rpc_done)
+ return status;
+ if (status != 0) {
+ if (status == -NFS4ERR_BADNAME &&
+ !(o_arg->open_flags & O_CREAT))
+ return -ENOENT;
return status;
+ }
if (o_arg->open_flags & O_CREAT) {
update_changeattr(dir, &o_res->cinfo);
@@ -2408,14 +2411,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
return status;
}
-static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
- const struct nfs_fh *dirfh, const struct qstr *name,
- struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
+ const struct qstr *name, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
{
+ struct nfs_server *server = NFS_SERVER(dir);
int status;
struct nfs4_lookup_arg args = {
.bitmask = server->attr_bitmask,
- .dir_fh = dirfh,
+ .dir_fh = NFS_FH(dir),
.name = name,
};
struct nfs4_lookup_res res = {
@@ -2431,40 +2435,8 @@ static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
nfs_fattr_init(fattr);
- dprintk("NFS call lookupfh %s\n", name->name);
- status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
- dprintk("NFS reply lookupfh: %d\n", status);
- return status;
-}
-
-static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
- struct qstr *name, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
-{
- struct nfs4_exception exception = { };
- int err;
- do {
- err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr);
- /* FIXME: !!!! */
- if (err == -NFS4ERR_MOVED) {
- err = -EREMOTE;
- break;
- }
- err = nfs4_handle_exception(server, err, &exception);
- } while (exception.retry);
- return err;
-}
-
-static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
- const struct qstr *name, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
-{
- int status;
-
dprintk("NFS call lookup %s\n", name->name);
- status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
- if (status == -NFS4ERR_MOVED)
- status = nfs4_get_referral(dir, name, fattr, fhandle);
+ status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -2485,11 +2457,20 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr),
- &exception);
- if (err == -EPERM)
+ int status;
+
+ status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr);
+ switch (status) {
+ case -NFS4ERR_BADNAME:
+ return -ENOENT;
+ case -NFS4ERR_MOVED:
+ err = nfs4_get_referral(dir, name, fattr, fhandle);
+ break;
+ case -NFS4ERR_WRONGSEC:
nfs_fixup_secinfo_attributes(fattr, fhandle);
+ }
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ status, &exception);
} while (exception.retry);
return err;
}
@@ -3210,7 +3191,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
struct nfs_server *server = NFS_SERVER(data->inode);
if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
- nfs_restart_rpc(task, server->nfs_client);
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -3260,7 +3241,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
- nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
if (task->tk_status >= 0) {
@@ -3317,7 +3298,7 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
struct inode *inode = data->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
- nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
nfs_refresh_inode(inode, data->res.fattr);
@@ -3374,9 +3355,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
- if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
+ if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0)
+ return;
+ if (task->tk_status != NFS4ERR_CB_PATH_DOWN) {
nfs4_schedule_lease_recovery(clp);
- return;
+ return;
+ }
+ nfs4_schedule_path_down_recovery(clp);
}
do_renew_lease(clp, timestamp);
}
@@ -3386,7 +3371,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
.rpc_release = nfs4_renew_release,
};
-int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -3395,9 +3380,11 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
};
struct nfs4_renewdata *data;
+ if (renew_flags == 0)
+ return 0;
if (!atomic_inc_not_zero(&clp->cl_count))
return -EIO;
- data = kmalloc(sizeof(*data), GFP_KERNEL);
+ data = kmalloc(sizeof(*data), GFP_NOFS);
if (data == NULL)
return -ENOMEM;
data->client = clp;
@@ -3406,7 +3393,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
&nfs4_renew_ops, data);
}
-int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -3851,7 +3838,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-EAGAIN) {
- nfs_restart_rpc(task, data->res.server->nfs_client);
+ rpc_restart_call_prepare(task);
return;
}
}
@@ -4105,8 +4092,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
break;
default:
if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
- nfs_restart_rpc(task,
- calldata->server->nfs_client);
+ rpc_restart_call_prepare(task);
}
}
@@ -4939,7 +4925,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
task->tk_status = 0;
/* fall through */
case -NFS4ERR_RETRY_UNCACHED_REP:
- nfs_restart_rpc(task, data->clp);
+ rpc_restart_call_prepare(task);
return;
}
dprintk("<-- %s\n", __func__);
@@ -5504,11 +5490,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
return rpc_run_task(&task_setup_data);
}
-static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
{
struct rpc_task *task;
int ret = 0;
+ if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
+ return 0;
task = _nfs41_proc_sequence(clp, cred);
if (IS_ERR(task))
ret = PTR_ERR(task);
@@ -5778,7 +5766,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
server = NFS_SERVER(lrp->args.inode);
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
- nfs_restart_rpc(task, lrp->clp);
+ rpc_restart_call_prepare(task);
return;
}
spin_lock(&lo->plh_inode->i_lock);
@@ -5949,7 +5937,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
}
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
- nfs_restart_rpc(task, server->nfs_client);
+ rpc_restart_call_prepare(task);
return;
}
@@ -5962,6 +5950,7 @@ static void nfs4_layoutcommit_release(void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
struct pnfs_layout_segment *lseg, *tmp;
+ unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
pnfs_cleanup_layoutcommit(data);
/* Matched by references in pnfs_set_layoutcommit */
@@ -5971,6 +5960,11 @@ static void nfs4_layoutcommit_release(void *calldata)
&lseg->pls_flags))
put_lseg(lseg);
}
+
+ clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+ smp_mb__after_clear_bit();
+ wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+
put_rpccred(data->cred);
kfree(data);
}
@@ -6262,7 +6256,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.getroot = nfs4_proc_get_root,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
- .lookupfh = nfs4_proc_lookupfh,
.lookup = nfs4_proc_lookup,
.access = nfs4_proc_access,
.readlink = nfs4_proc_readlink,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index df8e7f3ca56d..dc484c0eae7f 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work)
struct rpc_cred *cred;
long lease;
unsigned long last, now;
+ unsigned renew_flags = 0;
ops = clp->cl_mvops->state_renewal_ops;
dprintk("%s: start\n", __func__);
@@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work)
last = clp->cl_last_renewal;
now = jiffies;
/* Are we close to a lease timeout? */
- if (time_after(now, last + lease/3)) {
+ if (time_after(now, last + lease/3))
+ renew_flags |= NFS4_RENEW_TIMEOUT;
+ if (nfs_delegations_present(clp))
+ renew_flags |= NFS4_RENEW_DELEGATION_CB;
+
+ if (renew_flags != 0) {
cred = ops->get_state_renewal_cred_locked(clp);
spin_unlock(&clp->cl_lock);
if (cred == NULL) {
- if (!nfs_delegations_present(clp)) {
+ if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) {
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
nfs_expire_all_delegations(clp);
} else {
/* Queue an asynchronous RENEW. */
- ops->sched_state_renewal(clp, cred);
+ ops->sched_state_renewal(clp, cred, renew_flags);
put_rpccred(cred);
goto out_exp;
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 72ab97ef3d61..39914be40b03 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
+void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
+{
+ nfs_handle_cb_pathdown(clp);
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1dce12f41a4f..e6161b213ed1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6602,8 +6602,6 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
if (status)
goto out;
status = decode_secinfo(xdr, res);
- if (status)
- goto out;
out:
return status;
}
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9383ca7245bc..c807ab93140e 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -38,21 +38,15 @@
*/
#include <linux/module.h>
-#include <scsi/osd_initiator.h>
+#include <scsi/osd_ore.h>
#include "objlayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-#define _LLU(x) ((unsigned long long)x)
-
-enum { BIO_MAX_PAGES_KMALLOC =
- (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
-};
-
struct objio_dev_ent {
struct nfs4_deviceid_node id_node;
- struct osd_dev *od;
+ struct ore_dev od;
};
static void
@@ -60,8 +54,8 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d)
{
struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
- dprintk("%s: free od=%p\n", __func__, de->od);
- osduld_put_device(de->od);
+ dprintk("%s: free od=%p\n", __func__, de->od.od);
+ osduld_put_device(de->od.od);
kfree(de);
}
@@ -98,12 +92,12 @@ _dev_list_add(const struct nfs_server *nfss,
nfss->pnfs_curr_ld,
nfss->nfs_client,
d_id);
- de->od = od;
+ de->od.od = od;
d = nfs4_insert_deviceid_node(&de->id_node);
n = container_of(d, struct objio_dev_ent, id_node);
if (n != de) {
- dprintk("%s: Race with other n->od=%p\n", __func__, n->od);
+ dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od);
objio_free_deviceid_node(&de->id_node);
de = n;
}
@@ -111,28 +105,11 @@ _dev_list_add(const struct nfs_server *nfss,
return de;
}
-struct caps_buffers {
- u8 caps_key[OSD_CRYPTO_KEYID_SIZE];
- u8 creds[OSD_CAP_LEN];
-};
-
struct objio_segment {
struct pnfs_layout_segment lseg;
- struct pnfs_osd_object_cred *comps;
-
- unsigned mirrors_p1;
- unsigned stripe_unit;
- unsigned group_width; /* Data stripe_units without integrity comps */
- u64 group_depth;
- unsigned group_count;
-
- unsigned max_io_size;
-
- unsigned comps_index;
- unsigned num_comps;
- /* variable length */
- struct objio_dev_ent *ods[];
+ struct ore_layout layout;
+ struct ore_components oc;
};
static inline struct objio_segment *
@@ -141,59 +118,44 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg)
return container_of(lseg, struct objio_segment, lseg);
}
-struct objio_state;
-typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
-
struct objio_state {
/* Generic layer */
- struct objlayout_io_state ol_state;
-
- struct objio_segment *layout;
-
- struct kref kref;
- objio_done_fn done;
- void *private;
-
- unsigned long length;
- unsigned numdevs; /* Actually used devs in this IO */
- /* A per-device variable array of size numdevs */
- struct _objio_per_comp {
- struct bio *bio;
- struct osd_request *or;
- unsigned long length;
- u64 offset;
- unsigned dev;
- } per_dev[];
+ struct objlayout_io_res oir;
+
+ bool sync;
+ /*FIXME: Support for extra_bytes at ore_get_rw_state() */
+ struct ore_io_state *ios;
};
/* Send and wait for a get_device_info of devices in the layout,
then look them up with the osd_initiator library */
-static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay,
- struct objio_segment *objio_seg, unsigned comp,
- gfp_t gfp_flags)
+static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
+ struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id,
+ gfp_t gfp_flags)
{
struct pnfs_osd_deviceaddr *deviceaddr;
- struct nfs4_deviceid *d_id;
struct objio_dev_ent *ode;
struct osd_dev *od;
struct osd_dev_info odi;
int err;
- d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id;
-
ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
- if (ode)
- return ode;
+ if (ode) {
+ objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
+ return 0;
+ }
err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags);
if (unlikely(err)) {
dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
__func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err);
- return ERR_PTR(err);
+ return err;
}
odi.systemid_len = deviceaddr->oda_systemid.len;
if (odi.systemid_len > sizeof(odi.systemid)) {
+ dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
+ __func__, sizeof(odi.systemid));
err = -EINVAL;
goto out;
} else if (odi.systemid_len)
@@ -218,96 +180,53 @@ static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay,
ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od,
gfp_flags);
-
+ objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
+ dprintk("Adding new dev_id(%llx:%llx)\n",
+ _DEVID_LO(d_id), _DEVID_HI(d_id));
out:
- dprintk("%s: return=%d\n", __func__, err);
objlayout_put_deviceinfo(deviceaddr);
- return err ? ERR_PTR(err) : ode;
+ return err;
}
-static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
- struct objio_segment *objio_seg,
- gfp_t gfp_flags)
+static void copy_single_comp(struct ore_components *oc, unsigned c,
+ struct pnfs_osd_object_cred *src_comp)
{
- unsigned i;
- int err;
+ struct ore_comp *ocomp = &oc->comps[c];
- /* lookup all devices */
- for (i = 0; i < objio_seg->num_comps; i++) {
- struct objio_dev_ent *ode;
+ WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
+ WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
- ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags);
- if (unlikely(IS_ERR(ode))) {
- err = PTR_ERR(ode);
- goto out;
- }
- objio_seg->ods[i] = ode;
- }
- err = 0;
+ ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
+ ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
-out:
- dprintk("%s: return=%d\n", __func__, err);
- return err;
+ memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
}
-static int _verify_data_map(struct pnfs_osd_layout *layout)
+int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
+ struct objio_segment **pseg)
{
- struct pnfs_osd_data_map *data_map = &layout->olo_map;
- u64 stripe_length;
- u32 group_width;
-
-/* FIXME: Only raid0 for now. if not go through MDS */
- if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
- printk(KERN_ERR "Only RAID_0 for now\n");
- return -ENOTSUPP;
- }
- if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
- printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
- data_map->odm_num_comps, data_map->odm_mirror_cnt);
- return -EINVAL;
- }
+ struct __alloc_objio_segment {
+ struct objio_segment olseg;
+ struct ore_dev *ods[numdevs];
+ struct ore_comp comps[numdevs];
+ } *aolseg;
- if (data_map->odm_group_width)
- group_width = data_map->odm_group_width;
- else
- group_width = data_map->odm_num_comps /
- (data_map->odm_mirror_cnt + 1);
-
- stripe_length = (u64)data_map->odm_stripe_unit * group_width;
- if (stripe_length >= (1ULL << 32)) {
- printk(KERN_ERR "Total Stripe length(0x%llx)"
- " >= 32bit is not supported\n", _LLU(stripe_length));
- return -ENOTSUPP;
+ aolseg = kzalloc(sizeof(*aolseg), gfp_flags);
+ if (unlikely(!aolseg)) {
+ dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
+ numdevs, sizeof(*aolseg));
+ return -ENOMEM;
}
- if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
- printk(KERN_ERR "Stripe Unit(0x%llx)"
- " must be Multples of PAGE_SIZE(0x%lx)\n",
- _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
- return -ENOTSUPP;
- }
+ aolseg->olseg.oc.numdevs = numdevs;
+ aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS;
+ aolseg->olseg.oc.comps = aolseg->comps;
+ aolseg->olseg.oc.ods = aolseg->ods;
+ *pseg = &aolseg->olseg;
return 0;
}
-static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp,
- struct pnfs_osd_object_cred *src_comp,
- struct caps_buffers *caps_p)
-{
- WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key));
- WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds));
-
- *cur_comp = *src_comp;
-
- memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred,
- sizeof(caps_p->caps_key));
- cur_comp->oc_cap_key.cred = caps_p->caps_key;
-
- memcpy(caps_p->creds, src_comp->oc_cap.cred,
- sizeof(caps_p->creds));
- cur_comp->oc_cap.cred = caps_p->creds;
-}
-
int objio_alloc_lseg(struct pnfs_layout_segment **outp,
struct pnfs_layout_hdr *pnfslay,
struct pnfs_layout_range *range,
@@ -317,59 +236,43 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp,
struct objio_segment *objio_seg;
struct pnfs_osd_xdr_decode_layout_iter iter;
struct pnfs_osd_layout layout;
- struct pnfs_osd_object_cred *cur_comp, src_comp;
- struct caps_buffers *caps_p;
+ struct pnfs_osd_object_cred src_comp;
+ unsigned cur_comp;
int err;
err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
if (unlikely(err))
return err;
- err = _verify_data_map(&layout);
+ err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
if (unlikely(err))
return err;
- objio_seg = kzalloc(sizeof(*objio_seg) +
- sizeof(objio_seg->ods[0]) * layout.olo_num_comps +
- sizeof(*objio_seg->comps) * layout.olo_num_comps +
- sizeof(struct caps_buffers) * layout.olo_num_comps,
- gfp_flags);
- if (!objio_seg)
- return -ENOMEM;
+ objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
+ objio_seg->layout.group_width = layout.olo_map.odm_group_width;
+ objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
+ objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
+ objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
- objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps);
- cur_comp = objio_seg->comps;
- caps_p = (void *)(cur_comp + layout.olo_num_comps);
- while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err))
- copy_single_comp(cur_comp++, &src_comp, caps_p++);
+ err = ore_verify_layout(layout.olo_map.odm_num_comps,
+ &objio_seg->layout);
if (unlikely(err))
goto err;
- objio_seg->num_comps = layout.olo_num_comps;
- objio_seg->comps_index = layout.olo_comps_index;
- err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags);
- if (err)
- goto err;
-
- objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
- objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit;
- if (layout.olo_map.odm_group_width) {
- objio_seg->group_width = layout.olo_map.odm_group_width;
- objio_seg->group_depth = layout.olo_map.odm_group_depth;
- objio_seg->group_count = layout.olo_map.odm_num_comps /
- objio_seg->mirrors_p1 /
- objio_seg->group_width;
- } else {
- objio_seg->group_width = layout.olo_map.odm_num_comps /
- objio_seg->mirrors_p1;
- objio_seg->group_depth = -1;
- objio_seg->group_count = 1;
+ objio_seg->oc.first_dev = layout.olo_comps_index;
+ cur_comp = 0;
+ while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
+ copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
+ err = objio_devices_lookup(pnfslay, objio_seg, cur_comp,
+ &src_comp.oc_object_id.oid_device_id,
+ gfp_flags);
+ if (err)
+ goto err;
+ ++cur_comp;
}
-
- /* Cache this calculation it will hit for every page */
- objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE -
- objio_seg->stripe_unit) *
- objio_seg->group_width;
+ /* pnfs_osd_xdr_decode_layout_comp returns false on error */
+ if (unlikely(err))
+ goto err;
*outp = &objio_seg->lseg;
return 0;
@@ -386,43 +289,63 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg)
int i;
struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
- for (i = 0; i < objio_seg->num_comps; i++) {
- if (!objio_seg->ods[i])
+ for (i = 0; i < objio_seg->oc.numdevs; i++) {
+ struct ore_dev *od = objio_seg->oc.ods[i];
+ struct objio_dev_ent *ode;
+
+ if (!od)
break;
- nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node);
+ ode = container_of(od, typeof(*ode), od);
+ nfs4_put_deviceid_node(&ode->id_node);
}
kfree(objio_seg);
}
-int objio_alloc_io_state(struct pnfs_layout_segment *lseg,
- struct objlayout_io_state **outp,
- gfp_t gfp_flags)
+static int
+objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
+ struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
+ loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
+ struct objio_state **outp)
{
struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
- struct objio_state *ios;
- const unsigned first_size = sizeof(*ios) +
- objio_seg->num_comps * sizeof(ios->per_dev[0]);
- const unsigned sec_size = objio_seg->num_comps *
- sizeof(ios->ol_state.ioerrs[0]);
-
- ios = kzalloc(first_size + sec_size, gfp_flags);
- if (unlikely(!ios))
+ struct ore_io_state *ios;
+ int ret;
+ struct __alloc_objio_state {
+ struct objio_state objios;
+ struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
+ } *aos;
+
+ aos = kzalloc(sizeof(*aos), gfp_flags);
+ if (unlikely(!aos))
return -ENOMEM;
- ios->layout = objio_seg;
- ios->ol_state.ioerrs = ((void *)ios) + first_size;
- ios->ol_state.num_comps = objio_seg->num_comps;
+ objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
+ aos->ioerrs, rpcdata, pnfs_layout_type);
+
+ ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
+ offset, count, &ios);
+ if (unlikely(ret)) {
+ kfree(aos);
+ return ret;
+ }
- *outp = &ios->ol_state;
+ ios->pages = pages;
+ ios->pgbase = pgbase;
+ ios->private = aos;
+ BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
+
+ aos->objios.sync = 0;
+ aos->objios.ios = ios;
+ *outp = &aos->objios;
return 0;
}
-void objio_free_io_state(struct objlayout_io_state *ol_state)
+void objio_free_result(struct objlayout_io_res *oir)
{
- struct objio_state *ios = container_of(ol_state, struct objio_state,
- ol_state);
+ struct objio_state *objios = container_of(oir, struct objio_state, oir);
- kfree(ios);
+ ore_put_io_state(objios->ios);
+ kfree(objios);
}
enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
@@ -455,543 +378,152 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
}
}
-static void _clear_bio(struct bio *bio)
+static void __on_dev_error(struct ore_io_state *ios,
+ struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
+ u64 dev_offset, u64 dev_len)
{
- struct bio_vec *bv;
- unsigned i;
+ struct objio_state *objios = ios->private;
+ struct pnfs_osd_objid pooid;
+ struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
+ /* FIXME: what to do with more-then-one-group layouts. We need to
+ * translate from ore_io_state index to oc->comps index
+ */
+ unsigned comp = dev_index;
- __bio_for_each_segment(bv, bio, i, 0) {
- unsigned this_count = bv->bv_len;
+ pooid.oid_device_id = ode->id_node.deviceid;
+ pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
+ pooid.oid_object_id = ios->oc->comps[comp].obj.id;
- if (likely(PAGE_SIZE == this_count))
- clear_highpage(bv->bv_page);
- else
- zero_user(bv->bv_page, bv->bv_offset, this_count);
- }
-}
-
-static int _io_check(struct objio_state *ios, bool is_write)
-{
- enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
- int lin_ret = 0;
- int i;
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_sense_info osi;
- struct osd_request *or = ios->per_dev[i].or;
- unsigned dev;
- int ret;
-
- if (!or)
- continue;
-
- ret = osd_req_decode_sense(or, &osi);
- if (likely(!ret))
- continue;
-
- if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
- /* start read offset passed endof file */
- BUG_ON(is_write);
- _clear_bio(ios->per_dev[i].bio);
- dprintk("%s: start read offset passed end of file "
- "offset=0x%llx, length=0x%lx\n", __func__,
- _LLU(ios->per_dev[i].offset),
- ios->per_dev[i].length);
-
- continue; /* we recovered */
- }
- dev = ios->per_dev[i].dev;
- objlayout_io_set_result(&ios->ol_state, dev,
- &ios->layout->comps[dev].oc_object_id,
- osd_pri_2_pnfs_err(osi.osd_err_pri),
- ios->per_dev[i].offset,
- ios->per_dev[i].length,
- is_write);
-
- if (osi.osd_err_pri >= oep) {
- oep = osi.osd_err_pri;
- lin_ret = ret;
- }
- }
-
- return lin_ret;
-}
-
-/*
- * Common IO state helpers.
- */
-static void _io_free(struct objio_state *ios)
-{
- unsigned i;
-
- for (i = 0; i < ios->numdevs; i++) {
- struct _objio_per_comp *per_dev = &ios->per_dev[i];
-
- if (per_dev->or) {
- osd_end_request(per_dev->or);
- per_dev->or = NULL;
- }
-
- if (per_dev->bio) {
- bio_put(per_dev->bio);
- per_dev->bio = NULL;
- }
- }
-}
-
-struct osd_dev *_io_od(struct objio_state *ios, unsigned dev)
-{
- unsigned min_dev = ios->layout->comps_index;
- unsigned max_dev = min_dev + ios->layout->num_comps;
-
- BUG_ON(dev < min_dev || max_dev <= dev);
- return ios->layout->ods[dev - min_dev]->od;
-}
-
-struct _striping_info {
- u64 obj_offset;
- u64 group_length;
- unsigned dev;
- unsigned unit_off;
-};
-
-static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
- struct _striping_info *si)
-{
- u32 stripe_unit = ios->layout->stripe_unit;
- u32 group_width = ios->layout->group_width;
- u64 group_depth = ios->layout->group_depth;
- u32 U = stripe_unit * group_width;
-
- u64 T = U * group_depth;
- u64 S = T * ios->layout->group_count;
- u64 M = div64_u64(file_offset, S);
-
- /*
- G = (L - (M * S)) / T
- H = (L - (M * S)) % T
- */
- u64 LmodU = file_offset - M * S;
- u32 G = div64_u64(LmodU, T);
- u64 H = LmodU - G * T;
-
- u32 N = div_u64(H, U);
-
- div_u64_rem(file_offset, stripe_unit, &si->unit_off);
- si->obj_offset = si->unit_off + (N * stripe_unit) +
- (M * group_depth * stripe_unit);
-
- /* "H - (N * U)" is just "H % U" so it's bound to u32 */
- si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
- si->dev *= ios->layout->mirrors_p1;
-
- si->group_length = T - H;
-}
-
-static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
- unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len,
- gfp_t gfp_flags)
-{
- unsigned pg = *cur_pg;
- struct request_queue *q =
- osd_request_queue(_io_od(ios, per_dev->dev));
-
- per_dev->length += cur_len;
-
- if (per_dev->bio == NULL) {
- unsigned stripes = ios->layout->num_comps /
- ios->layout->mirrors_p1;
- unsigned pages_in_stripe = stripes *
- (ios->layout->stripe_unit / PAGE_SIZE);
- unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
- stripes;
-
- if (BIO_MAX_PAGES_KMALLOC < bio_size)
- bio_size = BIO_MAX_PAGES_KMALLOC;
-
- per_dev->bio = bio_kmalloc(gfp_flags, bio_size);
- if (unlikely(!per_dev->bio)) {
- dprintk("Faild to allocate BIO size=%u\n", bio_size);
- return -ENOMEM;
- }
- }
-
- while (cur_len > 0) {
- unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
- unsigned added_len;
-
- BUG_ON(ios->ol_state.nr_pages <= pg);
- cur_len -= pglen;
-
- added_len = bio_add_pc_page(q, per_dev->bio,
- ios->ol_state.pages[pg], pglen, pgbase);
- if (unlikely(pglen != added_len))
- return -ENOMEM;
- pgbase = 0;
- ++pg;
- }
- BUG_ON(cur_len);
-
- *cur_pg = pg;
- return 0;
-}
-
-static int _prepare_one_group(struct objio_state *ios, u64 length,
- struct _striping_info *si, unsigned *last_pg,
- gfp_t gfp_flags)
-{
- unsigned stripe_unit = ios->layout->stripe_unit;
- unsigned mirrors_p1 = ios->layout->mirrors_p1;
- unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
- unsigned dev = si->dev;
- unsigned first_dev = dev - (dev % devs_in_group);
- unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
- unsigned cur_pg = *last_pg;
- int ret = 0;
-
- while (length) {
- struct _objio_per_comp *per_dev = &ios->per_dev[dev];
- unsigned cur_len, page_off = 0;
-
- if (!per_dev->length) {
- per_dev->dev = dev;
- if (dev < si->dev) {
- per_dev->offset = si->obj_offset + stripe_unit -
- si->unit_off;
- cur_len = stripe_unit;
- } else if (dev == si->dev) {
- per_dev->offset = si->obj_offset;
- cur_len = stripe_unit - si->unit_off;
- page_off = si->unit_off & ~PAGE_MASK;
- BUG_ON(page_off &&
- (page_off != ios->ol_state.pgbase));
- } else { /* dev > si->dev */
- per_dev->offset = si->obj_offset - si->unit_off;
- cur_len = stripe_unit;
- }
-
- if (max_comp < dev)
- max_comp = dev;
- } else {
- cur_len = stripe_unit;
- }
- if (cur_len >= length)
- cur_len = length;
-
- ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
- cur_len, gfp_flags);
- if (unlikely(ret))
- goto out;
-
- dev += mirrors_p1;
- dev = (dev % devs_in_group) + first_dev;
-
- length -= cur_len;
- ios->length += cur_len;
- }
-out:
- ios->numdevs = max_comp + mirrors_p1;
- *last_pg = cur_pg;
- return ret;
-}
-
-static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags)
-{
- u64 length = ios->ol_state.count;
- u64 offset = ios->ol_state.offset;
- struct _striping_info si;
- unsigned last_pg = 0;
- int ret = 0;
-
- while (length) {
- _calc_stripe_info(ios, offset, &si);
-
- if (length < si.group_length)
- si.group_length = length;
-
- ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags);
- if (unlikely(ret))
- goto out;
-
- offset += si.group_length;
- length -= si.group_length;
- }
-
-out:
- if (!ios->length)
- return ret;
-
- return 0;
-}
-
-static ssize_t _sync_done(struct objio_state *ios)
-{
- struct completion *waiting = ios->private;
-
- complete(waiting);
- return 0;
-}
-
-static void _last_io(struct kref *kref)
-{
- struct objio_state *ios = container_of(kref, struct objio_state, kref);
-
- ios->done(ios);
-}
-
-static void _done_io(struct osd_request *or, void *p)
-{
- struct objio_state *ios = p;
-
- kref_put(&ios->kref, _last_io);
-}
-
-static ssize_t _io_exec(struct objio_state *ios)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- ssize_t status = 0; /* sync status */
- unsigned i;
- objio_done_fn saved_done_fn = ios->done;
- bool sync = ios->ol_state.sync;
-
- if (sync) {
- ios->done = _sync_done;
- ios->private = &wait;
- }
-
- kref_init(&ios->kref);
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_request *or = ios->per_dev[i].or;
-
- if (!or)
- continue;
-
- kref_get(&ios->kref);
- osd_execute_request_async(or, _done_io, ios);
- }
-
- kref_put(&ios->kref, _last_io);
-
- if (sync) {
- wait_for_completion(&wait);
- status = saved_done_fn(ios);
- }
-
- return status;
+ objlayout_io_set_result(&objios->oir, comp,
+ &pooid, osd_pri_2_pnfs_err(oep),
+ dev_offset, dev_len, !ios->reading);
}
/*
* read
*/
-static ssize_t _read_done(struct objio_state *ios)
+static void _read_done(struct ore_io_state *ios, void *private)
{
+ struct objio_state *objios = private;
ssize_t status;
- int ret = _io_check(ios, false);
+ int ret = ore_check_io(ios, &__on_dev_error);
- _io_free(ios);
+ /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
if (likely(!ret))
status = ios->length;
else
status = ret;
- objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
- return status;
+ objlayout_read_done(&objios->oir, status, objios->sync);
}
-static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
+int objio_read_pagelist(struct nfs_read_data *rdata)
{
- struct osd_request *or = NULL;
- struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
- unsigned dev = per_dev->dev;
- struct pnfs_osd_object_cred *cred =
- &ios->layout->comps[dev];
- struct osd_obj_id obj = {
- .partition = cred->oc_object_id.oid_partition_id,
- .id = cred->oc_object_id.oid_object_id,
- };
+ struct objio_state *objios;
int ret;
- or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
- if (unlikely(!or)) {
- ret = -ENOMEM;
- goto err;
- }
- per_dev->or = or;
-
- osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
-
- ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
- if (ret) {
- dprintk("%s: Faild to osd_finalize_request() => %d\n",
- __func__, ret);
- goto err;
- }
-
- dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
- __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
- per_dev->length);
-
-err:
- return ret;
-}
-
-static ssize_t _read_exec(struct objio_state *ios)
-{
- unsigned i;
- int ret;
-
- for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
- if (!ios->per_dev[i].length)
- continue;
- ret = _read_mirrors(ios, i);
- if (unlikely(ret))
- goto err;
- }
-
- ios->done = _read_done;
- return _io_exec(ios); /* In sync mode exec returns the io status */
-
-err:
- _io_free(ios);
- return ret;
-}
-
-ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
-{
- struct objio_state *ios = container_of(ol_state, struct objio_state,
- ol_state);
- int ret;
-
- ret = _io_rw_pagelist(ios, GFP_KERNEL);
+ ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
+ rdata->lseg, rdata->args.pages, rdata->args.pgbase,
+ rdata->args.offset, rdata->args.count, rdata,
+ GFP_KERNEL, &objios);
if (unlikely(ret))
return ret;
- return _read_exec(ios);
+ objios->ios->done = _read_done;
+ dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
+ rdata->args.offset, rdata->args.count);
+ return ore_read(objios->ios);
}
/*
* write
*/
-static ssize_t _write_done(struct objio_state *ios)
+static void _write_done(struct ore_io_state *ios, void *private)
{
+ struct objio_state *objios = private;
ssize_t status;
- int ret = _io_check(ios, true);
+ int ret = ore_check_io(ios, &__on_dev_error);
- _io_free(ios);
+ /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
if (likely(!ret)) {
/* FIXME: should be based on the OSD's persistence model
* See OSD2r05 Section 4.13 Data persistence model */
- ios->ol_state.committed = NFS_FILE_SYNC;
+ objios->oir.committed = NFS_FILE_SYNC;
status = ios->length;
} else {
status = ret;
}
- objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
- return status;
+ objlayout_write_done(&objios->oir, status, objios->sync);
}
-static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
+static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
- struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
- unsigned dev = ios->per_dev[cur_comp].dev;
- unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
- int ret;
+ struct objio_state *objios = priv;
+ struct nfs_write_data *wdata = objios->oir.rpcdata;
+ pgoff_t index = offset / PAGE_SIZE;
+ struct page *page = find_get_page(wdata->inode->i_mapping, index);
- for (; cur_comp < last_comp; ++cur_comp, ++dev) {
- struct osd_request *or = NULL;
- struct pnfs_osd_object_cred *cred =
- &ios->layout->comps[dev];
- struct osd_obj_id obj = {
- .partition = cred->oc_object_id.oid_partition_id,
- .id = cred->oc_object_id.oid_object_id,
- };
- struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
- struct bio *bio;
-
- or = osd_start_request(_io_od(ios, dev), GFP_NOFS);
- if (unlikely(!or)) {
- ret = -ENOMEM;
- goto err;
- }
- per_dev->or = or;
-
- if (per_dev != master_dev) {
- bio = bio_kmalloc(GFP_NOFS,
- master_dev->bio->bi_max_vecs);
- if (unlikely(!bio)) {
- dprintk("Faild to allocate BIO size=%u\n",
- master_dev->bio->bi_max_vecs);
- ret = -ENOMEM;
- goto err;
- }
-
- __bio_clone(bio, master_dev->bio);
- bio->bi_bdev = NULL;
- bio->bi_next = NULL;
- per_dev->bio = bio;
- per_dev->dev = dev;
- per_dev->length = master_dev->length;
- per_dev->offset = master_dev->offset;
- } else {
- bio = master_dev->bio;
- bio->bi_rw |= REQ_WRITE;
+ if (!page) {
+ page = find_or_create_page(wdata->inode->i_mapping,
+ index, GFP_NOFS);
+ if (unlikely(!page)) {
+ dprintk("%s: grab_cache_page Failed index=0x%lx\n",
+ __func__, index);
+ return NULL;
}
-
- osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
-
- ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
- if (ret) {
- dprintk("%s: Faild to osd_finalize_request() => %d\n",
- __func__, ret);
- goto err;
- }
-
- dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
- __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
- per_dev->length);
+ unlock_page(page);
}
+ if (PageDirty(page) || PageWriteback(page))
+ *uptodate = true;
+ else
+ *uptodate = PageUptodate(page);
+ dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
+ return page;
+}
-err:
- return ret;
+static void __r4w_put_page(void *priv, struct page *page)
+{
+ dprintk("%s: index=0x%lx\n", __func__, page->index);
+ page_cache_release(page);
+ return;
}
-static ssize_t _write_exec(struct objio_state *ios)
+static const struct _ore_r4w_op _r4w_op = {
+ .get_page = &__r4w_get_page,
+ .put_page = &__r4w_put_page,
+};
+
+int objio_write_pagelist(struct nfs_write_data *wdata, int how)
{
- unsigned i;
+ struct objio_state *objios;
int ret;
- for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
- if (!ios->per_dev[i].length)
- continue;
- ret = _write_mirrors(ios, i);
- if (unlikely(ret))
- goto err;
- }
-
- ios->done = _write_done;
- return _io_exec(ios); /* In sync mode exec returns the io->status */
+ ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
+ wdata->lseg, wdata->args.pages, wdata->args.pgbase,
+ wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
+ &objios);
+ if (unlikely(ret))
+ return ret;
-err:
- _io_free(ios);
- return ret;
-}
+ objios->sync = 0 != (how & FLUSH_SYNC);
+ objios->ios->r4w = &_r4w_op;
-ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
-{
- struct objio_state *ios = container_of(ol_state, struct objio_state,
- ol_state);
- int ret;
+ if (!objios->sync)
+ objios->ios->done = _write_done;
- /* TODO: ios->stable = stable; */
- ret = _io_rw_pagelist(ios, GFP_NOFS);
+ dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
+ wdata->args.offset, wdata->args.count);
+ ret = ore_write(objios->ios);
if (unlikely(ret))
return ret;
- return _write_exec(ios);
+ if (objios->sync)
+ _write_done(objios->ios, objios);
+
+ return 0;
}
static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
@@ -1001,7 +533,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
return false;
return pgio->pg_count + req->wb_bytes <=
- OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
+ OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
}
static const struct nfs_pageio_ops objio_pg_read_ops = {
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 1d06f8e2adea..72074e3a04f9 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -156,77 +156,39 @@ last_byte_offset(u64 start, u64 len)
return end > start ? end - 1 : NFS4_MAX_UINT64;
}
-static struct objlayout_io_state *
-objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
- struct page **pages,
- unsigned pgbase,
- loff_t offset,
- size_t count,
- struct pnfs_layout_segment *lseg,
- void *rpcdata,
- gfp_t gfp_flags)
+void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
+ struct page ***p_pages, unsigned *p_pgbase,
+ u64 offset, unsigned long count)
{
- struct objlayout_io_state *state;
u64 lseg_end_offset;
- dprintk("%s: allocating io_state\n", __func__);
- if (objio_alloc_io_state(lseg, &state, gfp_flags))
- return NULL;
-
BUG_ON(offset < lseg->pls_range.offset);
lseg_end_offset = end_offset(lseg->pls_range.offset,
lseg->pls_range.length);
BUG_ON(offset >= lseg_end_offset);
- if (offset + count > lseg_end_offset) {
- count = lseg->pls_range.length -
- (offset - lseg->pls_range.offset);
- dprintk("%s: truncated count %Zd\n", __func__, count);
- }
+ WARN_ON(offset + count > lseg_end_offset);
- if (pgbase > PAGE_SIZE) {
- pages += pgbase >> PAGE_SHIFT;
- pgbase &= ~PAGE_MASK;
+ if (*p_pgbase > PAGE_SIZE) {
+ dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
+ *p_pages += *p_pgbase >> PAGE_SHIFT;
+ *p_pgbase &= ~PAGE_MASK;
}
-
- INIT_LIST_HEAD(&state->err_list);
- state->lseg = lseg;
- state->rpcdata = rpcdata;
- state->pages = pages;
- state->pgbase = pgbase;
- state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
- state->offset = offset;
- state->count = count;
- state->sync = 0;
-
- return state;
-}
-
-static void
-objlayout_free_io_state(struct objlayout_io_state *state)
-{
- dprintk("%s: freeing io_state\n", __func__);
- if (unlikely(!state))
- return;
-
- objio_free_io_state(state);
}
/*
* I/O done common code
*/
static void
-objlayout_iodone(struct objlayout_io_state *state)
+objlayout_iodone(struct objlayout_io_res *oir)
{
- dprintk("%s: state %p status\n", __func__, state);
-
- if (likely(state->status >= 0)) {
- objlayout_free_io_state(state);
+ if (likely(oir->status >= 0)) {
+ objio_free_result(oir);
} else {
- struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
+ struct objlayout *objlay = oir->objlay;
spin_lock(&objlay->lock);
objlay->delta_space_valid = OBJ_DSU_INVALID;
- list_add(&objlay->err_list, &state->err_list);
+ list_add(&objlay->err_list, &oir->err_list);
spin_unlock(&objlay->lock);
}
}
@@ -238,13 +200,13 @@ objlayout_iodone(struct objlayout_io_state *state)
* the error for later reporting at layout-return.
*/
void
-objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
+objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
struct pnfs_osd_objid *pooid, int osd_error,
u64 offset, u64 length, bool is_write)
{
- struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
+ struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
- BUG_ON(index >= state->num_comps);
+ BUG_ON(index >= oir->num_comps);
if (osd_error) {
ioerr->oer_component = *pooid;
ioerr->oer_comp_offset = offset;
@@ -285,21 +247,18 @@ static void _rpc_read_complete(struct work_struct *work)
}
void
-objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
+objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- int eof = state->eof;
- struct nfs_read_data *rdata;
+ struct nfs_read_data *rdata = oir->rpcdata;
- state->status = status;
- dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
- rdata = state->rpcdata;
- rdata->task.tk_status = status;
- if (status >= 0) {
+ oir->status = rdata->task.tk_status = status;
+ if (status >= 0)
rdata->res.count = status;
- rdata->res.eof = eof;
- }
- objlayout_iodone(state);
- /* must not use state after this point */
+ objlayout_iodone(oir);
+ /* must not use oir after this point */
+
+ dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
+ status, rdata->res.eof, sync);
if (sync)
pnfs_ld_read_done(rdata);
@@ -317,40 +276,36 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
{
loff_t offset = rdata->args.offset;
size_t count = rdata->args.count;
- struct objlayout_io_state *state;
- ssize_t status = 0;
+ int err;
loff_t eof;
- dprintk("%s: Begin inode %p offset %llu count %d\n",
- __func__, rdata->inode, offset, (int)count);
-
eof = i_size_read(rdata->inode);
if (unlikely(offset + count > eof)) {
if (offset >= eof) {
- status = 0;
+ err = 0;
rdata->res.count = 0;
rdata->res.eof = 1;
+ /*FIXME: do we need to call pnfs_ld_read_done() */
goto out;
}
count = eof - offset;
}
- state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
- rdata->args.pages, rdata->args.pgbase,
- offset, count,
- rdata->lseg, rdata,
- GFP_KERNEL);
- if (unlikely(!state)) {
- status = -ENOMEM;
- goto out;
- }
+ rdata->res.eof = (offset + count) >= eof;
+ _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
+ &rdata->args.pgbase,
+ rdata->args.offset, rdata->args.count);
- state->eof = state->offset + state->count >= eof;
+ dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
+ __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
- status = objio_read_pagelist(state);
+ err = objio_read_pagelist(rdata);
out:
- dprintk("%s: Return status %Zd\n", __func__, status);
- rdata->pnfs_error = status;
+ if (unlikely(err)) {
+ rdata->pnfs_error = err;
+ dprintk("%s: Returned Error %d\n", __func__, err);
+ return PNFS_NOT_ATTEMPTED;
+ }
return PNFS_ATTEMPTED;
}
@@ -371,26 +326,20 @@ static void _rpc_write_complete(struct work_struct *work)
}
void
-objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
- bool sync)
+objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_write_data *wdata;
+ struct nfs_write_data *wdata = oir->rpcdata;
- dprintk("%s: Begin\n", __func__);
- wdata = state->rpcdata;
- state->status = status;
- wdata->task.tk_status = status;
+ oir->status = wdata->task.tk_status = status;
if (status >= 0) {
wdata->res.count = status;
- wdata->verf.committed = state->committed;
- dprintk("%s: Return status %d committed %d\n",
- __func__, wdata->task.tk_status,
- wdata->verf.committed);
- } else
- dprintk("%s: Return status %d\n",
- __func__, wdata->task.tk_status);
- objlayout_iodone(state);
- /* must not use state after this point */
+ wdata->verf.committed = oir->committed;
+ }
+ objlayout_iodone(oir);
+ /* must not use oir after this point */
+
+ dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
+ status, wdata->verf.committed, sync);
if (sync)
pnfs_ld_write_done(wdata);
@@ -407,30 +356,18 @@ enum pnfs_try_status
objlayout_write_pagelist(struct nfs_write_data *wdata,
int how)
{
- struct objlayout_io_state *state;
- ssize_t status;
-
- dprintk("%s: Begin inode %p offset %llu count %u\n",
- __func__, wdata->inode, wdata->args.offset, wdata->args.count);
-
- state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
- wdata->args.pages,
- wdata->args.pgbase,
- wdata->args.offset,
- wdata->args.count,
- wdata->lseg, wdata,
- GFP_NOFS);
- if (unlikely(!state)) {
- status = -ENOMEM;
- goto out;
- }
+ int err;
- state->sync = how & FLUSH_SYNC;
+ _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
+ &wdata->args.pgbase,
+ wdata->args.offset, wdata->args.count);
- status = objio_write_pagelist(state, how & FLUSH_STABLE);
- out:
- dprintk("%s: Return status %Zd\n", __func__, status);
- wdata->pnfs_error = status;
+ err = objio_write_pagelist(wdata, how);
+ if (unlikely(err)) {
+ wdata->pnfs_error = err;
+ dprintk("%s: Returned Error %d\n", __func__, err);
+ return PNFS_NOT_ATTEMPTED;
+ }
return PNFS_ATTEMPTED;
}
@@ -537,14 +474,14 @@ merge_ioerr(struct pnfs_osd_ioerr *dest_err,
static void
encode_accumulated_error(struct objlayout *objlay, __be32 *p)
{
- struct objlayout_io_state *state, *tmp;
+ struct objlayout_io_res *oir, *tmp;
struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
- list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
+ list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
unsigned i;
- for (i = 0; i < state->num_comps; i++) {
- struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
+ for (i = 0; i < oir->num_comps; i++) {
+ struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
if (!ioerr->oer_errno)
continue;
@@ -563,8 +500,8 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p)
merge_ioerr(&accumulated_err, ioerr);
}
- list_del(&state->err_list);
- objlayout_free_io_state(state);
+ list_del(&oir->err_list);
+ objio_free_result(oir);
}
pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
@@ -576,7 +513,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
const struct nfs4_layoutreturn_args *args)
{
struct objlayout *objlay = OBJLAYOUT(pnfslay);
- struct objlayout_io_state *state, *tmp;
+ struct objlayout_io_res *oir, *tmp;
__be32 *start;
dprintk("%s: Begin\n", __func__);
@@ -585,13 +522,13 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
spin_lock(&objlay->lock);
- list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
+ list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
__be32 *last_xdr = NULL, *p;
unsigned i;
int res = 0;
- for (i = 0; i < state->num_comps; i++) {
- struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
+ for (i = 0; i < oir->num_comps; i++) {
+ struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
if (!ioerr->oer_errno)
continue;
@@ -615,7 +552,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
}
last_xdr = p;
- pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]);
+ pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
}
/* TODO: use xdr_write_pages */
@@ -631,8 +568,8 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
encode_accumulated_error(objlay, last_xdr);
goto loop_done;
}
- list_del(&state->err_list);
- objlayout_free_io_state(state);
+ list_del(&oir->err_list);
+ objio_free_result(oir);
}
loop_done:
spin_unlock(&objlay->lock);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index a8244c8e042d..8ec34727ed21 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -74,19 +74,11 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo)
* per-I/O operation state
* embedded in objects provider io_state data structure
*/
-struct objlayout_io_state {
- struct pnfs_layout_segment *lseg;
-
- struct page **pages;
- unsigned pgbase;
- unsigned nr_pages;
- unsigned long count;
- loff_t offset;
- bool sync;
+struct objlayout_io_res {
+ struct objlayout *objlay;
void *rpcdata;
int status; /* res */
- int eof; /* res */
int committed; /* res */
/* Error reporting (layout_return) */
@@ -100,6 +92,18 @@ struct objlayout_io_state {
struct pnfs_osd_ioerr *ioerrs;
};
+static inline
+void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
+ struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
+ struct pnfs_layout_hdr *pnfs_layout_type)
+{
+ oir->objlay = OBJLAYOUT(pnfs_layout_type);
+ oir->rpcdata = rpcdata;
+ INIT_LIST_HEAD(&oir->err_list);
+ oir->num_comps = num_comps;
+ oir->ioerrs = ioerrs;
+}
+
/*
* Raid engine I/O API
*/
@@ -110,28 +114,24 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
gfp_t gfp_flags);
extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
-extern int objio_alloc_io_state(
- struct pnfs_layout_segment *lseg,
- struct objlayout_io_state **outp,
- gfp_t gfp_flags);
-extern void objio_free_io_state(struct objlayout_io_state *state);
+/* objio_free_result will free these @oir structs recieved from
+ * objlayout_{read,write}_done
+ */
+extern void objio_free_result(struct objlayout_io_res *oir);
-extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
-extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
- bool stable);
+extern int objio_read_pagelist(struct nfs_read_data *rdata);
+extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
/*
* callback API
*/
-extern void objlayout_io_set_result(struct objlayout_io_state *state,
+extern void objlayout_io_set_result(struct objlayout_io_res *oir,
unsigned index, struct pnfs_osd_objid *pooid,
int osd_error, u64 offset, u64 length, bool is_write);
static inline void
-objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
+objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
{
- struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
-
/* If one of the I/Os errored out and the delta_space_used was
* invalid we render the complete report as invalid. Protocol mandate
* the DSU be accurate or not reported.
@@ -144,9 +144,9 @@ objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
spin_unlock(&objlay->lock);
}
-extern void objlayout_read_done(struct objlayout_io_state *state,
+extern void objlayout_read_done(struct objlayout_io_res *oir,
ssize_t status, bool sync);
-extern void objlayout_write_done(struct objlayout_io_state *state,
+extern void objlayout_write_done(struct objlayout_io_res *oir,
ssize_t status, bool sync);
extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index 16fc758e9123..b3918f7ac34d 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
p = _osd_xdr_decode_data_map(p, &layout->olo_map);
layout->olo_comps_index = be32_to_cpup(p++);
layout->olo_num_comps = be32_to_cpup(p++);
+ dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
+ layout->olo_comps_index, layout->olo_num_comps);
+
iter->total_comps = layout->olo_num_comps;
return 0;
}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b60970cc7f1f..5668f7c54c41 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -18,6 +18,7 @@
#include <linux/nfs_page.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
+#include <linux/export.h>
#include "internal.h"
#include "pnfs.h"
@@ -41,7 +42,7 @@ nfs_page_free(struct nfs_page *p)
/**
* nfs_create_request - Create an NFS read/write request.
- * @file: file descriptor to use
+ * @ctx: open context to use
* @inode: inode to which the request is attached
* @page: page to write
* @offset: starting offset within the page for the write
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e550e8836c37..baf73536bc04 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -29,6 +29,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
+#include <linux/module.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
@@ -1168,23 +1169,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
/*
* Called by non rpc-based layout drivers
*/
-int
-pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_write_data *data)
{
- int status;
-
- if (!data->pnfs_error) {
+ if (likely(!data->pnfs_error)) {
pnfs_set_layoutcommit(data);
data->mds_ops->rpc_call_done(&data->task, data);
- data->mds_ops->rpc_release(data);
- return 0;
+ } else {
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ dprintk("pnfs write error = %d\n", data->pnfs_error);
}
-
- dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
- data->pnfs_error);
- status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
- data->mds_ops, NFS_FILE_SYNC);
- return status ? : -EAGAIN;
+ data->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1268,23 +1263,17 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
/*
* Called by non rpc-based layout drivers
*/
-int
-pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_read_data *data)
{
- int status;
-
- if (!data->pnfs_error) {
+ if (likely(!data->pnfs_error)) {
__nfs4_read_done_cb(data);
data->mds_ops->rpc_call_done(&data->task, data);
- data->mds_ops->rpc_release(data);
- return 0;
+ } else {
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ dprintk("pnfs write error = %d\n", data->pnfs_error);
}
-
- dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
- data->pnfs_error);
- status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
- data->mds_ops);
- return status ? : -EAGAIN;
+ data->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1381,6 +1370,18 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
}
}
+void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
+{
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+ } else {
+ dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ }
+}
+EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
+
void
pnfs_set_layoutcommit(struct nfs_write_data *wdata)
{
@@ -1443,17 +1444,31 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
data = kzalloc(sizeof(*data), GFP_NOFS);
if (!data) {
- mark_inode_dirty_sync(inode);
status = -ENOMEM;
goto out;
}
+ if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ goto out_free;
+
+ if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
+ if (!sync) {
+ status = -EAGAIN;
+ goto out_free;
+ }
+ status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+ if (status)
+ goto out_free;
+ }
+
INIT_LIST_HEAD(&data->lseg_list);
spin_lock(&inode->i_lock);
if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
+ clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
spin_unlock(&inode->i_lock);
- kfree(data);
- goto out;
+ wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
+ goto out_free;
}
pnfs_list_write_lseg(inode, &data->lseg_list);
@@ -1475,6 +1490,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
status = nfs4_proc_layoutcommit(data, sync);
out:
+ if (status)
+ mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
+out_free:
+ kfree(data);
+ goto out;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 01cbfd54f3cb..1509530cb111 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -178,6 +178,7 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
@@ -200,8 +201,8 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
-int pnfs_ld_write_done(struct nfs_write_data *);
-int pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_write_done(struct nfs_write_data *);
+void pnfs_ld_read_done(struct nfs_read_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 6fda5228ef56..4f359d2a26eb 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -28,6 +28,7 @@
* such damages.
*/
+#include <linux/export.h>
#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2171c043ab08..8b48ec63f722 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,16 +35,13 @@ static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
static struct kmem_cache *nfs_rdata_cachep;
-static mempool_t *nfs_rdata_mempool;
-
-#define MIN_POOL_READ (32)
struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
{
- struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
+ struct nfs_read_data *p;
+ p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
if (p) {
- memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -52,7 +49,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
else {
p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
if (!p->pagevec) {
- mempool_free(p, nfs_rdata_mempool);
+ kmem_cache_free(nfs_rdata_cachep, p);
p = NULL;
}
}
@@ -64,7 +61,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
{
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
- mempool_free(p, nfs_rdata_mempool);
+ kmem_cache_free(nfs_rdata_cachep, p);
}
void nfs_readdata_release(struct nfs_read_data *rdata)
@@ -276,7 +273,6 @@ nfs_async_read_error(struct list_head *head)
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- SetPageError(req->wb_page);
nfs_readpage_release(req);
}
}
@@ -322,7 +318,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
offset += len;
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
- ClearPageError(page);
desc->pg_rpc_callops = &nfs_read_partial_ops;
return ret;
out_bad:
@@ -331,7 +326,6 @@ out_bad:
list_del(&data->list);
nfs_readdata_free(data);
}
- SetPageError(page);
nfs_readpage_release(req);
return -ENOMEM;
}
@@ -357,7 +351,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &data->pages);
- ClearPageError(req->wb_page);
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
@@ -435,7 +428,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
- nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
+ rpc_restart_call_prepare(task);
}
/*
@@ -462,10 +455,10 @@ static void nfs_readpage_release_partial(void *calldata)
int status = data->task.tk_status;
if (status < 0)
- SetPageError(page);
+ set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
if (atomic_dec_and_test(&req->wb_complete)) {
- if (!PageError(page))
+ if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
SetPageUptodate(page);
nfs_readpage_release(req);
}
@@ -541,13 +534,23 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
static void nfs_readpage_release_full(void *calldata)
{
struct nfs_read_data *data = calldata;
+ struct nfs_pageio_descriptor pgio;
+ if (data->pnfs_error) {
+ nfs_pageio_init_read_mds(&pgio, data->inode);
+ pgio.pg_recoalesce = 1;
+ }
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- nfs_readpage_release(req);
+ if (!data->pnfs_error)
+ nfs_readpage_release(req);
+ else
+ nfs_pageio_add_request(&pgio, req);
}
+ if (data->pnfs_error)
+ nfs_pageio_complete(&pgio);
nfs_readdata_release(calldata);
}
@@ -648,7 +651,6 @@ readpage_async_filler(void *data, struct page *page)
return 0;
out_error:
error = PTR_ERR(new);
- SetPageError(page);
out_unlock:
unlock_page(page);
return error;
@@ -711,16 +713,10 @@ int __init nfs_init_readpagecache(void)
if (nfs_rdata_cachep == NULL)
return -ENOMEM;
- nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
- nfs_rdata_cachep);
- if (nfs_rdata_mempool == NULL)
- return -ENOMEM;
-
return 0;
}
void nfs_destroy_readpagecache(void)
{
- mempool_destroy(nfs_rdata_mempool);
kmem_cache_destroy(nfs_rdata_cachep);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b961ceac66b4..480b3b6bf71e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -733,18 +733,22 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
+
+#ifdef CONFIG_NFS_V4
#ifdef CONFIG_NFS_V4_1
-void show_sessions(struct seq_file *m, struct nfs_server *server)
+static void show_sessions(struct seq_file *m, struct nfs_server *server)
{
if (nfs4_has_session(server->nfs_client))
seq_printf(m, ",sessions");
}
#else
-void show_sessions(struct seq_file *m, struct nfs_server *server) {}
+static void show_sessions(struct seq_file *m, struct nfs_server *server) {}
+#endif
#endif
+#ifdef CONFIG_NFS_V4
#ifdef CONFIG_NFS_V4_1
-void show_pnfs(struct seq_file *m, struct nfs_server *server)
+static void show_pnfs(struct seq_file *m, struct nfs_server *server)
{
seq_printf(m, ",pnfs=");
if (server->pnfs_curr_ld)
@@ -752,9 +756,10 @@ void show_pnfs(struct seq_file *m, struct nfs_server *server)
else
seq_printf(m, "not configured");
}
-#else /* CONFIG_NFS_V4_1 */
-void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
-#endif /* CONFIG_NFS_V4_1 */
+#else
+static void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
+#endif
+#endif
static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
{
@@ -2035,9 +2040,6 @@ static inline void nfs_initialise_sb(struct super_block *sb)
sb->s_blocksize = nfs_block_bits(server->wsize,
&sb->s_blocksize_bits);
- if (server->flags & NFS_MOUNT_NOAC)
- sb->s_flags |= MS_SYNCHRONOUS;
-
sb->s_bdi = &server->backing_dev_info;
nfs_super_set_maxbytes(sb, server->maxfilesize);
@@ -2249,6 +2251,10 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
+ /* -o noac implies -o sync */
+ if (server->flags & NFS_MOUNT_NOAC)
+ sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
@@ -2361,6 +2367,10 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
+ /* -o noac implies -o sync */
+ if (server->flags & NFS_MOUNT_NOAC)
+ sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
@@ -2628,6 +2638,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
if (server->flags & NFS4_MOUNT_UNSHARED)
compare_super = NULL;
+ /* -o noac implies -o sync */
+ if (server->flags & NFS_MOUNT_NOAC)
+ sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
@@ -2789,7 +2803,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
goto out_put_mnt_ns;
ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
- export_path, LOOKUP_FOLLOW, &path);
+ export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
nfs_referral_loop_unprotect();
put_mnt_ns(ns_private);
@@ -2916,6 +2930,10 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
if (server->flags & NFS4_MOUNT_UNSHARED)
compare_super = NULL;
+ /* -o noac implies -o sync */
+ if (server->flags & NFS_MOUNT_NOAC)
+ sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
@@ -3003,6 +3021,10 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
if (server->flags & NFS4_MOUNT_UNSHARED)
compare_super = NULL;
+ /* -o noac implies -o sync */
+ if (server->flags & NFS_MOUNT_NOAC)
+ sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index b2fbbde58e44..4f9319a2e567 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -87,7 +87,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
struct inode *dir = data->dir;
if (!NFS_PROTO(dir)->unlink_done(task, dir))
- nfs_restart_rpc(task, NFS_SERVER(dir)->nfs_client);
+ rpc_restart_call_prepare(task);
}
/**
@@ -369,7 +369,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
struct dentry *new_dentry = data->new_dentry;
if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
- nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
+ rpc_restart_call_prepare(task);
return;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b39b37f80913..1dda78db6a73 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -20,6 +20,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/backing-dev.h>
+#include <linux/export.h>
#include <asm/uaccess.h>
@@ -390,7 +391,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
BUG_ON(error);
if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
- nfsi->change_attr++;
+ inode->i_version++;
set_bit(PG_MAPPED, &req->wb_flags);
SetPagePrivate(req->wb_page);
set_page_private(req->wb_page, (unsigned long)req);
@@ -428,7 +429,6 @@ static void
nfs_mark_request_dirty(struct nfs_page *req)
{
__set_page_dirty_nobuffers(req->wb_page);
- __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
}
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -762,6 +762,8 @@ int nfs_updatepage(struct file *file, struct page *page,
status = nfs_writepage_setup(ctx, page, offset, count);
if (status < 0)
nfs_set_pageerror(page);
+ else
+ __set_page_dirty_nobuffers(page);
dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
status, (long long)i_size_read(inode));
@@ -958,7 +960,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
if (!data)
goto out_bad;
data->pagevec[0] = page;
- nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+ nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
list_add(&data->list, res);
requests++;
nbytes -= len;
@@ -1010,7 +1012,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &data->pages);
- ClearPageError(req->wb_page);
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
@@ -1165,7 +1166,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
static void nfs_writeback_release_full(void *calldata)
{
struct nfs_write_data *data = calldata;
- int status = data->task.tk_status;
+ int ret, status = data->task.tk_status;
+ struct nfs_pageio_descriptor pgio;
+
+ if (data->pnfs_error) {
+ nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
+ pgio.pg_recoalesce = 1;
+ }
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
@@ -1181,6 +1188,11 @@ static void nfs_writeback_release_full(void *calldata)
req->wb_bytes,
(long long)req_offset(req));
+ if (data->pnfs_error) {
+ dprintk(", pnfs error = %d\n", data->pnfs_error);
+ goto next;
+ }
+
if (status < 0) {
nfs_set_pageerror(page);
nfs_context_set_write_error(req->wb_context, status);
@@ -1200,7 +1212,19 @@ remove_request:
next:
nfs_clear_page_tag_locked(req);
nfs_end_page_writeback(page);
+ if (data->pnfs_error) {
+ lock_page(page);
+ nfs_pageio_cond_complete(&pgio, page->index);
+ ret = nfs_page_async_flush(&pgio, page, 0);
+ if (ret) {
+ nfs_set_pageerror(page);
+ dprintk("rewrite to MDS error = %d\n", ret);
+ }
+ unlock_page(page);
+ }
}
+ if (data->pnfs_error)
+ nfs_pageio_complete(&pgio);
nfs_writedata_release(calldata);
}
@@ -1220,7 +1244,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
- struct nfs_server *server = NFS_SERVER(data->inode);
int status;
dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1254,7 +1277,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
- server->nfs_client->cl_hostname,
+ NFS_SERVER(data->inode)->nfs_client->cl_hostname,
resp->verf->committed, argp->stable);
complain = jiffies + 300 * HZ;
}
@@ -1281,7 +1304,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
argp->stable = NFS_FILE_SYNC;
}
- nfs_restart_rpc(task, server->nfs_client);
+ rpc_restart_call_prepare(task);
return;
}
if (time_before(complain, jiffies)) {
@@ -1553,6 +1576,10 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
int flags = FLUSH_SYNC;
int ret = 0;
+ /* no commits means nothing needs to be done */
+ if (!nfsi->ncommit)
+ return ret;
+
if (wbc->sync_mode == WB_SYNC_NONE) {
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
@@ -1686,34 +1713,20 @@ out_error:
int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
struct page *page)
{
- struct nfs_page *req;
- int ret;
+ /*
+ * If PagePrivate is set, then the page is currently associated with
+ * an in-progress read or write request. Don't try to migrate it.
+ *
+ * FIXME: we could do this in principle, but we'll need a way to ensure
+ * that we can safely release the inode reference while holding
+ * the page lock.
+ */
+ if (PagePrivate(page))
+ return -EBUSY;
nfs_fscache_release_page(page, GFP_KERNEL);
- req = nfs_find_and_lock_request(page, false);
- ret = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
-
- ret = migrate_page(mapping, newpage, page);
- if (!req)
- goto out;
- if (ret)
- goto out_unlock;
- page_cache_get(newpage);
- spin_lock(&mapping->host->i_lock);
- req->wb_page = newpage;
- SetPagePrivate(newpage);
- set_page_private(newpage, (unsigned long)req);
- ClearPagePrivate(page);
- set_page_private(page, 0);
- spin_unlock(&mapping->host->i_lock);
- page_cache_release(page);
-out_unlock:
- nfs_clear_page_tag_locked(req);
-out:
- return ret;
+ return migrate_page(mapping, newpage, page);
}
#endif