summaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c131
-rw-r--r--fs/nfs/dir.c13
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c42
-rw-r--r--fs/nfs/idmap.c90
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h43
-rw-r--r--fs/nfs/namespace.c66
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4_fs.h38
-rw-r--r--fs/nfs/nfs4filelayout.c361
-rw-r--r--fs/nfs/nfs4filelayout.h19
-rw-r--r--fs/nfs/nfs4filelayoutdev.c256
-rw-r--r--fs/nfs/nfs4namespace.c41
-rw-r--r--fs/nfs/nfs4proc.c254
-rw-r--r--fs/nfs/nfs4renewd.c6
-rw-r--r--fs/nfs/nfs4state.c35
-rw-r--r--fs/nfs/nfs4xdr.c42
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/pagelist.c22
-rw-r--r--fs/nfs/pnfs.c330
-rw-r--r--fs/nfs/pnfs.h118
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c127
-rw-r--r--fs/nfs/super.c478
-rw-r--r--fs/nfs/unlink.c22
-rw-r--r--fs/nfs/write.c155
29 files changed, 1821 insertions, 922 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 89587573fe50..2f41dccea18e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
rv = NFS4ERR_DELAY;
list_del_init(&lo->plh_bulk_recall);
spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&free_me_list);
put_layout_hdr(lo);
iput(ino);
}
- pnfs_free_lseg_list(&free_me_list);
return rv;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index bd3ca32879e7..139be9647d80 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -82,6 +82,11 @@ retry:
#endif /* CONFIG_NFS_V4 */
/*
+ * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
+ */
+static int nfs4_disable_idmapping = 0;
+
+/*
* RPC cruft for NFS
*/
static struct rpc_version *nfs_version[5] = {
@@ -481,7 +486,12 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
* Look up a client by IP address and protocol version
* - creates a new record if one doesn't yet exist
*/
-static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
+static struct nfs_client *
+nfs_get_client(const struct nfs_client_initdata *cl_init,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour,
+ int noresvport)
{
struct nfs_client *clp, *new = NULL;
int error;
@@ -512,6 +522,13 @@ install_client:
clp = new;
list_add(&clp->cl_share_link, &nfs_client_list);
spin_unlock(&nfs_client_lock);
+
+ error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
+ authflavour, noresvport);
+ if (error < 0) {
+ nfs_put_client(clp);
+ return ERR_PTR(error);
+ }
dprintk("--> nfs_get_client() = %p [new]\n", clp);
return clp;
@@ -767,9 +784,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
/*
* Initialise an NFS2 or NFS3 client
*/
-static int nfs_init_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
- const struct nfs_parsed_mount_data *data)
+int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
+ const char *ip_addr, rpc_authflavor_t authflavour,
+ int noresvport)
{
int error;
@@ -784,7 +801,7 @@ static int nfs_init_client(struct nfs_client *clp,
* - RFC 2623, sec 2.3.2
*/
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
- 0, data->flags & NFS_MOUNT_NORESVPORT);
+ 0, noresvport);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -820,19 +837,17 @@ static int nfs_init_server(struct nfs_server *server,
cl_init.rpc_ops = &nfs_v3_clientops;
#endif
+ nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+ data->timeo, data->retrans);
+
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init);
+ clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
+ data->flags & NFS_MOUNT_NORESVPORT);
if (IS_ERR(clp)) {
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
return PTR_ERR(clp);
}
- nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
- data->timeo, data->retrans);
- error = nfs_init_client(clp, &timeparms, data);
- if (error < 0)
- goto error;
-
server->nfs_client = clp;
/* Initialise the client representation from the mount data */
@@ -1009,14 +1024,19 @@ static void nfs_server_insert_lists(struct nfs_server *server)
spin_lock(&nfs_client_lock);
list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
list_add_tail(&server->master_link, &nfs_volume_list);
+ clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
spin_unlock(&nfs_client_lock);
}
static void nfs_server_remove_lists(struct nfs_server *server)
{
+ struct nfs_client *clp = server->nfs_client;
+
spin_lock(&nfs_client_lock);
list_del_rcu(&server->client_link);
+ if (clp && list_empty(&clp->cl_superblocks))
+ set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
list_del(&server->master_link);
spin_unlock(&nfs_client_lock);
@@ -1307,11 +1327,11 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
/*
* Initialise an NFS4 client record
*/
-static int nfs4_init_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
- const char *ip_addr,
- rpc_authflavor_t authflavour,
- int flags)
+int nfs4_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour,
+ int noresvport)
{
int error;
@@ -1325,7 +1345,7 @@ static int nfs4_init_client(struct nfs_client *clp,
clp->rpc_ops = &nfs_v4_clientops;
error = nfs_create_rpc_client(clp, timeparms, authflavour,
- 1, flags & NFS_MOUNT_NORESVPORT);
+ 1, noresvport);
if (error < 0)
goto error;
strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1378,27 +1398,71 @@ static int nfs4_set_client(struct nfs_server *server,
dprintk("--> nfs4_set_client()\n");
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init);
+ clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
+ server->flags & NFS_MOUNT_NORESVPORT);
if (IS_ERR(clp)) {
error = PTR_ERR(clp);
goto error;
}
- error = nfs4_init_client(clp, timeparms, ip_addr, authflavour,
- server->flags);
- if (error < 0)
- goto error_put;
+
+ /*
+ * Query for the lease time on clientid setup or renewal
+ *
+ * Note that this will be set on nfs_clients that were created
+ * only for the DS role and did not set this bit, but now will
+ * serve a dual role.
+ */
+ set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
server->nfs_client = clp;
dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
return 0;
-
-error_put:
- nfs_put_client(clp);
error:
dprintk("<-- nfs4_set_client() = xerror %d\n", error);
return error;
}
+/*
+ * Set up a pNFS Data Server client.
+ *
+ * Return any existing nfs_client that matches server address,port,version
+ * and minorversion.
+ *
+ * For a new nfs_client, use a soft mount (default), a low retrans and a
+ * low timeout interval so that if a connection is lost, we retry through
+ * the MDS.
+ */
+struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
+ const struct sockaddr *ds_addr,
+ int ds_addrlen, int ds_proto)
+{
+ struct nfs_client_initdata cl_init = {
+ .addr = ds_addr,
+ .addrlen = ds_addrlen,
+ .rpc_ops = &nfs_v4_clientops,
+ .proto = ds_proto,
+ .minorversion = mds_clp->cl_minorversion,
+ };
+ struct rpc_timeout ds_timeout = {
+ .to_initval = 15 * HZ,
+ .to_maxval = 15 * HZ,
+ .to_retries = 1,
+ .to_exponential = 1,
+ };
+ struct nfs_client *clp;
+
+ /*
+ * Set an authflavor equual to the MDS value. Use the MDS nfs_client
+ * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
+ * (section 13.1 RFC 5661).
+ */
+ clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
+ mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
+
+ dprintk("<-- %s %p\n", __func__, clp);
+ return clp;
+}
+EXPORT_SYMBOL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.
@@ -1435,6 +1499,10 @@ static int nfs4_server_common_setup(struct nfs_server *server,
BUG_ON(!server->nfs_client->rpc_ops);
BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ /* data servers support only a subset of NFSv4.1 */
+ if (is_ds_only_client(server->nfs_client))
+ return -EPROTONOSUPPORT;
+
fattr = nfs_alloc_fattr();
if (fattr == NULL)
return -ENOMEM;
@@ -1504,6 +1572,13 @@ static int nfs4_init_server(struct nfs_server *server,
if (error < 0)
goto error;
+ /*
+ * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
+ * authentication.
+ */
+ if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
+ server->caps |= NFS_CAP_UIDGID_NOMAP;
+
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
if (data->wsize)
@@ -1921,3 +1996,7 @@ void nfs_fs_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
+
+module_param(nfs4_disable_idmapping, bool, 0644);
+MODULE_PARM_DESC(nfs4_disable_idmapping,
+ "Turn off NFSv4 idmapping when using 'sec=sys'");
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c3eb33b904d..abdf38d5971d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1169,11 +1169,23 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
iput(inode);
}
+static void nfs_d_release(struct dentry *dentry)
+{
+ /* free cached devname value, if it survived that far */
+ if (unlikely(dentry->d_fsdata)) {
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ WARN_ON(1);
+ else
+ kfree(dentry->d_fsdata);
+ }
+}
+
const struct dentry_operations nfs_dentry_operations = {
.d_revalidate = nfs_lookup_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
+ .d_release = nfs_d_release,
};
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
@@ -1248,6 +1260,7 @@ const struct dentry_operations nfs4_dentry_operations = {
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
+ .d_release = nfs_d_release,
};
/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9943a75bb6d1..8eea25366717 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -45,6 +45,7 @@
#include <linux/pagemap.h>
#include <linux/kref.h>
#include <linux/slab.h>
+#include <linux/task_io_accounting_ops.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
@@ -649,8 +650,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
- if (nfs_writeback_done(task, data) != 0)
- return;
+ nfs_writeback_done(task, data);
}
/*
@@ -938,6 +938,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (retval)
goto out;
+ task_io_account_read(count);
+
retval = nfs_direct_read(iocb, iov, nr_segs, pos);
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -999,6 +1001,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (retval)
goto out;
+ task_io_account_write(count);
+
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
if (retval > 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7bf029ef4084..d85a534b15cd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -387,10 +387,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos);
- pnfs_update_layout(mapping->host,
- nfs_file_open_context(file),
- IOMODE_RW);
-
start:
/*
* Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b5ffe8fa291f..1084792bc0fe 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -75,18 +75,25 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
/*
* get an NFS2/NFS3 root dentry from the root filehandle
*/
-struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
+ const char *devname)
{
struct nfs_server *server = NFS_SB(sb);
struct nfs_fsinfo fsinfo;
struct dentry *ret;
struct inode *inode;
+ void *name = kstrdup(devname, GFP_KERNEL);
int error;
+ if (!name)
+ return ERR_PTR(-ENOMEM);
+
/* get the actual root for this mount */
fsinfo.fattr = nfs_alloc_fattr();
- if (fsinfo.fattr == NULL)
+ if (fsinfo.fattr == NULL) {
+ kfree(name);
return ERR_PTR(-ENOMEM);
+ }
error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
if (error < 0) {
@@ -119,7 +126,15 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
}
security_d_instantiate(ret, inode);
+ spin_lock(&ret->d_lock);
+ if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
+ ret->d_fsdata = name;
+ name = NULL;
+ }
+ spin_unlock(&ret->d_lock);
out:
+ if (name)
+ kfree(name);
nfs_free_fattr(fsinfo.fattr);
return ret;
}
@@ -169,27 +184,35 @@ out:
/*
* get an NFS4 root dentry from the root filehandle
*/
-struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
+ const char *devname)
{
struct nfs_server *server = NFS_SB(sb);
struct nfs_fattr *fattr = NULL;
struct dentry *ret;
struct inode *inode;
+ void *name = kstrdup(devname, GFP_KERNEL);
int error;
dprintk("--> nfs4_get_root()\n");
+ if (!name)
+ return ERR_PTR(-ENOMEM);
+
/* get the info about the server and filesystem */
error = nfs4_server_capabilities(server, mntfh);
if (error < 0) {
dprintk("nfs_get_root: getcaps error = %d\n",
-error);
+ kfree(name);
return ERR_PTR(error);
}
fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- return ERR_PTR(-ENOMEM);;
+ if (fattr == NULL) {
+ kfree(name);
+ return ERR_PTR(-ENOMEM);
+ }
/* get the actual root for this mount */
error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
@@ -223,8 +246,15 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
}
security_d_instantiate(ret, inode);
-
+ spin_lock(&ret->d_lock);
+ if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
+ ret->d_fsdata = name;
+ name = NULL;
+ }
+ spin_unlock(&ret->d_lock);
out:
+ if (name)
+ kfree(name);
nfs_free_fattr(fattr);
dprintk("<-- nfs4_get_root()\n");
return ret;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 18696882f1c6..79664a1025af 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -33,16 +33,41 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
+{
+ unsigned long val;
+ char buf[16];
+
+ if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
+ return 0;
+ memcpy(buf, name, namelen);
+ buf[namelen] = '\0';
+ if (strict_strtoul(buf, 0, &val) != 0)
+ return 0;
+ *res = val;
+ return 1;
+}
+
+static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
+{
+ return snprintf(buf, buflen, "%u", id);
+}
#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
#include <linux/slab.h>
#include <linux/cred.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs_sb.h>
#include <linux/nfs_idmap.h>
#include <linux/keyctl.h>
#include <linux/key-type.h>
#include <linux/rcupdate.h>
-#include <linux/kernel.h>
#include <linux/err.h>
#include <keys/user-type.h>
@@ -219,23 +244,39 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
return ret;
}
-int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
{
+ if (nfs_map_string_to_numeric(name, namelen, uid))
+ return 0;
return nfs_idmap_lookup_id(name, namelen, "uid", uid);
}
-int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
{
+ if (nfs_map_string_to_numeric(name, namelen, gid))
+ return 0;
return nfs_idmap_lookup_id(name, namelen, "gid", gid);
}
-int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
+int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
{
- return nfs_idmap_lookup_name(uid, "user", buf, buflen);
+ int ret = -EINVAL;
+
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+ ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
+ if (ret < 0)
+ ret = nfs_map_numeric_to_string(uid, buf, buflen);
+ return ret;
}
-int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
+int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
{
- return nfs_idmap_lookup_name(gid, "group", buf, buflen);
+ int ret = -EINVAL;
+
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+ ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
+ if (ret < 0)
+ ret = nfs_map_numeric_to_string(gid, buf, buflen);
+ return ret;
}
#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
@@ -243,7 +284,6 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/init.h>
-#include <linux/types.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/in.h>
@@ -695,31 +735,45 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
return hash;
}
-int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
{
- struct idmap *idmap = clp->cl_idmap;
+ struct idmap *idmap = server->nfs_client->cl_idmap;
+ if (nfs_map_string_to_numeric(name, namelen, uid))
+ return 0;
return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
}
-int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
{
- struct idmap *idmap = clp->cl_idmap;
+ struct idmap *idmap = server->nfs_client->cl_idmap;
+ if (nfs_map_string_to_numeric(name, namelen, uid))
+ return 0;
return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
}
-int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
+int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
{
- struct idmap *idmap = clp->cl_idmap;
+ struct idmap *idmap = server->nfs_client->cl_idmap;
+ int ret = -EINVAL;
- return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+ ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
+ if (ret < 0)
+ ret = nfs_map_numeric_to_string(uid, buf, buflen);
+ return ret;
}
-int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
+int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
{
- struct idmap *idmap = clp->cl_idmap;
+ struct idmap *idmap = server->nfs_client->cl_idmap;
+ int ret = -EINVAL;
- return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+ ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
+ if (ret < 0)
+ ret = nfs_map_numeric_to_string(uid, buf, buflen);
+ return ret;
}
#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..01768e5e2c9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
*/
u64 nfs_compat_user_ino64(u64 fileid)
{
- int ino;
+#ifdef CONFIG_COMPAT
+ compat_ulong_t ino;
+#else
+ unsigned long ino;
+#endif
if (enable_ino64)
return fileid;
@@ -1513,7 +1518,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index cf9fdbdabc67..72e0bddf7a2f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -148,6 +148,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fattr *);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern int nfs4_check_client_ready(struct nfs_client *clp);
+extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
+ const struct sockaddr *ds_addr,
+ int ds_addrlen, int ds_proto);
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
@@ -163,10 +166,10 @@ static inline void nfs_fs_proc_exit(void)
/* nfs4namespace.c */
#ifdef CONFIG_NFS_V4
-extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
+extern struct vfsmount *nfs_do_refmount(struct dentry *dentry);
#else
static inline
-struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
+struct vfsmount *nfs_do_refmount(struct dentry *dentry)
{
return ERR_PTR(-ENOENT);
}
@@ -213,8 +216,14 @@ extern const u32 nfs41_maxwrite_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
+extern int nfs4_init_ds_session(struct nfs_client *clp);
+
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+extern int nfs_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr, rpc_authflavor_t authflavour,
+ int noresvport);
/* dir.c */
extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -247,24 +256,30 @@ extern void nfs_sb_active(struct super_block *sb);
extern void nfs_sb_deactive(struct super_block *sb);
/* namespace.c */
-extern char *nfs_path(const char *base,
- const struct dentry *droot,
- const struct dentry *dentry,
+extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen);
extern struct vfsmount *nfs_d_automount(struct path *path);
/* getroot.c */
-extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
+extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
+ const char *);
#ifdef CONFIG_NFS_V4
-extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
+ const char *);
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
/* read.c */
+extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
+ const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
/* write.c */
+extern int nfs_initiate_write(struct nfs_write_data *data,
+ struct rpc_clnt *clnt,
+ const struct rpc_call_ops *call_ops,
+ int how);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
@@ -274,6 +289,13 @@ extern int nfs_migrate_page(struct address_space *,
#endif
/* nfs4proc.c */
+extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
+extern int nfs4_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour,
+ int noresvport);
+extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
extern int _nfs4_call_sync(struct nfs_server *server,
struct rpc_message *msg,
struct nfs4_sequence_args *args,
@@ -288,12 +310,11 @@ extern int _nfs4_call_sync_session(struct nfs_server *server,
/*
* Determine the device name as a string
*/
-static inline char *nfs_devname(const struct vfsmount *mnt_parent,
- const struct dentry *dentry,
+static inline char *nfs_devname(struct dentry *dentry,
char *buffer, ssize_t buflen)
{
- return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
- dentry, buffer, buflen);
+ char *dummy;
+ return nfs_path(&dummy, dentry, buffer, buflen);
}
/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f32b8603dca8..bf1c68009ffd 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -25,33 +25,30 @@ static LIST_HEAD(nfs_automount_list);
static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
int nfs_mountpoint_expiry_timeout = 500 * HZ;
-static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
- const struct dentry *dentry,
+static struct vfsmount *nfs_do_submount(struct dentry *dentry,
struct nfs_fh *fh,
struct nfs_fattr *fattr);
/*
* nfs_path - reconstruct the path given an arbitrary dentry
- * @base - arbitrary string to prepend to the path
- * @droot - pointer to root dentry for mountpoint
+ * @base - used to return pointer to the end of devname part of path
* @dentry - pointer to dentry
* @buffer - result buffer
* @buflen - length of buffer
*
- * Helper function for constructing the path from the
- * root dentry to an arbitrary hashed dentry.
+ * Helper function for constructing the server pathname
+ * by arbitrary hashed dentry.
*
* This is mainly for use in figuring out the path on the
- * server side when automounting on top of an existing partition.
+ * server side when automounting on top of an existing partition
+ * and in generating /proc/mounts and friends.
*/
-char *nfs_path(const char *base,
- const struct dentry *droot,
- const struct dentry *dentry,
- char *buffer, ssize_t buflen)
+char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
{
char *end;
int namelen;
unsigned seq;
+ const char *base;
rename_retry:
end = buffer+buflen;
@@ -60,7 +57,10 @@ rename_retry:
seq = read_seqbegin(&rename_lock);
rcu_read_lock();
- while (!IS_ROOT(dentry) && dentry != droot) {
+ while (1) {
+ spin_lock(&dentry->d_lock);
+ if (IS_ROOT(dentry))
+ break;
namelen = dentry->d_name.len;
buflen -= namelen + 1;
if (buflen < 0)
@@ -68,27 +68,47 @@ rename_retry:
end -= namelen;
memcpy(end, dentry->d_name.name, namelen);
*--end = '/';
+ spin_unlock(&dentry->d_lock);
dentry = dentry->d_parent;
}
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
+ if (read_seqretry(&rename_lock, seq)) {
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
goto rename_retry;
+ }
if (*end != '/') {
- if (--buflen < 0)
+ if (--buflen < 0) {
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
goto Elong;
+ }
*--end = '/';
}
+ *p = end;
+ base = dentry->d_fsdata;
+ if (!base) {
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
+ WARN_ON(1);
+ return end;
+ }
namelen = strlen(base);
/* Strip off excess slashes in base string */
while (namelen > 0 && base[namelen - 1] == '/')
namelen--;
buflen -= namelen;
- if (buflen < 0)
+ if (buflen < 0) {
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
goto Elong;
+ }
end -= namelen;
memcpy(end, base, namelen);
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
return end;
Elong_unlock:
+ spin_unlock(&dentry->d_lock);
rcu_read_unlock();
if (read_seqretry(&rename_lock, seq))
goto rename_retry;
@@ -143,9 +163,9 @@ struct vfsmount *nfs_d_automount(struct path *path)
}
if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
- mnt = nfs_do_refmount(path->mnt, path->dentry);
+ mnt = nfs_do_refmount(path->dentry);
else
- mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr);
+ mnt = nfs_do_submount(path->dentry, fh, fattr);
if (IS_ERR(mnt))
goto out;
@@ -209,19 +229,17 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
/**
* nfs_do_submount - set up mountpoint when crossing a filesystem boundary
- * @mnt_parent - mountpoint of parent directory
* @dentry - parent directory
* @fh - filehandle for new root dentry
* @fattr - attributes for new root inode
*
*/
-static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
- const struct dentry *dentry,
+static struct vfsmount *nfs_do_submount(struct dentry *dentry,
struct nfs_fh *fh,
struct nfs_fattr *fattr)
{
struct nfs_clone_mount mountdata = {
- .sb = mnt_parent->mnt_sb,
+ .sb = dentry->d_sb,
.dentry = dentry,
.fh = fh,
.fattr = fattr,
@@ -237,11 +255,11 @@ static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
dentry->d_name.name);
if (page == NULL)
goto out;
- devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
+ devname = nfs_devname(dentry, page, PAGE_SIZE);
mnt = (struct vfsmount *)devname;
if (IS_ERR(devname))
goto free_page;
- mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata);
+ mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
free_page:
free_page((unsigned long)page);
out:
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ce939c062a52..d0c80d8b3f96 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,4 +885,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.lock = nfs3_proc_lock,
.clear_acl_cache = nfs3_forget_cached_acls,
.close_context = nfs_close_context,
+ .init_client = nfs_init_client,
};
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..c64be1cff080 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -252,6 +252,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
extern int nfs4_setup_sequence(const struct nfs_server *server,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
int cache_reply, struct rpc_task *task);
+extern int nfs41_setup_sequence(struct nfs4_session *session,
+ struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ int cache_reply, struct rpc_task *task);
extern void nfs4_destroy_session(struct nfs4_session *session);
extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
extern int nfs4_proc_create_session(struct nfs_client *);
@@ -259,6 +262,19 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
extern int nfs4_init_session(struct nfs_server *server);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
+
+static inline bool
+is_ds_only_client(struct nfs_client *clp)
+{
+ return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) ==
+ EXCHGID4_FLAG_USE_PNFS_DS;
+}
+
+static inline bool
+is_ds_client(struct nfs_client *clp)
+{
+ return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
+}
#else /* CONFIG_NFS_v4_1 */
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
{
@@ -276,6 +292,18 @@ static inline int nfs4_init_session(struct nfs_server *server)
{
return 0;
}
+
+static inline bool
+is_ds_only_client(struct nfs_client *clp)
+{
+ return false;
+}
+
+static inline bool
+is_ds_client(struct nfs_client *clp)
+{
+ return false;
+}
#endif /* CONFIG_NFS_V4_1 */
extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
@@ -298,6 +326,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+#else
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +340,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
-extern void nfs4_schedule_state_recovery(struct nfs_client *);
+extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
-extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
-extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
+extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 23f930caf1e2..428558464817 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -40,32 +40,309 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
MODULE_DESCRIPTION("The NFSv4 file layout driver");
-static int
-filelayout_set_layoutdriver(struct nfs_server *nfss)
+#define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
+
+static loff_t
+filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
+ loff_t offset)
{
- int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
- nfs4_fl_free_deviceid_callback);
- if (status) {
- printk(KERN_WARNING "%s: deviceid cache could not be "
- "initialized\n", __func__);
- return status;
+ u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
+ u64 tmp;
+
+ offset -= flseg->pattern_offset;
+ tmp = offset;
+ do_div(tmp, stripe_width);
+
+ return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit);
+}
+
+/* This function is used by the layout driver to calculate the
+ * offset of the file on the dserver based on whether the
+ * layout type is STRIPE_DENSE or STRIPE_SPARSE
+ */
+static loff_t
+filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
+{
+ struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
+
+ switch (flseg->stripe_type) {
+ case STRIPE_SPARSE:
+ return offset;
+
+ case STRIPE_DENSE:
+ return filelayout_get_dense_offset(flseg, offset);
}
- dprintk("%s: deviceid cache has been initialized successfully\n",
- __func__);
+
+ BUG();
+}
+
+/* For data server errors we don't recover from */
+static void
+filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
+{
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+ } else {
+ dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ }
+}
+
+static int filelayout_async_handle_error(struct rpc_task *task,
+ struct nfs4_state *state,
+ struct nfs_client *clp,
+ int *reset)
+{
+ if (task->tk_status >= 0)
+ return 0;
+
+ *reset = 0;
+
+ switch (task->tk_status) {
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_BADSLOT:
+ case -NFS4ERR_BAD_HIGH_SLOT:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case -NFS4ERR_SEQ_FALSE_RETRY:
+ case -NFS4ERR_SEQ_MISORDERED:
+ dprintk("%s ERROR %d, Reset session. Exchangeid "
+ "flags 0x%x\n", __func__, task->tk_status,
+ clp->cl_exchange_flags);
+ nfs4_schedule_session_recovery(clp->cl_session);
+ break;
+ case -NFS4ERR_DELAY:
+ case -NFS4ERR_GRACE:
+ case -EKEYEXPIRED:
+ rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
+ break;
+ default:
+ dprintk("%s DS error. Retry through MDS %d\n", __func__,
+ task->tk_status);
+ *reset = 1;
+ break;
+ }
+ task->tk_status = 0;
+ return -EAGAIN;
+}
+
+/* NFS_PROTO call done callback routines */
+
+static int filelayout_read_done_cb(struct rpc_task *task,
+ struct nfs_read_data *data)
+{
+ struct nfs_client *clp = data->ds_clp;
+ int reset = 0;
+
+ dprintk("%s DS read\n", __func__);
+
+ if (filelayout_async_handle_error(task, data->args.context->state,
+ data->ds_clp, &reset) == -EAGAIN) {
+ dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
+ __func__, data->ds_clp, data->ds_clp->cl_session);
+ if (reset) {
+ filelayout_set_lo_fail(data->lseg);
+ nfs4_reset_read(task, data);
+ clp = NFS_SERVER(data->inode)->nfs_client;
+ }
+ nfs_restart_rpc(task, clp);
+ return -EAGAIN;
+ }
+
return 0;
}
-/* Clear out the layout by destroying its device list */
-static int
-filelayout_clear_layoutdriver(struct nfs_server *nfss)
+/*
+ * Call ops for the async read/write cases
+ * In the case of dense layouts, the offset needs to be reset to its
+ * original value.
+ */
+static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- dprintk("--> %s\n", __func__);
+ struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+
+ rdata->read_done_cb = filelayout_read_done_cb;
+
+ if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
+ &rdata->args.seq_args, &rdata->res.seq_res,
+ 0, task))
+ return;
+
+ rpc_call_start(task);
+}
+
+static void filelayout_read_call_done(struct rpc_task *task, void *data)
+{
+ struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+
+ dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+
+ /* Note this may cause RPC to be resent */
+ rdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_read_release(void *data)
+{
+ struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+
+ rdata->mds_ops->rpc_release(data);
+}
+
+static int filelayout_write_done_cb(struct rpc_task *task,
+ struct nfs_write_data *data)
+{
+ int reset = 0;
+
+ if (filelayout_async_handle_error(task, data->args.context->state,
+ data->ds_clp, &reset) == -EAGAIN) {
+ struct nfs_client *clp;
+
+ dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
+ __func__, data->ds_clp, data->ds_clp->cl_session);
+ if (reset) {
+ filelayout_set_lo_fail(data->lseg);
+ nfs4_reset_write(task, data);
+ clp = NFS_SERVER(data->inode)->nfs_client;
+ } else
+ clp = data->ds_clp;
+ nfs_restart_rpc(task, clp);
+ return -EAGAIN;
+ }
- if (nfss->nfs_client->cl_devid_cache)
- pnfs_put_deviceid_cache(nfss->nfs_client);
return 0;
}
+static void filelayout_write_prepare(struct rpc_task *task, void *data)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+ if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+ &wdata->args.seq_args, &wdata->res.seq_res,
+ 0, task))
+ return;
+
+ rpc_call_start(task);
+}
+
+static void filelayout_write_call_done(struct rpc_task *task, void *data)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+ /* Note this may cause RPC to be resent */
+ wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_write_release(void *data)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+ wdata->mds_ops->rpc_release(data);
+}
+
+struct rpc_call_ops filelayout_read_call_ops = {
+ .rpc_call_prepare = filelayout_read_prepare,
+ .rpc_call_done = filelayout_read_call_done,
+ .rpc_release = filelayout_read_release,
+};
+
+struct rpc_call_ops filelayout_write_call_ops = {
+ .rpc_call_prepare = filelayout_write_prepare,
+ .rpc_call_done = filelayout_write_call_done,
+ .rpc_release = filelayout_write_release,
+};
+
+static enum pnfs_try_status
+filelayout_read_pagelist(struct nfs_read_data *data)
+{
+ struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs4_pnfs_ds *ds;
+ loff_t offset = data->args.offset;
+ u32 j, idx;
+ struct nfs_fh *fh;
+ int status;
+
+ dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
+ __func__, data->inode->i_ino,
+ data->args.pgbase, (size_t)data->args.count, offset);
+
+ /* Retrieve the correct rpc_client for the byte range */
+ j = nfs4_fl_calc_j_index(lseg, offset);
+ idx = nfs4_fl_calc_ds_index(lseg, j);
+ ds = nfs4_fl_prepare_ds(lseg, idx);
+ if (!ds) {
+ /* Either layout fh index faulty, or ds connect failed */
+ set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+ set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ return PNFS_NOT_ATTEMPTED;
+ }
+ dprintk("%s USE DS:ip %x %hu\n", __func__,
+ ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+
+ /* No multipath support. Use first DS */
+ data->ds_clp = ds->ds_clp;
+ fh = nfs4_fl_select_ds_fh(lseg, j);
+ if (fh)
+ data->args.fh = fh;
+
+ data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+ data->mds_offset = offset;
+
+ /* Perform an asynchronous read to ds */
+ status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
+ &filelayout_read_call_ops);
+ BUG_ON(status != 0);
+ return PNFS_ATTEMPTED;
+}
+
+/* Perform async writes. */
+static enum pnfs_try_status
+filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+{
+ struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs4_pnfs_ds *ds;
+ loff_t offset = data->args.offset;
+ u32 j, idx;
+ struct nfs_fh *fh;
+ int status;
+
+ /* Retrieve the correct rpc_client for the byte range */
+ j = nfs4_fl_calc_j_index(lseg, offset);
+ idx = nfs4_fl_calc_ds_index(lseg, j);
+ ds = nfs4_fl_prepare_ds(lseg, idx);
+ if (!ds) {
+ printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+ set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ return PNFS_NOT_ATTEMPTED;
+ }
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+ data->inode->i_ino, sync, (size_t) data->args.count, offset,
+ ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+
+ /* We can't handle commit to ds yet */
+ if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
+ data->args.stable = NFS_FILE_SYNC;
+
+ data->write_done_cb = filelayout_write_done_cb;
+ data->ds_clp = ds->ds_clp;
+ fh = nfs4_fl_select_ds_fh(lseg, j);
+ if (fh)
+ data->args.fh = fh;
+ /*
+ * Get the file offset on the dserver. Set the write offset to
+ * this offset and save the original offset.
+ */
+ data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+ data->mds_offset = offset;
+
+ /* Perform an asynchronous write */
+ status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
+ &filelayout_write_call_ops, sync);
+ BUG_ON(status != 0);
+ return PNFS_ATTEMPTED;
+}
+
/*
* filelayout_check_layout()
*
@@ -92,14 +369,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
}
- if (fl->stripe_unit % PAGE_SIZE) {
- dprintk("%s Stripe unit (%u) not page aligned\n",
+ if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
+ dprintk("%s Invalid stripe unit (%u)\n",
__func__, fl->stripe_unit);
goto out;
}
/* find and reference the deviceid */
- dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
+ dsaddr = nfs4_fl_find_get_deviceid(id);
if (dsaddr == NULL) {
dsaddr = get_device_info(lo->plh_inode, id);
if (dsaddr == NULL)
@@ -134,7 +411,7 @@ out:
dprintk("--> %s returns %d\n", __func__, status);
return status;
out_put:
- pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
+ nfs4_fl_put_deviceid(dsaddr);
goto out;
}
@@ -243,23 +520,47 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
static void
filelayout_free_lseg(struct pnfs_layout_segment *lseg)
{
- struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
dprintk("--> %s\n", __func__);
- pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
- &fl->dsaddr->deviceid);
+ nfs4_fl_put_deviceid(fl->dsaddr);
_filelayout_free_lseg(fl);
}
+/*
+ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
+ *
+ * return 1 : coalesce page
+ * return 0 : don't coalesce page
+ */
+int
+filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ u64 p_stripe, r_stripe;
+ u32 stripe_unit;
+
+ if (!pgio->pg_lseg)
+ return 1;
+ p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
+ r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
+ stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
+
+ do_div(p_stripe, stripe_unit);
+ do_div(r_stripe, stripe_unit);
+
+ return (p_stripe == r_stripe);
+}
+
static struct pnfs_layoutdriver_type filelayout_type = {
- .id = LAYOUT_NFSV4_1_FILES,
- .name = "LAYOUT_NFSV4_1_FILES",
- .owner = THIS_MODULE,
- .set_layoutdriver = filelayout_set_layoutdriver,
- .clear_layoutdriver = filelayout_clear_layoutdriver,
- .alloc_lseg = filelayout_alloc_lseg,
- .free_lseg = filelayout_free_lseg,
+ .id = LAYOUT_NFSV4_1_FILES,
+ .name = "LAYOUT_NFSV4_1_FILES",
+ .owner = THIS_MODULE,
+ .alloc_lseg = filelayout_alloc_lseg,
+ .free_lseg = filelayout_free_lseg,
+ .pg_test = filelayout_pg_test,
+ .read_pagelist = filelayout_read_pagelist,
+ .write_pagelist = filelayout_write_pagelist,
};
static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index bbf60dd2ab9d..ee0c907742b5 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -55,8 +55,14 @@ struct nfs4_pnfs_ds {
atomic_t ds_count;
};
+/* nfs4_file_layout_dsaddr flags */
+#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
+
struct nfs4_file_layout_dsaddr {
- struct pnfs_deviceid_node deviceid;
+ struct hlist_node node;
+ struct nfs4_deviceid deviceid;
+ atomic_t ref;
+ unsigned long flags;
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
@@ -83,11 +89,18 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
generic_hdr);
}
-extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
+extern struct nfs_fh *
+nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
+
extern void print_ds(struct nfs4_pnfs_ds *ds);
extern void print_deviceid(struct nfs4_deviceid *dev_id);
+u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
+u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
+struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
+ u32 ds_idx);
extern struct nfs4_file_layout_dsaddr *
-nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
+nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
+extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..68143c162e3b 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -37,6 +37,30 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
/*
+ * Device ID RCU cache. A device ID is unique per client ID and layout type.
+ */
+#define NFS4_FL_DEVICE_ID_HASH_BITS 5
+#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
+#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
+
+static inline u32
+nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
+{
+ unsigned char *cptr = (unsigned char *)id->data;
+ unsigned int nbytes = NFS4_DEVICEID4_SIZE;
+ u32 x = 0;
+
+ while (nbytes--) {
+ x *= 37;
+ x += *cptr++;
+ }
+ return x & NFS4_FL_DEVICE_ID_HASH_MASK;
+}
+
+static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
+static DEFINE_SPINLOCK(filelayout_deviceid_lock);
+
+/*
* Data server cache
*
* Data servers can be mapped to different device ids.
@@ -104,6 +128,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port)
return NULL;
}
+/*
+ * Create an rpc connection to the nfs4_pnfs_ds data server
+ * Currently only support IPv4
+ */
+static int
+nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
+{
+ struct nfs_client *clp;
+ struct sockaddr_in sin;
+ int status = 0;
+
+ dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
+ ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
+
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ds->ds_ip_addr;
+ sin.sin_port = ds->ds_port;
+
+ clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
+ sizeof(sin), IPPROTO_TCP);
+ if (IS_ERR(clp)) {
+ status = PTR_ERR(clp);
+ goto out;
+ }
+
+ if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
+ if (!is_ds_client(clp)) {
+ status = -ENODEV;
+ goto out_put;
+ }
+ ds->ds_clp = clp;
+ dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
+ ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ goto out;
+ }
+
+ /*
+ * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
+ * be equal to the MDS lease. Renewal is scheduled in create_session.
+ */
+ spin_lock(&mds_srv->nfs_client->cl_lock);
+ clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
+ spin_unlock(&mds_srv->nfs_client->cl_lock);
+ clp->cl_last_renewal = jiffies;
+
+ /* New nfs_client */
+ status = nfs4_init_ds_session(clp);
+ if (status)
+ goto out_put;
+
+ ds->ds_clp = clp;
+ dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
+ ntohs(ds->ds_port));
+out:
+ return status;
+out_put:
+ nfs_put_client(clp);
+ goto out;
+}
+
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
@@ -122,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
struct nfs4_pnfs_ds *ds;
int i;
- print_deviceid(&dsaddr->deviceid.de_id);
+ print_deviceid(&dsaddr->deviceid);
for (i = 0; i < dsaddr->ds_num; i++) {
ds = dsaddr->ds_list[i];
@@ -139,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
kfree(dsaddr);
}
-void
-nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
-{
- struct nfs4_file_layout_dsaddr *dsaddr =
- container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
-
- nfs4_fl_free_deviceid(dsaddr);
-}
-
static struct nfs4_pnfs_ds *
nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
{
@@ -219,6 +295,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
goto out_err;
}
buf = kmalloc(rlen + 1, GFP_KERNEL);
+ if (!buf) {
+ dprintk("%s: Not enough memory\n", __func__);
+ goto out_err;
+ }
buf[rlen] = '\0';
memcpy(buf, r_addr, rlen);
@@ -296,7 +376,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
dsaddr->stripe_count = cnt;
dsaddr->ds_num = num;
- memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
+ memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
/* Go back an read stripe indices */
p = indicesp;
@@ -346,28 +426,37 @@ out_err:
}
/*
- * Decode the opaque device specified in 'dev'
- * and add it to the list of available devices.
- * If the deviceid is already cached, nfs4_add_deviceid will return
- * a pointer to the cached struct and throw away the new.
+ * Decode the opaque device specified in 'dev' and add it to the cache of
+ * available devices.
*/
-static struct nfs4_file_layout_dsaddr*
+static struct nfs4_file_layout_dsaddr *
decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
{
- struct nfs4_file_layout_dsaddr *dsaddr;
- struct pnfs_deviceid_node *d;
+ struct nfs4_file_layout_dsaddr *d, *new;
+ long hash;
- dsaddr = decode_device(inode, dev);
- if (!dsaddr) {
+ new = decode_device(inode, dev);
+ if (!new) {
printk(KERN_WARNING "%s: Could not decode or add device\n",
__func__);
return NULL;
}
- d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
- &dsaddr->deviceid);
+ spin_lock(&filelayout_deviceid_lock);
+ d = nfs4_fl_find_get_deviceid(&new->deviceid);
+ if (d) {
+ spin_unlock(&filelayout_deviceid_lock);
+ nfs4_fl_free_deviceid(new);
+ return d;
+ }
+
+ INIT_HLIST_NODE(&new->node);
+ atomic_set(&new->ref, 1);
+ hash = nfs4_fl_deviceid_hash(&new->deviceid);
+ hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
+ spin_unlock(&filelayout_deviceid_lock);
- return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
+ return new;
}
/*
@@ -442,12 +531,123 @@ out_free:
return dsaddr;
}
+void
+nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
+{
+ if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
+ hlist_del_rcu(&dsaddr->node);
+ spin_unlock(&filelayout_deviceid_lock);
+
+ synchronize_rcu();
+ nfs4_fl_free_deviceid(dsaddr);
+ }
+}
+
struct nfs4_file_layout_dsaddr *
-nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
+nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
+{
+ struct nfs4_file_layout_dsaddr *d;
+ struct hlist_node *n;
+ long hash = nfs4_fl_deviceid_hash(id);
+
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
+ if (!memcmp(&d->deviceid, id, sizeof(*id))) {
+ if (!atomic_inc_not_zero(&d->ref))
+ goto fail;
+ rcu_read_unlock();
+ return d;
+ }
+ }
+fail:
+ rcu_read_unlock();
+ return NULL;
+}
+
+/*
+ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
+ * Then: ((res + fsi) % dsaddr->stripe_count)
+ */
+u32
+nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
+{
+ struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
+ u64 tmp;
+
+ tmp = offset - flseg->pattern_offset;
+ do_div(tmp, flseg->stripe_unit);
+ tmp += flseg->first_stripe_index;
+ return do_div(tmp, flseg->dsaddr->stripe_count);
+}
+
+u32
+nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
{
- struct pnfs_deviceid_node *d;
+ return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
+}
- d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
- return (d == NULL) ? NULL :
- container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
+struct nfs_fh *
+nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
+{
+ struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
+ u32 i;
+
+ if (flseg->stripe_type == STRIPE_SPARSE) {
+ if (flseg->num_fh == 1)
+ i = 0;
+ else if (flseg->num_fh == 0)
+ /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
+ return NULL;
+ else
+ i = nfs4_fl_calc_ds_index(lseg, j);
+ } else
+ i = j;
+ return flseg->fh_array[i];
+}
+
+static void
+filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
+ int err, u32 ds_addr)
+{
+ u32 *p = (u32 *)&dsaddr->deviceid;
+
+ printk(KERN_ERR "NFS: data server %x connection error %d."
+ " Deviceid [%x%x%x%x] marked out of use.\n",
+ ds_addr, err, p[0], p[1], p[2], p[3]);
+
+ spin_lock(&filelayout_deviceid_lock);
+ dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
+ spin_unlock(&filelayout_deviceid_lock);
+}
+
+struct nfs4_pnfs_ds *
+nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
+{
+ struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
+ struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
+
+ if (ds == NULL) {
+ printk(KERN_ERR "%s: No data server for offset index %d\n",
+ __func__, ds_idx);
+ return NULL;
+ }
+
+ if (!ds->ds_clp) {
+ struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+ int err;
+
+ if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
+ /* Already tried to connect, don't try again */
+ dprintk("%s Deviceid marked out of use\n", __func__);
+ return NULL;
+ }
+ err = nfs4_ds_connect(s, ds);
+ if (err) {
+ filelayout_mark_devid_negative(dsaddr, err,
+ ntohl(ds->ds_ip_addr));
+ return NULL;
+ }
+ }
+ return ds;
}
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3c2a1724fbd2..bb80c49b6533 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -54,33 +54,29 @@ Elong:
/*
* Determine the mount path as a string
*/
-static char *nfs4_path(const struct vfsmount *mnt_parent,
- const struct dentry *dentry,
- char *buffer, ssize_t buflen)
+static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
{
- const char *srvpath;
-
- srvpath = strchr(mnt_parent->mnt_devname, ':');
- if (srvpath)
- srvpath++;
- else
- srvpath = mnt_parent->mnt_devname;
-
- return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
+ char *limit;
+ char *path = nfs_path(&limit, dentry, buffer, buflen);
+ if (!IS_ERR(path)) {
+ char *colon = strchr(path, ':');
+ if (colon && colon < limit)
+ path = colon + 1;
+ }
+ return path;
}
/*
* Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
* believe to be the server path to this dentry
*/
-static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
- const struct dentry *dentry,
+static int nfs4_validate_fspath(struct dentry *dentry,
const struct nfs4_fs_locations *locations,
char *page, char *page2)
{
const char *path, *fs_path;
- path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
+ path = nfs4_path(dentry, page, PAGE_SIZE);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -165,20 +161,18 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
/**
* nfs_follow_referral - set up mountpoint when hitting a referral on moved error
- * @mnt_parent - mountpoint of parent directory
* @dentry - parent directory
* @locations - array of NFSv4 server location information
*
*/
-static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
- const struct dentry *dentry,
+static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
const struct nfs4_fs_locations *locations)
{
struct vfsmount *mnt = ERR_PTR(-ENOENT);
struct nfs_clone_mount mountdata = {
- .sb = mnt_parent->mnt_sb,
+ .sb = dentry->d_sb,
.dentry = dentry,
- .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
+ .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
};
char *page = NULL, *page2 = NULL;
int loc, error;
@@ -198,7 +192,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
goto out;
/* Ensure fs path is a prefix of current dentry path */
- error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
+ error = nfs4_validate_fspath(dentry, locations, page, page2);
if (error < 0) {
mnt = ERR_PTR(error);
goto out;
@@ -225,11 +219,10 @@ out:
/*
* nfs_do_refmount - handle crossing a referral on server
- * @mnt_parent - mountpoint of referral
* @dentry - dentry of referral
*
*/
-struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
+struct vfsmount *nfs_do_refmount(struct dentry *dentry)
{
struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct dentry *parent;
@@ -262,7 +255,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
fs_locations->fs_path.ncomponents <= 0)
goto out_free;
- mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations);
+ mnt = nfs_follow_referral(dentry, fs_locations);
out_free:
__free_page(page);
kfree(fs_locations);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..1d84e7088af9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -85,6 +85,9 @@ static int nfs4_map_errors(int err)
switch (err) {
case -NFS4ERR_RESOURCE:
return -EREMOTEIO;
+ case -NFS4ERR_BADOWNER:
+ case -NFS4ERR_BADNAME:
+ return -EINVAL;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -241,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
-static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state *state = exception->state;
@@ -256,12 +259,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -272,7 +276,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -292,11 +296,23 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
break;
case -NFS4ERR_OLD_STATEID:
exception->retry = 1;
+ break;
+ case -NFS4ERR_BADOWNER:
+ /* The following works around a Linux server bug! */
+ case -NFS4ERR_BADNAME:
+ if (server->caps & NFS_CAP_UIDGID_NOMAP) {
+ server->caps &= ~NFS_CAP_UIDGID_NOMAP;
+ exception->retry = 1;
+ printk(KERN_WARNING "NFS: v4 server %s "
+ "does not accept raw "
+ "uid/gids. "
+ "Reenabling the idmapper.\n",
+ server->nfs_client->cl_hostname);
+ }
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
-do_state_recovery:
- nfs4_schedule_state_recovery(clp);
+wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
if (ret == 0)
exception->retry = 1;
@@ -435,8 +451,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
clp = res->sr_session->clp;
do_renew_lease(clp, timestamp);
/* Check sequence flags */
- if (atomic_read(&clp->cl_count) > 1)
- nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+ if (res->sr_status_flags != 0)
+ nfs4_schedule_lease_recovery(clp);
break;
case -NFS4ERR_DELAY:
/* The server detected a resend of the RPC call and
@@ -505,7 +521,7 @@ out:
return ret_id;
}
-static int nfs41_setup_sequence(struct nfs4_session *session,
+int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
int cache_reply,
@@ -571,6 +587,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
res->sr_status = 1;
return 0;
}
+EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
int nfs4_setup_sequence(const struct nfs_server *server,
struct nfs4_sequence_args *args,
@@ -1255,14 +1272,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(
- server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
/* Don't recall a delegation if it was lost */
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_lease_recovery(server->nfs_client);
goto out;
case -ERESTARTSYS:
/*
@@ -1271,7 +1287,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
*/
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
* User RPCSEC_GSS context has expired.
@@ -1574,9 +1590,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
return 0;
}
-static int nfs4_recover_expired_lease(struct nfs_server *server)
+static int nfs4_client_recover_expired_lease(struct nfs_client *clp)
{
- struct nfs_client *clp = server->nfs_client;
unsigned int loop;
int ret;
@@ -1587,12 +1602,17 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
!test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
break;
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
ret = -EIO;
}
return ret;
}
+static int nfs4_recover_expired_lease(struct nfs_server *server)
+{
+ return nfs4_client_recover_expired_lease(server->nfs_client);
+}
+
/*
* OPEN_EXPIRED:
* reclaim state on the server after a network partition.
@@ -3070,15 +3090,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
{
struct nfs_server *server = NFS_SERVER(data->inode);
- dprintk("--> %s\n", __func__);
-
- if (!nfs4_sequence_done(task, &data->res.seq_res))
- return -EAGAIN;
-
if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
nfs_restart_rpc(task, server->nfs_client);
return -EAGAIN;
@@ -3090,19 +3105,44 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+{
+
+ dprintk("--> %s\n", __func__);
+
+ if (!nfs4_sequence_done(task, &data->res.seq_res))
+ return -EAGAIN;
+
+ return data->read_done_cb(task, data);
+}
+
static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
{
data->timestamp = jiffies;
+ data->read_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
}
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+/* Reset the the nfs_read_data to send the read to the MDS. */
+void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
+{
+ dprintk("%s Reset task for i/o through\n", __func__);
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ /* offsets will differ in the dense stripe case */
+ data->args.offset = data->mds_offset;
+ data->ds_clp = NULL;
+ data->args.fh = NFS_FH(data->inode);
+ data->read_done_cb = nfs4_read_done_cb;
+ task->tk_ops = data->mds_ops;
+ rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+}
+EXPORT_SYMBOL_GPL(nfs4_reset_read);
+
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
{
struct inode *inode = data->inode;
- if (!nfs4_sequence_done(task, &data->res.seq_res))
- return -EAGAIN;
-
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
return -EAGAIN;
@@ -3114,11 +3154,41 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+{
+ if (!nfs4_sequence_done(task, &data->res.seq_res))
+ return -EAGAIN;
+ return data->write_done_cb(task, data);
+}
+
+/* Reset the the nfs_write_data to send the write to the MDS. */
+void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
+{
+ dprintk("%s Reset task for i/o through\n", __func__);
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ data->ds_clp = NULL;
+ data->write_done_cb = nfs4_write_done_cb;
+ data->args.fh = NFS_FH(data->inode);
+ data->args.bitmask = data->res.server->cache_consistency_bitmask;
+ data->args.offset = data->mds_offset;
+ data->res.fattr = &data->fattr;
+ task->tk_ops = data->mds_ops;
+ rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+}
+EXPORT_SYMBOL_GPL(nfs4_reset_write);
+
static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->inode);
- data->args.bitmask = server->cache_consistency_bitmask;
+ if (data->lseg) {
+ data->args.bitmask = NULL;
+ data->res.fattr = NULL;
+ } else
+ data->args.bitmask = server->cache_consistency_bitmask;
+ if (!data->write_done_cb)
+ data->write_done_cb = nfs4_write_done_cb;
data->res.server = server;
data->timestamp = jiffies;
@@ -3178,7 +3248,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
return;
}
do_renew_lease(clp, timestamp);
@@ -3252,6 +3322,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
}
}
+static int buf_to_pages_noslab(const void *buf, size_t buflen,
+ struct page **pages, unsigned int *pgbase)
+{
+ struct page *newpage, **spages;
+ int rc = 0;
+ size_t len;
+ spages = pages;
+
+ do {
+ len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
+ newpage = alloc_page(GFP_KERNEL);
+
+ if (newpage == NULL)
+ goto unwind;
+ memcpy(page_address(newpage), buf, len);
+ buf += len;
+ buflen -= len;
+ *pages++ = newpage;
+ rc++;
+ } while (buflen != 0);
+
+ return rc;
+
+unwind:
+ for(; rc > 0; rc--)
+ __free_page(spages[rc-1]);
+ return -ENOMEM;
+}
+
struct nfs4_cached_acl {
int cached;
size_t len;
@@ -3420,13 +3519,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
.rpc_argp = &arg,
.rpc_resp = &res,
};
- int ret;
+ int ret, i;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
+ i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+ if (i < 0)
+ return i;
nfs_inode_return_delegation(inode);
- buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+
+ /*
+ * Free each page after tx, so the only ref left is
+ * held by the network stack
+ */
+ for (; i > 0; i--)
+ put_page(pages[i-1]);
+
/*
* Acl update can result in inode attribute update.
* so mark the attribute cache invalid.
@@ -3464,12 +3573,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3480,7 +3590,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3607,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-do_state_recovery:
+wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
- nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
task->tk_status = 0;
@@ -4110,7 +4219,7 @@ static void nfs4_lock_release(void *calldata)
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("%s: cancelling lock!\n", __func__);
} else
nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4243,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_state *state = lsp->ls_state;
-
switch (error) {
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_EXPIRED:
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
if (new_lock_owner != 0 ||
(lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_nograce(clp, state);
- lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ nfs4_schedule_stateid_recovery(server, lsp->ls_state);
break;
case -NFS4ERR_STALE_STATEID:
- if (new_lock_owner != 0 ||
- (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_reboot(clp, state);
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
};
}
@@ -4366,12 +4470,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_EXPIRED:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ goto out;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -ERESTARTSYS:
/*
@@ -4381,7 +4487,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
err = 0;
goto out;
case -EKEYEXPIRED:
@@ -4988,10 +5094,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
int status;
unsigned *ptr;
struct nfs4_session *session = clp->cl_session;
+ long timeout = 0;
+ int err;
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ do {
+ status = _nfs4_proc_create_session(clp);
+ if (status == -NFS4ERR_DELAY) {
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ if (err)
+ status = err;
+ }
+ } while (status == -NFS4ERR_DELAY);
+
if (status)
goto out;
@@ -5073,6 +5189,27 @@ int nfs4_init_session(struct nfs_server *server)
return ret;
}
+int nfs4_init_ds_session(struct nfs_client *clp)
+{
+ struct nfs4_session *session = clp->cl_session;
+ int ret;
+
+ if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
+ return 0;
+
+ ret = nfs4_client_recover_expired_lease(clp);
+ if (!ret)
+ /* Test for the DS role */
+ if (!is_ds_client(clp))
+ ret = -ENODEV;
+ if (!ret)
+ ret = nfs4_check_client_ready(clp);
+ return ret;
+
+}
+EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
+
+
/*
* Renew the cl_session lease.
*/
@@ -5100,7 +5237,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5187,7 +5324,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if (IS_ERR(task))
ret = PTR_ERR(task);
else
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
}
@@ -5203,8 +5340,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
}
ret = rpc_wait_for_completion_task(task);
- if (!ret)
+ if (!ret) {
+ struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
+
+ if (task->tk_status == 0)
+ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
ret = task->tk_status;
+ }
rpc_put_task(task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5383,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5309,6 +5451,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
status = PTR_ERR(task);
goto out;
}
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status == 0)
+ status = task->tk_status;
rpc_put_task(task);
return 0;
out:
@@ -5595,6 +5740,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.clear_acl_cache = nfs4_zap_acl_attr,
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
+ .init_client = nfs4_init_client,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 402143d75fc5..df8e7f3ca56d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -64,12 +64,8 @@ nfs4_renew_state(struct work_struct *work)
ops = clp->cl_mvops->state_renewal_ops;
dprintk("%s: start\n", __func__);
- rcu_read_lock();
- if (list_empty(&clp->cl_superblocks)) {
- rcu_read_unlock();
+ if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
goto out;
- }
- rcu_read_unlock();
spin_lock(&clp->cl_lock);
lease = clp->cl_lease_time;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..ab1bf5bb021f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -153,6 +153,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
int status;
struct nfs_fsinfo fsinfo;
+ if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
+ nfs4_schedule_state_renewal(clp);
+ return 0;
+ }
+
status = nfs4_proc_get_lease_time(clp, &fsinfo);
if (status == 0) {
/* Update lease time and schedule renewal */
@@ -1007,9 +1012,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
}
/*
- * Schedule a state recovery attempt
+ * Schedule a lease recovery attempt
*/
-void nfs4_schedule_state_recovery(struct nfs_client *clp)
+void nfs4_schedule_lease_recovery(struct nfs_client *clp)
{
if (!clp)
return;
@@ -1018,7 +1023,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
-int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1037,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
return 1;
}
-int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1046,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
return 1;
}
+void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -1436,10 +1449,16 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
}
#ifdef CONFIG_NFS_V4_1
+void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+ nfs4_schedule_lease_recovery(session->clp);
+}
+EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
+
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1466,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
clp->cl_boot_time = CURRENT_TIME;
nfs4_state_start_reclaim_nograce(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1455,7 +1474,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1475,7 +1494,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..0cf560f77884 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -844,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
if (iap->ia_valid & ATTR_MODE)
len += 4;
if (iap->ia_valid & ATTR_UID) {
- owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
+ owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
if (owner_namelen < 0) {
dprintk("nfs: couldn't resolve uid %d to string\n",
iap->ia_uid);
@@ -856,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
}
if (iap->ia_valid & ATTR_GID) {
- owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
+ owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
if (owner_grouplen < 0) {
dprintk("nfs: couldn't resolve gid %d to string\n",
iap->ia_gid);
@@ -1384,7 +1384,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
hdr->replen += decode_putrootfh_maxsz;
}
-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
+static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
{
nfs4_stateid stateid;
__be32 *p;
@@ -1392,6 +1392,8 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
p = reserve_space(xdr, NFS4_STATEID_SIZE);
if (ctx->state != NULL) {
nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
+ if (zero_seqid)
+ stateid.stateid.seqid = 0;
xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
} else
xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1404,7 +1406,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
p = reserve_space(xdr, 4);
*p = cpu_to_be32(OP_READ);
- encode_stateid(xdr, args->context, args->lock_context);
+ encode_stateid(xdr, args->context, args->lock_context,
+ hdr->minorversion);
p = reserve_space(xdr, 12);
p = xdr_encode_hyper(p, args->offset);
@@ -1592,7 +1595,8 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
p = reserve_space(xdr, 4);
*p = cpu_to_be32(OP_WRITE);
- encode_stateid(xdr, args->context, args->lock_context);
+ encode_stateid(xdr, args->context, args->lock_context,
+ hdr->minorversion);
p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->offset);
@@ -1660,7 +1664,7 @@ static void encode_create_session(struct xdr_stream *xdr,
p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
*p++ = cpu_to_be32(OP_CREATE_SESSION);
- p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ p = xdr_encode_hyper(p, clp->cl_clientid);
*p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
*p++ = cpu_to_be32(args->flags); /*flags */
@@ -2271,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_putfh(xdr, args->fh, &hdr);
encode_write(xdr, args, &hdr);
req->rq_snd_buf.flags |= XDRBUF_WRITE;
- encode_getfattr(xdr, args->bitmask, &hdr);
+ if (args->bitmask)
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -3382,7 +3387,7 @@ out_overflow:
}
static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
- struct nfs_client *clp, uint32_t *uid, int may_sleep)
+ const struct nfs_server *server, uint32_t *uid, int may_sleep)
{
uint32_t len;
__be32 *p;
@@ -3402,7 +3407,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
if (!may_sleep) {
/* do nothing */
} else if (len < XDR_MAX_NETOBJ) {
- if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
+ if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
ret = NFS_ATTR_FATTR_OWNER;
else
dprintk("%s: nfs_map_name_to_uid failed!\n",
@@ -3420,7 +3425,7 @@ out_overflow:
}
static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
- struct nfs_client *clp, uint32_t *gid, int may_sleep)
+ const struct nfs_server *server, uint32_t *gid, int may_sleep)
{
uint32_t len;
__be32 *p;
@@ -3440,7 +3445,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
if (!may_sleep) {
/* do nothing */
} else if (len < XDR_MAX_NETOBJ) {
- if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
+ if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
ret = NFS_ATTR_FATTR_GROUP;
else
dprintk("%s: nfs_map_group_to_gid failed!\n",
@@ -3939,14 +3944,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_owner(xdr, bitmap, server->nfs_client,
- &fattr->uid, may_sleep);
+ status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_group(xdr, bitmap, server->nfs_client,
- &fattr->gid, may_sleep);
+ status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
@@ -4694,7 +4697,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_ex_clid);
+ xdr_decode_hyper(p, &clp->cl_clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
@@ -5690,8 +5693,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_write(xdr, res);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ if (res->fattr)
+ decode_getfattr(xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
if (!status)
status = res->count;
out:
@@ -6167,8 +6171,6 @@ static struct {
{ NFS4ERR_DQUOT, -EDQUOT },
{ NFS4ERR_STALE, -ESTALE },
{ NFS4ERR_BADHANDLE, -EBADHANDLE },
- { NFS4ERR_BADOWNER, -EINVAL },
- { NFS4ERR_BADNAME, -EINVAL },
{ NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
{ NFS4ERR_NOTSUPP, -ENOTSUPP },
{ NFS4ERR_TOOSMALL, -ETOOSMALL },
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
+/* Default NFSROOT mount options. */
+#define NFS_DEF_OPTIONS "udp"
+
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
/* Text-based mount options passed to super.c */
-static char nfs_root_options[256] __initdata = "";
+static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
/* Address of NFS server */
static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
}
static int __init root_nfs_cat(char *dest, const char *src,
- const size_t destlen)
+ const size_t destlen)
{
+ size_t len = strlen(dest);
+
+ if (len && dest[len - 1] != ',')
+ if (strlcat(dest, ",", destlen) > destlen)
+ return -1;
+
if (strlcat(dest, src, destlen) > destlen)
return -1;
return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
if (root_nfs_cat(nfs_root_options, incoming,
sizeof(nfs_root_options)))
return -1;
-
- /*
- * Possibly prepare for more options to be appended
- */
- if (nfs_root_options[0] != '\0' &&
- nfs_root_options[strlen(nfs_root_options)] != ',')
- if (root_nfs_cat(nfs_root_options, ",",
- sizeof(nfs_root_options)))
- return -1;
-
return 0;
}
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
*/
static int __init root_nfs_data(char *cmdline)
{
- char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+ char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
int len, retval = -1;
char *tmp = NULL;
const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
* Append mandatory options for nfsroot so they override
* what has come before
*/
- snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+ snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
&servaddr);
- if (root_nfs_cat(nfs_root_options, addr_option,
+ if (root_nfs_cat(nfs_root_options, mand_options,
sizeof(nfs_root_options)))
goto out_optionstoolong;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e1164e3f9e69..23e794410669 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,6 +20,7 @@
#include <linux/nfs_mount.h>
#include "internal.h"
+#include "pnfs.h"
static struct kmem_cache *nfs_page_cachep;
@@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
*/
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+ int (*doio)(struct nfs_pageio_descriptor *),
size_t bsize,
int io_flags)
{
@@ -226,6 +227,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_doio = doio;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
+ desc->pg_lseg = NULL;
}
/**
@@ -240,7 +242,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
* Return 'true' if this is the case, else return 'false'.
*/
static int nfs_can_coalesce_requests(struct nfs_page *prev,
- struct nfs_page *req)
+ struct nfs_page *req,
+ struct nfs_pageio_descriptor *pgio)
{
if (req->wb_context->cred != prev->wb_context->cred)
return 0;
@@ -254,6 +257,12 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
return 0;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return 0;
+ /*
+ * Non-whole file layouts need to check that req is inside of
+ * pgio->pg_lseg.
+ */
+ if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
+ return 0;
return 1;
}
@@ -286,7 +295,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
if (newlen > desc->pg_bsize)
return 0;
prev = nfs_list_entry(desc->pg_list.prev);
- if (!nfs_can_coalesce_requests(prev, req))
+ if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
} else
desc->pg_base = req->wb_pgbase;
@@ -302,12 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
- int error = desc->pg_doio(desc->pg_inode,
- &desc->pg_list,
- nfs_page_array_len(desc->pg_base,
- desc->pg_count),
- desc->pg_count,
- desc->pg_ioflags);
+ int error = desc->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1b1bc1a0fb0a..f38813a0a295 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -30,6 +30,7 @@
#include <linux/nfs_fs.h>
#include "internal.h"
#include "pnfs.h"
+#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
@@ -74,10 +75,8 @@ find_pnfs_driver(u32 id)
void
unset_pnfs_layoutdriver(struct nfs_server *nfss)
{
- if (nfss->pnfs_curr_ld) {
- nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
+ if (nfss->pnfs_curr_ld)
module_put(nfss->pnfs_curr_ld->owner);
- }
nfss->pnfs_curr_ld = NULL;
}
@@ -115,13 +114,7 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
goto out_no_driver;
}
server->pnfs_curr_ld = ld_type;
- if (ld_type->set_layoutdriver(server)) {
- printk(KERN_ERR
- "%s: Error initializing mount point for layout driver %u.\n",
- __func__, id);
- module_put(ld_type->owner);
- goto out_no_driver;
- }
+
dprintk("%s: pNFS module for %u set\n", __func__, id);
return;
@@ -230,37 +223,41 @@ static void free_lseg(struct pnfs_layout_segment *lseg)
put_layout_hdr(NFS_I(ino)->layout);
}
-/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
- * could sleep, so must be called outside of the lock.
- * Returns 1 if object was removed, otherwise return 0.
- */
-static int
-put_lseg_locked(struct pnfs_layout_segment *lseg,
- struct list_head *tmp_list)
+static void
+put_lseg_common(struct pnfs_layout_segment *lseg)
+{
+ struct inode *inode = lseg->pls_layout->plh_inode;
+
+ BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
+ list_del_init(&lseg->pls_list);
+ if (list_empty(&lseg->pls_layout->plh_segs)) {
+ set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
+ /* Matched by initial refcount set in alloc_init_layout_hdr */
+ put_layout_hdr_locked(lseg->pls_layout);
+ }
+ rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
+}
+
+void
+put_lseg(struct pnfs_layout_segment *lseg)
{
+ struct inode *inode;
+
+ if (!lseg)
+ return;
+
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount),
test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
- if (atomic_dec_and_test(&lseg->pls_refcount)) {
- struct inode *ino = lseg->pls_layout->plh_inode;
+ inode = lseg->pls_layout->plh_inode;
+ if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
+ LIST_HEAD(free_me);
- BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
- list_del(&lseg->pls_list);
- if (list_empty(&lseg->pls_layout->plh_segs)) {
- struct nfs_client *clp;
-
- clp = NFS_SERVER(ino)->nfs_client;
- spin_lock(&clp->cl_lock);
- /* List does not take a reference, so no need for put here */
- list_del_init(&lseg->pls_layout->plh_layouts);
- spin_unlock(&clp->cl_lock);
- clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
- }
- rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
- list_add(&lseg->pls_list, tmp_list);
- return 1;
+ put_lseg_common(lseg);
+ list_add(&lseg->pls_list, &free_me);
+ spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&free_me);
}
- return 0;
}
static bool
@@ -281,7 +278,13 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
* list. It will now be removed when all
* outstanding io is finished.
*/
- rv = put_lseg_locked(lseg, tmp_list);
+ dprintk("%s: lseg %p ref %d\n", __func__, lseg,
+ atomic_read(&lseg->pls_refcount));
+ if (atomic_dec_and_test(&lseg->pls_refcount)) {
+ put_lseg_common(lseg);
+ list_add(&lseg->pls_list, tmp_list);
+ rv = 1;
+ }
}
return rv;
}
@@ -299,6 +302,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin lo %p\n", __func__, lo);
+ if (list_empty(&lo->plh_segs)) {
+ if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
+ put_layout_hdr_locked(lo);
+ return 0;
+ }
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
dprintk("%s: freeing lseg %p iomode %d "
@@ -312,11 +320,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return invalid - removed;
}
+/* note free_me must contain lsegs from a single layout_hdr */
void
pnfs_free_lseg_list(struct list_head *free_me)
{
struct pnfs_layout_segment *lseg, *tmp;
+ struct pnfs_layout_hdr *lo;
+
+ if (list_empty(free_me))
+ return;
+ lo = list_first_entry(free_me, struct pnfs_layout_segment,
+ pls_list)->pls_layout;
+
+ if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
+ struct nfs_client *clp;
+
+ clp = NFS_SERVER(lo->plh_inode)->nfs_client;
+ spin_lock(&clp->cl_lock);
+ list_del_init(&lo->plh_layouts);
+ spin_unlock(&clp->cl_lock);
+ }
list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
list_del(&lseg->pls_list);
free_lseg(lseg);
@@ -332,10 +356,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_lock(&nfsi->vfs_inode.i_lock);
lo = nfsi->layout;
if (lo) {
- set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
+ lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
- /* Matched by refcount set to 1 in alloc_init_layout_hdr */
- put_layout_hdr_locked(lo);
}
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
@@ -403,6 +425,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
(int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
return true;
return lo->plh_block_lgets ||
+ test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
(list_empty(&lo->plh_segs) &&
(atomic_read(&lo->plh_outstanding) > lget));
@@ -674,7 +697,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
is_matching_lseg(lseg, iomode)) {
- ret = lseg;
+ ret = get_lseg(lseg);
break;
}
if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
@@ -699,6 +722,7 @@ pnfs_update_layout(struct inode *ino,
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
+ bool first = false;
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
return NULL;
@@ -715,21 +739,25 @@ pnfs_update_layout(struct inode *ino,
dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
}
- /* Check to see if the layout for the given range already exists */
- lseg = pnfs_find_lseg(lo, iomode);
- if (lseg)
- goto out_unlock;
/* if LAYOUTGET already failed once we don't try again */
if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
goto out_unlock;
+ /* Check to see if the layout for the given range already exists */
+ lseg = pnfs_find_lseg(lo, iomode);
+ if (lseg)
+ goto out_unlock;
+
if (pnfs_layoutgets_blocked(lo, NULL, 0))
goto out_unlock;
atomic_inc(&lo->plh_outstanding);
get_layout_hdr(lo);
- if (list_empty(&lo->plh_segs)) {
+ if (list_empty(&lo->plh_segs))
+ first = true;
+ spin_unlock(&ino->i_lock);
+ if (first) {
/* The lo must be on the clp list if there is any
* chance of a CB_LAYOUTRECALL(FILE) coming in.
*/
@@ -738,24 +766,18 @@ pnfs_update_layout(struct inode *ino,
list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
spin_unlock(&clp->cl_lock);
}
- spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, iomode);
- if (!lseg) {
- spin_lock(&ino->i_lock);
- if (list_empty(&lo->plh_segs)) {
- spin_lock(&clp->cl_lock);
- list_del_init(&lo->plh_layouts);
- spin_unlock(&clp->cl_lock);
- clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
- }
- spin_unlock(&ino->i_lock);
+ if (!lseg && first) {
+ spin_lock(&clp->cl_lock);
+ list_del_init(&lo->plh_layouts);
+ spin_unlock(&clp->cl_lock);
}
atomic_dec(&lo->plh_outstanding);
put_layout_hdr(lo);
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
- nfsi->layout->plh_flags, lseg);
+ nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
return lseg;
out_unlock:
spin_unlock(&ino->i_lock);
@@ -808,7 +830,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
init_lseg(lo, lseg);
lseg->pls_range = res->range;
- *lgp->lsegpp = lseg;
+ *lgp->lsegpp = get_lseg(lseg);
pnfs_insert_layout(lo, lseg);
if (res->return_on_close) {
@@ -829,137 +851,97 @@ out_forget_reply:
goto out;
}
-/*
- * Device ID cache. Currently supports one layout type per struct nfs_client.
- * Add layout type to the lookup key to expand to support multiple types.
- */
-int
-pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
- void (*free_callback)(struct pnfs_deviceid_node *))
+static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *prev,
+ struct nfs_page *req)
{
- struct pnfs_deviceid_cache *c;
-
- c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
- spin_lock(&clp->cl_lock);
- if (clp->cl_devid_cache != NULL) {
- atomic_inc(&clp->cl_devid_cache->dc_ref);
- dprintk("%s [kref [%d]]\n", __func__,
- atomic_read(&clp->cl_devid_cache->dc_ref));
- kfree(c);
- } else {
- /* kzalloc initializes hlists */
- spin_lock_init(&c->dc_lock);
- atomic_set(&c->dc_ref, 1);
- c->dc_free_callback = free_callback;
- clp->cl_devid_cache = c;
- dprintk("%s [new]\n", __func__);
+ if (pgio->pg_count == prev->wb_bytes) {
+ /* This is first coelesce call for a series of nfs_pages */
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ prev->wb_context,
+ IOMODE_READ);
}
- spin_unlock(&clp->cl_lock);
- return 0;
+ return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
}
-EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
-/*
- * Called from pnfs_layoutdriver_type->free_lseg
- * last layout segment reference frees deviceid
- */
void
-pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
- struct pnfs_deviceid_node *devid)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
- struct nfs4_deviceid *id = &devid->de_id;
- struct pnfs_deviceid_node *d;
- struct hlist_node *n;
- long h = nfs4_deviceid_hash(id);
+ struct pnfs_layoutdriver_type *ld;
- dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
- if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
- return;
+ ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
+}
- hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
- if (!memcmp(&d->de_id, id, sizeof(*id))) {
- hlist_del_rcu(&d->de_node);
- spin_unlock(&c->dc_lock);
- synchronize_rcu();
- c->dc_free_callback(devid);
- return;
- }
- spin_unlock(&c->dc_lock);
- /* Why wasn't it found in the list? */
- BUG();
-}
-EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
-
-/* Find and reference a deviceid */
-struct pnfs_deviceid_node *
-pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
-{
- struct pnfs_deviceid_node *d;
- struct hlist_node *n;
- long hash = nfs4_deviceid_hash(id);
-
- dprintk("--> %s hash %ld\n", __func__, hash);
- rcu_read_lock();
- hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
- if (!memcmp(&d->de_id, id, sizeof(*id))) {
- if (!atomic_inc_not_zero(&d->de_ref)) {
- goto fail;
- } else {
- rcu_read_unlock();
- return d;
- }
- }
+static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_count == prev->wb_bytes) {
+ /* This is first coelesce call for a series of nfs_pages */
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ prev->wb_context,
+ IOMODE_RW);
}
-fail:
- rcu_read_unlock();
- return NULL;
+ return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+}
+
+void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+{
+ struct pnfs_layoutdriver_type *ld;
+
+ ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
+}
+
+enum pnfs_try_status
+pnfs_try_to_write_data(struct nfs_write_data *wdata,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ struct inode *inode = wdata->inode;
+ enum pnfs_try_status trypnfs;
+ struct nfs_server *nfss = NFS_SERVER(inode);
+
+ wdata->mds_ops = call_ops;
+
+ dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
+ inode->i_ino, wdata->args.count, wdata->args.offset, how);
+
+ trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED) {
+ put_lseg(wdata->lseg);
+ wdata->lseg = NULL;
+ } else
+ nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
+
+ dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
+ return trypnfs;
}
-EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
/*
- * Add a deviceid to the cache.
- * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
+ * Call the appropriate parallel I/O subsystem read function.
*/
-struct pnfs_deviceid_node *
-pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
-{
- struct pnfs_deviceid_node *d;
- long hash = nfs4_deviceid_hash(&new->de_id);
-
- dprintk("--> %s hash %ld\n", __func__, hash);
- spin_lock(&c->dc_lock);
- d = pnfs_find_get_deviceid(c, &new->de_id);
- if (d) {
- spin_unlock(&c->dc_lock);
- dprintk("%s [discard]\n", __func__);
- c->dc_free_callback(new);
- return d;
- }
- INIT_HLIST_NODE(&new->de_node);
- atomic_set(&new->de_ref, 1);
- hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
- spin_unlock(&c->dc_lock);
- dprintk("%s [new]\n", __func__);
- return new;
-}
-EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
-
-void
-pnfs_put_deviceid_cache(struct nfs_client *clp)
+enum pnfs_try_status
+pnfs_try_to_read_data(struct nfs_read_data *rdata,
+ const struct rpc_call_ops *call_ops)
{
- struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
+ struct inode *inode = rdata->inode;
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ enum pnfs_try_status trypnfs;
- dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
- if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
- int i;
- /* Verify cache is empty */
- for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
- BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
- clp->cl_devid_cache = NULL;
- spin_unlock(&clp->cl_lock);
- kfree(local);
+ rdata->mds_ops = call_ops;
+
+ dprintk("%s: Reading ino:%lu %u@%llu\n",
+ __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
+
+ trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
+ if (trypnfs == PNFS_NOT_ATTEMPTED) {
+ put_lseg(rdata->lseg);
+ rdata->lseg = NULL;
+ } else {
+ nfs_inc_stats(inode, NFSIOS_PNFS_READ);
}
+ dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
+ return trypnfs;
}
-EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e2612ea0cbed..6380b9405bcd 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,8 @@
#ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H
+#include <linux/nfs_page.h>
+
enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
NFS_LSEG_ROC, /* roc bit received from server */
@@ -43,6 +45,11 @@ struct pnfs_layout_segment {
struct pnfs_layout_hdr *pls_layout;
};
+enum pnfs_try_status {
+ PNFS_ATTEMPTED = 0,
+ PNFS_NOT_ATTEMPTED = 1,
+};
+
#ifdef CONFIG_NFS_V4_1
#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -61,10 +68,18 @@ struct pnfs_layoutdriver_type {
const u32 id;
const char *name;
struct module *owner;
- int (*set_layoutdriver) (struct nfs_server *);
- int (*clear_layoutdriver) (struct nfs_server *);
struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
void (*free_lseg) (struct pnfs_layout_segment *lseg);
+
+ /* test for nfs page cache coalescing */
+ int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+
+ /*
+ * Return PNFS_ATTEMPTED to indicate the layout code has attempted
+ * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
+ */
+ enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
+ enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
};
struct pnfs_layout_hdr {
@@ -90,52 +105,6 @@ struct pnfs_device {
unsigned int pglen;
};
-/*
- * Device ID RCU cache. A device ID is unique per client ID and layout type.
- */
-#define NFS4_DEVICE_ID_HASH_BITS 5
-#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
-#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
-
-static inline u32
-nfs4_deviceid_hash(struct nfs4_deviceid *id)
-{
- unsigned char *cptr = (unsigned char *)id->data;
- unsigned int nbytes = NFS4_DEVICEID4_SIZE;
- u32 x = 0;
-
- while (nbytes--) {
- x *= 37;
- x += *cptr++;
- }
- return x & NFS4_DEVICE_ID_HASH_MASK;
-}
-
-struct pnfs_deviceid_node {
- struct hlist_node de_node;
- struct nfs4_deviceid de_id;
- atomic_t de_ref;
-};
-
-struct pnfs_deviceid_cache {
- spinlock_t dc_lock;
- atomic_t dc_ref;
- void (*dc_free_callback)(struct pnfs_deviceid_node *);
- struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
-};
-
-extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
- void (*free_callback)(struct pnfs_deviceid_node *));
-extern void pnfs_put_deviceid_cache(struct nfs_client *);
-extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
- struct pnfs_deviceid_cache *,
- struct nfs4_deviceid *);
-extern struct pnfs_deviceid_node *pnfs_add_deviceid(
- struct pnfs_deviceid_cache *,
- struct pnfs_deviceid_node *);
-extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
- struct pnfs_deviceid_node *devid);
-
extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
@@ -146,11 +115,18 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
/* pnfs.c */
void get_layout_hdr(struct pnfs_layout_hdr *lo);
+void put_lseg(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
+enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
+ const struct rpc_call_ops *, int);
+enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
+ const struct rpc_call_ops *);
+void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
@@ -177,6 +153,16 @@ static inline int lo_fail_bit(u32 iomode)
NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
}
+static inline struct pnfs_layout_segment *
+get_lseg(struct pnfs_layout_segment *lseg)
+{
+ if (lseg) {
+ atomic_inc(&lseg->pls_refcount);
+ smp_mb__after_atomic_inc();
+ }
+ return lseg;
+}
+
/* Return true if a layout driver is being used for this mountpoint */
static inline int pnfs_enabled_sb(struct nfs_server *nfss)
{
@@ -194,12 +180,36 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
}
static inline struct pnfs_layout_segment *
+get_lseg(struct pnfs_layout_segment *lseg)
+{
+ return NULL;
+}
+
+static inline void put_lseg(struct pnfs_layout_segment *lseg)
+{
+}
+
+static inline struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type)
{
return NULL;
}
+static inline enum pnfs_try_status
+pnfs_try_to_read_data(struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ return PNFS_NOT_ATTEMPTED;
+}
+
+static inline enum pnfs_try_status
+pnfs_try_to_write_data(struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ return PNFS_NOT_ATTEMPTED;
+}
+
static inline bool
pnfs_roc(struct inode *ino)
{
@@ -230,6 +240,18 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
+static inline void
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
+{
+ pgio->pg_test = NULL;
+}
+
+static inline void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
+{
+ pgio->pg_test = NULL;
+}
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 77d5e21c4ad6..b8ec170f2a0f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -741,4 +741,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lock = nfs_proc_lock,
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
+ .init_client = nfs_init_client,
};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index aedcaa7f291f..7cded2b12a05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,19 +18,20 @@
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
+#include <linux/module.h>
#include <asm/system.h>
+#include "pnfs.h"
#include "nfs4_fs.h"
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
-#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int);
-static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int);
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
@@ -69,6 +70,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
static void nfs_readdata_release(struct nfs_read_data *rdata)
{
+ put_lseg(rdata->lseg);
put_nfs_open_context(rdata->args.context);
nfs_readdata_free(rdata);
}
@@ -114,14 +116,13 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
- LIST_HEAD(one_request);
struct nfs_page *new;
unsigned int len;
+ struct nfs_pageio_descriptor pgio;
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
- pnfs_update_layout(inode, ctx, IOMODE_READ);
new = nfs_create_request(ctx, inode, page, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
@@ -130,11 +131,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
- nfs_list_add_request(new, &one_request);
+ nfs_pageio_init(&pgio, inode, NULL, 0, 0);
+ nfs_list_add_request(new, &pgio.pg_list);
+ pgio.pg_count = len;
+
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
- nfs_pagein_multi(inode, &one_request, 1, len, 0);
+ nfs_pagein_multi(&pgio);
else
- nfs_pagein_one(inode, &one_request, 1, len, 0);
+ nfs_pagein_one(&pgio);
return 0;
}
@@ -155,24 +159,20 @@ static void nfs_readpage_release(struct nfs_page *req)
nfs_release_request(req);
}
-/*
- * Set up the NFS read request struct
- */
-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops,
- unsigned int count, unsigned int offset)
+int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
+ const struct rpc_call_ops *call_ops)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = data->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = req->wb_context->cred,
+ .rpc_cred = data->cred,
};
struct rpc_task_setup task_setup_data = {
.task = &data->task,
- .rpc_client = NFS_CLIENT(inode),
+ .rpc_client = clnt,
.rpc_message = &msg,
.callback_ops = call_ops,
.callback_data = data,
@@ -180,9 +180,39 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
.flags = RPC_TASK_ASYNC | swap_flags,
};
+ /* Set up the initial task struct. */
+ NFS_PROTO(inode)->read_setup(data, &msg);
+
+ dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
+ "offset %llu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_read);
+
+/*
+ * Set up the NFS read request struct
+ */
+static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops,
+ unsigned int count, unsigned int offset,
+ struct pnfs_layout_segment *lseg)
+{
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
+
data->req = req;
data->inode = inode;
- data->cred = msg.rpc_cred;
+ data->cred = req->wb_context->cred;
+ data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -197,21 +227,11 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->read_setup(data, &msg);
-
- dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- count,
- (unsigned long long)data->args.offset);
+ if (data->lseg &&
+ (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
+ return 0;
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
- rpc_put_task(task);
- return 0;
+ return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
static void
@@ -240,20 +260,21 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
-static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
{
- struct nfs_page *req = nfs_list_entry(head->next);
+ struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
- size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
+ size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
+ struct pnfs_layout_segment *lseg;
LIST_HEAD(list);
nfs_list_remove_request(req);
- nbytes = count;
+ nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);
@@ -266,9 +287,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
+ BUG_ON(desc->pg_lseg != NULL);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
ClearPageError(page);
offset = 0;
- nbytes = count;
+ nbytes = desc->pg_count;
do {
int ret2;
@@ -280,12 +303,14 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
if (nbytes < rsize)
rsize = nbytes;
ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
- rsize, offset);
+ rsize, offset, lseg);
if (ret == 0)
ret = ret2;
offset += rsize;
nbytes -= rsize;
} while (nbytes != 0);
+ put_lseg(lseg);
+ desc->pg_lseg = NULL;
return ret;
@@ -300,16 +325,21 @@ out_bad:
return -ENOMEM;
}
-static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
+ struct list_head *head = &desc->pg_list;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret = -ENOMEM;
- data = nfs_readdata_alloc(npages);
- if (!data)
- goto out_bad;
+ data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!data) {
+ nfs_async_read_error(head);
+ goto out;
+ }
pages = data->pagevec;
while (!list_empty(head)) {
@@ -320,10 +350,14 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
+ if ((!lseg) && list_is_singular(&data->pages))
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
- return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
-out_bad:
- nfs_async_read_error(head);
+ ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
+ 0, lseg);
+out:
+ put_lseg(lseg);
+ desc->pg_lseg = NULL;
return ret;
}
@@ -366,6 +400,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
return;
/* Yes, so retry the read at the end of the data */
+ data->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
@@ -625,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
+ pnfs_pageio_init_read(&pgio, inode);
if (rsize < PAGE_CACHE_SIZE)
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b68c8607770f..2b8e9a5e366a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -263,8 +263,11 @@ static match_table_t nfs_local_lock_tokens = {
static void nfs_umount_begin(struct super_block *);
static int nfs_statfs(struct dentry *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
+static int nfs_show_devname(struct seq_file *, struct vfsmount *);
+static int nfs_show_path(struct seq_file *, struct vfsmount *);
static int nfs_show_stats(struct seq_file *, struct vfsmount *);
-static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
+static struct dentry *nfs_fs_mount(struct file_system_type *,
+ int, const char *, void *);
static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static void nfs_put_super(struct super_block *);
@@ -274,7 +277,7 @@ static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
static struct file_system_type nfs_fs_type = {
.owner = THIS_MODULE,
.name = "nfs",
- .get_sb = nfs_get_sb,
+ .mount = nfs_fs_mount,
.kill_sb = nfs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -296,6 +299,8 @@ static const struct super_operations nfs_sops = {
.evict_inode = nfs_evict_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_devname = nfs_show_devname,
+ .show_path = nfs_show_path,
.show_stats = nfs_show_stats,
.remount_fs = nfs_remount,
};
@@ -303,16 +308,16 @@ static const struct super_operations nfs_sops = {
#ifdef CONFIG_NFS_V4
static int nfs4_validate_text_mount_data(void *options,
struct nfs_parsed_mount_data *args, const char *dev_name);
-static int nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
-static int nfs4_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
+ struct nfs_parsed_mount_data *data);
+static struct dentry *nfs4_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
-static int nfs4_referral_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
static void nfs4_kill_super(struct super_block *sb);
@@ -320,7 +325,7 @@ static void nfs4_kill_super(struct super_block *sb);
static struct file_system_type nfs4_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_get_sb,
+ .mount = nfs4_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -352,7 +357,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
struct file_system_type nfs4_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_referral_get_sb,
+ .mount = nfs4_referral_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -366,6 +371,8 @@ static const struct super_operations nfs4_sops = {
.evict_inode = nfs4_evict_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
+ .show_devname = nfs_show_devname,
+ .show_path = nfs_show_path,
.show_stats = nfs_show_stats,
.remount_fs = nfs_remount,
};
@@ -726,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
+static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
+{
+ char *page = (char *) __get_free_page(GFP_KERNEL);
+ char *devname, *dummy;
+ int err = 0;
+ if (!page)
+ return -ENOMEM;
+ devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
+ if (IS_ERR(devname))
+ err = PTR_ERR(devname);
+ else
+ seq_escape(m, devname, " \t\n\\");
+ free_page((unsigned long)page);
+ return err;
+}
+
+static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
+{
+ seq_puts(m, "/");
+ return 0;
+}
+
/*
* Present statistical information for this VFS mountpoint
*/
@@ -979,6 +1008,27 @@ static int nfs_parse_security_flavors(char *value,
return 1;
}
+static int nfs_get_option_str(substring_t args[], char **option)
+{
+ kfree(*option);
+ *option = match_strdup(args);
+ return !option;
+}
+
+static int nfs_get_option_ul(substring_t args[], unsigned long *option)
+{
+ int rc;
+ char *string;
+
+ string = match_strdup(args);
+ if (string == NULL)
+ return -ENOMEM;
+ rc = strict_strtoul(string, 10, option);
+ kfree(string);
+
+ return rc;
+}
+
/*
* Error-check and convert a string of mount options from user space into
* a data structure. The whole mount string is processed; bad options are
@@ -1127,155 +1177,82 @@ static int nfs_parse_mount_options(char *raw,
* options that take numeric values
*/
case Opt_port:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0 || option > USHRT_MAX)
+ if (nfs_get_option_ul(args, &option) ||
+ option > USHRT_MAX)
goto out_invalid_value;
mnt->nfs_server.port = option;
break;
case Opt_rsize:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->rsize = option;
break;
case Opt_wsize:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->wsize = option;
break;
case Opt_bsize:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->bsize = option;
break;
case Opt_timeo:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0 || option == 0)
+ if (nfs_get_option_ul(args, &option) || option == 0)
goto out_invalid_value;
mnt->timeo = option;
break;
case Opt_retrans:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0 || option == 0)
+ if (nfs_get_option_ul(args, &option) || option == 0)
goto out_invalid_value;
mnt->retrans = option;
break;
case Opt_acregmin:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acregmin = option;
break;
case Opt_acregmax:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acregmax = option;
break;
case Opt_acdirmin:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acdirmin = option;
break;
case Opt_acdirmax:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acdirmax = option;
break;
case Opt_actimeo:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acregmin = mnt->acregmax =
mnt->acdirmin = mnt->acdirmax = option;
break;
case Opt_namelen:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->namlen = option;
break;
case Opt_mountport:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0 || option > USHRT_MAX)
+ if (nfs_get_option_ul(args, &option) ||
+ option > USHRT_MAX)
goto out_invalid_value;
mnt->mount_server.port = option;
break;
case Opt_mountvers:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0 ||
+ if (nfs_get_option_ul(args, &option) ||
option < NFS_MNT_VERSION ||
option > NFS_MNT3_VERSION)
goto out_invalid_value;
mnt->mount_server.version = option;
break;
case Opt_nfsvers:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
switch (option) {
case NFS2_VERSION:
@@ -1295,12 +1272,7 @@ static int nfs_parse_mount_options(char *raw,
}
break;
case Opt_minorversion:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = strict_strtoul(string, 10, &option);
- kfree(string);
- if (rc != 0)
+ if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
if (option > NFS4_MAX_MINOR_VERSION)
goto out_invalid_value;
@@ -1336,21 +1308,18 @@ static int nfs_parse_mount_options(char *raw,
case Opt_xprt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
- kfree(string);
break;
case Opt_xprt_tcp6:
protofamily = AF_INET6;
case Opt_xprt_tcp:
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- kfree(string);
break;
case Opt_xprt_rdma:
/* vector side protocols to TCP */
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
xprt_load_transport(string);
- kfree(string);
break;
default:
dfprintk(MOUNT, "NFS: unrecognized "
@@ -1358,6 +1327,7 @@ static int nfs_parse_mount_options(char *raw,
kfree(string);
return 0;
}
+ kfree(string);
break;
case Opt_mountproto:
string = match_strdup(args);
@@ -1400,18 +1370,13 @@ static int nfs_parse_mount_options(char *raw,
goto out_invalid_address;
break;
case Opt_clientaddr:
- string = match_strdup(args);
- if (string == NULL)
+ if (nfs_get_option_str(args, &mnt->client_address))
goto out_nomem;
- kfree(mnt->client_address);
- mnt->client_address = string;
break;
case Opt_mounthost:
- string = match_strdup(args);
- if (string == NULL)
+ if (nfs_get_option_str(args,
+ &mnt->mount_server.hostname))
goto out_nomem;
- kfree(mnt->mount_server.hostname);
- mnt->mount_server.hostname = string;
break;
case Opt_mountaddr:
string = match_strdup(args);
@@ -1451,11 +1416,8 @@ static int nfs_parse_mount_options(char *raw,
};
break;
case Opt_fscache_uniq:
- string = match_strdup(args);
- if (string == NULL)
+ if (nfs_get_option_str(args, &mnt->fscache_uniq))
goto out_nomem;
- kfree(mnt->fscache_uniq);
- mnt->fscache_uniq = string;
mnt->options |= NFS_OPTION_FSCACHE;
break;
case Opt_local_lock:
@@ -1665,99 +1627,59 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
return nfs_walk_authlist(args, &request);
}
-static int nfs_parse_simple_hostname(const char *dev_name,
- char **hostname, size_t maxnamlen,
- char **export_path, size_t maxpathlen)
+/*
+ * Split "dev_name" into "hostname:export_path".
+ *
+ * The leftmost colon demarks the split between the server's hostname
+ * and the export path. If the hostname starts with a left square
+ * bracket, then it may contain colons.
+ *
+ * Note: caller frees hostname and export path, even on error.
+ */
+static int nfs_parse_devname(const char *dev_name,
+ char **hostname, size_t maxnamlen,
+ char **export_path, size_t maxpathlen)
{
size_t len;
- char *colon, *comma;
-
- colon = strchr(dev_name, ':');
- if (colon == NULL)
- goto out_bad_devname;
-
- len = colon - dev_name;
- if (len > maxnamlen)
- goto out_hostname;
+ char *end;
- /* N.B. caller will free nfs_server.hostname in all cases */
- *hostname = kstrndup(dev_name, len, GFP_KERNEL);
- if (!*hostname)
- goto out_nomem;
-
- /* kill possible hostname list: not supported */
- comma = strchr(*hostname, ',');
- if (comma != NULL) {
- if (comma == *hostname)
+ /* Is the host name protected with square brakcets? */
+ if (*dev_name == '[') {
+ end = strchr(++dev_name, ']');
+ if (end == NULL || end[1] != ':')
goto out_bad_devname;
- *comma = '\0';
- }
- colon++;
- len = strlen(colon);
- if (len > maxpathlen)
- goto out_path;
- *export_path = kstrndup(colon, len, GFP_KERNEL);
- if (!*export_path)
- goto out_nomem;
-
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
- return 0;
-
-out_bad_devname:
- dfprintk(MOUNT, "NFS: device name not in host:path format\n");
- return -EINVAL;
-
-out_nomem:
- dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
- return -ENOMEM;
-
-out_hostname:
- dfprintk(MOUNT, "NFS: server hostname too long\n");
- return -ENAMETOOLONG;
-
-out_path:
- dfprintk(MOUNT, "NFS: export pathname too long\n");
- return -ENAMETOOLONG;
-}
-
-/*
- * Hostname has square brackets around it because it contains one or
- * more colons. We look for the first closing square bracket, and a
- * colon must follow it.
- */
-static int nfs_parse_protected_hostname(const char *dev_name,
- char **hostname, size_t maxnamlen,
- char **export_path, size_t maxpathlen)
-{
- size_t len;
- char *start, *end;
+ len = end - dev_name;
+ end++;
+ } else {
+ char *comma;
- start = (char *)(dev_name + 1);
+ end = strchr(dev_name, ':');
+ if (end == NULL)
+ goto out_bad_devname;
+ len = end - dev_name;
- end = strchr(start, ']');
- if (end == NULL)
- goto out_bad_devname;
- if (*(end + 1) != ':')
- goto out_bad_devname;
+ /* kill possible hostname list: not supported */
+ comma = strchr(dev_name, ',');
+ if (comma != NULL && comma < end)
+ *comma = 0;
+ }
- len = end - start;
if (len > maxnamlen)
goto out_hostname;
/* N.B. caller will free nfs_server.hostname in all cases */
- *hostname = kstrndup(start, len, GFP_KERNEL);
+ *hostname = kstrndup(dev_name, len, GFP_KERNEL);
if (*hostname == NULL)
goto out_nomem;
-
- end += 2;
- len = strlen(end);
+ len = strlen(++end);
if (len > maxpathlen)
goto out_path;
*export_path = kstrndup(end, len, GFP_KERNEL);
if (!*export_path)
goto out_nomem;
+ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
return 0;
out_bad_devname:
@@ -1778,29 +1700,6 @@ out_path:
}
/*
- * Split "dev_name" into "hostname:export_path".
- *
- * The leftmost colon demarks the split between the server's hostname
- * and the export path. If the hostname starts with a left square
- * bracket, then it may contain colons.
- *
- * Note: caller frees hostname and export path, even on error.
- */
-static int nfs_parse_devname(const char *dev_name,
- char **hostname, size_t maxnamlen,
- char **export_path, size_t maxpathlen)
-{
- if (*dev_name == '[')
- return nfs_parse_protected_hostname(dev_name,
- hostname, maxnamlen,
- export_path, maxpathlen);
-
- return nfs_parse_simple_hostname(dev_name,
- hostname, maxnamlen,
- export_path, maxpathlen);
-}
-
-/*
* Validate the NFS2/NFS3 mount data
* - fills in the mount root filehandle
*
@@ -2267,19 +2166,19 @@ static int nfs_bdi_register(struct nfs_server *server)
return bdi_register_dev(&server->backing_dev_info, server->s_dev);
}
-static int nfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
{
struct nfs_server *server = NULL;
struct super_block *s;
struct nfs_parsed_mount_data *data;
struct nfs_fh *mntfh;
- struct dentry *mntroot;
+ struct dentry *mntroot = ERR_PTR(-ENOMEM);
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
struct nfs_sb_mountdata sb_mntdata = {
.mntflags = flags,
};
- int error = -ENOMEM;
+ int error;
data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
mntfh = nfs_alloc_fhandle();
@@ -2290,12 +2189,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
/* Validate the mount data */
error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
- if (error < 0)
+ if (error < 0) {
+ mntroot = ERR_PTR(error);
goto out;
+ }
#ifdef CONFIG_NFS_V4
if (data->version == 4) {
- error = nfs4_try_mount(flags, dev_name, data, mnt);
+ mntroot = nfs4_try_mount(flags, dev_name, data);
kfree(data->client_address);
kfree(data->nfs_server.export_path);
goto out;
@@ -2305,7 +2206,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
/* Get a volume representation */
server = nfs_create_server(data, mntfh);
if (IS_ERR(server)) {
- error = PTR_ERR(server);
+ mntroot = ERR_CAST(server);
goto out;
}
sb_mntdata.server = server;
@@ -2316,7 +2217,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
- error = PTR_ERR(s);
+ mntroot = ERR_CAST(s);
goto out_err_nosb;
}
@@ -2325,8 +2226,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
server = NULL;
} else {
error = nfs_bdi_register(server);
- if (error)
+ if (error) {
+ mntroot = ERR_PTR(error);
goto error_splat_bdi;
+ }
}
if (!s->s_root) {
@@ -2336,20 +2239,15 @@ static int nfs_get_sb(struct file_system_type *fs_type,
s, data ? data->fscache_uniq : NULL, NULL);
}
- mntroot = nfs_get_root(s, mntfh);
- if (IS_ERR(mntroot)) {
- error = PTR_ERR(mntroot);
+ mntroot = nfs_get_root(s, mntfh, dev_name);
+ if (IS_ERR(mntroot))
goto error_splat_super;
- }
error = security_sb_set_mnt_opts(s, &data->lsm_opts);
if (error)
goto error_splat_root;
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
- error = 0;
out:
kfree(data->nfs_server.hostname);
@@ -2359,7 +2257,7 @@ out:
out_free_fh:
nfs_free_fhandle(mntfh);
kfree(data);
- return error;
+ return mntroot;
out_err_nosb:
nfs_free_server(server);
@@ -2367,6 +2265,7 @@ out_err_nosb:
error_splat_root:
dput(mntroot);
+ mntroot = ERR_PTR(error);
error_splat_super:
if (server && !s->s_root)
bdi_unregister(&server->backing_dev_info);
@@ -2450,7 +2349,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
nfs_fscache_get_super_cookie(s, NULL, data);
}
- mntroot = nfs_get_root(s, data->fh);
+ mntroot = nfs_get_root(s, data->fh, dev_name);
if (IS_ERR(mntroot)) {
error = PTR_ERR(mntroot);
goto error_splat_super;
@@ -2718,7 +2617,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
s, data ? data->fscache_uniq : NULL, NULL);
}
- mntroot = nfs4_get_root(s, mntfh);
+ mntroot = nfs4_get_root(s, mntfh, dev_name);
if (IS_ERR(mntroot)) {
error = PTR_ERR(mntroot);
goto error_splat_super;
@@ -2771,27 +2670,6 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
return root_mnt;
}
-static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
-{
- char *page = (char *) __get_free_page(GFP_KERNEL);
- char *devname, *tmp;
-
- if (page == NULL)
- return;
- devname = nfs_path(path->mnt->mnt_devname,
- path->mnt->mnt_root, path->dentry,
- page, PAGE_SIZE);
- if (IS_ERR(devname))
- goto out_freepage;
- tmp = kstrdup(devname, GFP_KERNEL);
- if (tmp == NULL)
- goto out_freepage;
- kfree(mnt->mnt_devname);
- mnt->mnt_devname = tmp;
-out_freepage:
- free_page((unsigned long)page);
-}
-
struct nfs_referral_count {
struct list_head list;
const struct task_struct *task;
@@ -2858,17 +2736,18 @@ static void nfs_referral_loop_unprotect(void)
kfree(p);
}
-static int nfs_follow_remote_path(struct vfsmount *root_mnt,
- const char *export_path, struct vfsmount *mnt_target)
+static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
+ const char *export_path)
{
struct nameidata *nd = NULL;
struct mnt_namespace *ns_private;
struct super_block *s;
+ struct dentry *dentry;
int ret;
nd = kmalloc(sizeof(*nd), GFP_KERNEL);
if (nd == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
ns_private = create_mnt_ns(root_mnt);
ret = PTR_ERR(ns_private);
@@ -2890,32 +2769,27 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
s = nd->path.mnt->mnt_sb;
atomic_inc(&s->s_active);
- mnt_target->mnt_sb = s;
- mnt_target->mnt_root = dget(nd->path.dentry);
-
- /* Correct the device pathname */
- nfs_fix_devname(&nd->path, mnt_target);
+ dentry = dget(nd->path.dentry);
path_put(&nd->path);
kfree(nd);
down_write(&s->s_umount);
- return 0;
+ return dentry;
out_put_mnt_ns:
put_mnt_ns(ns_private);
out_mntput:
mntput(root_mnt);
out_err:
kfree(nd);
- return ret;
+ return ERR_PTR(ret);
}
-static int nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_parsed_mount_data *data,
- struct vfsmount *mnt)
+static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
+ struct nfs_parsed_mount_data *data)
{
char *export_path;
struct vfsmount *root_mnt;
- int error;
+ struct dentry *res;
dfprintk(MOUNT, "--> nfs4_try_mount()\n");
@@ -2925,26 +2799,25 @@ static int nfs4_try_mount(int flags, const char *dev_name,
data->nfs_server.hostname);
data->nfs_server.export_path = export_path;
- error = PTR_ERR(root_mnt);
- if (IS_ERR(root_mnt))
- goto out;
-
- error = nfs_follow_remote_path(root_mnt, export_path, mnt);
+ res = ERR_CAST(root_mnt);
+ if (!IS_ERR(root_mnt))
+ res = nfs_follow_remote_path(root_mnt, export_path);
-out:
- dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error,
- error != 0 ? " [error]" : "");
- return error;
+ dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
+ IS_ERR(res) ? PTR_ERR(res) : 0,
+ IS_ERR(res) ? " [error]" : "");
+ return res;
}
/*
* Get the superblock for an NFS4 mountpoint
*/
-static int nfs4_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+static struct dentry *nfs4_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
{
struct nfs_parsed_mount_data *data;
int error = -ENOMEM;
+ struct dentry *res = ERR_PTR(-ENOMEM);
data = nfs_alloc_parsed_mount_data(4);
if (data == NULL)
@@ -2952,10 +2825,14 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
/* Validate the mount data */
error = nfs4_validate_mount_data(raw_data, data, dev_name);
- if (error < 0)
+ if (error < 0) {
+ res = ERR_PTR(error);
goto out;
+ }
- error = nfs4_try_mount(flags, dev_name, data, mnt);
+ res = nfs4_try_mount(flags, dev_name, data);
+ if (IS_ERR(res))
+ error = PTR_ERR(res);
out:
kfree(data->client_address);
@@ -2964,9 +2841,9 @@ out:
kfree(data->fscache_uniq);
out_free_data:
kfree(data);
- dprintk("<-- nfs4_get_sb() = %d%s\n", error,
+ dprintk("<-- nfs4_mount() = %d%s\n", error,
error != 0 ? " [error]" : "");
- return error;
+ return res;
}
static void nfs4_kill_super(struct super_block *sb)
@@ -3033,7 +2910,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
nfs_fscache_get_super_cookie(s, NULL, data);
}
- mntroot = nfs4_get_root(s, data->fh);
+ mntroot = nfs4_get_root(s, data->fh, dev_name);
if (IS_ERR(mntroot)) {
error = PTR_ERR(mntroot);
goto error_splat_super;
@@ -3120,7 +2997,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
nfs_fscache_get_super_cookie(s, NULL, data);
}
- mntroot = nfs4_get_root(s, mntfh);
+ mntroot = nfs4_get_root(s, mntfh, dev_name);
if (IS_ERR(mntroot)) {
error = PTR_ERR(mntroot);
goto error_splat_super;
@@ -3160,16 +3037,15 @@ error_splat_bdi:
/*
* Create an NFS4 server record on referral traversal
*/
-static int nfs4_referral_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
char *export_path;
struct vfsmount *root_mnt;
- int error;
+ struct dentry *res;
- dprintk("--> nfs4_referral_get_sb()\n");
+ dprintk("--> nfs4_referral_mount()\n");
export_path = data->mnt_path;
data->mnt_path = "/";
@@ -3178,15 +3054,13 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type,
flags, data, data->hostname);
data->mnt_path = export_path;
- error = PTR_ERR(root_mnt);
- if (IS_ERR(root_mnt))
- goto out;
-
- error = nfs_follow_remote_path(root_mnt, export_path, mnt);
-out:
- dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error,
- error != 0 ? " [error]" : "");
- return error;
+ res = ERR_CAST(root_mnt);
+ if (!IS_ERR(root_mnt))
+ res = nfs_follow_remote_path(root_mnt, export_path);
+ dprintk("<-- nfs4_referral_mount() = %ld%s\n",
+ IS_ERR(res) ? PTR_ERR(res) : 0,
+ IS_ERR(res) ? " [error]" : "");
+ return res;
}
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..8d6864c2a5fa 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -148,6 +148,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
alias = d_lookup(parent, &data->args.name);
if (alias != NULL) {
int ret = 0;
+ void *devname_garbage = NULL;
/*
* Hey, we raced with lookup... See if we need to transfer
@@ -157,6 +158,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
spin_lock(&alias->d_lock);
if (alias->d_inode != NULL &&
!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+ devname_garbage = alias->d_fsdata;
alias->d_fsdata = data;
alias->d_flags |= DCACHE_NFSFS_RENAMED;
ret = 1;
@@ -164,6 +166,13 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
spin_unlock(&alias->d_lock);
nfs_dec_sillycount(dir);
dput(alias);
+ /*
+ * If we'd displaced old cached devname, free it. At that
+ * point dentry is definitely not a root, so we won't need
+ * that anymore.
+ */
+ if (devname_garbage)
+ kfree(devname_garbage);
return ret;
}
data->dir = igrab(dir);
@@ -180,7 +189,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
return 1;
}
@@ -252,6 +261,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
{
struct nfs_unlinkdata *data;
int status = -ENOMEM;
+ void *devname_garbage = NULL;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
@@ -269,8 +279,16 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto out_unlock;
dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+ devname_garbage = dentry->d_fsdata;
dentry->d_fsdata = data;
spin_unlock(&dentry->d_lock);
+ /*
+ * If we'd displaced old cached devname, free it. At that
+ * point dentry is definitely not a root, so we won't need
+ * that anymore.
+ */
+ if (devname_garbage)
+ kfree(devname_garbage);
return 0;
out_unlock:
spin_unlock(&dentry->d_lock);
@@ -299,6 +317,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
data = dentry->d_fsdata;
+ dentry->d_fsdata = NULL;
}
spin_unlock(&dentry->d_lock);
@@ -315,6 +334,7 @@ nfs_cancel_async_unlink(struct dentry *dentry)
struct nfs_unlinkdata *data = dentry->d_fsdata;
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+ dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock);
nfs_free_unlinkdata(data);
return;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..47a3ad63e0d5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -28,6 +28,7 @@
#include "iostat.h"
#include "nfs4_fs.h"
#include "fscache.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -96,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
static void nfs_writedata_release(struct nfs_write_data *wdata)
{
+ put_lseg(wdata->lseg);
put_nfs_open_context(wdata->args.context);
nfs_writedata_free(wdata);
}
@@ -781,25 +783,21 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static int nfs_write_rpcsetup(struct nfs_page *req,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- unsigned int count, unsigned int offset,
- int how)
+int nfs_initiate_write(struct nfs_write_data *data,
+ struct rpc_clnt *clnt,
+ const struct rpc_call_ops *call_ops,
+ int how)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = data->inode;
int priority = flush_task_priority(how);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = req->wb_context->cred,
+ .rpc_cred = data->cred,
};
struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
+ .rpc_client = clnt,
.task = &data->task,
.rpc_message = &msg,
.callback_ops = call_ops,
@@ -810,12 +808,52 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
};
int ret = 0;
+ /* Set up the initial task struct. */
+ NFS_PROTO(inode)->write_setup(data, &msg);
+
+ dprintk("NFS: %5u initiated write call "
+ "(req %s/%lld, %u bytes @ offset %llu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto out;
+ }
+ if (how & FLUSH_SYNC) {
+ ret = rpc_wait_for_completion_task(task);
+ if (ret == 0)
+ ret = task->tk_status;
+ }
+ rpc_put_task(task);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_write);
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static int nfs_write_rpcsetup(struct nfs_page *req,
+ struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
+ unsigned int count, unsigned int offset,
+ struct pnfs_layout_segment *lseg,
+ int how)
+{
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
+
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
data->req = req;
data->inode = inode = req->wb_context->path.dentry->d_inode;
- data->cred = msg.rpc_cred;
+ data->cred = req->wb_context->cred;
+ data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -836,30 +874,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- dprintk("NFS: %5u initiated write call "
- "(req %s/%lld, %u bytes @ offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- count,
- (unsigned long long)data->args.offset);
+ if (data->lseg &&
+ (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
+ return 0;
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- ret = PTR_ERR(task);
- goto out;
- }
- if (how & FLUSH_SYNC) {
- ret = rpc_wait_for_completion_task(task);
- if (ret == 0)
- ret = task->tk_status;
- }
- rpc_put_task(task);
-out:
- return ret;
+ return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -879,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
{
- struct nfs_page *req = nfs_list_entry(head->next);
+ struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
- size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
+ size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
+ struct pnfs_layout_segment *lseg;
LIST_HEAD(list);
nfs_list_remove_request(req);
- nbytes = count;
+ nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);
@@ -905,9 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
+ BUG_ON(desc->pg_lseg);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
ClearPageError(page);
offset = 0;
- nbytes = count;
+ nbytes = desc->pg_count;
do {
int ret2;
@@ -919,13 +941,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
if (nbytes < wsize)
wsize = nbytes;
ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
- wsize, offset, how);
+ wsize, offset, lseg, desc->pg_ioflags);
if (ret == 0)
ret = ret2;
offset += wsize;
nbytes -= wsize;
} while (nbytes != 0);
+ put_lseg(lseg);
+ desc->pg_lseg = NULL;
return ret;
out_bad:
@@ -946,16 +970,26 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
+ struct list_head *head = &desc->pg_list;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ int ret;
- data = nfs_writedata_alloc(npages);
- if (!data)
- goto out_bad;
-
+ data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!data) {
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_redirty_request(req);
+ }
+ ret = -ENOMEM;
+ goto out;
+ }
pages = data->pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
@@ -965,16 +999,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
+ if ((!lseg) && list_is_singular(&data->pages))
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
/* Set up the argument struct */
- return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
- out_bad:
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_redirty_request(req);
- }
- return -ENOMEM;
+ ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
+out:
+ put_lseg(lseg); /* Cleans any gotten in ->pg_test */
+ desc->pg_lseg = NULL;
+ return ret;
}
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -982,6 +1015,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
{
size_t wsize = NFS_SERVER(inode)->wsize;
+ pnfs_pageio_init_write(pgio, inode);
+
if (wsize < PAGE_CACHE_SIZE)
nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
else
@@ -1132,7 +1167,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
/*
* This function is called when the WRITE call is complete.
*/
-int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
@@ -1151,7 +1186,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
status = NFS_PROTO(data->inode)->write_done(task, data);
if (status != 0)
- return status;
+ return;
nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1166,6 +1201,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
static unsigned long complain;
+ /* Note this will print the MDS for a DS write */
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
@@ -1186,6 +1222,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
/* Was this an NFSv2 write or an NFSv3 stable write? */
if (resp->verf->committed != NFS_UNSTABLE) {
/* Resend from where the server left off */
+ data->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
@@ -1196,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
argp->stable = NFS_FILE_SYNC;
}
nfs_restart_rpc(task, server->nfs_client);
- return -EAGAIN;
+ return;
}
if (time_before(complain, jiffies)) {
printk(KERN_WARNING
@@ -1207,7 +1244,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
- return 0;
+ return;
}
@@ -1292,6 +1329,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
+ if (how & FLUSH_SYNC)
+ rpc_wait_for_completion_task(task);
rpc_put_task(task);
return 0;
}