summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-11-27 04:42:59 +0100
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-11-27 04:42:59 +0100
commit423eaf8f00d89ca79bb2c9d4d22e92c9774e2d8a (patch)
treee160521a6b059d50045ea47c3f730bf2c83c1f85 /fs
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched (diff)
parentNFS: Clean up new multi-segment direct I/O changes (diff)
downloadlinux-423eaf8f00d89ca79bb2c9d4d22e92c9774e2d8a.tar.xz
linux-423eaf8f00d89ca79bb2c9d4d22e92c9774e2d8a.zip
Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: NFS: Clean up new multi-segment direct I/O changes NFS: Ensure we return zero if applications attempt to write zero bytes NFS: Support multiple segment iovecs in the NFS direct I/O path NFS: Introduce iovec I/O helpers to fs/nfs/direct.c SUNRPC: Add missing "space" to net/sunrpc/auth_gss.c SUNRPC: make sunrpc/xprtsock.c:xs_setup_{udp,tcp}() static NFS: fs/nfs/dir.c should #include "internal.h" NFS: make nfs_wb_page_priority() static NFS: mount failure causes bad page state SUNRPC: remove NFS/RDMA client's binary sysctls kernel BUG at fs/nfs/namespace.c:108! - can be triggered by bad server sunrpc: rpc_pipe_poll may miss available data in some cases sunrpc: return error if unsupported enctype or cksumtype is encountered sunrpc: gss_pipe_downcall(), don't assume all errors are transient NFS: Fix the ustat() regression
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/direct.c142
-rw-r--r--fs/nfs/getroot.c81
-rw-r--r--fs/nfs/super.c11
-rw-r--r--fs/nfs/write.c3
5 files changed, 138 insertions, 100 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 35334539d947..f697b5c74b7c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -38,6 +38,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
#include "iostat.h"
+#include "internal.h"
/* #define NFS_DEBUG_VERBOSE 1 */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index afcab007a22b..5e8d82f6666b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -263,17 +263,19 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
+ const struct iovec *iov,
+ loff_t pos)
{
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->path.dentry->d_inode;
+ unsigned long user_addr = (unsigned long)iov->iov_base;
+ size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize;
unsigned int pgbase;
int result;
ssize_t started = 0;
- get_dreq(dreq);
-
do {
struct nfs_read_data *data;
size_t bytes;
@@ -347,15 +349,46 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
count -= bytes;
} while (count != 0);
+ if (started)
+ return started;
+ return result < 0 ? (ssize_t) result : -EFAULT;
+}
+
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ loff_t pos)
+{
+ ssize_t result = -EINVAL;
+ size_t requested_bytes = 0;
+ unsigned long seg;
+
+ get_dreq(dreq);
+
+ for (seg = 0; seg < nr_segs; seg++) {
+ const struct iovec *vec = &iov[seg];
+ result = nfs_direct_read_schedule_segment(dreq, vec, pos);
+ if (result < 0)
+ break;
+ requested_bytes += result;
+ if ((size_t)result < vec->iov_len)
+ break;
+ pos += vec->iov_len;
+ }
+
if (put_dreq(dreq))
nfs_direct_complete(dreq);
- if (started)
+ if (requested_bytes != 0)
return 0;
- return result < 0 ? (ssize_t) result : -EFAULT;
+
+ if (result < 0)
+ return result;
+ return -EIO;
}
-static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
+static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
ssize_t result = 0;
sigset_t oldset;
@@ -372,9 +405,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
rpc_clnt_sigmask(clnt, &oldset);
- result = nfs_direct_read_schedule(dreq, user_addr, count, pos);
+ result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
rpc_clnt_sigunmask(clnt, &oldset);
@@ -601,17 +633,19 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
* handled automatically by nfs_direct_write_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
+static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
+ const struct iovec *iov,
+ loff_t pos, int sync)
{
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->path.dentry->d_inode;
+ unsigned long user_addr = (unsigned long)iov->iov_base;
+ size_t count = iov->iov_len;
size_t wsize = NFS_SERVER(inode)->wsize;
unsigned int pgbase;
int result;
ssize_t started = 0;
- get_dreq(dreq);
-
do {
struct nfs_write_data *data;
size_t bytes;
@@ -689,15 +723,48 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
count -= bytes;
} while (count != 0);
+ if (started)
+ return started;
+ return result < 0 ? (ssize_t) result : -EFAULT;
+}
+
+static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ loff_t pos, int sync)
+{
+ ssize_t result = 0;
+ size_t requested_bytes = 0;
+ unsigned long seg;
+
+ get_dreq(dreq);
+
+ for (seg = 0; seg < nr_segs; seg++) {
+ const struct iovec *vec = &iov[seg];
+ result = nfs_direct_write_schedule_segment(dreq, vec,
+ pos, sync);
+ if (result < 0)
+ break;
+ requested_bytes += result;
+ if ((size_t)result < vec->iov_len)
+ break;
+ pos += vec->iov_len;
+ }
+
if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, inode);
+ nfs_direct_write_complete(dreq, dreq->inode);
- if (started)
+ if (requested_bytes != 0)
return 0;
- return result < 0 ? (ssize_t) result : -EFAULT;
+
+ if (result < 0)
+ return result;
+ return -EIO;
}
-static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
+static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos,
+ size_t count)
{
ssize_t result = 0;
sigset_t oldset;
@@ -720,10 +787,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
-
rpc_clnt_sigmask(clnt, &oldset);
- result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync);
+ result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
if (!result)
result = nfs_direct_wait(dreq);
rpc_clnt_sigunmask(clnt, &oldset);
@@ -759,21 +824,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- /* XXX: temporary */
- const char __user *buf = iov[0].iov_base;
- size_t count = iov[0].iov_len;
+ size_t count;
+
+ count = iov_length(iov, nr_segs);
+ nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
- dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
+ dprintk("nfs: direct read(%s/%s, %zd@%Ld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name,
- (unsigned long) count, (long long) pos);
-
- if (nr_segs != 1)
- goto out;
+ count, (long long) pos);
- retval = -EFAULT;
- if (!access_ok(VERIFY_WRITE, buf, count))
- goto out;
retval = 0;
if (!count)
goto out;
@@ -782,7 +842,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (retval)
goto out;
- retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos);
+ retval = nfs_direct_read(iocb, iov, nr_segs, pos);
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -821,21 +881,21 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- /* XXX: temporary */
- const char __user *buf = iov[0].iov_base;
- size_t count = iov[0].iov_len;
+ size_t count;
+
+ count = iov_length(iov, nr_segs);
+ nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
- dprintk("nfs: direct write(%s/%s, %lu@%Ld)\n",
+ dfprintk(VFS, "nfs: direct write(%s/%s, %zd@%Ld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name,
- (unsigned long) count, (long long) pos);
-
- if (nr_segs != 1)
- goto out;
+ count, (long long) pos);
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
+ if (!count)
+ goto out; /* return 0 */
retval = -EINVAL;
if ((ssize_t) count < 0)
@@ -844,15 +904,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (!count)
goto out;
- retval = -EFAULT;
- if (!access_ok(VERIFY_READ, buf, count))
- goto out;
-
retval = nfs_sync_mapping(mapping);
if (retval)
goto out;
- retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos);
+ retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
if (retval > 0)
iocb->ki_pos = pos + retval;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 522e5ad4d8ad..0ee43843f4ec 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -43,6 +43,25 @@
#define NFSDBG_FACILITY NFSDBG_CLIENT
/*
+ * Set the superblock root dentry.
+ * Note that this function frees the inode in case of error.
+ */
+static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *inode)
+{
+ /* The mntroot acts as the dummy root dentry for this superblock */
+ if (sb->s_root == NULL) {
+ sb->s_root = d_alloc_root(inode);
+ if (sb->s_root == NULL) {
+ iput(inode);
+ return -ENOMEM;
+ }
+ /* Circumvent igrab(): we know the inode is not being freed */
+ atomic_inc(&inode->i_count);
+ }
+ return 0;
+}
+
+/*
* get an NFS2/NFS3 root dentry from the root filehandle
*/
struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
@@ -54,33 +73,6 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
struct inode *inode;
int error;
- /* create a dummy root dentry with dummy inode for this superblock */
- if (!sb->s_root) {
- struct nfs_fh dummyfh;
- struct dentry *root;
- struct inode *iroot;
-
- memset(&dummyfh, 0, sizeof(dummyfh));
- memset(&fattr, 0, sizeof(fattr));
- nfs_fattr_init(&fattr);
- fattr.valid = NFS_ATTR_FATTR;
- fattr.type = NFDIR;
- fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
- fattr.nlink = 2;
-
- iroot = nfs_fhget(sb, &dummyfh, &fattr);
- if (IS_ERR(iroot))
- return ERR_PTR(PTR_ERR(iroot));
-
- root = d_alloc_root(iroot);
- if (!root) {
- iput(iroot);
- return ERR_PTR(-ENOMEM);
- }
-
- sb->s_root = root;
- }
-
/* get the actual root for this mount */
fsinfo.fattr = &fattr;
@@ -96,6 +88,10 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
return ERR_PTR(PTR_ERR(inode));
}
+ error = nfs_superblock_set_dummy_root(sb, inode);
+ if (error != 0)
+ return ERR_PTR(error);
+
/* root dentries normally start off anonymous and get spliced in later
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
@@ -241,33 +237,6 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
dprintk("--> nfs4_get_root()\n");
- /* create a dummy root dentry with dummy inode for this superblock */
- if (!sb->s_root) {
- struct nfs_fh dummyfh;
- struct dentry *root;
- struct inode *iroot;
-
- memset(&dummyfh, 0, sizeof(dummyfh));
- memset(&fattr, 0, sizeof(fattr));
- nfs_fattr_init(&fattr);
- fattr.valid = NFS_ATTR_FATTR;
- fattr.type = NFDIR;
- fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
- fattr.nlink = 2;
-
- iroot = nfs_fhget(sb, &dummyfh, &fattr);
- if (IS_ERR(iroot))
- return ERR_PTR(PTR_ERR(iroot));
-
- root = d_alloc_root(iroot);
- if (!root) {
- iput(iroot);
- return ERR_PTR(-ENOMEM);
- }
-
- sb->s_root = root;
- }
-
/* get the info about the server and filesystem */
error = nfs4_server_capabilities(server, mntfh);
if (error < 0) {
@@ -289,6 +258,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
return ERR_PTR(PTR_ERR(inode));
}
+ error = nfs_superblock_set_dummy_root(sb, inode);
+ if (error != 0)
+ return ERR_PTR(error);
+
/* root dentries normally start off anonymous and get spliced in later
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa517ae9207f..2426e713b77f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1054,10 +1054,11 @@ static int nfs_validate_mount_data(void *options,
{
struct nfs_mount_data *data = (struct nfs_mount_data *)options;
+ memset(args, 0, sizeof(*args));
+
if (data == NULL)
goto out_no_data;
- memset(args, 0, sizeof(*args));
args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
args->rsize = NFS_MAX_FILE_IO_SIZE;
args->wsize = NFS_MAX_FILE_IO_SIZE;
@@ -1474,6 +1475,11 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
error = PTR_ERR(mntroot);
goto error_splat_super;
}
+ if (mntroot->d_inode->i_op != &nfs_dir_inode_operations) {
+ dput(mntroot);
+ error = -ESTALE;
+ goto error_splat_super;
+ }
s->s_flags |= MS_ACTIVE;
mnt->mnt_sb = s;
@@ -1531,10 +1537,11 @@ static int nfs4_validate_mount_data(void *options,
struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
char *c;
+ memset(args, 0, sizeof(*args));
+
if (data == NULL)
goto out_no_data;
- memset(args, 0, sizeof(*args));
args->rsize = NFS_MAX_FILE_IO_SIZE;
args->wsize = NFS_MAX_FILE_IO_SIZE;
args->timeo = 600;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 89527a487ed7..51cc1bd6a116 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1436,7 +1436,8 @@ out:
return ret;
}
-int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
+static int nfs_wb_page_priority(struct inode *inode, struct page *page,
+ int how)
{
loff_t range_start = page_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);