summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 22:03:38 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 22:03:38 +0200
commit73ccb023a2f25b72c4b95499ca24760588014614 (patch)
treeb0fd9968af3e929ac496f159420a25dc3e1dcafb
parentMerge tag 'metag-for-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
parentpNFS/flexfiles: Always attempt to call layoutstats when flexfiles is enabled (diff)
downloadlinux-73ccb023a2f25b72c4b95499ca24760588014614.tar.xz
linux-73ccb023a2f25b72c4b95499ca24760588014614.zip
Merge tag 'nfs-for-4.12-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable bugfixes: - Fix use after free in write error path - Use GFP_NOIO for two allocations in writeback - Fix a hang in OPEN related to server reboot - Check the result of nfs4_pnfs_ds_connect - Fix an rcu lock leak Features: - Removal of the unmaintained and unused OSD pNFS layout - Cleanup and removal of lots of unnecessary dprintk()s - Cleanup and removal of some memory failure paths now that GFP_NOFS is guaranteed to never fail. - Remove the v3-only data server limitation on pNFS/flexfiles Bugfixes: - RPC/RDMA connection handling bugfixes - Copy offload: fixes to ensure the copied data is COMMITed to disk. - Readdir: switch back to using the ->iterate VFS interface - File locking fixes from Ben Coddington - Various use-after-free and deadlock issues in pNFS - Write path bugfixes" * tag 'nfs-for-4.12-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (89 commits) pNFS/flexfiles: Always attempt to call layoutstats when flexfiles is enabled NFSv4.1: Work around a Linux server bug... NFS append COMMIT after synchronous COPY NFSv4: Fix exclusive create attributes encoding NFSv4: Fix an rcu lock leak nfs: use kmap/kunmap directly NFS: always treat the invocation of nfs_getattr as cache hit when noac is on Fix nfs_client refcounting if kmalloc fails in nfs4_proc_exchange_id and nfs4_proc_async_renew NFSv4.1: RECLAIM_COMPLETE must handle NFS4ERR_CONN_NOT_BOUND_TO_SESSION pNFS: Fix NULL dereference in pnfs_generic_alloc_ds_commits pNFS: Fix a typo in pnfs_generic_alloc_ds_commits pNFS: Fix a deadlock when coalescing writes and returning the layout pNFS: Don't clear the layout return info if there are segments to return pNFS: Ensure we commit the layout if it has been invalidated pNFS: Don't send COMMITs to the DSes if the server invalidated our layout pNFS/flexfiles: Fix up the ff_layout_write_pagelist failure path pNFS: Ensure we check layout validity before marking it for return NFS4.1 handle interrupted slot reuse from ERR_DELAY NFSv4: check return value of xdr_inline_decode nfs/filelayout: fix NULL pointer dereference in fl_pnfs_update_layout() ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/filesystems/nfs/pnfs.txt37
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/lockd/clntlock.c1
-rw-r--r--fs/lockd/clntproc.c26
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfs/Kconfig5
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/callback_proc.c47
-rw-r--r--fs/nfs/callback_xdr.c109
-rw-r--r--fs/nfs/client.c67
-rw-r--r--fs/nfs/dir.c104
-rw-r--r--fs/nfs/direct.c21
-rw-r--r--fs/nfs/file.c30
-rw-r--r--fs/nfs/filelayout/filelayout.c8
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c24
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c10
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h5
-rw-r--r--fs/nfs/namespace.c34
-rw-r--r--fs/nfs/nfs3proc.c54
-rw-r--r--fs/nfs/nfs42proc.c24
-rw-r--r--fs/nfs/nfs42xdr.c22
-rw-r--r--fs/nfs/nfs4client.c283
-rw-r--r--fs/nfs/nfs4getroot.c3
-rw-r--r--fs/nfs/nfs4namespace.c7
-rw-r--r--fs/nfs/nfs4proc.c99
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/nfs4xdr.c94
-rw-r--r--fs/nfs/objlayout/Kbuild5
-rw-r--r--fs/nfs/objlayout/objio_osd.c675
-rw-r--r--fs/nfs/objlayout/objlayout.c706
-rw-r--r--fs/nfs/objlayout/objlayout.h183
-rw-r--r--fs/nfs/objlayout/pnfs_osd_xdr_cli.c415
-rw-r--r--fs/nfs/pagelist.c77
-rw-r--r--fs/nfs/pnfs.c62
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/pnfs_nfs.c24
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/read.c9
-rw-r--r--fs/nfs/write.c121
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/lockd/bind.h24
-rw-r--r--include/linux/lockd/lockd.h2
-rw-r--r--include/linux/nfs_fs.h17
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/nfs_page.h5
-rw-r--r--include/linux/nfs_xdr.h3
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c12
-rw-r--r--net/sunrpc/xprtrdma/transport.c57
-rw-r--r--net/sunrpc/xprtrdma/verbs.c323
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h22
56 files changed, 949 insertions, 2960 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4e0654b56aef..238bd211f365 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2434,12 +2434,6 @@
and gids from such clients. This is intended to ease
migration from NFSv2/v3.
- objlayoutdriver.osd_login_prog=
- [NFS] [OBJLAYOUT] sets the pathname to the program which
- is used to automatically discover and login into new
- osd-targets. Please see:
- Documentation/filesystems/pnfs.txt for more explanations
-
nmi_debug= [KNL,SH] Specify one or more actions to take
when a NMI is triggered.
Format: [state][,regs][,debounce][,die]
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
index 8de578a98222..80dc0bdc302a 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -64,46 +64,9 @@ table which are called by the nfs-client pnfs-core to implement the
different layout types.
Files-layout-driver code is in: fs/nfs/filelayout/.. directory
-Objects-layout-driver code is in: fs/nfs/objlayout/.. directory
Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory
Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
-objects-layout setup
---------------------
-
-As part of the full STD implementation the objlayoutdriver.ko needs, at times,
-to automatically login to yet undiscovered iscsi/osd devices. For this the
-driver makes up-calles to a user-mode script called *osd_login*
-
-The path_name of the script to use is by default:
- /sbin/osd_login.
-This name can be overridden by the Kernel module parameter:
- objlayoutdriver.osd_login_prog
-
-If Kernel does not find the osd_login_prog path it will zero it out
-and will not attempt farther logins. An admin can then write new value
-to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it.
-
-The /sbin/osd_login is part of the nfs-utils package, and should usually
-be installed on distributions that support this Kernel version.
-
-The API to the login script is as follows:
- Usage: $0 -u <URI> -o <OSDNAME> -s <SYSTEMID>
- Options:
- -u target uri e.g. iscsi://<ip>:<port>
- (always exists)
- (More protocols can be defined in the future.
- The client does not interpret this string it is
- passed unchanged as received from the Server)
- -o osdname of the requested target OSD
- (Might be empty)
- (A string which denotes the OSD name, there is a
- limit of 64 chars on this string)
- -s systemid of the requested target OSD
- (Might be empty)
- (This string, if not empty is always an hex
- representation of the 20 bytes osd_system_id)
-
blocks-layout setup
-------------------
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index aa93f09ae6e6..3ee4fdc3da9e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2177,7 +2177,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
}
/* Unlock on close is handled by the flush method */
- if (fl->fl_flags & FL_CLOSE)
+ if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
return 0;
if (pid && pid_nr == 0)
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 41e491b8e5d7..27d577dbe51a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -69,6 +69,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
goto out_nobind;
+ host->h_nlmclnt_ops = nlm_init->nlmclnt_ops;
return host;
out_nobind:
nlmclnt_release_host(host);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 112952037933..066ac313ae5c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -150,17 +150,22 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
* @host: address of a valid nlm_host context representing the NLM server
* @cmd: fcntl-style file lock operation to perform
* @fl: address of arguments for the lock operation
+ * @data: address of data to be sent to callback operations
*
*/
-int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
+int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data)
{
struct nlm_rqst *call;
int status;
+ const struct nlmclnt_operations *nlmclnt_ops = host->h_nlmclnt_ops;
call = nlm_alloc_call(host);
if (call == NULL)
return -ENOMEM;
+ if (nlmclnt_ops && nlmclnt_ops->nlmclnt_alloc_call)
+ nlmclnt_ops->nlmclnt_alloc_call(data);
+
nlmclnt_locks_init_private(fl, host);
if (!fl->fl_u.nfs_fl.owner) {
/* lockowner allocation has failed */
@@ -169,6 +174,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
}
/* Set up the argument struct */
nlmclnt_setlockargs(call, fl);
+ call->a_callback_data = data;
if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
if (fl->fl_type != F_UNLCK) {
@@ -214,8 +220,12 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
void nlmclnt_release_call(struct nlm_rqst *call)
{
+ const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops;
+
if (!atomic_dec_and_test(&call->a_count))
return;
+ if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call)
+ nlmclnt_ops->nlmclnt_release_call(call->a_callback_data);
nlmclnt_release_host(call->a_host);
nlmclnt_release_lockargs(call);
kfree(call);
@@ -687,6 +697,19 @@ out:
return status;
}
+static void nlmclnt_unlock_prepare(struct rpc_task *task, void *data)
+{
+ struct nlm_rqst *req = data;
+ const struct nlmclnt_operations *nlmclnt_ops = req->a_host->h_nlmclnt_ops;
+ bool defer_call = false;
+
+ if (nlmclnt_ops && nlmclnt_ops->nlmclnt_unlock_prepare)
+ defer_call = nlmclnt_ops->nlmclnt_unlock_prepare(task, req->a_callback_data);
+
+ if (!defer_call)
+ rpc_call_start(task);
+}
+
static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
{
struct nlm_rqst *req = data;
@@ -720,6 +743,7 @@ die:
}
static const struct rpc_call_ops nlmclnt_unlock_ops = {
+ .rpc_call_prepare = nlmclnt_unlock_prepare,
.rpc_call_done = nlmclnt_unlock_callback,
.rpc_release = nlmclnt_rpc_release,
};
diff --git a/fs/locks.c b/fs/locks.c
index 26811321d39b..af2031a1fcff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2504,7 +2504,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
.fl_owner = filp,
.fl_pid = current->tgid,
.fl_file = filp,
- .fl_flags = FL_FLOCK,
+ .fl_flags = FL_FLOCK | FL_CLOSE,
.fl_type = F_UNLCK,
.fl_end = OFFSET_MAX,
};
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f31fd0dd92c6..69d02cf8cf37 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -123,11 +123,6 @@ config PNFS_BLOCK
depends on NFS_V4_1 && BLK_DEV_DM
default NFS_V4
-config PNFS_OBJLAYOUT
- tristate
- depends on NFS_V4_1 && SCSI_OSD_ULD
- default NFS_V4
-
config PNFS_FLEXFILE_LAYOUT
tristate
depends on NFS_V4_1 && NFS_V3
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 6abdda209642..98f4e5728a67 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -31,6 +31,5 @@ nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o
nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o
obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
-obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f073a6d2c6a5..52479f180ea1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -131,10 +131,11 @@ restart:
if (!inode)
continue;
if (!nfs_sb_active(inode->i_sb)) {
- rcu_read_lock();
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
iput(inode);
spin_lock(&clp->cl_lock);
+ rcu_read_lock();
goto restart;
}
return inode;
@@ -170,10 +171,11 @@ restart:
if (!inode)
continue;
if (!nfs_sb_active(inode->i_sb)) {
- rcu_read_lock();
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
iput(inode);
spin_lock(&clp->cl_lock);
+ rcu_read_lock();
goto restart;
}
return inode;
@@ -317,31 +319,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
static u32 do_callback_layoutrecall(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
- u32 res;
-
- dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
if (args->cbl_recall_type == RETURN_FILE)
- res = initiate_file_draining(clp, args);
- else
- res = initiate_bulk_draining(clp, args);
- dprintk("%s returning %i\n", __func__, res);
- return res;
-
+ return initiate_file_draining(clp, args);
+ return initiate_bulk_draining(clp, args);
}
__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps)
{
- u32 res;
-
- dprintk("%s: -->\n", __func__);
+ u32 res = NFS4ERR_OP_NOT_IN_SESSION;
if (cps->clp)
res = do_callback_layoutrecall(cps->clp, args);
- else
- res = NFS4ERR_OP_NOT_IN_SESSION;
-
- dprintk("%s: exit with status = %d\n", __func__, res);
return cpu_to_be32(res);
}
@@ -364,8 +353,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
struct nfs_client *clp = cps->clp;
struct nfs_server *server = NULL;
- dprintk("%s: -->\n", __func__);
-
if (!clp) {
res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
goto out;
@@ -384,8 +371,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
goto found;
}
rcu_read_unlock();
- dprintk("%s: layout type %u not found\n",
- __func__, dev->cbd_layout_type);
continue;
}
@@ -395,8 +380,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
out:
kfree(args->devs);
- dprintk("%s: exit with status = %u\n",
- __func__, be32_to_cpu(res));
return res;
}
@@ -417,16 +400,11 @@ static __be32
validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
const struct cb_sequenceargs * args)
{
- dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n",
- __func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr);
-
if (args->csa_slotid > tbl->server_highest_slotid)
return htonl(NFS4ERR_BADSLOT);
/* Replay */
if (args->csa_sequenceid == slot->seq_nr) {
- dprintk("%s seqid %u is a replay\n",
- __func__, args->csa_sequenceid);
if (nfs4_test_locked_slot(tbl, slot->slot_nr))
return htonl(NFS4ERR_DELAY);
/* Signal process_op to set this error on next op */
@@ -480,15 +458,6 @@ static bool referring_call_exists(struct nfs_client *clp,
for (j = 0; j < rclist->rcl_nrefcalls; j++) {
ref = &rclist->rcl_refcalls[j];
-
- dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u "
- "slotid %u\n", __func__,
- ((u32 *)&rclist->rcl_sessionid.data)[0],
- ((u32 *)&rclist->rcl_sessionid.data)[1],
- ((u32 *)&rclist->rcl_sessionid.data)[2],
- ((u32 *)&rclist->rcl_sessionid.data)[3],
- ref->rc_sequenceid, ref->rc_slotid);
-
status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
ref->rc_sequenceid, HZ >> 1) < 0;
if (status)
@@ -593,8 +562,6 @@ out:
res->csr_status = status;
trace_nfs4_cb_sequence(args, res, status);
- dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
- ntohl(status), ntohl(res->csr_status));
return status;
}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d051fc3583a9..c14758e08d73 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -171,8 +171,6 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
}
hdr->nops = ntohl(*p);
- dprintk("%s: minorversion %d nops %d\n", __func__,
- hdr->minorversion, hdr->nops);
return 0;
}
@@ -192,11 +190,8 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
status = decode_fh(xdr, &args->fh);
if (unlikely(status != 0))
- goto out;
- status = decode_bitmap(xdr, args->bitmap);
-out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
- return status;
+ return status;
+ return decode_bitmap(xdr, args->bitmap);
}
static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
@@ -206,17 +201,12 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_delegation_stateid(xdr, &args->stateid);
if (unlikely(status != 0))
- goto out;
+ return status;
p = read_buf(xdr, 4);
- if (unlikely(p == NULL)) {
- status = htonl(NFS4ERR_RESOURCE);
- goto out;
- }
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
args->truncate = ntohl(*p);
- status = decode_fh(xdr, &args->fh);
-out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
- return status;
+ return decode_fh(xdr, &args->fh);
}
#if defined(CONFIG_NFS_V4_1)
@@ -235,10 +225,8 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
uint32_t iomode;
p = read_buf(xdr, 4 * sizeof(uint32_t));
- if (unlikely(p == NULL)) {
- status = htonl(NFS4ERR_BADXDR);
- goto out;
- }
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_BADXDR);
args->cbl_layout_type = ntohl(*p++);
/* Depite the spec's xdr, iomode really belongs in the FILE switch,
@@ -252,37 +240,23 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
args->cbl_range.iomode = iomode;
status = decode_fh(xdr, &args->cbl_fh);
if (unlikely(status != 0))
- goto out;
+ return status;
p = read_buf(xdr, 2 * sizeof(uint64_t));
- if (unlikely(p == NULL)) {
- status = htonl(NFS4ERR_BADXDR);
- goto out;
- }
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_range.offset);
p = xdr_decode_hyper(p, &args->cbl_range.length);
- status = decode_layout_stateid(xdr, &args->cbl_stateid);
- if (unlikely(status != 0))
- goto out;
+ return decode_layout_stateid(xdr, &args->cbl_stateid);
} else if (args->cbl_recall_type == RETURN_FSID) {
p = read_buf(xdr, 2 * sizeof(uint64_t));
- if (unlikely(p == NULL)) {
- status = htonl(NFS4ERR_BADXDR);
- goto out;
- }
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_fsid.major);
p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
- } else if (args->cbl_recall_type != RETURN_ALL) {
- status = htonl(NFS4ERR_BADXDR);
- goto out;
- }
- dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
- __func__,
- args->cbl_layout_type, iomode,
- args->cbl_layoutchanged, args->cbl_recall_type);
-out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
- return status;
+ } else if (args->cbl_recall_type != RETURN_ALL)
+ return htonl(NFS4ERR_BADXDR);
+ return 0;
}
static
@@ -437,12 +411,11 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
status = decode_sessionid(xdr, &args->csa_sessionid);
if (status)
- goto out;
+ return status;
- status = htonl(NFS4ERR_RESOURCE);
p = read_buf(xdr, 5 * sizeof(uint32_t));
if (unlikely(p == NULL))
- goto out;
+ return htonl(NFS4ERR_RESOURCE);
args->csa_addr = svc_addr(rqstp);
args->csa_sequenceid = ntohl(*p++);
@@ -456,7 +429,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
sizeof(*args->csa_rclists),
GFP_KERNEL);
if (unlikely(args->csa_rclists == NULL))
- goto out;
+ return htonl(NFS4ERR_RESOURCE);
for (i = 0; i < args->csa_nrclists; i++) {
status = decode_rc_list(xdr, &args->csa_rclists[i]);
@@ -466,27 +439,13 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
}
}
}
- status = 0;
-
- dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u "
- "highestslotid %u cachethis %d nrclists %u\n",
- __func__,
- ((u32 *)&args->csa_sessionid)[0],
- ((u32 *)&args->csa_sessionid)[1],
- ((u32 *)&args->csa_sessionid)[2],
- ((u32 *)&args->csa_sessionid)[3],
- args->csa_sequenceid, args->csa_slotid,
- args->csa_highestslotid, args->csa_cachethis,
- args->csa_nrclists);
-out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
- return status;
+ return 0;
out_free:
for (i = 0; i < args->csa_nrclists; i++)
kfree(args->csa_rclists[i].rcl_refcalls);
kfree(args->csa_rclists);
- goto out;
+ return status;
}
static __be32 decode_recallany_args(struct svc_rqst *rqstp,
@@ -557,11 +516,8 @@ static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream
status = decode_fh(xdr, &args->cbnl_fh);
if (unlikely(status != 0))
- goto out;
- status = decode_lockowner(xdr, args);
-out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
- return status;
+ return status;
+ return decode_lockowner(xdr, args);
}
#endif /* CONFIG_NFS_V4_1 */
@@ -707,7 +663,6 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
}
@@ -734,11 +689,11 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
__be32 status = res->csr_status;
if (unlikely(status != 0))
- goto out;
+ return status;
status = encode_sessionid(xdr, &res->csr_sessionid);
if (status)
- goto out;
+ return status;
p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t));
if (unlikely(p == NULL))
@@ -748,9 +703,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
*p++ = htonl(res->csr_slotid);
*p++ = htonl(res->csr_highestslotid);
*p++ = htonl(res->csr_target_highestslotid);
-out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
- return status;
+ return 0;
}
static __be32
@@ -871,14 +824,10 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
long maxlen;
__be32 res;
- dprintk("%s: start\n", __func__);
status = decode_op_hdr(xdr_in, &op_nr);
if (unlikely(status))
return status;
- dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
- __func__, cps->minorversion, nop, op_nr);
-
switch (cps->minorversion) {
case 0:
status = preprocess_nfs4_op(op_nr, &op);
@@ -917,7 +866,6 @@ encode_hdr:
return res;
if (op->encode_res != NULL && status == 0)
status = op->encode_res(rqstp, xdr_out, resp);
- dprintk("%s: done, status = %d\n", __func__, ntohl(status));
return status;
}
@@ -937,8 +885,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
};
unsigned int nops = 0;
- dprintk("%s: start\n", __func__);
-
xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
@@ -977,7 +923,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
*hdr_res.nops = htonl(nops);
nfs4_cb_free_slot(&cps);
nfs_put_client(cps.clp);
- dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
out_invalidcred:
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 04d15a0045e3..ee5ddbd36088 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -218,6 +218,7 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
static void pnfs_init_server(struct nfs_server *server)
{
rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
+ rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
}
#else
@@ -240,8 +241,6 @@ static void pnfs_init_server(struct nfs_server *server)
*/
void nfs_free_client(struct nfs_client *clp)
{
- dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
-
nfs_fscache_release_client_cookie(clp);
/* -EIO all pending I/O */
@@ -256,8 +255,6 @@ void nfs_free_client(struct nfs_client *clp)
kfree(clp->cl_hostname);
kfree(clp->cl_acceptor);
kfree(clp);
-
- dprintk("<-- nfs_free_client()\n");
}
EXPORT_SYMBOL_GPL(nfs_free_client);
@@ -271,7 +268,6 @@ void nfs_put_client(struct nfs_client *clp)
if (!clp)
return;
- dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
nn = net_generic(clp->cl_net, nfs_net_id);
if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
@@ -382,9 +378,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
}
smp_rmb();
-
- dprintk("<-- %s found nfs_client %p for %s\n",
- __func__, clp, cl_init->hostname ?: "");
return clp;
}
@@ -403,9 +396,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
return NULL;
}
- dprintk("--> nfs_get_client(%s,v%u)\n",
- cl_init->hostname, rpc_ops->version);
-
/* see if the client already exists */
do {
spin_lock(&nn->nfs_client_lock);
@@ -430,8 +420,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
new = rpc_ops->alloc_client(cl_init);
} while (!IS_ERR(new));
- dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
- cl_init->hostname, PTR_ERR(new));
return new;
}
EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -558,6 +546,7 @@ static int nfs_start_lockd(struct nfs_server *server)
.noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
1 : 0,
.net = clp->cl_net,
+ .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
};
if (nlm_init.nfs_version > 3)
@@ -624,27 +613,21 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
{
int error;
- if (clp->cl_cons_state == NFS_CS_READY) {
- /* the client is already initialised */
- dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+ /* the client is already initialised */
+ if (clp->cl_cons_state == NFS_CS_READY)
return clp;
- }
/*
* Create a client RPC handle for doing FSSTAT with UNIX auth only
* - RFC 2623, sec 2.3.2
*/
error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
- if (error < 0)
- goto error;
- nfs_mark_client_ready(clp, NFS_CS_READY);
+ nfs_mark_client_ready(clp, error == 0 ? NFS_CS_READY : error);
+ if (error < 0) {
+ nfs_put_client(clp);
+ clp = ERR_PTR(error);
+ }
return clp;
-
-error:
- nfs_mark_client_ready(clp, error);
- nfs_put_client(clp);
- dprintk("<-- nfs_init_client() = xerror %d\n", error);
- return ERR_PTR(error);
}
EXPORT_SYMBOL_GPL(nfs_init_client);
@@ -668,8 +651,6 @@ static int nfs_init_server(struct nfs_server *server,
struct nfs_client *clp;
int error;
- dprintk("--> nfs_init_server()\n");
-
nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
data->timeo, data->retrans);
if (data->flags & NFS_MOUNT_NORESVPORT)
@@ -677,10 +658,8 @@ static int nfs_init_server(struct nfs_server *server,
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
- if (IS_ERR(clp)) {
- dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+ if (IS_ERR(clp))
return PTR_ERR(clp);
- }
server->nfs_client = clp;
@@ -725,13 +704,11 @@ static int nfs_init_server(struct nfs_server *server,
server->mountd_protocol = data->mount_server.protocol;
server->namelen = data->namlen;
- dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
return 0;
error:
server->nfs_client = NULL;
nfs_put_client(clp);
- dprintk("<-- nfs_init_server() = xerror %d\n", error);
return error;
}
@@ -798,12 +775,10 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
struct nfs_client *clp = server->nfs_client;
int error;
- dprintk("--> nfs_probe_fsinfo()\n");
-
if (clp->rpc_ops->set_capabilities != NULL) {
error = clp->rpc_ops->set_capabilities(server, mntfh);
if (error < 0)
- goto out_error;
+ return error;
}
fsinfo.fattr = fattr;
@@ -811,7 +786,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
if (error < 0)
- goto out_error;
+ return error;
nfs_server_set_fsinfo(server, &fsinfo);
@@ -826,12 +801,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
server->namelen = pathinfo.max_namelen;
}
- dprintk("<-- nfs_probe_fsinfo() = 0\n");
return 0;
-
-out_error:
- dprintk("nfs_probe_fsinfo: error = %d\n", -error);
- return error;
}
EXPORT_SYMBOL_GPL(nfs_probe_fsinfo);
@@ -927,8 +897,6 @@ EXPORT_SYMBOL_GPL(nfs_alloc_server);
*/
void nfs_free_server(struct nfs_server *server)
{
- dprintk("--> nfs_free_server()\n");
-
nfs_server_remove_lists(server);
if (server->destroy != NULL)
@@ -946,7 +914,6 @@ void nfs_free_server(struct nfs_server *server)
nfs_free_iostats(server->io_stats);
kfree(server);
nfs_release_automount_timer();
- dprintk("<-- nfs_free_server()\n");
}
EXPORT_SYMBOL_GPL(nfs_free_server);
@@ -1026,10 +993,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
struct nfs_fattr *fattr_fsinfo;
int error;
- dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
- (unsigned long long) fattr->fsid.major,
- (unsigned long long) fattr->fsid.minor);
-
server = nfs_alloc_server();
if (!server)
return ERR_PTR(-ENOMEM);
@@ -1061,10 +1024,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
- dprintk("Cloned FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
-
error = nfs_start_lockd(server);
if (error < 0)
goto out_free_server;
@@ -1073,13 +1032,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
server->mount_time = jiffies;
nfs_free_fattr(fattr_fsinfo);
- dprintk("<-- nfs_clone_server() = %p\n", server);
return server;
out_free_server:
nfs_free_fattr(fattr_fsinfo);
nfs_free_server(server);
- dprintk("<-- nfs_clone_server() = error %d\n", error);
return ERR_PTR(error);
}
EXPORT_SYMBOL_GPL(nfs_clone_server);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f92ba8d6c556..32ccd7754f8a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -57,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
.read = generic_read_dir,
- .iterate_shared = nfs_readdir,
+ .iterate = nfs_readdir,
.open = nfs_opendir,
.release = nfs_closedir,
.fsync = nfs_fsync_dir,
@@ -145,7 +145,6 @@ struct nfs_cache_array_entry {
};
struct nfs_cache_array {
- atomic_t refcount;
int size;
int eof_index;
u64 last_cookie;
@@ -171,27 +170,6 @@ typedef struct {
} nfs_readdir_descriptor_t;
/*
- * The caller is responsible for calling nfs_readdir_release_array(page)
- */
-static
-struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
-{
- void *ptr;
- if (page == NULL)
- return ERR_PTR(-EIO);
- ptr = kmap(page);
- if (ptr == NULL)
- return ERR_PTR(-ENOMEM);
- return ptr;
-}
-
-static
-void nfs_readdir_release_array(struct page *page)
-{
- kunmap(page);
-}
-
-/*
* we are freeing strings created by nfs_add_to_readdir_array()
*/
static
@@ -201,18 +179,9 @@ void nfs_readdir_clear_array(struct page *page)
int i;
array = kmap_atomic(page);
- if (atomic_dec_and_test(&array->refcount))
- for (i = 0; i < array->size; i++)
- kfree(array->array[i].string.name);
- kunmap_atomic(array);
-}
-
-static bool grab_page(struct page *page)
-{
- struct nfs_cache_array *array = kmap_atomic(page);
- bool res = atomic_inc_not_zero(&array->refcount);
+ for (i = 0; i < array->size; i++)
+ kfree(array->array[i].string.name);
kunmap_atomic(array);
- return res;
}
/*
@@ -239,13 +208,10 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
static
int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
{
- struct nfs_cache_array *array = nfs_readdir_get_array(page);
+ struct nfs_cache_array *array = kmap(page);
struct nfs_cache_array_entry *cache_entry;
int ret;
- if (IS_ERR(array))
- return PTR_ERR(array);
-
cache_entry = &array->array[array->size];
/* Check that this entry lies within the page bounds */
@@ -264,7 +230,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
if (entry->eof != 0)
array->eof_index = array->size;
out:
- nfs_readdir_release_array(page);
+ kunmap(page);
return ret;
}
@@ -353,11 +319,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
struct nfs_cache_array *array;
int status;
- array = nfs_readdir_get_array(desc->page);
- if (IS_ERR(array)) {
- status = PTR_ERR(array);
- goto out;
- }
+ array = kmap(desc->page);
if (*desc->dir_cookie == 0)
status = nfs_readdir_search_for_pos(array, desc);
@@ -369,8 +331,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
desc->current_index += array->size;
desc->page_index++;
}
- nfs_readdir_release_array(desc->page);
-out:
+ kunmap(desc->page);
return status;
}
@@ -606,13 +567,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
out_nopages:
if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
- array = nfs_readdir_get_array(page);
- if (!IS_ERR(array)) {
- array->eof_index = array->size;
- status = 0;
- nfs_readdir_release_array(page);
- } else
- status = PTR_ERR(array);
+ array = kmap(page);
+ array->eof_index = array->size;
+ status = 0;
+ kunmap(page);
}
put_page(scratch);
@@ -674,13 +632,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
goto out;
}
- array = nfs_readdir_get_array(page);
- if (IS_ERR(array)) {
- status = PTR_ERR(array);
- goto out_label_free;
- }
+ array = kmap(page);
memset(array, 0, sizeof(struct nfs_cache_array));
- atomic_set(&array->refcount, 1);
array->eof_index = -1;
status = nfs_readdir_alloc_pages(pages, array_size);
@@ -703,8 +656,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
nfs_readdir_free_pages(pages, array_size);
out_release_array:
- nfs_readdir_release_array(page);
-out_label_free:
+ kunmap(page);
nfs4_label_free(entry.label);
out:
nfs_free_fattr(entry.fattr);
@@ -743,7 +695,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
static
void cache_page_release(nfs_readdir_descriptor_t *desc)
{
- nfs_readdir_clear_array(desc->page);
+ if (!desc->page->mapping)
+ nfs_readdir_clear_array(desc->page);
put_page(desc->page);
desc->page = NULL;
}
@@ -751,16 +704,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
static
struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
{
- struct page *page;
-
- for (;;) {
- page = read_cache_page(desc->file->f_mapping,
+ return read_cache_page(desc->file->f_mapping,
desc->page_index, (filler_t *)nfs_readdir_filler, desc);
- if (IS_ERR(page) || grab_page(page))
- break;
- put_page(page);
- }
- return page;
}
/*
@@ -809,12 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
struct nfs_cache_array *array = NULL;
struct nfs_open_dir_context *ctx = file->private_data;
- array = nfs_readdir_get_array(desc->page);
- if (IS_ERR(array)) {
- res = PTR_ERR(array);
- goto out;
- }
-
+ array = kmap(desc->page);
for (i = desc->cache_entry_index; i < array->size; i++) {
struct nfs_cache_array_entry *ent;
@@ -835,8 +775,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
if (array->eof_index >= 0)
desc->eof = 1;
- nfs_readdir_release_array(desc->page);
-out:
+ kunmap(desc->page);
cache_page_release(desc);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
(unsigned long long)*desc->dir_cookie, res);
@@ -966,11 +905,13 @@ out:
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
{
+ struct inode *inode = file_inode(filp);
struct nfs_open_dir_context *dir_ctx = filp->private_data;
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
filp, offset, whence);
+ inode_lock(inode);
switch (whence) {
case 1:
offset += filp->f_pos;
@@ -978,13 +919,16 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
if (offset >= 0)
break;
default:
- return -EINVAL;
+ offset = -EINVAL;
+ goto out;
}
if (offset != filp->f_pos) {
filp->f_pos = offset;
dir_ctx->dir_cookie = 0;
dir_ctx->duped = 0;
}
+out:
+ inode_unlock(inode);
return offset;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c1b5fed7c863..6fb9fad2d1e6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -392,16 +392,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
nfs_direct_req_release(dreq);
}
-static void nfs_direct_readpage_release(struct nfs_page *req)
-{
- dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
- req->wb_context->dentry->d_sb->s_id,
- (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
- req->wb_bytes,
- (long long)req_offset(req));
- nfs_release_request(req);
-}
-
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;
@@ -426,7 +416,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
set_page_dirty(page);
bytes += req->wb_bytes;
nfs_list_remove_request(req);
- nfs_direct_readpage_release(req);
+ nfs_release_request(req);
}
out_put:
if (put_dreq(dreq))
@@ -700,16 +690,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
int status = data->task.tk_status;
nfs_init_cinfo_from_dreq(&cinfo, dreq);
- if (status < 0) {
- dprintk("NFS: %5u commit failed with error %d.\n",
- data->task.tk_pid, status);
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
- dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
+ if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data))
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- }
- dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 668213984d68..5713eb32a45e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -482,7 +482,7 @@ static int nfs_launder_page(struct page *page)
inode->i_ino, (long long)page_offset(page));
nfs_fscache_wait_on_page_write(nfsi, page);
- return nfs_wb_launder_page(inode, page);
+ return nfs_wb_page(inode, page);
}
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -697,14 +697,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
if (!IS_ERR(l_ctx)) {
status = nfs_iocounter_wait(l_ctx);
nfs_put_lock_context(l_ctx);
- if (status < 0)
+ /* NOTE: special case
+ * If we're signalled while cleaning up locks on process exit, we
+ * still need to complete the unlock.
+ */
+ if (status < 0 && !(fl->fl_flags & FL_CLOSE))
return status;
}
- /* NOTE: special case
- * If we're signalled while cleaning up locks on process exit, we
- * still need to complete the unlock.
- */
/*
* Use local locking if mounted with "-onolock" or with appropriate
* "-olocal_lock="
@@ -820,9 +820,23 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
- /* We're simulating flock() locks using posix locks on the server */
- if (fl->fl_type == F_UNLCK)
+ /*
+ * VFS doesn't require the open mode to match a flock() lock's type.
+ * NFS, however, may simulate flock() locking with posix locking which
+ * requires the open mode to match the lock type.
+ */
+ switch (fl->fl_type) {
+ case F_UNLCK:
return do_unlk(filp, cmd, fl, is_local);
+ case F_RDLCK:
+ if (!(filp->f_mode & FMODE_READ))
+ return -EBADF;
+ break;
+ case F_WRLCK:
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+ }
+
return do_setlk(filp, cmd, fl, is_local);
}
EXPORT_SYMBOL_GPL(nfs_flock);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index acd30baca461..1cf85d65b748 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -921,11 +921,11 @@ fl_pnfs_update_layout(struct inode *ino,
fl = FILELAYOUT_LSEG(lseg);
status = filelayout_check_deviceid(lo, fl, gfp_flags);
- if (status)
+ if (status) {
+ pnfs_put_lseg(lseg);
lseg = ERR_PTR(status);
+ }
out:
- if (IS_ERR(lseg))
- pnfs_put_lseg(lseg);
return lseg;
}
@@ -933,6 +933,7 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
+ pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -959,6 +960,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo;
int status;
+ pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
req->wb_context,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 42dedf2d625f..f5714ee01000 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -846,6 +846,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
int ds_idx;
retry:
+ pnfs_generic_pg_check_layout(pgio);
/* Use full layout for now */
if (!pgio->pg_lseg)
ff_layout_pg_get_read(pgio, req, false);
@@ -894,6 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
int status;
retry:
+ pnfs_generic_pg_check_layout(pgio);
if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -1800,16 +1802,16 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
if (!ds)
- return PNFS_NOT_ATTEMPTED;
+ goto out_failed;
ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
hdr->inode);
if (IS_ERR(ds_clnt))
- return PNFS_NOT_ATTEMPTED;
+ goto out_failed;
ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
if (!ds_cred)
- return PNFS_NOT_ATTEMPTED;
+ goto out_failed;
vers = nfs4_ff_layout_ds_version(lseg, idx);
@@ -1839,6 +1841,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
sync, RPC_TASK_SOFTCONN);
put_rpccred(ds_cred);
return PNFS_ATTEMPTED;
+
+out_failed:
+ if (ff_layout_avoid_mds_available_ds(lseg))
+ return PNFS_TRY_AGAIN;
+ return PNFS_NOT_ATTEMPTED;
}
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -2354,10 +2361,21 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
return 0;
}
+static int
+ff_layout_set_layoutdriver(struct nfs_server *server,
+ const struct nfs_fh *dummy)
+{
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+ server->caps |= NFS_CAP_LAYOUTSTATS;
+#endif
+ return 0;
+}
+
static struct pnfs_layoutdriver_type flexfilelayout_type = {
.id = LAYOUT_FLEX_FILES,
.name = "LAYOUT_FLEX_FILES",
.owner = THIS_MODULE,
+ .set_layoutdriver = ff_layout_set_layoutdriver,
.alloc_layout_hdr = ff_layout_alloc_layout_hdr,
.free_layout_hdr = ff_layout_free_layout_hdr,
.alloc_lseg = ff_layout_alloc_lseg,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 457cfeb1d5c1..6df7a0cf5660 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -119,7 +119,13 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE)
ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE;
- if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) {
+ /*
+ * check for valid major/minor combination.
+ * currently we support dataserver which talk:
+ * v3, v4.0, v4.1, v4.2
+ */
+ if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) ||
+ (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) {
dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__,
i, ds_versions[i].version,
ds_versions[i].minor_version);
@@ -415,7 +421,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror->mirror_ds->ds_versions[0].minor_version);
/* connect success, check rsize/wsize limit */
- if (ds->ds_clp) {
+ if (!status) {
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f489a5a71bd5..1de93ba78dc9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -734,7 +734,10 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
if (need_atime || nfs_need_revalidate_inode(inode)) {
struct nfs_server *server = NFS_SERVER(inode);
- nfs_readdirplus_parent_cache_miss(path->dentry);
+ if (!(server->flags & NFS_MOUNT_NOAC))
+ nfs_readdirplus_parent_cache_miss(path->dentry);
+ else
+ nfs_readdirplus_parent_cache_hit(path->dentry);
err = __nfs_revalidate_inode(server, inode);
} else
nfs_readdirplus_parent_cache_hit(path->dentry);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b38fedb7e03..e9b4c3320e37 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -495,7 +495,6 @@ void nfs_mark_request_commit(struct nfs_page *req,
u32 ds_commit_idx);
int nfs_write_need_commit(struct nfs_pgio_header *);
void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
-int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
@@ -756,9 +755,13 @@ static inline bool nfs_error_is_fatal(int err)
{
switch (err) {
case -ERESTARTSYS:
+ case -EACCES:
+ case -EDQUOT:
+ case -EFBIG:
case -EIO:
case -ENOSPC:
case -EROFS:
+ case -ESTALE:
case -E2BIG:
return true;
default:
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 786f17580582..1a224a33a6c2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -143,11 +143,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
struct nfs_fh *fh = NULL;
struct nfs_fattr *fattr = NULL;
- dprintk("--> nfs_d_automount()\n");
-
- mnt = ERR_PTR(-ESTALE);
if (IS_ROOT(path->dentry))
- goto out_nofree;
+ return ERR_PTR(-ESTALE);
mnt = ERR_PTR(-ENOMEM);
fh = nfs_alloc_fhandle();
@@ -155,13 +152,10 @@ struct vfsmount *nfs_d_automount(struct path *path)
if (fh == NULL || fattr == NULL)
goto out;
- dprintk("%s: enter\n", __func__);
-
mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
if (IS_ERR(mnt))
goto out;
- dprintk("%s: done, success\n", __func__);
mntget(mnt); /* prevent immediate expiration */
mnt_set_expiry(mnt, &nfs_automount_list);
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
@@ -169,11 +163,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
out:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
-out_nofree:
- if (IS_ERR(mnt))
- dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
- else
- dprintk("<-- %s() = %p\n", __func__, mnt);
return mnt;
}
@@ -248,27 +237,20 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
.fattr = fattr,
.authflavor = authflavor,
};
- struct vfsmount *mnt = ERR_PTR(-ENOMEM);
+ struct vfsmount *mnt;
char *page = (char *) __get_free_page(GFP_USER);
char *devname;
- dprintk("--> nfs_do_submount()\n");
-
- dprintk("%s: submounting on %pd2\n", __func__,
- dentry);
if (page == NULL)
- goto out;
+ return ERR_PTR(-ENOMEM);
+
devname = nfs_devname(dentry, page, PAGE_SIZE);
- mnt = (struct vfsmount *)devname;
if (IS_ERR(devname))
- goto free_page;
- mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
-free_page:
- free_page((unsigned long)page);
-out:
- dprintk("%s: done\n", __func__);
+ mnt = (struct vfsmount *)devname;
+ else
+ mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
- dprintk("<-- nfs_do_submount() = %p\n", mnt);
+ free_page((unsigned long)page);
return mnt;
}
EXPORT_SYMBOL_GPL(nfs_do_submount);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index dc925b531f32..0c07b567118d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -865,12 +865,63 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
}
+static void nfs3_nlm_alloc_call(void *data)
+{
+ struct nfs_lock_context *l_ctx = data;
+ if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
+ get_nfs_open_context(l_ctx->open_context);
+ nfs_get_lock_context(l_ctx->open_context);
+ }
+}
+
+static bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data)
+{
+ struct nfs_lock_context *l_ctx = data;
+ if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags))
+ return nfs_async_iocounter_wait(task, l_ctx);
+ return false;
+
+}
+
+static void nfs3_nlm_release_call(void *data)
+{
+ struct nfs_lock_context *l_ctx = data;
+ struct nfs_open_context *ctx;
+ if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
+ ctx = l_ctx->open_context;
+ nfs_put_lock_context(l_ctx);
+ put_nfs_open_context(ctx);
+ }
+}
+
+const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
+ .nlmclnt_alloc_call = nfs3_nlm_alloc_call,
+ .nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare,
+ .nlmclnt_release_call = nfs3_nlm_release_call,
+};
+
static int
nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = file_inode(filp);
+ struct nfs_lock_context *l_ctx = NULL;
+ struct nfs_open_context *ctx = nfs_file_open_context(filp);
+ int status;
- return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
+ if (fl->fl_flags & FL_CLOSE) {
+ l_ctx = nfs_get_lock_context(ctx);
+ if (IS_ERR(l_ctx))
+ l_ctx = NULL;
+ else
+ set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
+ }
+
+ status = nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, l_ctx);
+
+ if (l_ctx)
+ nfs_put_lock_context(l_ctx);
+
+ return status;
}
static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
@@ -921,6 +972,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.dir_inode_ops = &nfs3_dir_inode_operations,
.file_inode_ops = &nfs3_file_inode_operations,
.file_ops = &nfs_file_operations,
+ .nlmclnt_ops = &nlmclnt_fl_close_lock_ops,
.getroot = nfs3_proc_get_root,
.submount = nfs_submount,
.try_mount = nfs_try_mount,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 1e486c73ec94..929d09a5310a 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -167,23 +167,29 @@ static ssize_t _nfs42_proc_copy(struct file *src,
if (status)
return status;
+ res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+ if (!res->commit_res.verf)
+ return -ENOMEM;
status = nfs4_call_sync(server->client, server, &msg,
&args->seq_args, &res->seq_res, 0);
if (status == -ENOTSUPP)
server->caps &= ~NFS_CAP_COPY;
if (status)
- return status;
+ goto out;
- if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
- status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
- if (status)
- return status;
+ if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+ &res->commit_res.verf->verifier)) {
+ status = -EAGAIN;
+ goto out;
}
truncate_pagecache_range(dst_inode, pos_dst,
pos_dst + res->write_res.count);
- return res->write_res.count;
+ status = res->write_res.count;
+out:
+ kfree(res->commit_res.verf);
+ return status;
}
ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -240,6 +246,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
if (err == -ENOTSUPP) {
err = -EOPNOTSUPP;
break;
+ } if (err == -EAGAIN) {
+ dst_exception.retry = 1;
+ continue;
}
err2 = nfs4_handle_exception(server, err, &src_exception);
@@ -379,6 +388,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
pnfs_mark_layout_stateid_invalid(lo, &head);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
+ nfs_commit_inode(inode, 0);
} else
spin_unlock(&inode->i_lock);
break;
@@ -400,8 +410,6 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
case -EOPNOTSUPP:
NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
}
-
- dprintk("%s server returns %d\n", __func__, task->tk_status);
}
static void
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 6c7296454bbc..528362f69cc1 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -66,12 +66,14 @@
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
- encode_copy_maxsz)
+ encode_copy_maxsz + \
+ encode_commit_maxsz)
#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
- decode_copy_maxsz)
+ decode_copy_maxsz + \
+ decode_commit_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_deallocate_maxsz + \
@@ -222,6 +224,18 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
encode_nops(&hdr);
}
+static void encode_copy_commit(struct xdr_stream *xdr,
+ struct nfs42_copy_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
+ p = reserve_space(xdr, 12);
+ p = xdr_encode_hyper(p, args->dst_pos);
+ *p = cpu_to_be32(args->count);
+}
+
/*
* Encode COPY request
*/
@@ -239,6 +253,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->dst_fh, &hdr);
encode_copy(xdr, args, &hdr);
+ encode_copy_commit(xdr, args, &hdr);
encode_nops(&hdr);
}
@@ -481,6 +496,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
if (status)
goto out;
status = decode_copy(xdr, res);
+ if (status)
+ goto out;
+ status = decode_commit(xdr, &res->commit_res);
out:
return status;
}
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8346ccbf2d52..692a7a8bfc7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -359,11 +359,9 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
struct nfs_client *old;
int error;
- if (clp->cl_cons_state == NFS_CS_READY) {
+ if (clp->cl_cons_state == NFS_CS_READY)
/* the client is initialised already */
- dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
return clp;
- }
/* Check NFS protocol revision and initialize RPC op vector */
clp->rpc_ops = &nfs_v4_clientops;
@@ -421,7 +419,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error:
nfs_mark_client_ready(clp, error);
nfs_put_client(clp);
- dprintk("<-- nfs4_init_client() = xerror %d\n", error);
return ERR_PTR(error);
}
@@ -469,6 +466,50 @@ static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2)
return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0;
}
+static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new,
+ struct nfs_client **prev, struct nfs_net *nn)
+{
+ int status;
+
+ if (pos->rpc_ops != new->rpc_ops)
+ return 1;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ return 1;
+
+ /* If "pos" isn't marked ready, we can't trust the
+ * remaining fields in "pos", especially the client
+ * ID and serverowner fields. Wait for CREATE_SESSION
+ * to finish. */
+ if (pos->cl_cons_state > NFS_CS_READY) {
+ atomic_inc(&pos->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+
+ nfs_put_client(*prev);
+ *prev = pos;
+
+ status = nfs_wait_client_init_complete(pos);
+ spin_lock(&nn->nfs_client_lock);
+
+ if (status < 0)
+ return status;
+ }
+
+ if (pos->cl_cons_state != NFS_CS_READY)
+ return 1;
+
+ if (pos->cl_clientid != new->cl_clientid)
+ return 1;
+
+ /* NFSv4.1 always uses the uniform string, however someone
+ * might switch the uniquifier string on us.
+ */
+ if (!nfs4_match_client_owner_id(pos, new))
+ return 1;
+
+ return 0;
+}
+
/**
* nfs40_walk_client_list - Find server that recognizes a client ID
*
@@ -497,34 +538,10 @@ int nfs40_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
- /* If "pos" isn't marked ready, we can't trust the
- * remaining fields in "pos" */
- if (pos->cl_cons_state > NFS_CS_READY) {
- atomic_inc(&pos->cl_count);
- spin_unlock(&nn->nfs_client_lock);
-
- nfs_put_client(prev);
- prev = pos;
-
- status = nfs_wait_client_init_complete(pos);
- if (status < 0)
- goto out;
- status = -NFS4ERR_STALE_CLIENTID;
- spin_lock(&nn->nfs_client_lock);
- }
- if (pos->cl_cons_state != NFS_CS_READY)
- continue;
-
- if (pos->cl_clientid != new->cl_clientid)
- continue;
-
- if (!nfs4_match_client_owner_id(pos, new))
+ status = nfs4_match_client(pos, new, &prev, nn);
+ if (status < 0)
+ goto out_unlock;
+ if (status != 0)
continue;
/*
* We just sent a new SETCLIENTID, which should have
@@ -557,8 +574,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
prev = NULL;
*result = pos;
- dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
- __func__, pos, atomic_read(&pos->cl_count));
goto out;
case -ERESTARTSYS:
case -ETIMEDOUT:
@@ -567,37 +582,23 @@ int nfs40_walk_client_list(struct nfs_client *new,
*/
nfs4_schedule_path_down_recovery(pos);
default:
+ spin_lock(&nn->nfs_client_lock);
goto out;
}
spin_lock(&nn->nfs_client_lock);
}
+out_unlock:
spin_unlock(&nn->nfs_client_lock);
/* No match found. The server lost our clientid */
out:
nfs_put_client(prev);
- dprintk("NFS: <-- %s status = %d\n", __func__, status);
return status;
}
#ifdef CONFIG_NFS_V4_1
/*
- * Returns true if the client IDs match
- */
-static bool nfs4_match_clientids(u64 a, u64 b)
-{
- if (a != b) {
- dprintk("NFS: --> %s client ID %llx does not match %llx\n",
- __func__, a, b);
- return false;
- }
- dprintk("NFS: --> %s client ID %llx matches %llx\n",
- __func__, a, b);
- return true;
-}
-
-/*
* Returns true if the server major ids match
*/
static bool
@@ -605,36 +606,8 @@ nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
struct nfs41_server_owner *o2)
{
if (o1->major_id_sz != o2->major_id_sz)
- goto out_major_mismatch;
- if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
- goto out_major_mismatch;
-
- dprintk("NFS: --> %s server owner major IDs match\n", __func__);
- return true;
-
-out_major_mismatch:
- dprintk("NFS: --> %s server owner major IDs do not match\n",
- __func__);
- return false;
-}
-
-/*
- * Returns true if server minor ids match
- */
-static bool
-nfs4_check_serverowner_minor_id(struct nfs41_server_owner *o1,
- struct nfs41_server_owner *o2)
-{
- /* Check eir_server_owner so_minor_id */
- if (o1->minor_id != o2->minor_id)
- goto out_minor_mismatch;
-
- dprintk("NFS: --> %s server owner minor IDs match\n", __func__);
- return true;
-
-out_minor_mismatch:
- dprintk("NFS: --> %s server owner minor IDs do not match\n", __func__);
- return false;
+ return false;
+ return memcmp(o1->major_id, o2->major_id, o1->major_id_sz) == 0;
}
/*
@@ -645,18 +618,9 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1,
struct nfs41_server_scope *s2)
{
if (s1->server_scope_sz != s2->server_scope_sz)
- goto out_scope_mismatch;
- if (memcmp(s1->server_scope, s2->server_scope,
- s1->server_scope_sz) != 0)
- goto out_scope_mismatch;
-
- dprintk("NFS: --> %s server scopes match\n", __func__);
- return true;
-
-out_scope_mismatch:
- dprintk("NFS: --> %s server scopes do not match\n",
- __func__);
- return false;
+ return false;
+ return memcmp(s1->server_scope, s2->server_scope,
+ s1->server_scope_sz) == 0;
}
/**
@@ -680,7 +644,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
struct rpc_xprt *xprt)
{
/* Check eir_clientid */
- if (!nfs4_match_clientids(clp->cl_clientid, res->clientid))
+ if (clp->cl_clientid != res->clientid)
goto out_err;
/* Check eir_server_owner so_major_id */
@@ -689,8 +653,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
goto out_err;
/* Check eir_server_owner so_minor_id */
- if (!nfs4_check_serverowner_minor_id(clp->cl_serverowner,
- res->server_owner))
+ if (clp->cl_serverowner->minor_id != res->server_owner->minor_id)
goto out_err;
/* Check eir_server_scope */
@@ -739,33 +702,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
if (pos == new)
goto found;
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
- /* If "pos" isn't marked ready, we can't trust the
- * remaining fields in "pos", especially the client
- * ID and serverowner fields. Wait for CREATE_SESSION
- * to finish. */
- if (pos->cl_cons_state > NFS_CS_READY) {
- atomic_inc(&pos->cl_count);
- spin_unlock(&nn->nfs_client_lock);
-
- nfs_put_client(prev);
- prev = pos;
-
- status = nfs_wait_client_init_complete(pos);
- spin_lock(&nn->nfs_client_lock);
- if (status < 0)
- break;
- status = -NFS4ERR_STALE_CLIENTID;
- }
- if (pos->cl_cons_state != NFS_CS_READY)
- continue;
-
- if (!nfs4_match_clientids(pos->cl_clientid, new->cl_clientid))
+ status = nfs4_match_client(pos, new, &prev, nn);
+ if (status < 0)
+ goto out;
+ if (status != 0)
continue;
/*
@@ -777,23 +717,15 @@ int nfs41_walk_client_list(struct nfs_client *new,
new->cl_serverowner))
continue;
- /* Unlike NFSv4.0, we know that NFSv4.1 always uses the
- * uniform string, however someone might switch the
- * uniquifier string on us.
- */
- if (!nfs4_match_client_owner_id(pos, new))
- continue;
found:
atomic_inc(&pos->cl_count);
*result = pos;
status = 0;
- dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
- __func__, pos, atomic_read(&pos->cl_count));
break;
}
+out:
spin_unlock(&nn->nfs_client_lock);
- dprintk("NFS: <-- %s status = %d\n", __func__, status);
nfs_put_client(prev);
return status;
}
@@ -916,9 +848,6 @@ static int nfs4_set_client(struct nfs_server *server,
.timeparms = timeparms,
};
struct nfs_client *clp;
- int error;
-
- dprintk("--> nfs4_set_client()\n");
if (server->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -927,15 +856,11 @@ static int nfs4_set_client(struct nfs_server *server,
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
- if (IS_ERR(clp)) {
- error = PTR_ERR(clp);
- goto error;
- }
+ if (IS_ERR(clp))
+ return PTR_ERR(clp);
- if (server->nfs_client == clp) {
- error = -ELOOP;
- goto error;
- }
+ if (server->nfs_client == clp)
+ return -ELOOP;
/*
* Query for the lease time on clientid setup or renewal
@@ -947,11 +872,7 @@ static int nfs4_set_client(struct nfs_server *server,
set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
server->nfs_client = clp;
- dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
return 0;
-error:
- dprintk("<-- nfs4_set_client() = xerror %d\n", error);
- return error;
}
/*
@@ -982,7 +903,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
};
- struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
@@ -998,10 +918,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
* (section 13.1 RFC 5661).
*/
nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
- clp = nfs_get_client(&cl_init);
-
- dprintk("<-- %s %p\n", __func__, clp);
- return clp;
+ return nfs_get_client(&cl_init);
}
EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
@@ -1098,8 +1015,6 @@ static int nfs4_init_server(struct nfs_server *server,
struct rpc_timeout timeparms;
int error;
- dprintk("--> nfs4_init_server()\n");
-
nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
data->timeo, data->retrans);
@@ -1127,7 +1042,7 @@ static int nfs4_init_server(struct nfs_server *server,
data->minorversion,
data->net);
if (error < 0)
- goto error;
+ return error;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1138,16 +1053,10 @@ static int nfs4_init_server(struct nfs_server *server,
server->acregmax = data->acregmax * HZ;
server->acdirmin = data->acdirmin * HZ;
server->acdirmax = data->acdirmax * HZ;
+ server->port = data->nfs_server.port;
- server->port = data->nfs_server.port;
-
- error = nfs_init_server_rpcclient(server, &timeparms,
- data->selected_flavor);
-
-error:
- /* Done */
- dprintk("<-- nfs4_init_server() = %d\n", error);
- return error;
+ return nfs_init_server_rpcclient(server, &timeparms,
+ data->selected_flavor);
}
/*
@@ -1163,8 +1072,6 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
bool auth_probe;
int error;
- dprintk("--> nfs4_create_server()\n");
-
server = nfs_alloc_server();
if (!server)
return ERR_PTR(-ENOMEM);
@@ -1180,12 +1087,10 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
if (error < 0)
goto error;
- dprintk("<-- nfs4_create_server() = %p\n", server);
return server;
error:
nfs_free_server(server);
- dprintk("<-- nfs4_create_server() = error %d\n", error);
return ERR_PTR(error);
}
@@ -1200,8 +1105,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
bool auth_probe;
int error;
- dprintk("--> nfs4_create_referral_server()\n");
-
server = nfs_alloc_server();
if (!server)
return ERR_PTR(-ENOMEM);
@@ -1235,12 +1138,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- dprintk("<-- nfs_create_referral_server() = %p\n", server);
return server;
error:
nfs_free_server(server);
- dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
}
@@ -1300,31 +1201,16 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
struct sockaddr *localaddr = (struct sockaddr *)&address;
int error;
- dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__,
- (unsigned long long)server->fsid.major,
- (unsigned long long)server->fsid.minor,
- hostname);
-
error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
- if (error != 0) {
- dprintk("<-- %s(): rpc_switch_client_transport returned %d\n",
- __func__, error);
- goto out;
- }
+ if (error != 0)
+ return error;
error = rpc_localaddr(clnt, localaddr, sizeof(address));
- if (error != 0) {
- dprintk("<-- %s(): rpc_localaddr returned %d\n",
- __func__, error);
- goto out;
- }
+ if (error != 0)
+ return error;
- error = -EAFNOSUPPORT;
- if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) {
- dprintk("<-- %s(): rpc_ntop returned %d\n",
- __func__, error);
- goto out;
- }
+ if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0)
+ return -EAFNOSUPPORT;
nfs_server_remove_lists(server);
error = nfs4_set_client(server, hostname, sap, salen, buf,
@@ -1333,21 +1219,12 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
nfs_put_client(clp);
if (error != 0) {
nfs_server_insert_lists(server);
- dprintk("<-- %s(): nfs4_set_client returned %d\n",
- __func__, error);
- goto out;
+ return error;
}
if (server->nfs_client->cl_hostname == NULL)
server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
nfs_server_insert_lists(server);
- error = nfs_probe_destination(server);
- if (error < 0)
- goto out;
-
- dprintk("<-- %s() succeeded\n", __func__);
-
-out:
- return error;
+ return nfs_probe_destination(server);
}
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 039b3eb6d834..ac8406018962 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -14,8 +14,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
struct nfs_fsinfo fsinfo;
int ret = -ENOMEM;
- dprintk("--> nfs4_get_rootfh()\n");
-
fsinfo.fattr = nfs_alloc_fattr();
if (fsinfo.fattr == NULL)
goto out;
@@ -38,6 +36,5 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
out:
nfs_free_fattr(fsinfo.fattr);
- dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
return ret;
}
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index d8b040bd9814..7d531da1bae3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -340,7 +340,6 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
out:
free_page((unsigned long) page);
free_page((unsigned long) page2);
- dprintk("%s: done\n", __func__);
return mnt;
}
@@ -358,11 +357,9 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
int err;
/* BUG_ON(IS_ROOT(dentry)); */
- dprintk("%s: enter\n", __func__);
-
page = alloc_page(GFP_KERNEL);
if (page == NULL)
- goto out;
+ return mnt;
fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
if (fs_locations == NULL)
@@ -386,8 +383,6 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
out_free:
__free_page(page);
kfree(fs_locations);
-out:
- dprintk("%s: done\n", __func__);
return mnt;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 201ca3f2c4ba..c08c46a3b8cd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -698,7 +698,8 @@ static int nfs41_sequence_process(struct rpc_task *task,
session = slot->table->session;
if (slot->interrupted) {
- slot->interrupted = 0;
+ if (res->sr_status != -NFS4ERR_DELAY)
+ slot->interrupted = 0;
interrupted = true;
}
@@ -2300,8 +2301,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
if (status != 0)
return status;
}
- if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
+ if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
+ nfs4_sequence_free_slot(&o_res->seq_res);
nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
+ }
return 0;
}
@@ -3265,6 +3268,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
.rpc_resp = &res,
};
int status;
+ int i;
bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS |
FATTR4_WORD0_FH_EXPIRE_TYPE |
@@ -3330,8 +3334,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
server->cache_consistency_bitmask[2] = 0;
+
+ /* Avoid a regression due to buggy server */
+ for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++)
+ res.exclcreat_bitmask[i] &= res.attr_bitmask[i];
memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask,
sizeof(server->exclcreat_bitmask));
+
server->acl_bitmask = res.acl_bitmask;
server->fh_expire_type = res.fh_expire_type;
}
@@ -4610,7 +4619,7 @@ static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
return 0;
if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context,
- hdr->rw_ops->rw_mode) == -EIO)
+ hdr->rw_mode) == -EIO)
return -EIO;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
return -EIO;
@@ -4804,8 +4813,10 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
if (!atomic_inc_not_zero(&clp->cl_count))
return -EIO;
data = kmalloc(sizeof(*data), GFP_NOFS);
- if (data == NULL)
+ if (data == NULL) {
+ nfs_put_client(clp);
return -ENOMEM;
+ }
data->client = clp;
data->timestamp = jiffies;
return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT,
@@ -5782,6 +5793,7 @@ struct nfs4_unlockdata {
struct nfs_locku_res res;
struct nfs4_lock_state *lsp;
struct nfs_open_context *ctx;
+ struct nfs_lock_context *l_ctx;
struct file_lock fl;
struct nfs_server *server;
unsigned long timestamp;
@@ -5806,6 +5818,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
atomic_inc(&lsp->ls_count);
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
+ p->l_ctx = nfs_get_lock_context(ctx);
memcpy(&p->fl, fl, sizeof(p->fl));
p->server = NFS_SERVER(inode);
return p;
@@ -5816,6 +5829,7 @@ static void nfs4_locku_release_calldata(void *data)
struct nfs4_unlockdata *calldata = data;
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_lock_state(calldata->lsp);
+ nfs_put_lock_context(calldata->l_ctx);
put_nfs_open_context(calldata->ctx);
kfree(calldata);
}
@@ -5857,6 +5871,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
{
struct nfs4_unlockdata *calldata = data;
+ if (test_bit(NFS_CONTEXT_UNLOCK, &calldata->l_ctx->open_context->flags) &&
+ nfs_async_iocounter_wait(task, calldata->l_ctx))
+ return;
+
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
goto out_wait;
nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid);
@@ -5908,6 +5926,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
* canceled lock is passed in, and it won't be an unlock.
*/
fl->fl_type = F_UNLCK;
+ if (fl->fl_flags & FL_CLOSE)
+ set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
if (data == NULL) {
@@ -6445,9 +6465,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
ctx = nfs_file_open_context(filp);
state = ctx->state;
- if (request->fl_start < 0 || request->fl_end < 0)
- return -EINVAL;
-
if (IS_GETLK(cmd)) {
if (state != NULL)
return nfs4_proc_getlk(state, F_GETLK, request);
@@ -6470,20 +6487,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
!test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
return -ENOLCK;
- /*
- * Don't rely on the VFS having checked the file open mode,
- * since it won't do this for flock() locks.
- */
- switch (request->fl_type) {
- case F_RDLCK:
- if (!(filp->f_mode & FMODE_READ))
- return -EBADF;
- break;
- case F_WRLCK:
- if (!(filp->f_mode & FMODE_WRITE))
- return -EBADF;
- }
-
status = nfs4_set_lock_state(state, request);
if (status != 0)
return status;
@@ -7155,8 +7158,6 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
};
struct rpc_task *task;
- dprintk("--> %s\n", __func__);
-
nfs4_copy_sessionid(&args.sessionid, &clp->cl_session->sess_id);
if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
args.dir = NFS4_CDFC4_FORE;
@@ -7176,24 +7177,20 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
if (memcmp(res.sessionid.data,
clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
dprintk("NFS: %s: Session ID mismatch\n", __func__);
- status = -EIO;
- goto out;
+ return -EIO;
}
if ((res.dir & args.dir) != res.dir || res.dir == 0) {
dprintk("NFS: %s: Unexpected direction from server\n",
__func__);
- status = -EIO;
- goto out;
+ return -EIO;
}
if (res.use_conn_in_rdma_mode != args.use_conn_in_rdma_mode) {
dprintk("NFS: %s: Server returned RDMA mode = true\n",
__func__);
- status = -EIO;
- goto out;
+ return -EIO;
}
}
-out:
- dprintk("<-- %s status= %d\n", __func__, status);
+
return status;
}
@@ -7459,15 +7456,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
};
struct nfs41_exchange_id_data *calldata;
struct rpc_task *task;
- int status = -EIO;
+ int status;
if (!atomic_inc_not_zero(&clp->cl_count))
- goto out;
+ return -EIO;
- status = -ENOMEM;
calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
- if (!calldata)
- goto out;
+ if (!calldata) {
+ nfs_put_client(clp);
+ return -ENOMEM;
+ }
if (!xprt)
nfs4_init_boot_verifier(clp, &verifier);
@@ -7476,10 +7474,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
if (status)
goto out_calldata;
- dprintk("NFS call exchange_id auth=%s, '%s'\n",
- clp->cl_rpcclient->cl_auth->au_ops->au_name,
- clp->cl_owner_id);
-
calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
GFP_NOFS);
status = -ENOMEM;
@@ -7545,13 +7539,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
rpc_put_task(task);
out:
- if (clp->cl_implid != NULL)
- dprintk("NFS reply exchange_id: Server Implementation ID: "
- "domain: %s, name: %s, date: %llu,%u\n",
- clp->cl_implid->domain, clp->cl_implid->name,
- clp->cl_implid->date.seconds,
- clp->cl_implid->date.nseconds);
- dprintk("NFS reply exchange_id: %d\n", status);
return status;
out_impl_id:
@@ -7769,17 +7756,13 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
nfs4_set_sequence_privileged(&args.la_seq_args);
- dprintk("--> %s\n", __func__);
task = rpc_run_task(&task_setup);
if (IS_ERR(task))
- status = PTR_ERR(task);
- else {
- status = task->tk_status;
- rpc_put_task(task);
- }
- dprintk("<-- %s return %d\n", __func__, status);
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
return status;
}
@@ -8180,6 +8163,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
/* fall through */
case -NFS4ERR_RETRY_UNCACHED_REP:
return -EAGAIN;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ nfs4_schedule_session_recovery(clp->cl_session,
+ task->tk_status);
+ break;
default:
nfs4_schedule_lease_recovery(clp);
}
@@ -8258,7 +8247,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
if (status == 0)
status = task->tk_status;
rpc_put_task(task);
- return 0;
out:
dprintk("<-- %s status=%d\n", __func__, status);
return status;
@@ -8357,6 +8345,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
*/
pnfs_mark_layout_stateid_invalid(lo, &head);
spin_unlock(&inode->i_lock);
+ nfs_commit_inode(inode, 0);
pnfs_free_lseg_list(&head);
status = -EAGAIN;
goto out;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8156bad6b441..b34de036501b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1649,13 +1649,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
}
-static void nfs4_reclaim_complete(struct nfs_client *clp,
+static int nfs4_reclaim_complete(struct nfs_client *clp,
const struct nfs4_state_recovery_ops *ops,
struct rpc_cred *cred)
{
/* Notify the server we're done reclaiming our state */
if (ops->reclaim_complete)
- (void)ops->reclaim_complete(clp, cred);
+ return ops->reclaim_complete(clp, cred);
+ return 0;
}
static void nfs4_clear_reclaim_server(struct nfs_server *server)
@@ -1702,13 +1703,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
{
const struct nfs4_state_recovery_ops *ops;
struct rpc_cred *cred;
+ int err;
if (!nfs4_state_clear_reclaim_reboot(clp))
return;
ops = clp->cl_mvops->reboot_recovery_ops;
cred = nfs4_get_clid_cred(clp);
- nfs4_reclaim_complete(clp, ops, cred);
+ err = nfs4_reclaim_complete(clp, ops, cred);
put_rpccred(cred);
+ if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
}
static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 80ce289eea05..3aebfdc82b30 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1000,8 +1000,9 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
const struct nfs4_label *label,
+ const umode_t *umask,
const struct nfs_server *server,
- bool excl_check, const umode_t *umask)
+ const uint32_t attrmask[])
{
char owner_name[IDMAP_NAMESZ];
char owner_group[IDMAP_NAMESZ];
@@ -1016,22 +1017,20 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
/*
* We reserve enough space to write the entire attribute buffer at once.
*/
- if (iap->ia_valid & ATTR_SIZE) {
+ if ((iap->ia_valid & ATTR_SIZE) && (attrmask[0] & FATTR4_WORD0_SIZE)) {
bmval[0] |= FATTR4_WORD0_SIZE;
len += 8;
}
- if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
- umask = NULL;
if (iap->ia_valid & ATTR_MODE) {
- if (umask) {
+ if (umask && (attrmask[2] & FATTR4_WORD2_MODE_UMASK)) {
bmval[2] |= FATTR4_WORD2_MODE_UMASK;
len += 8;
- } else {
+ } else if (attrmask[1] & FATTR4_WORD1_MODE) {
bmval[1] |= FATTR4_WORD1_MODE;
len += 4;
}
}
- if (iap->ia_valid & ATTR_UID) {
+ if ((iap->ia_valid & ATTR_UID) && (attrmask[1] & FATTR4_WORD1_OWNER)) {
owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
if (owner_namelen < 0) {
dprintk("nfs: couldn't resolve uid %d to string\n",
@@ -1044,7 +1043,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
bmval[1] |= FATTR4_WORD1_OWNER;
len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
}
- if (iap->ia_valid & ATTR_GID) {
+ if ((iap->ia_valid & ATTR_GID) &&
+ (attrmask[1] & FATTR4_WORD1_OWNER_GROUP)) {
owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
if (owner_grouplen < 0) {
dprintk("nfs: couldn't resolve gid %d to string\n",
@@ -1056,32 +1056,26 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
bmval[1] |= FATTR4_WORD1_OWNER_GROUP;
len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
}
- if (iap->ia_valid & ATTR_ATIME_SET) {
- bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
- len += 16;
- } else if (iap->ia_valid & ATTR_ATIME) {
- bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
- len += 4;
- }
- if (iap->ia_valid & ATTR_MTIME_SET) {
- bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
- len += 16;
- } else if (iap->ia_valid & ATTR_MTIME) {
- bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
- len += 4;
+ if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+ if (iap->ia_valid & ATTR_ATIME_SET) {
+ bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+ len += 16;
+ } else if (iap->ia_valid & ATTR_ATIME) {
+ bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+ len += 4;
+ }
}
-
- if (excl_check) {
- const u32 *excl_bmval = server->exclcreat_bitmask;
- bmval[0] &= excl_bmval[0];
- bmval[1] &= excl_bmval[1];
- bmval[2] &= excl_bmval[2];
-
- if (!(excl_bmval[2] & FATTR4_WORD2_SECURITY_LABEL))
- label = NULL;
+ if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+ if (iap->ia_valid & ATTR_MTIME_SET) {
+ bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+ len += 16;
+ } else if (iap->ia_valid & ATTR_MTIME) {
+ bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+ len += 4;
+ }
}
- if (label) {
+ if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL)) {
len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
bmval[2] |= FATTR4_WORD2_SECURITY_LABEL;
}
@@ -1188,8 +1182,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
}
encode_string(xdr, create->name->len, create->name->name);
- encode_attrs(xdr, create->attrs, create->label, create->server, false,
- &create->umask);
+ encode_attrs(xdr, create->attrs, create->label, &create->umask,
+ create->server, create->server->attr_bitmask);
}
static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
@@ -1409,13 +1403,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
switch(arg->createmode) {
case NFS4_CREATE_UNCHECKED:
*p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
- encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
- &arg->umask);
+ encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+ arg->server, arg->server->attr_bitmask);
break;
case NFS4_CREATE_GUARDED:
*p = cpu_to_be32(NFS4_CREATE_GUARDED);
- encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false,
- &arg->umask);
+ encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+ arg->server, arg->server->attr_bitmask);
break;
case NFS4_CREATE_EXCLUSIVE:
*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
@@ -1424,8 +1418,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
case NFS4_CREATE_EXCLUSIVE4_1:
*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
encode_nfs4_verifier(xdr, &arg->u.verifier);
- encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true,
- &arg->umask);
+ encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
+ arg->server, arg->server->exclcreat_bitmask);
}
}
@@ -1681,7 +1675,8 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
{
encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
encode_nfs4_stateid(xdr, &arg->stateid);
- encode_attrs(xdr, arg->iap, arg->label, server, false, NULL);
+ encode_attrs(xdr, arg->iap, arg->label, NULL, server,
+ server->attr_bitmask);
}
static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
@@ -2005,16 +2000,10 @@ encode_layoutcommit(struct xdr_stream *xdr,
*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
- if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) {
- NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
- NFS_I(inode)->layout, xdr, args);
- } else {
- encode_uint32(xdr, args->layoutupdate_len);
- if (args->layoutupdate_pages) {
- xdr_write_pages(xdr, args->layoutupdate_pages, 0,
- args->layoutupdate_len);
- }
- }
+ encode_uint32(xdr, args->layoutupdate_len);
+ if (args->layoutupdate_pages)
+ xdr_write_pages(xdr, args->layoutupdate_pages, 0,
+ args->layoutupdate_len);
return 0;
}
@@ -2024,7 +2013,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
const struct nfs4_layoutreturn_args *args,
struct compound_hdr *hdr)
{
- const struct pnfs_layoutdriver_type *lr_ops = NFS_SERVER(args->inode)->pnfs_curr_ld;
__be32 *p;
encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
@@ -2041,8 +2029,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
spin_unlock(&args->inode->i_lock);
if (args->ld_private->ops && args->ld_private->ops->encode)
args->ld_private->ops->encode(xdr, args, args->ld_private);
- else if (lr_ops->encode_layoutreturn)
- lr_ops->encode_layoutreturn(xdr, args);
else
encode_uint32(xdr, 0);
}
@@ -5579,6 +5565,8 @@ static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
unsigned int i;
p = xdr_inline_decode(xdr, 4);
+ if (!p)
+ return -EIO;
bitmap_words = be32_to_cpup(p++);
if (bitmap_words > NFS4_OP_MAP_NUM_WORDS)
return -EIO;
diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild
deleted file mode 100644
index ed30ea072bb8..000000000000
--- a/fs/nfs/objlayout/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the pNFS Objects Layout Driver kernel module
-#
-objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
-obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
deleted file mode 100644
index 049c1b1f2932..000000000000
--- a/fs/nfs/objlayout/objio_osd.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * pNFS Objects layout implementation over open-osd initiator library
- *
- * Copyright (C) 2009 Panasas Inc. [year of first publication]
- * All rights reserved.
- *
- * Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <ooo@electrozaur.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * See the file COPYING included with this distribution for more details.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Panasas company nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <scsi/osd_ore.h>
-
-#include "objlayout.h"
-#include "../internal.h"
-
-#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-
-struct objio_dev_ent {
- struct nfs4_deviceid_node id_node;
- struct ore_dev od;
-};
-
-static void
-objio_free_deviceid_node(struct nfs4_deviceid_node *d)
-{
- struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
-
- dprintk("%s: free od=%p\n", __func__, de->od.od);
- osduld_put_device(de->od.od);
- kfree_rcu(d, rcu);
-}
-
-struct objio_segment {
- struct pnfs_layout_segment lseg;
-
- struct ore_layout layout;
- struct ore_components oc;
-};
-
-static inline struct objio_segment *
-OBJIO_LSEG(struct pnfs_layout_segment *lseg)
-{
- return container_of(lseg, struct objio_segment, lseg);
-}
-
-struct objio_state {
- /* Generic layer */
- struct objlayout_io_res oir;
-
- bool sync;
- /*FIXME: Support for extra_bytes at ore_get_rw_state() */
- struct ore_io_state *ios;
-};
-
-/* Send and wait for a get_device_info of devices in the layout,
- then look them up with the osd_initiator library */
-struct nfs4_deviceid_node *
-objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
- gfp_t gfp_flags)
-{
- struct pnfs_osd_deviceaddr *deviceaddr;
- struct objio_dev_ent *ode = NULL;
- struct osd_dev *od;
- struct osd_dev_info odi;
- bool retry_flag = true;
- __be32 *p;
- int err;
-
- deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
- if (!deviceaddr)
- return NULL;
-
- p = page_address(pdev->pages[0]);
- pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
-
- odi.systemid_len = deviceaddr->oda_systemid.len;
- if (odi.systemid_len > sizeof(odi.systemid)) {
- dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
- __func__, sizeof(odi.systemid));
- err = -EINVAL;
- goto out;
- } else if (odi.systemid_len)
- memcpy(odi.systemid, deviceaddr->oda_systemid.data,
- odi.systemid_len);
- odi.osdname_len = deviceaddr->oda_osdname.len;
- odi.osdname = (u8 *)deviceaddr->oda_osdname.data;
-
- if (!odi.osdname_len && !odi.systemid_len) {
- dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
- __func__);
- err = -ENODEV;
- goto out;
- }
-
-retry_lookup:
- od = osduld_info_lookup(&odi);
- if (IS_ERR(od)) {
- err = PTR_ERR(od);
- dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
- if (err == -ENODEV && retry_flag) {
- err = objlayout_autologin(deviceaddr);
- if (likely(!err)) {
- retry_flag = false;
- goto retry_lookup;
- }
- }
- goto out;
- }
-
- dprintk("Adding new dev_id(%llx:%llx)\n",
- _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
-
- ode = kzalloc(sizeof(*ode), gfp_flags);
- if (!ode) {
- dprintk("%s: -ENOMEM od=%p\n", __func__, od);
- goto out;
- }
-
- nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
- kfree(deviceaddr);
-
- ode->od.od = od;
- return &ode->id_node;
-
-out:
- kfree(deviceaddr);
- return NULL;
-}
-
-static void copy_single_comp(struct ore_components *oc, unsigned c,
- struct pnfs_osd_object_cred *src_comp)
-{
- struct ore_comp *ocomp = &oc->comps[c];
-
- WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
- WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
-
- ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
- ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
-
- memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
-}
-
-static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
- struct objio_segment **pseg)
-{
-/* This is the in memory structure of the objio_segment
- *
- * struct __alloc_objio_segment {
- * struct objio_segment olseg;
- * struct ore_dev *ods[numdevs];
- * struct ore_comp comps[numdevs];
- * } *aolseg;
- * NOTE: The code as above compiles and runs perfectly. It is elegant,
- * type safe and compact. At some Past time Linus has decided he does not
- * like variable length arrays, For the sake of this principal we uglify
- * the code as below.
- */
- struct objio_segment *lseg;
- size_t lseg_size = sizeof(*lseg) +
- numdevs * sizeof(lseg->oc.ods[0]) +
- numdevs * sizeof(*lseg->oc.comps);
-
- lseg = kzalloc(lseg_size, gfp_flags);
- if (unlikely(!lseg)) {
- dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
- numdevs, lseg_size);
- return -ENOMEM;
- }
-
- lseg->oc.numdevs = numdevs;
- lseg->oc.single_comp = EC_MULTPLE_COMPS;
- lseg->oc.ods = (void *)(lseg + 1);
- lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
-
- *pseg = lseg;
- return 0;
-}
-
-int objio_alloc_lseg(struct pnfs_layout_segment **outp,
- struct pnfs_layout_hdr *pnfslay,
- struct pnfs_layout_range *range,
- struct xdr_stream *xdr,
- gfp_t gfp_flags)
-{
- struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
- struct objio_segment *objio_seg;
- struct pnfs_osd_xdr_decode_layout_iter iter;
- struct pnfs_osd_layout layout;
- struct pnfs_osd_object_cred src_comp;
- unsigned cur_comp;
- int err;
-
- err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
- if (unlikely(err))
- return err;
-
- err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
- if (unlikely(err))
- return err;
-
- objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
- objio_seg->layout.group_width = layout.olo_map.odm_group_width;
- objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
- objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
- objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
-
- err = ore_verify_layout(layout.olo_map.odm_num_comps,
- &objio_seg->layout);
- if (unlikely(err))
- goto err;
-
- objio_seg->oc.first_dev = layout.olo_comps_index;
- cur_comp = 0;
- while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
- struct nfs4_deviceid_node *d;
- struct objio_dev_ent *ode;
-
- copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
-
- d = nfs4_find_get_deviceid(server,
- &src_comp.oc_object_id.oid_device_id,
- pnfslay->plh_lc_cred, gfp_flags);
- if (!d) {
- err = -ENXIO;
- goto err;
- }
-
- ode = container_of(d, struct objio_dev_ent, id_node);
- objio_seg->oc.ods[cur_comp++] = &ode->od;
- }
- /* pnfs_osd_xdr_decode_layout_comp returns false on error */
- if (unlikely(err))
- goto err;
-
- *outp = &objio_seg->lseg;
- return 0;
-
-err:
- kfree(objio_seg);
- dprintk("%s: Error: return %d\n", __func__, err);
- *outp = NULL;
- return err;
-}
-
-void objio_free_lseg(struct pnfs_layout_segment *lseg)
-{
- int i;
- struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
-
- for (i = 0; i < objio_seg->oc.numdevs; i++) {
- struct ore_dev *od = objio_seg->oc.ods[i];
- struct objio_dev_ent *ode;
-
- if (!od)
- break;
- ode = container_of(od, typeof(*ode), od);
- nfs4_put_deviceid_node(&ode->id_node);
- }
- kfree(objio_seg);
-}
-
-static int
-objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
- struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
- loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
- struct objio_state **outp)
-{
- struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
- struct ore_io_state *ios;
- int ret;
- struct __alloc_objio_state {
- struct objio_state objios;
- struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
- } *aos;
-
- aos = kzalloc(sizeof(*aos), gfp_flags);
- if (unlikely(!aos))
- return -ENOMEM;
-
- objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
- aos->ioerrs, rpcdata, pnfs_layout_type);
-
- ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
- offset, count, &ios);
- if (unlikely(ret)) {
- kfree(aos);
- return ret;
- }
-
- ios->pages = pages;
- ios->pgbase = pgbase;
- ios->private = aos;
- BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
-
- aos->objios.sync = 0;
- aos->objios.ios = ios;
- *outp = &aos->objios;
- return 0;
-}
-
-void objio_free_result(struct objlayout_io_res *oir)
-{
- struct objio_state *objios = container_of(oir, struct objio_state, oir);
-
- ore_put_io_state(objios->ios);
- kfree(objios);
-}
-
-static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
-{
- switch (oep) {
- case OSD_ERR_PRI_NO_ERROR:
- return (enum pnfs_osd_errno)0;
-
- case OSD_ERR_PRI_CLEAR_PAGES:
- BUG_ON(1);
- return 0;
-
- case OSD_ERR_PRI_RESOURCE:
- return PNFS_OSD_ERR_RESOURCE;
- case OSD_ERR_PRI_BAD_CRED:
- return PNFS_OSD_ERR_BAD_CRED;
- case OSD_ERR_PRI_NO_ACCESS:
- return PNFS_OSD_ERR_NO_ACCESS;
- case OSD_ERR_PRI_UNREACHABLE:
- return PNFS_OSD_ERR_UNREACHABLE;
- case OSD_ERR_PRI_NOT_FOUND:
- return PNFS_OSD_ERR_NOT_FOUND;
- case OSD_ERR_PRI_NO_SPACE:
- return PNFS_OSD_ERR_NO_SPACE;
- default:
- WARN_ON(1);
- /* fallthrough */
- case OSD_ERR_PRI_EIO:
- return PNFS_OSD_ERR_EIO;
- }
-}
-
-static void __on_dev_error(struct ore_io_state *ios,
- struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
- u64 dev_offset, u64 dev_len)
-{
- struct objio_state *objios = ios->private;
- struct pnfs_osd_objid pooid;
- struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
- /* FIXME: what to do with more-then-one-group layouts. We need to
- * translate from ore_io_state index to oc->comps index
- */
- unsigned comp = dev_index;
-
- pooid.oid_device_id = ode->id_node.deviceid;
- pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
- pooid.oid_object_id = ios->oc->comps[comp].obj.id;
-
- objlayout_io_set_result(&objios->oir, comp,
- &pooid, osd_pri_2_pnfs_err(oep),
- dev_offset, dev_len, !ios->reading);
-}
-
-/*
- * read
- */
-static void _read_done(struct ore_io_state *ios, void *private)
-{
- struct objio_state *objios = private;
- ssize_t status;
- int ret = ore_check_io(ios, &__on_dev_error);
-
- /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
-
- if (likely(!ret))
- status = ios->length;
- else
- status = ret;
-
- objlayout_read_done(&objios->oir, status, objios->sync);
-}
-
-int objio_read_pagelist(struct nfs_pgio_header *hdr)
-{
- struct objio_state *objios;
- int ret;
-
- ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
- hdr->lseg, hdr->args.pages, hdr->args.pgbase,
- hdr->args.offset, hdr->args.count, hdr,
- GFP_KERNEL, &objios);
- if (unlikely(ret))
- return ret;
-
- objios->ios->done = _read_done;
- dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
- hdr->args.offset, hdr->args.count);
- ret = ore_read(objios->ios);
- if (unlikely(ret))
- objio_free_result(&objios->oir);
- return ret;
-}
-
-/*
- * write
- */
-static void _write_done(struct ore_io_state *ios, void *private)
-{
- struct objio_state *objios = private;
- ssize_t status;
- int ret = ore_check_io(ios, &__on_dev_error);
-
- /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
-
- if (likely(!ret)) {
- /* FIXME: should be based on the OSD's persistence model
- * See OSD2r05 Section 4.13 Data persistence model */
- objios->oir.committed = NFS_FILE_SYNC;
- status = ios->length;
- } else {
- status = ret;
- }
-
- objlayout_write_done(&objios->oir, status, objios->sync);
-}
-
-static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
-{
- struct objio_state *objios = priv;
- struct nfs_pgio_header *hdr = objios->oir.rpcdata;
- struct address_space *mapping = hdr->inode->i_mapping;
- pgoff_t index = offset / PAGE_SIZE;
- struct page *page;
- loff_t i_size = i_size_read(hdr->inode);
-
- if (offset >= i_size) {
- *uptodate = true;
- dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
- return ZERO_PAGE(0);
- }
-
- page = find_get_page(mapping, index);
- if (!page) {
- page = find_or_create_page(mapping, index, GFP_NOFS);
- if (unlikely(!page)) {
- dprintk("%s: grab_cache_page Failed index=0x%lx\n",
- __func__, index);
- return NULL;
- }
- unlock_page(page);
- }
- *uptodate = PageUptodate(page);
- dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
- return page;
-}
-
-static void __r4w_put_page(void *priv, struct page *page)
-{
- dprintk("%s: index=0x%lx\n", __func__,
- (page == ZERO_PAGE(0)) ? -1UL : page->index);
- if (ZERO_PAGE(0) != page)
- put_page(page);
- return;
-}
-
-static const struct _ore_r4w_op _r4w_op = {
- .get_page = &__r4w_get_page,
- .put_page = &__r4w_put_page,
-};
-
-int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
-{
- struct objio_state *objios;
- int ret;
-
- ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
- hdr->lseg, hdr->args.pages, hdr->args.pgbase,
- hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
- &objios);
- if (unlikely(ret))
- return ret;
-
- objios->sync = 0 != (how & FLUSH_SYNC);
- objios->ios->r4w = &_r4w_op;
-
- if (!objios->sync)
- objios->ios->done = _write_done;
-
- dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
- hdr->args.offset, hdr->args.count);
- ret = ore_write(objios->ios);
- if (unlikely(ret)) {
- objio_free_result(&objios->oir);
- return ret;
- }
-
- if (objios->sync)
- _write_done(objios->ios, objios);
-
- return 0;
-}
-
-/*
- * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
- * of bytes (maximum @req->wb_bytes) that can be coalesced.
- */
-static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *prev, struct nfs_page *req)
-{
- struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio);
- unsigned int size;
-
- size = pnfs_generic_pg_test(pgio, prev, req);
-
- if (!size || mirror->pg_count + req->wb_bytes >
- (unsigned long)pgio->pg_layout_private)
- return 0;
-
- return min(size, req->wb_bytes);
-}
-
-static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
- pnfs_generic_pg_init_read(pgio, req);
- if (unlikely(pgio->pg_lseg == NULL))
- return; /* Not pNFS */
-
- pgio->pg_layout_private = (void *)
- OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
-}
-
-static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
- unsigned long *stripe_end)
-{
- u32 stripe_off;
- unsigned stripe_size;
-
- if (layout->raid_algorithm == PNFS_OSD_RAID_0)
- return true;
-
- stripe_size = layout->stripe_unit *
- (layout->group_width - layout->parity);
-
- div_u64_rem(offset, stripe_size, &stripe_off);
- if (!stripe_off)
- return true;
-
- *stripe_end = stripe_size - stripe_off;
- return false;
-}
-
-static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
- unsigned long stripe_end = 0;
- u64 wb_size;
-
- if (pgio->pg_dreq == NULL)
- wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
- else
- wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
-
- pnfs_generic_pg_init_write(pgio, req, wb_size);
- if (unlikely(pgio->pg_lseg == NULL))
- return; /* Not pNFS */
-
- if (req->wb_offset ||
- !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
- &OBJIO_LSEG(pgio->pg_lseg)->layout,
- &stripe_end)) {
- pgio->pg_layout_private = (void *)stripe_end;
- } else {
- pgio->pg_layout_private = (void *)
- OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
- }
-}
-
-static const struct nfs_pageio_ops objio_pg_read_ops = {
- .pg_init = objio_init_read,
- .pg_test = objio_pg_test,
- .pg_doio = pnfs_generic_pg_readpages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
-};
-
-static const struct nfs_pageio_ops objio_pg_write_ops = {
- .pg_init = objio_init_write,
- .pg_test = objio_pg_test,
- .pg_doio = pnfs_generic_pg_writepages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
-};
-
-static struct pnfs_layoutdriver_type objlayout_type = {
- .id = LAYOUT_OSD2_OBJECTS,
- .name = "LAYOUT_OSD2_OBJECTS",
- .flags = PNFS_LAYOUTRET_ON_SETATTR |
- PNFS_LAYOUTRET_ON_ERROR,
-
- .max_deviceinfo_size = PAGE_SIZE,
- .owner = THIS_MODULE,
- .alloc_layout_hdr = objlayout_alloc_layout_hdr,
- .free_layout_hdr = objlayout_free_layout_hdr,
-
- .alloc_lseg = objlayout_alloc_lseg,
- .free_lseg = objlayout_free_lseg,
-
- .read_pagelist = objlayout_read_pagelist,
- .write_pagelist = objlayout_write_pagelist,
- .pg_read_ops = &objio_pg_read_ops,
- .pg_write_ops = &objio_pg_write_ops,
-
- .sync = pnfs_generic_sync,
-
- .free_deviceid_node = objio_free_deviceid_node,
-
- .encode_layoutcommit = objlayout_encode_layoutcommit,
- .encode_layoutreturn = objlayout_encode_layoutreturn,
-};
-
-MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
-MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
-MODULE_LICENSE("GPL");
-
-static int __init
-objlayout_init(void)
-{
- int ret = pnfs_register_layoutdriver(&objlayout_type);
-
- if (ret)
- printk(KERN_INFO
- "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
- __func__, ret);
- else
- printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
- __func__);
- return ret;
-}
-
-static void __exit
-objlayout_exit(void)
-{
- pnfs_unregister_layoutdriver(&objlayout_type);
- printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
- __func__);
-}
-
-MODULE_ALIAS("nfs-layouttype4-2");
-
-module_init(objlayout_init);
-module_exit(objlayout_exit);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
deleted file mode 100644
index 8f3d2acb81c3..000000000000
--- a/fs/nfs/objlayout/objlayout.c
+++ /dev/null
@@ -1,706 +0,0 @@
-/*
- * pNFS Objects layout driver high level definitions
- *
- * Copyright (C) 2007 Panasas Inc. [year of first publication]
- * All rights reserved.
- *
- * Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <ooo@electrozaur.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * See the file COPYING included with this distribution for more details.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Panasas company nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/kmod.h>
-#include <linux/moduleparam.h>
-#include <linux/ratelimit.h>
-#include <scsi/osd_initiator.h>
-#include "objlayout.h"
-
-#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-/*
- * Create a objlayout layout structure for the given inode and return it.
- */
-struct pnfs_layout_hdr *
-objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
-{
- struct objlayout *objlay;
-
- objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
- if (!objlay)
- return NULL;
- spin_lock_init(&objlay->lock);
- INIT_LIST_HEAD(&objlay->err_list);
- dprintk("%s: Return %p\n", __func__, objlay);
- return &objlay->pnfs_layout;
-}
-
-/*
- * Free an objlayout layout structure
- */
-void
-objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
-{
- struct objlayout *objlay = OBJLAYOUT(lo);
-
- dprintk("%s: objlay %p\n", __func__, objlay);
-
- WARN_ON(!list_empty(&objlay->err_list));
- kfree(objlay);
-}
-
-/*
- * Unmarshall layout and store it in pnfslay.
- */
-struct pnfs_layout_segment *
-objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
- struct nfs4_layoutget_res *lgr,
- gfp_t gfp_flags)
-{
- int status = -ENOMEM;
- struct xdr_stream stream;
- struct xdr_buf buf = {
- .pages = lgr->layoutp->pages,
- .page_len = lgr->layoutp->len,
- .buflen = lgr->layoutp->len,
- .len = lgr->layoutp->len,
- };
- struct page *scratch;
- struct pnfs_layout_segment *lseg;
-
- dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
-
- scratch = alloc_page(gfp_flags);
- if (!scratch)
- goto err_nofree;
-
- xdr_init_decode(&stream, &buf, NULL);
- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
-
- status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
- if (unlikely(status)) {
- dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
- status);
- goto err;
- }
-
- __free_page(scratch);
-
- dprintk("%s: Return %p\n", __func__, lseg);
- return lseg;
-
-err:
- __free_page(scratch);
-err_nofree:
- dprintk("%s: Err Return=>%d\n", __func__, status);
- return ERR_PTR(status);
-}
-
-/*
- * Free a layout segement
- */
-void
-objlayout_free_lseg(struct pnfs_layout_segment *lseg)
-{
- dprintk("%s: freeing layout segment %p\n", __func__, lseg);
-
- if (unlikely(!lseg))
- return;
-
- objio_free_lseg(lseg);
-}
-
-/*
- * I/O Operations
- */
-static inline u64
-end_offset(u64 start, u64 len)
-{
- u64 end;
-
- end = start + len;
- return end >= start ? end : NFS4_MAX_UINT64;
-}
-
-static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
- struct page ***p_pages, unsigned *p_pgbase,
- u64 offset, unsigned long count)
-{
- u64 lseg_end_offset;
-
- BUG_ON(offset < lseg->pls_range.offset);
- lseg_end_offset = end_offset(lseg->pls_range.offset,
- lseg->pls_range.length);
- BUG_ON(offset >= lseg_end_offset);
- WARN_ON(offset + count > lseg_end_offset);
-
- if (*p_pgbase > PAGE_SIZE) {
- dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
- *p_pages += *p_pgbase >> PAGE_SHIFT;
- *p_pgbase &= ~PAGE_MASK;
- }
-}
-
-/*
- * I/O done common code
- */
-static void
-objlayout_iodone(struct objlayout_io_res *oir)
-{
- if (likely(oir->status >= 0)) {
- objio_free_result(oir);
- } else {
- struct objlayout *objlay = oir->objlay;
-
- spin_lock(&objlay->lock);
- objlay->delta_space_valid = OBJ_DSU_INVALID;
- list_add(&objlay->err_list, &oir->err_list);
- spin_unlock(&objlay->lock);
- }
-}
-
-/*
- * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
- *
- * The @index component IO failed (error returned from target). Register
- * the error for later reporting at layout-return.
- */
-void
-objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
- struct pnfs_osd_objid *pooid, int osd_error,
- u64 offset, u64 length, bool is_write)
-{
- struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
-
- BUG_ON(index >= oir->num_comps);
- if (osd_error) {
- ioerr->oer_component = *pooid;
- ioerr->oer_comp_offset = offset;
- ioerr->oer_comp_length = length;
- ioerr->oer_iswrite = is_write;
- ioerr->oer_errno = osd_error;
-
- dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
- "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
- __func__, index, ioerr->oer_errno,
- ioerr->oer_iswrite,
- _DEVID_LO(&ioerr->oer_component.oid_device_id),
- _DEVID_HI(&ioerr->oer_component.oid_device_id),
- ioerr->oer_component.oid_partition_id,
- ioerr->oer_component.oid_object_id,
- ioerr->oer_comp_offset,
- ioerr->oer_comp_length);
- } else {
- /* User need not call if no error is reported */
- ioerr->oer_errno = 0;
- }
-}
-
-/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
-static void _rpc_read_complete(struct work_struct *work)
-{
- struct rpc_task *task;
- struct nfs_pgio_header *hdr;
-
- dprintk("%s enter\n", __func__);
- task = container_of(work, struct rpc_task, u.tk_work);
- hdr = container_of(task, struct nfs_pgio_header, task);
-
- pnfs_ld_read_done(hdr);
-}
-
-void
-objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
-{
- struct nfs_pgio_header *hdr = oir->rpcdata;
-
- oir->status = hdr->task.tk_status = status;
- if (status >= 0)
- hdr->res.count = status;
- else
- hdr->pnfs_error = status;
- objlayout_iodone(oir);
- /* must not use oir after this point */
-
- dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
- status, hdr->res.eof, sync);
-
- if (sync)
- pnfs_ld_read_done(hdr);
- else {
- INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
- schedule_work(&hdr->task.u.tk_work);
- }
-}
-
-/*
- * Perform sync or async reads.
- */
-enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_pgio_header *hdr)
-{
- struct inode *inode = hdr->inode;
- loff_t offset = hdr->args.offset;
- size_t count = hdr->args.count;
- int err;
- loff_t eof;
-
- eof = i_size_read(inode);
- if (unlikely(offset + count > eof)) {
- if (offset >= eof) {
- err = 0;
- hdr->res.count = 0;
- hdr->res.eof = 1;
- /*FIXME: do we need to call pnfs_ld_read_done() */
- goto out;
- }
- count = eof - offset;
- }
-
- hdr->res.eof = (offset + count) >= eof;
- _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
- &hdr->args.pgbase,
- hdr->args.offset, hdr->args.count);
-
- dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
- __func__, inode->i_ino, offset, count, hdr->res.eof);
-
- err = objio_read_pagelist(hdr);
- out:
- if (unlikely(err)) {
- hdr->pnfs_error = err;
- dprintk("%s: Returned Error %d\n", __func__, err);
- return PNFS_NOT_ATTEMPTED;
- }
- return PNFS_ATTEMPTED;
-}
-
-/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
- * This is because the osd completion is called with ints-off from
- * the block layer
- */
-static void _rpc_write_complete(struct work_struct *work)
-{
- struct rpc_task *task;
- struct nfs_pgio_header *hdr;
-
- dprintk("%s enter\n", __func__);
- task = container_of(work, struct rpc_task, u.tk_work);
- hdr = container_of(task, struct nfs_pgio_header, task);
-
- pnfs_ld_write_done(hdr);
-}
-
-void
-objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
-{
- struct nfs_pgio_header *hdr = oir->rpcdata;
-
- oir->status = hdr->task.tk_status = status;
- if (status >= 0) {
- hdr->res.count = status;
- hdr->verf.committed = oir->committed;
- } else {
- hdr->pnfs_error = status;
- }
- objlayout_iodone(oir);
- /* must not use oir after this point */
-
- dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
- status, hdr->verf.committed, sync);
-
- if (sync)
- pnfs_ld_write_done(hdr);
- else {
- INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
- schedule_work(&hdr->task.u.tk_work);
- }
-}
-
-/*
- * Perform sync or async writes.
- */
-enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
-{
- int err;
-
- _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
- &hdr->args.pgbase,
- hdr->args.offset, hdr->args.count);
-
- err = objio_write_pagelist(hdr, how);
- if (unlikely(err)) {
- hdr->pnfs_error = err;
- dprintk("%s: Returned Error %d\n", __func__, err);
- return PNFS_NOT_ATTEMPTED;
- }
- return PNFS_ATTEMPTED;
-}
-
-void
-objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
- struct xdr_stream *xdr,
- const struct nfs4_layoutcommit_args *args)
-{
- struct objlayout *objlay = OBJLAYOUT(pnfslay);
- struct pnfs_osd_layoutupdate lou;
- __be32 *start;
-
- dprintk("%s: Begin\n", __func__);
-
- spin_lock(&objlay->lock);
- lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
- lou.dsu_delta = objlay->delta_space_used;
- objlay->delta_space_used = 0;
- objlay->delta_space_valid = OBJ_DSU_INIT;
- lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
- spin_unlock(&objlay->lock);
-
- start = xdr_reserve_space(xdr, 4);
-
- BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
-
- *start = cpu_to_be32((xdr->p - start - 1) * 4);
-
- dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
- lou.dsu_delta, lou.olu_ioerr_flag);
-}
-
-static int
-err_prio(u32 oer_errno)
-{
- switch (oer_errno) {
- case 0:
- return 0;
-
- case PNFS_OSD_ERR_RESOURCE:
- return OSD_ERR_PRI_RESOURCE;
- case PNFS_OSD_ERR_BAD_CRED:
- return OSD_ERR_PRI_BAD_CRED;
- case PNFS_OSD_ERR_NO_ACCESS:
- return OSD_ERR_PRI_NO_ACCESS;
- case PNFS_OSD_ERR_UNREACHABLE:
- return OSD_ERR_PRI_UNREACHABLE;
- case PNFS_OSD_ERR_NOT_FOUND:
- return OSD_ERR_PRI_NOT_FOUND;
- case PNFS_OSD_ERR_NO_SPACE:
- return OSD_ERR_PRI_NO_SPACE;
- default:
- WARN_ON(1);
- /* fallthrough */
- case PNFS_OSD_ERR_EIO:
- return OSD_ERR_PRI_EIO;
- }
-}
-
-static void
-merge_ioerr(struct pnfs_osd_ioerr *dest_err,
- const struct pnfs_osd_ioerr *src_err)
-{
- u64 dest_end, src_end;
-
- if (!dest_err->oer_errno) {
- *dest_err = *src_err;
- /* accumulated device must be blank */
- memset(&dest_err->oer_component.oid_device_id, 0,
- sizeof(dest_err->oer_component.oid_device_id));
-
- return;
- }
-
- if (dest_err->oer_component.oid_partition_id !=
- src_err->oer_component.oid_partition_id)
- dest_err->oer_component.oid_partition_id = 0;
-
- if (dest_err->oer_component.oid_object_id !=
- src_err->oer_component.oid_object_id)
- dest_err->oer_component.oid_object_id = 0;
-
- if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
- dest_err->oer_comp_offset = src_err->oer_comp_offset;
-
- dest_end = end_offset(dest_err->oer_comp_offset,
- dest_err->oer_comp_length);
- src_end = end_offset(src_err->oer_comp_offset,
- src_err->oer_comp_length);
- if (dest_end < src_end)
- dest_end = src_end;
-
- dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
-
- if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
- (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
- dest_err->oer_errno = src_err->oer_errno;
- } else if (src_err->oer_iswrite) {
- dest_err->oer_iswrite = true;
- dest_err->oer_errno = src_err->oer_errno;
- }
-}
-
-static void
-encode_accumulated_error(struct objlayout *objlay, __be32 *p)
-{
- struct objlayout_io_res *oir, *tmp;
- struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
-
- list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
- unsigned i;
-
- for (i = 0; i < oir->num_comps; i++) {
- struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
-
- if (!ioerr->oer_errno)
- continue;
-
- printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
- "is_write=%d dev(%llx:%llx) par=0x%llx "
- "obj=0x%llx offset=0x%llx length=0x%llx\n",
- __func__, i, ioerr->oer_errno,
- ioerr->oer_iswrite,
- _DEVID_LO(&ioerr->oer_component.oid_device_id),
- _DEVID_HI(&ioerr->oer_component.oid_device_id),
- ioerr->oer_component.oid_partition_id,
- ioerr->oer_component.oid_object_id,
- ioerr->oer_comp_offset,
- ioerr->oer_comp_length);
-
- merge_ioerr(&accumulated_err, ioerr);
- }
- list_del(&oir->err_list);
- objio_free_result(oir);
- }
-
- pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
-}
-
-void
-objlayout_encode_layoutreturn(struct xdr_stream *xdr,
- const struct nfs4_layoutreturn_args *args)
-{
- struct pnfs_layout_hdr *pnfslay = args->layout;
- struct objlayout *objlay = OBJLAYOUT(pnfslay);
- struct objlayout_io_res *oir, *tmp;
- __be32 *start;
-
- dprintk("%s: Begin\n", __func__);
- start = xdr_reserve_space(xdr, 4);
- BUG_ON(!start);
-
- spin_lock(&objlay->lock);
-
- list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
- __be32 *last_xdr = NULL, *p;
- unsigned i;
- int res = 0;
-
- for (i = 0; i < oir->num_comps; i++) {
- struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
-
- if (!ioerr->oer_errno)
- continue;
-
- dprintk("%s: err[%d]: errno=%d is_write=%d "
- "dev(%llx:%llx) par=0x%llx obj=0x%llx "
- "offset=0x%llx length=0x%llx\n",
- __func__, i, ioerr->oer_errno,
- ioerr->oer_iswrite,
- _DEVID_LO(&ioerr->oer_component.oid_device_id),
- _DEVID_HI(&ioerr->oer_component.oid_device_id),
- ioerr->oer_component.oid_partition_id,
- ioerr->oer_component.oid_object_id,
- ioerr->oer_comp_offset,
- ioerr->oer_comp_length);
-
- p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
- if (unlikely(!p)) {
- res = -E2BIG;
- break; /* accumulated_error */
- }
-
- last_xdr = p;
- pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
- }
-
- /* TODO: use xdr_write_pages */
- if (unlikely(res)) {
- /* no space for even one error descriptor */
- BUG_ON(!last_xdr);
-
- /* we've encountered a situation with lots and lots of
- * errors and no space to encode them all. Use the last
- * available slot to report the union of all the
- * remaining errors.
- */
- encode_accumulated_error(objlay, last_xdr);
- goto loop_done;
- }
- list_del(&oir->err_list);
- objio_free_result(oir);
- }
-loop_done:
- spin_unlock(&objlay->lock);
-
- *start = cpu_to_be32((xdr->p - start - 1) * 4);
- dprintk("%s: Return\n", __func__);
-}
-
-enum {
- OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
- OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
- OSD_LOGIN_UPCALL_PATHLEN = 256
-};
-
-static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
-
-module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
- 0600);
-MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
-
-struct __auto_login {
- char uri[OBJLAYOUT_MAX_URI_LEN];
- char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
- char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
-};
-
-static int __objlayout_upcall(struct __auto_login *login)
-{
- static char *envp[] = { "HOME=/",
- "TERM=linux",
- "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
- NULL
- };
- char *argv[8];
- int ret;
-
- if (unlikely(!osd_login_prog[0])) {
- dprintk("%s: osd_login_prog is disabled\n", __func__);
- return -EACCES;
- }
-
- dprintk("%s uri: %s\n", __func__, login->uri);
- dprintk("%s osdname %s\n", __func__, login->osdname);
- dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
-
- argv[0] = (char *)osd_login_prog;
- argv[1] = "-u";
- argv[2] = login->uri;
- argv[3] = "-o";
- argv[4] = login->osdname;
- argv[5] = "-s";
- argv[6] = login->systemid_hex;
- argv[7] = NULL;
-
- ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
- /*
- * Disable the upcall mechanism if we're getting an ENOENT or
- * EACCES error. The admin can re-enable it on the fly by using
- * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
- * the problem has been fixed.
- */
- if (ret == -ENOENT || ret == -EACCES) {
- printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
- "objlayoutdriver.osd_login_prog kernel parameter!\n",
- osd_login_prog);
- osd_login_prog[0] = '\0';
- }
- dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
-
- return ret;
-}
-
-/* Assume dest is all zeros */
-static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
- char *dest, int max_len,
- const char *var_name)
-{
- if (!s.len)
- return;
-
- if (s.len >= max_len) {
- pr_warn_ratelimited(
- "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
- var_name, s.len, max_len);
- s.len = max_len - 1; /* space for null terminator */
- }
-
- memcpy(dest, s.data, s.len);
-}
-
-/* Assume sysid is all zeros */
-static void _sysid_2_hex(struct nfs4_string s,
- char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
-{
- int i;
- char *cur;
-
- if (!s.len)
- return;
-
- if (s.len != OSD_SYSTEMID_LEN) {
- pr_warn_ratelimited(
- "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
- s.len);
- if (s.len > OSD_SYSTEMID_LEN)
- s.len = OSD_SYSTEMID_LEN;
- }
-
- cur = sysid;
- for (i = 0; i < s.len; i++)
- cur = hex_byte_pack(cur, s.data[i]);
-}
-
-int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
-{
- int rc;
- struct __auto_login login;
-
- if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
- return -ENODEV;
-
- memset(&login, 0, sizeof(login));
- __copy_nfsS_and_zero_terminate(
- deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
- login.uri, sizeof(login.uri), "URI");
-
- __copy_nfsS_and_zero_terminate(
- deviceaddr->oda_osdname,
- login.osdname, sizeof(login.osdname), "OSDNAME");
-
- _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
-
- rc = __objlayout_upcall(&login);
- if (rc > 0) /* script returns positive values */
- rc = -ENODEV;
-
- return rc;
-}
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
deleted file mode 100644
index fc94a5872ed4..000000000000
--- a/fs/nfs/objlayout/objlayout.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Data types and function declerations for interfacing with the
- * pNFS standard object layout driver.
- *
- * Copyright (C) 2007 Panasas Inc. [year of first publication]
- * All rights reserved.
- *
- * Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <ooo@electrozaur.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * See the file COPYING included with this distribution for more details.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Panasas company nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _OBJLAYOUT_H
-#define _OBJLAYOUT_H
-
-#include <linux/nfs_fs.h>
-#include <linux/pnfs_osd_xdr.h>
-#include "../pnfs.h"
-
-/*
- * per-inode layout
- */
-struct objlayout {
- struct pnfs_layout_hdr pnfs_layout;
-
- /* for layout_commit */
- enum osd_delta_space_valid_enum {
- OBJ_DSU_INIT = 0,
- OBJ_DSU_VALID,
- OBJ_DSU_INVALID,
- } delta_space_valid;
- s64 delta_space_used; /* consumed by write ops */
-
- /* for layout_return */
- spinlock_t lock;
- struct list_head err_list;
-};
-
-static inline struct objlayout *
-OBJLAYOUT(struct pnfs_layout_hdr *lo)
-{
- return container_of(lo, struct objlayout, pnfs_layout);
-}
-
-/*
- * per-I/O operation state
- * embedded in objects provider io_state data structure
- */
-struct objlayout_io_res {
- struct objlayout *objlay;
-
- void *rpcdata;
- int status; /* res */
- int committed; /* res */
-
- /* Error reporting (layout_return) */
- struct list_head err_list;
- unsigned num_comps;
- /* Pointer to array of error descriptors of size num_comps.
- * It should contain as many entries as devices in the osd_layout
- * that participate in the I/O. It is up to the io_engine to allocate
- * needed space and set num_comps.
- */
- struct pnfs_osd_ioerr *ioerrs;
-};
-
-static inline
-void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
- struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
- struct pnfs_layout_hdr *pnfs_layout_type)
-{
- oir->objlay = OBJLAYOUT(pnfs_layout_type);
- oir->rpcdata = rpcdata;
- INIT_LIST_HEAD(&oir->err_list);
- oir->num_comps = num_comps;
- oir->ioerrs = ioerrs;
-}
-
-/*
- * Raid engine I/O API
- */
-extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
- struct pnfs_layout_hdr *pnfslay,
- struct pnfs_layout_range *range,
- struct xdr_stream *xdr,
- gfp_t gfp_flags);
-extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
-
-/* objio_free_result will free these @oir structs received from
- * objlayout_{read,write}_done
- */
-extern void objio_free_result(struct objlayout_io_res *oir);
-
-extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
-extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
-
-/*
- * callback API
- */
-extern void objlayout_io_set_result(struct objlayout_io_res *oir,
- unsigned index, struct pnfs_osd_objid *pooid,
- int osd_error, u64 offset, u64 length, bool is_write);
-
-static inline void
-objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
-{
- /* If one of the I/Os errored out and the delta_space_used was
- * invalid we render the complete report as invalid. Protocol mandate
- * the DSU be accurate or not reported.
- */
- spin_lock(&objlay->lock);
- if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
- objlay->delta_space_valid = OBJ_DSU_VALID;
- objlay->delta_space_used += space_used;
- }
- spin_unlock(&objlay->lock);
-}
-
-extern void objlayout_read_done(struct objlayout_io_res *oir,
- ssize_t status, bool sync);
-extern void objlayout_write_done(struct objlayout_io_res *oir,
- ssize_t status, bool sync);
-
-/*
- * exported generic objects function vectors
- */
-
-extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
-extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
-
-extern struct pnfs_layout_segment *objlayout_alloc_lseg(
- struct pnfs_layout_hdr *,
- struct nfs4_layoutget_res *,
- gfp_t gfp_flags);
-extern void objlayout_free_lseg(struct pnfs_layout_segment *);
-
-extern enum pnfs_try_status objlayout_read_pagelist(
- struct nfs_pgio_header *);
-
-extern enum pnfs_try_status objlayout_write_pagelist(
- struct nfs_pgio_header *,
- int how);
-
-extern void objlayout_encode_layoutcommit(
- struct pnfs_layout_hdr *,
- struct xdr_stream *,
- const struct nfs4_layoutcommit_args *);
-
-extern void objlayout_encode_layoutreturn(
- struct xdr_stream *,
- const struct nfs4_layoutreturn_args *);
-
-extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
-
-#endif /* _OBJLAYOUT_H */
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
deleted file mode 100644
index f093c7ec983b..000000000000
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Object-Based pNFS Layout XDR layer
- *
- * Copyright (C) 2007 Panasas Inc. [year of first publication]
- * All rights reserved.
- *
- * Benny Halevy <bhalevy@panasas.com>
- * Boaz Harrosh <ooo@electrozaur.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * See the file COPYING included with this distribution for more details.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Panasas company nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/pnfs_osd_xdr.h>
-
-#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-
-/*
- * The following implementation is based on RFC5664
- */
-
-/*
- * struct pnfs_osd_objid {
- * struct nfs4_deviceid oid_device_id;
- * u64 oid_partition_id;
- * u64 oid_object_id;
- * }; // xdr size 32 bytes
- */
-static __be32 *
-_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
-{
- p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
- sizeof(objid->oid_device_id.data));
-
- p = xdr_decode_hyper(p, &objid->oid_partition_id);
- p = xdr_decode_hyper(p, &objid->oid_object_id);
- return p;
-}
-/*
- * struct pnfs_osd_opaque_cred {
- * u32 cred_len;
- * void *cred;
- * }; // xdr size [variable]
- * The return pointers are from the xdr buffer
- */
-static int
-_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
- struct xdr_stream *xdr)
-{
- __be32 *p = xdr_inline_decode(xdr, 1);
-
- if (!p)
- return -EINVAL;
-
- opaque_cred->cred_len = be32_to_cpu(*p++);
-
- p = xdr_inline_decode(xdr, opaque_cred->cred_len);
- if (!p)
- return -EINVAL;
-
- opaque_cred->cred = p;
- return 0;
-}
-
-/*
- * struct pnfs_osd_object_cred {
- * struct pnfs_osd_objid oc_object_id;
- * u32 oc_osd_version;
- * u32 oc_cap_key_sec;
- * struct pnfs_osd_opaque_cred oc_cap_key
- * struct pnfs_osd_opaque_cred oc_cap;
- * }; // xdr size 32 + 4 + 4 + [variable] + [variable]
- */
-static int
-_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
- struct xdr_stream *xdr)
-{
- __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
- int ret;
-
- if (!p)
- return -EIO;
-
- p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
- comp->oc_osd_version = be32_to_cpup(p++);
- comp->oc_cap_key_sec = be32_to_cpup(p);
-
- ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
- if (unlikely(ret))
- return ret;
-
- ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
- return ret;
-}
-
-/*
- * struct pnfs_osd_data_map {
- * u32 odm_num_comps;
- * u64 odm_stripe_unit;
- * u32 odm_group_width;
- * u32 odm_group_depth;
- * u32 odm_mirror_cnt;
- * u32 odm_raid_algorithm;
- * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
- */
-static inline int
-_osd_data_map_xdr_sz(void)
-{
- return 4 + 8 + 4 + 4 + 4 + 4;
-}
-
-static __be32 *
-_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
-{
- data_map->odm_num_comps = be32_to_cpup(p++);
- p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
- data_map->odm_group_width = be32_to_cpup(p++);
- data_map->odm_group_depth = be32_to_cpup(p++);
- data_map->odm_mirror_cnt = be32_to_cpup(p++);
- data_map->odm_raid_algorithm = be32_to_cpup(p++);
- dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
- "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
- __func__,
- data_map->odm_num_comps,
- (unsigned long long)data_map->odm_stripe_unit,
- data_map->odm_group_width,
- data_map->odm_group_depth,
- data_map->odm_mirror_cnt,
- data_map->odm_raid_algorithm);
- return p;
-}
-
-int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
- struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
-{
- __be32 *p;
-
- memset(iter, 0, sizeof(*iter));
-
- p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
- if (unlikely(!p))
- return -EINVAL;
-
- p = _osd_xdr_decode_data_map(p, &layout->olo_map);
- layout->olo_comps_index = be32_to_cpup(p++);
- layout->olo_num_comps = be32_to_cpup(p++);
- dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
- layout->olo_comps_index, layout->olo_num_comps);
-
- iter->total_comps = layout->olo_num_comps;
- return 0;
-}
-
-bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
- struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
- int *err)
-{
- BUG_ON(iter->decoded_comps > iter->total_comps);
- if (iter->decoded_comps == iter->total_comps)
- return false;
-
- *err = _osd_xdr_decode_object_cred(comp, xdr);
- if (unlikely(*err)) {
- dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
- "total_comps=%d\n", __func__, *err,
- iter->decoded_comps, iter->total_comps);
- return false; /* stop the loop */
- }
- dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
- "key_len=%u cap_len=%u\n",
- __func__,
- _DEVID_LO(&comp->oc_object_id.oid_device_id),
- _DEVID_HI(&comp->oc_object_id.oid_device_id),
- comp->oc_object_id.oid_partition_id,
- comp->oc_object_id.oid_object_id,
- comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
-
- iter->decoded_comps++;
- return true;
-}
-
-/*
- * Get Device Information Decoding
- *
- * Note: since Device Information is currently done synchronously, all
- * variable strings fields are left inside the rpc buffer and are only
- * pointed to by the pnfs_osd_deviceaddr members. So the read buffer
- * should not be freed while the returned information is in use.
- */
-/*
- *struct nfs4_string {
- * unsigned int len;
- * char *data;
- *}; // size [variable]
- * NOTE: Returned string points to inside the XDR buffer
- */
-static __be32 *
-__read_u8_opaque(__be32 *p, struct nfs4_string *str)
-{
- str->len = be32_to_cpup(p++);
- str->data = (char *)p;
-
- p += XDR_QUADLEN(str->len);
- return p;
-}
-
-/*
- * struct pnfs_osd_targetid {
- * u32 oti_type;
- * struct nfs4_string oti_scsi_device_id;
- * };// size 4 + [variable]
- */
-static __be32 *
-__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
-{
- u32 oti_type;
-
- oti_type = be32_to_cpup(p++);
- targetid->oti_type = oti_type;
-
- switch (oti_type) {
- case OBJ_TARGET_SCSI_NAME:
- case OBJ_TARGET_SCSI_DEVICE_ID:
- p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
- }
-
- return p;
-}
-
-/*
- * struct pnfs_osd_net_addr {
- * struct nfs4_string r_netid;
- * struct nfs4_string r_addr;
- * };
- */
-static __be32 *
-__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
-{
- p = __read_u8_opaque(p, &netaddr->r_netid);
- p = __read_u8_opaque(p, &netaddr->r_addr);
-
- return p;
-}
-
-/*
- * struct pnfs_osd_targetaddr {
- * u32 ota_available;
- * struct pnfs_osd_net_addr ota_netaddr;
- * };
- */
-static __be32 *
-__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
-{
- u32 ota_available;
-
- ota_available = be32_to_cpup(p++);
- targetaddr->ota_available = ota_available;
-
- if (ota_available)
- p = __read_net_addr(p, &targetaddr->ota_netaddr);
-
-
- return p;
-}
-
-/*
- * struct pnfs_osd_deviceaddr {
- * struct pnfs_osd_targetid oda_targetid;
- * struct pnfs_osd_targetaddr oda_targetaddr;
- * u8 oda_lun[8];
- * struct nfs4_string oda_systemid;
- * struct pnfs_osd_object_cred oda_root_obj_cred;
- * struct nfs4_string oda_osdname;
- * };
- */
-
-/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
- * not have an xdr_stream
- */
-static __be32 *
-__read_opaque_cred(__be32 *p,
- struct pnfs_osd_opaque_cred *opaque_cred)
-{
- opaque_cred->cred_len = be32_to_cpu(*p++);
- opaque_cred->cred = p;
- return p + XDR_QUADLEN(opaque_cred->cred_len);
-}
-
-static __be32 *
-__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
-{
- p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
- comp->oc_osd_version = be32_to_cpup(p++);
- comp->oc_cap_key_sec = be32_to_cpup(p++);
-
- p = __read_opaque_cred(p, &comp->oc_cap_key);
- p = __read_opaque_cred(p, &comp->oc_cap);
- return p;
-}
-
-void pnfs_osd_xdr_decode_deviceaddr(
- struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
-{
- p = __read_targetid(p, &deviceaddr->oda_targetid);
-
- p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
-
- p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
- sizeof(deviceaddr->oda_lun));
-
- p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
-
- p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
-
- p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
-
- /* libosd likes this terminated in dbg. It's last, so no problems */
- deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
-}
-
-/*
- * struct pnfs_osd_layoutupdate {
- * u32 dsu_valid;
- * s64 dsu_delta;
- * u32 olu_ioerr_flag;
- * }; xdr size 4 + 8 + 4
- */
-int
-pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
- struct pnfs_osd_layoutupdate *lou)
-{
- __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4);
-
- if (!p)
- return -E2BIG;
-
- *p++ = cpu_to_be32(lou->dsu_valid);
- if (lou->dsu_valid)
- p = xdr_encode_hyper(p, lou->dsu_delta);
- *p++ = cpu_to_be32(lou->olu_ioerr_flag);
- return 0;
-}
-
-/*
- * struct pnfs_osd_objid {
- * struct nfs4_deviceid oid_device_id;
- * u64 oid_partition_id;
- * u64 oid_object_id;
- * }; // xdr size 32 bytes
- */
-static inline __be32 *
-pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
-{
- p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
- sizeof(object_id->oid_device_id.data));
- p = xdr_encode_hyper(p, object_id->oid_partition_id);
- p = xdr_encode_hyper(p, object_id->oid_object_id);
-
- return p;
-}
-
-/*
- * struct pnfs_osd_ioerr {
- * struct pnfs_osd_objid oer_component;
- * u64 oer_comp_offset;
- * u64 oer_comp_length;
- * u32 oer_iswrite;
- * u32 oer_errno;
- * }; // xdr size 32 + 24 bytes
- */
-void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
-{
- p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
- p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
- p = xdr_encode_hyper(p, ioerr->oer_comp_length);
- *p++ = cpu_to_be32(ioerr->oer_iswrite);
- *p = cpu_to_be32(ioerr->oer_errno);
-}
-
-__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
-{
- __be32 *p;
-
- p = xdr_reserve_space(xdr, 32 + 24);
- if (unlikely(!p))
- dprintk("%s: out of xdr space\n", __func__);
-
- return p;
-}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6e629b856a00..ad92b401326c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,19 +29,6 @@
static struct kmem_cache *nfs_page_cachep;
static const struct rpc_call_ops nfs_pgio_common_ops;
-static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
-{
- p->npages = pagecount;
- if (pagecount <= ARRAY_SIZE(p->page_array))
- p->pagevec = p->page_array;
- else {
- p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
- if (!p->pagevec)
- p->npages = 0;
- }
- return p->pagevec != NULL;
-}
-
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc)
{
@@ -115,6 +102,35 @@ nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
TASK_KILLABLE);
}
+/**
+ * nfs_async_iocounter_wait - wait on a rpc_waitqueue for I/O
+ * to complete
+ * @task: the rpc_task that should wait
+ * @l_ctx: nfs_lock_context with io_counter to check
+ *
+ * Returns true if there is outstanding I/O to wait on and the
+ * task has been put to sleep.
+ */
+bool
+nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
+{
+ struct inode *inode = d_inode(l_ctx->open_context->dentry);
+ bool ret = false;
+
+ if (atomic_read(&l_ctx->io_count) > 0) {
+ rpc_sleep_on(&NFS_SERVER(inode)->uoc_rpcwaitq, task, NULL);
+ ret = true;
+ }
+
+ if (atomic_read(&l_ctx->io_count) == 0) {
+ rpc_wake_up_queued_task(&NFS_SERVER(inode)->uoc_rpcwaitq, task);
+ ret = false;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
+
/*
* nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked
@@ -398,8 +414,11 @@ static void nfs_clear_request(struct nfs_page *req)
req->wb_page = NULL;
}
if (l_ctx != NULL) {
- if (atomic_dec_and_test(&l_ctx->io_count))
+ if (atomic_dec_and_test(&l_ctx->io_count)) {
wake_up_atomic_t(&l_ctx->io_count);
+ if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
+ rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
+ }
nfs_put_lock_context(l_ctx);
req->wb_lock_context = NULL;
}
@@ -677,7 +696,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
const struct nfs_pgio_completion_ops *compl_ops,
const struct nfs_rw_ops *rw_ops,
size_t bsize,
- int io_flags)
+ int io_flags,
+ gfp_t gfp_flags)
{
struct nfs_pgio_mirror *new;
int i;
@@ -701,7 +721,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
/* until we have a request, we don't have an lseg and no
* idea how many mirrors there will be */
new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX,
- sizeof(struct nfs_pgio_mirror), GFP_KERNEL);
+ sizeof(struct nfs_pgio_mirror), gfp_flags);
desc->pg_mirrors_dynamic = new;
desc->pg_mirrors = new;
@@ -754,13 +774,24 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
*last_page;
struct list_head *head = &mirror->pg_list;
struct nfs_commit_info cinfo;
+ struct nfs_page_array *pg_array = &hdr->page_array;
unsigned int pagecount, pageused;
+ gfp_t gfp_flags = GFP_KERNEL;
pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
- if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
- nfs_pgio_error(hdr);
- desc->pg_error = -ENOMEM;
- return desc->pg_error;
+
+ if (pagecount <= ARRAY_SIZE(pg_array->page_array))
+ pg_array->pagevec = pg_array->page_array;
+ else {
+ if (hdr->rw_mode == FMODE_WRITE)
+ gfp_flags = GFP_NOIO;
+ pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags);
+ if (!pg_array->pagevec) {
+ pg_array->npages = 0;
+ nfs_pgio_error(hdr);
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
+ }
}
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
@@ -1256,8 +1287,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
mirror = &desc->pg_mirrors[midx];
if (!list_empty(&mirror->pg_list)) {
prev = nfs_list_entry(mirror->pg_list.prev);
- if (index != prev->wb_index + 1)
- nfs_pageio_complete_mirror(desc, midx);
+ if (index != prev->wb_index + 1) {
+ nfs_pageio_complete(desc);
+ break;
+ }
}
}
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index dd042498ce7c..adc6ec28d4b5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -322,9 +322,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
static void
pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
{
+ struct pnfs_layout_segment *lseg;
lo->plh_return_iomode = 0;
lo->plh_return_seq = 0;
clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+ list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+ if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+ continue;
+ pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+ }
}
static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
@@ -367,9 +373,9 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
struct pnfs_layout_segment *lseg, *next;
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
- pnfs_clear_layoutreturn_info(lo);
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
pnfs_clear_lseg_state(lseg, lseg_list);
+ pnfs_clear_layoutreturn_info(lo);
pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
!test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
@@ -563,7 +569,6 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
}
}
}
-EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked);
/*
* is l2 fully contained in l1?
@@ -728,6 +733,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
+ nfs_commit_inode(&nfsi->vfs_inode, 0);
pnfs_put_layout_hdr(lo);
} else
spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -1209,7 +1215,6 @@ out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
-EXPORT_SYMBOL_GPL(_pnfs_return_layout);
int
pnfs_commit_and_return_layout(struct inode *inode)
@@ -1991,6 +1996,8 @@ out_forget:
spin_unlock(&ino->i_lock);
lseg->pls_layout = lo;
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ if (!pnfs_layout_is_valid(lo))
+ nfs_commit_inode(ino, 0);
return ERR_PTR(-EAGAIN);
}
@@ -2051,9 +2058,11 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
bool return_now = false;
spin_lock(&inode->i_lock);
+ if (!pnfs_layout_is_valid(lo)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
pnfs_set_plh_return_info(lo, range.iomode, 0);
- /* Block LAYOUTGET */
- set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
/*
* mark all matching lsegs so that we are sure to have no live
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
@@ -2075,10 +2084,22 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
void
+pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
+{
+ if (pgio->pg_lseg == NULL ||
+ test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
+ return;
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
+
+void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 rd_size = req->wb_bytes;
+ pnfs_generic_pg_check_layout(pgio);
if (pgio->pg_lseg == NULL) {
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2109,6 +2130,7 @@ void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
+ pnfs_generic_pg_check_layout(pgio);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -2277,8 +2299,20 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
enum pnfs_try_status trypnfs;
trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
- if (trypnfs == PNFS_NOT_ATTEMPTED)
+ switch (trypnfs) {
+ case PNFS_NOT_ATTEMPTED:
pnfs_write_through_mds(desc, hdr);
+ case PNFS_ATTEMPTED:
+ break;
+ case PNFS_TRY_AGAIN:
+ /* cleanup hdr and prepare to redo pnfs */
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
+ list_splice_init(&hdr->pages, &mirror->pg_list);
+ mirror->pg_recoalesce = 1;
+ }
+ hdr->mds_ops->rpc_release(hdr);
+ }
}
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
@@ -2408,10 +2442,20 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
enum pnfs_try_status trypnfs;
trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
- if (trypnfs == PNFS_TRY_AGAIN)
- pnfs_read_resend_pnfs(hdr);
- if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status)
+ switch (trypnfs) {
+ case PNFS_NOT_ATTEMPTED:
pnfs_read_through_mds(desc, hdr);
+ case PNFS_ATTEMPTED:
+ break;
+ case PNFS_TRY_AGAIN:
+ /* cleanup hdr and prepare to redo pnfs */
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
+ list_splice_init(&hdr->pages, &mirror->pg_list);
+ mirror->pg_recoalesce = 1;
+ }
+ hdr->mds_ops->rpc_release(hdr);
+ }
}
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 590e1e35781f..2d05b756a8d6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -173,14 +173,9 @@ struct pnfs_layoutdriver_type {
gfp_t gfp_flags);
int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *);
- void (*encode_layoutreturn) (struct xdr_stream *xdr,
- const struct nfs4_layoutreturn_args *args);
void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
- void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
- struct xdr_stream *xdr,
- const struct nfs4_layoutcommit_args *args);
int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
};
@@ -239,6 +234,7 @@ void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
void unset_pnfs_layoutdriver(struct nfs_server *);
+void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7250b95549ec..d40755a0984b 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -217,7 +217,14 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
if (list_empty(&bucket->committing))
continue;
- data = nfs_commitdata_alloc();
+ /*
+ * If the layout segment is invalid, then let
+ * pnfs_generic_retry_commit() clean up the bucket.
+ */
+ if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) &&
+ !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
+ break;
+ data = nfs_commitdata_alloc(false);
if (!data)
break;
data->ds_commit_index = i;
@@ -283,16 +290,10 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
unsigned int nreq = 0;
if (!list_empty(mds_pages)) {
- data = nfs_commitdata_alloc();
- if (data != NULL) {
- data->ds_commit_index = -1;
- list_add(&data->pages, &list);
- nreq++;
- } else {
- nfs_retry_commit(mds_pages, NULL, cinfo, 0);
- pnfs_generic_retry_commit(cinfo, 0);
- return -ENOMEM;
- }
+ data = nfs_commitdata_alloc(true);
+ data->ds_commit_index = -1;
+ list_add(&data->pages, &list);
+ nreq++;
}
nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
@@ -619,7 +620,6 @@ void nfs4_pnfs_v3_ds_connect_unload(void)
get_v3_ds_connect = NULL;
}
}
-EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload);
static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
struct nfs4_pnfs_ds *ds,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b7bca8303989..9872cf676a50 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -638,7 +638,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = file_inode(filp);
- return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
+ return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, NULL);
}
/* Helper functions for NFS lock bounds checking */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index defc9233e985..a8421d9dab6a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,7 +35,11 @@ static struct kmem_cache *nfs_rdata_cachep;
static struct nfs_pgio_header *nfs_readhdr_alloc(void)
{
- return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+ struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+
+ if (p)
+ p->rw_mode = FMODE_READ;
+ return p;
}
static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
@@ -64,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
pg_ops = server->pnfs_curr_ld->pg_read_ops;
#endif
nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
- server->rsize, 0);
+ server->rsize, 0, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
@@ -451,7 +455,6 @@ void nfs_destroy_readpagecache(void)
}
static const struct nfs_rw_ops nfs_rw_read_ops = {
- .rw_mode = FMODE_READ,
.rw_alloc_header = nfs_readhdr_alloc,
.rw_free_header = nfs_readhdr_free,
.rw_done = nfs_readpage_done,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cc341fc7fd44..db7ba542559e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -60,14 +60,28 @@ static mempool_t *nfs_wdata_mempool;
static struct kmem_cache *nfs_cdata_cachep;
static mempool_t *nfs_commit_mempool;
-struct nfs_commit_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
{
- struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
+ struct nfs_commit_data *p;
- if (p) {
- memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&p->pages);
+ if (never_fail)
+ p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
+ else {
+ /* It is OK to do some reclaim, not no safe to wait
+ * for anything to be returned to the pool.
+ * mempool_alloc() cannot handle that particular combination,
+ * so we need two separate attempts.
+ */
+ p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
+ if (!p)
+ p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
+ __GFP_NOWARN | __GFP_NORETRY);
+ if (!p)
+ return NULL;
}
+
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
return p;
}
EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
@@ -82,8 +96,10 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
{
struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
- if (p)
+ if (p) {
memset(p, 0, sizeof(*p));
+ p->rw_mode = FMODE_WRITE;
+ }
return p;
}
@@ -547,9 +563,21 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
{
nfs_unlock_request(req);
nfs_end_page_writeback(req);
- nfs_release_request(req);
generic_error_remove_page(page_file_mapping(req->wb_page),
req->wb_page);
+ nfs_release_request(req);
+}
+
+static bool
+nfs_error_is_fatal_on_server(int err)
+{
+ switch (err) {
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ return false;
+ }
+ return nfs_error_is_fatal(err);
}
/*
@@ -557,8 +585,7 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
* May return an error if the user signalled nfs_wait_on_request().
*/
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
- struct page *page, bool nonblock,
- bool launder)
+ struct page *page, bool nonblock)
{
struct nfs_page *req;
int ret = 0;
@@ -574,19 +601,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
ret = 0;
+ /* If there is a fatal error that covers this write, just exit */
+ if (nfs_error_is_fatal_on_server(req->wb_context->error))
+ goto out_launder;
+
if (!nfs_pageio_add_request(pgio, req)) {
ret = pgio->pg_error;
/*
- * Remove the problematic req upon fatal errors
- * in launder case, while other dirty pages can
- * still be around until they get flushed.
+ * Remove the problematic req upon fatal errors on the server
*/
if (nfs_error_is_fatal(ret)) {
nfs_context_set_write_error(req->wb_context, ret);
- if (launder) {
- nfs_write_error_remove_page(req);
- goto out;
- }
+ if (nfs_error_is_fatal_on_server(ret))
+ goto out_launder;
}
nfs_redirty_request(req);
ret = -EAGAIN;
@@ -595,16 +622,18 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
NFSIOS_WRITEPAGES, 1);
out:
return ret;
+out_launder:
+ nfs_write_error_remove_page(req);
+ return ret;
}
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
- struct nfs_pageio_descriptor *pgio, bool launder)
+ struct nfs_pageio_descriptor *pgio)
{
int ret;
nfs_pageio_cond_complete(pgio, page_index(page));
- ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
- launder);
+ ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
if (ret == -EAGAIN) {
redirty_page_for_writepage(wbc, page);
ret = 0;
@@ -616,8 +645,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
* Write an mmapped page to the server.
*/
static int nfs_writepage_locked(struct page *page,
- struct writeback_control *wbc,
- bool launder)
+ struct writeback_control *wbc)
{
struct nfs_pageio_descriptor pgio;
struct inode *inode = page_file_mapping(page)->host;
@@ -626,7 +654,7 @@ static int nfs_writepage_locked(struct page *page,
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0,
false, &nfs_async_write_completion_ops);
- err = nfs_do_writepage(page, wbc, &pgio, launder);
+ err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
return err;
@@ -639,7 +667,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
int ret;
- ret = nfs_writepage_locked(page, wbc, false);
+ ret = nfs_writepage_locked(page, wbc);
unlock_page(page);
return ret;
}
@@ -648,7 +676,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
{
int ret;
- ret = nfs_do_writepage(page, wbc, data, false);
+ ret = nfs_do_writepage(page, wbc, data);
unlock_page(page);
return ret;
}
@@ -1367,7 +1395,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
pg_ops = server->pnfs_curr_ld->pg_write_ops;
#endif
nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
- server->wsize, ioflags);
+ server->wsize, ioflags, GFP_NOIO);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
@@ -1704,50 +1732,14 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
if (list_empty(head))
return 0;
- data = nfs_commitdata_alloc();
-
- if (!data)
- goto out_bad;
+ data = nfs_commitdata_alloc(true);
/* Set up the argument struct */
nfs_init_commit(data, head, NULL, cinfo);
atomic_inc(&cinfo->mds->rpcs_out);
return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
data->mds_ops, how, 0);
- out_bad:
- nfs_retry_commit(head, NULL, cinfo, 0);
- return -ENOMEM;
-}
-
-int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf)
-{
- struct inode *inode = file_inode(file);
- struct nfs_open_context *open;
- struct nfs_commit_info cinfo;
- struct nfs_page *req;
- int ret;
-
- open = get_nfs_open_context(nfs_file_open_context(file));
- req = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode));
- if (IS_ERR(req)) {
- ret = PTR_ERR(req);
- goto out_put;
- }
-
- nfs_init_cinfo_from_inode(&cinfo, inode);
-
- memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier));
- nfs_request_add_commit_list(req, &cinfo);
- ret = nfs_commit_inode(inode, FLUSH_SYNC);
- if (ret > 0)
- ret = 0;
-
- nfs_free_request(req);
-out_put:
- put_nfs_open_context(open);
- return ret;
}
-EXPORT_SYMBOL_GPL(nfs_commit_file);
/*
* COMMIT call returned
@@ -1985,7 +1977,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
/*
* Write back all requests on one page - we do this before reading it.
*/
-int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
+int nfs_wb_page(struct inode *inode, struct page *page)
{
loff_t range_start = page_file_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
@@ -2002,7 +1994,7 @@ int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
for (;;) {
wait_on_page_writeback(page);
if (clear_page_dirty_for_io(page)) {
- ret = nfs_writepage_locked(page, &wbc, launder);
+ ret = nfs_writepage_locked(page, &wbc);
if (ret < 0)
goto out_error;
continue;
@@ -2107,7 +2099,6 @@ void nfs_destroy_writepagecache(void)
}
static const struct nfs_rw_ops nfs_rw_write_ops = {
- .rw_mode = FMODE_WRITE,
.rw_alloc_header = nfs_writehdr_alloc,
.rw_free_header = nfs_writehdr_free,
.rw_done = nfs_writeback_done,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 26488b419965..0ad325ed71e8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -909,6 +909,8 @@ static inline struct file *get_file(struct file *f)
#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
#define FL_LAYOUT 2048 /* outstanding pNFS layout */
+#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
+
/*
* Special return value from posix_lock_file() and vfs_lock_file() for
* asynchronous locking.
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 140edab64446..05728396a1a1 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -18,6 +18,7 @@
/* Dummy declarations */
struct svc_rqst;
+struct rpc_task;
/*
* This is the set of functions for lockd->nfsd communication
@@ -43,6 +44,7 @@ struct nlmclnt_initdata {
u32 nfs_version;
int noresvport;
struct net *net;
+ const struct nlmclnt_operations *nlmclnt_ops;
};
/*
@@ -52,8 +54,26 @@ struct nlmclnt_initdata {
extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
extern void nlmclnt_done(struct nlm_host *host);
-extern int nlmclnt_proc(struct nlm_host *host, int cmd,
- struct file_lock *fl);
+/*
+ * NLM client operations provide a means to modify RPC processing of NLM
+ * requests. Callbacks receive a pointer to data passed into the call to
+ * nlmclnt_proc().
+ */
+struct nlmclnt_operations {
+ /* Called on successful allocation of nlm_rqst, use for allocation or
+ * reference counting. */
+ void (*nlmclnt_alloc_call)(void *);
+
+ /* Called in rpc_task_prepare for unlock. A return value of true
+ * indicates the callback has put the task to sleep on a waitqueue
+ * and NLM should not call rpc_call_start(). */
+ bool (*nlmclnt_unlock_prepare)(struct rpc_task*, void *);
+
+ /* Called when the nlm_rqst is freed, callbacks should clean up here */
+ void (*nlmclnt_release_call)(void *);
+};
+
+extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data);
extern int lockd_up(struct net *net);
extern void lockd_down(struct net *net);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b37dee3acaba..41f7b6a04d69 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -69,6 +69,7 @@ struct nlm_host {
char *h_addrbuf; /* address eyecatcher */
struct net *net; /* host net */
char nodename[UNX_MAXNODENAME + 1];
+ const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */
};
/*
@@ -142,6 +143,7 @@ struct nlm_rqst {
struct nlm_block * a_block;
unsigned int a_retries; /* Retry count */
u8 a_owner[NLMCLNT_OHSIZE];
+ void * a_callback_data; /* sent to nlmclnt_operations callbacks */
};
/*
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 287f34161086..bb0eb2c9acca 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -76,6 +76,7 @@ struct nfs_open_context {
#define NFS_CONTEXT_ERROR_WRITE (0)
#define NFS_CONTEXT_RESEND_WRITES (1)
#define NFS_CONTEXT_BAD (2)
+#define NFS_CONTEXT_UNLOCK (3)
int error;
struct list_head list;
@@ -499,25 +500,13 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
*/
extern int nfs_sync_inode(struct inode *inode);
extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
+extern int nfs_wb_page(struct inode *inode, struct page *page);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
extern int nfs_commit_inode(struct inode *, int);
-extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
extern void nfs_commit_free(struct nfs_commit_data *data);
static inline int
-nfs_wb_launder_page(struct inode *inode, struct page *page)
-{
- return nfs_wb_single_page(inode, page, true);
-}
-
-static inline int
-nfs_wb_page(struct inode *inode, struct page *page)
-{
- return nfs_wb_single_page(inode, page, false);
-}
-
-static inline int
nfs_have_writebacks(struct inode *inode)
{
return NFS_I(inode)->nrequests != 0;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e1502c55741e..e418a1096662 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -221,6 +221,7 @@ struct nfs_server {
u32 mountd_version;
unsigned short mountd_port;
unsigned short mountd_protocol;
+ struct rpc_wait_queue uoc_rpcwaitq;
};
/* Server capabilities */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 957049f72290..247cc3d3498f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -64,7 +64,6 @@ struct nfs_pageio_ops {
};
struct nfs_rw_ops {
- const fmode_t rw_mode;
struct nfs_pgio_header *(*rw_alloc_header)(void);
void (*rw_free_header)(struct nfs_pgio_header *);
int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
@@ -124,7 +123,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
const struct nfs_pgio_completion_ops *compl_ops,
const struct nfs_rw_ops *rw_ops,
size_t bsize,
- int how);
+ int how,
+ gfp_t gfp_flags);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
struct nfs_page *);
extern int nfs_pageio_resend(struct nfs_pageio_descriptor *,
@@ -141,6 +141,7 @@ extern int nfs_page_group_lock(struct nfs_page *, bool);
extern void nfs_page_group_lock_wait(struct nfs_page *);
extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
/*
* Lock the page of an asynchronous request
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 348f7c158084..b28c83475ee8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1383,6 +1383,7 @@ struct nfs42_copy_res {
struct nfs42_write_res write_res;
bool consecutive;
bool synchronous;
+ struct nfs_commitres commit_res;
};
struct nfs42_seek_args {
@@ -1427,6 +1428,7 @@ struct nfs_pgio_header {
struct list_head pages;
struct nfs_page *req;
struct nfs_writeverf verf; /* Used for writes */
+ fmode_t rw_mode;
struct pnfs_layout_segment *lseg;
loff_t io_start;
const struct rpc_call_ops *mds_ops;
@@ -1550,6 +1552,7 @@ struct nfs_rpc_ops {
const struct inode_operations *dir_inode_ops;
const struct inode_operations *file_inode_ops;
const struct file_operations *file_ops;
+ const struct nlmclnt_operations *nlmclnt_ops;
int (*getroot) (struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52da3ce54bb5..b5cb921775a0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1042,8 +1042,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
struct rpc_task *task;
task = rpc_new_task(task_setup_data);
- if (IS_ERR(task))
- goto out;
rpc_task_set_client(task, task_setup_data->rpc_client);
rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
@@ -1053,7 +1051,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
atomic_inc(&task->tk_count);
rpc_execute(task);
-out:
return task;
}
EXPORT_SYMBOL_GPL(rpc_run_task);
@@ -1140,10 +1137,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
- if (IS_ERR(task)) {
- xprt_free_bc_request(req);
- goto out;
- }
task->tk_rqstp = req;
/*
@@ -1158,7 +1151,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
rpc_execute(task);
-out:
dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
return task;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5db68b371db2..0cc83839c13c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -965,11 +965,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
if (task == NULL) {
task = rpc_alloc_task();
- if (task == NULL) {
- rpc_release_calldata(setup_data->callback_ops,
- setup_data->callback_data);
- return ERR_PTR(-ENOMEM);
- }
flags = RPC_TASK_DYNAMIC;
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 1f7082144e01..e34f4ee7f2b6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_decode);
/**
- * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * xdr_init_decode_pages - Initialize an xdr_stream for decoding into pages
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer from which to decode data
* @pages: list of pages to decode into
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b530a2852ba8..3e63c5e97ebe 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
+EXPORT_SYMBOL_GPL(xprt_force_disconnect);
/**
* xprt_conditional_disconnect - force a transport to disconnect
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a044be2d6ad7..694e9b13ecf0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
}
sge->length = len;
- ib_dma_sync_single_for_device(ia->ri_device, sge->addr,
+ ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE);
req->rl_send_wr.num_sge++;
return true;
@@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len;
sge[sge_no].lkey = rdmab_lkey(rb);
- ib_dma_sync_single_for_device(device, sge[sge_no].addr,
+ ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
sge[sge_no].length, DMA_TO_DEVICE);
/* If there is a Read chunk, the page list is being handled
@@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
return 0;
out_err:
- pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
- PTR_ERR(iptr));
- r_xprt->rx_stats.failed_marshal_count++;
+ if (PTR_ERR(iptr) != -ENOBUFS) {
+ pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
+ PTR_ERR(iptr));
+ r_xprt->rx_stats.failed_marshal_count++;
+ }
return PTR_ERR(iptr);
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c717f5410776..62ecbccd9748 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
- int xprt_rdma_pad_optimize = 0;
+unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt);
- rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy);
+ rc = rpcrdma_ia_open(new_xprt, sap);
if (rc)
goto out1;
@@ -457,19 +457,33 @@ out1:
return ERR_PTR(rc);
}
-/*
- * Close a connection, during shutdown or timeout/reconnect
+/**
+ * xprt_rdma_close - Close down RDMA connection
+ * @xprt: generic transport to be closed
+ *
+ * Called during transport shutdown reconnect, or device
+ * removal. Caller holds the transport's write lock.
*/
static void
xprt_rdma_close(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+ dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
- dprintk("RPC: %s: closing\n", __func__);
- if (r_xprt->rx_ep.rep_connected > 0)
+ if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
+ xprt_clear_connected(xprt);
+ rpcrdma_ia_remove(ia);
+ return;
+ }
+ if (ep->rep_connected == -ENODEV)
+ return;
+ if (ep->rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
- rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ rpcrdma_ep_disconnect(ep, ia);
}
static void
@@ -484,6 +498,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
dprintk("RPC: %s: %u\n", __func__, port);
}
+/**
+ * xprt_rdma_timer - invoked when an RPC times out
+ * @xprt: controlling RPC transport
+ * @task: RPC task that timed out
+ *
+ * Invoked when the transport is still connected, but an RPC
+ * retransmit timeout occurs.
+ *
+ * Since RDMA connections don't have a keep-alive, forcibly
+ * disconnect and retry to connect. This drives full
+ * detection of the network path, and retransmissions of
+ * all pending RPCs.
+ */
+static void
+xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
+
+ xprt_force_disconnect(xprt);
+}
+
static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -659,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
* xprt_rdma_send_request - marshal and send an RPC request
* @task: RPC task with an RPC message in rq_snd_buf
*
+ * Caller holds the transport's write lock.
+ *
* Return values:
* 0: The request has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again
@@ -685,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;
+ if (!xprt_connected(xprt))
+ goto drop_connection;
+
/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
@@ -776,6 +816,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
.alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
+ .timer = xprt_rdma_timer,
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
.set_port = xprt_rdma_set_port,
.connect = xprt_rdma_connect,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b332b395045..3dbce9ac4327 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -53,7 +53,7 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
#include <asm/bitops.h>
-#include <linux/module.h> /* try_module_get()/module_put() */
+
#include <rdma/ib_cm.h>
#include "xprt_rdma.h"
@@ -69,8 +69,11 @@
/*
* internal functions
*/
+static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
-static struct workqueue_struct *rpcrdma_receive_wq;
+static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
int
rpcrdma_alloc_wq(void)
@@ -180,7 +183,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
- ib_dma_sync_single_for_cpu(rep->rr_device,
+ ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
rdmab_addr(rep->rr_rdmabuf),
rep->rr_len, DMA_FROM_DEVICE);
@@ -262,6 +265,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
__func__, ep);
complete(&ia->ri_done);
break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ pr_info("rpcrdma: removing device for %pIS:%u\n",
+ sap, rpc_get_port(sap));
+#endif
+ set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
+ ep->rep_connected = -ENODEV;
+ xprt_force_disconnect(&xprt->rx_xprt);
+ wait_for_completion(&ia->ri_remove_done);
+
+ ia->ri_id = NULL;
+ ia->ri_pd = NULL;
+ ia->ri_device = NULL;
+ /* Return 1 to ensure the core destroys the id. */
+ return 1;
case RDMA_CM_EVENT_ESTABLISHED:
connstate = 1;
ib_query_qp(ia->ri_id->qp, attr,
@@ -291,9 +309,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
- goto connected;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- connstate = -ENODEV;
connected:
dprintk("RPC: %s: %sconnected\n",
__func__, connstate > 0 ? "" : "dis");
@@ -329,14 +344,6 @@ connected:
return 0;
}
-static void rpcrdma_destroy_id(struct rdma_cm_id *id)
-{
- if (id) {
- module_put(id->device->owner);
- rdma_destroy_id(id);
- }
-}
-
static struct rdma_cm_id *
rpcrdma_create_id(struct rpcrdma_xprt *xprt,
struct rpcrdma_ia *ia, struct sockaddr *addr)
@@ -346,6 +353,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
int rc;
init_completion(&ia->ri_done);
+ init_completion(&ia->ri_remove_done);
id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
IB_QPT_RC);
@@ -370,16 +378,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
goto out;
}
- /* FIXME:
- * Until xprtrdma supports DEVICE_REMOVAL, the provider must
- * be pinned while there are active NFS/RDMA mounts to prevent
- * hangs and crashes at umount time.
- */
- if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
- dprintk("RPC: %s: Failed to get device module\n",
- __func__);
- ia->ri_async_rc = -ENODEV;
- }
rc = ia->ri_async_rc;
if (rc)
goto out;
@@ -389,21 +387,20 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
if (rc) {
dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
__func__, rc);
- goto put;
+ goto out;
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
dprintk("RPC: %s: wait() exited: %i\n",
__func__, rc);
- goto put;
+ goto out;
}
rc = ia->ri_async_rc;
if (rc)
- goto put;
+ goto out;
return id;
-put:
- module_put(id->device->owner);
+
out:
rdma_destroy_id(id);
return ERR_PTR(rc);
@@ -413,13 +410,16 @@ out:
* Exported functions.
*/
-/*
- * Open and initialize an Interface Adapter.
- * o initializes fields of struct rpcrdma_ia, including
- * interface and provider attributes and protection zone.
+/**
+ * rpcrdma_ia_open - Open and initialize an Interface Adapter.
+ * @xprt: controlling transport
+ * @addr: IP address of remote peer
+ *
+ * Returns 0 on success, negative errno if an appropriate
+ * Interface Adapter could not be found and opened.
*/
int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
int rc;
@@ -427,7 +427,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
- goto out1;
+ goto out_err;
}
ia->ri_device = ia->ri_id->device;
@@ -435,10 +435,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
if (IS_ERR(ia->ri_pd)) {
rc = PTR_ERR(ia->ri_pd);
pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
- goto out2;
+ goto out_err;
}
- switch (memreg) {
+ switch (xprt_rdma_memreg_strategy) {
case RPCRDMA_FRMR:
if (frwr_is_supported(ia)) {
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
@@ -452,28 +452,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
}
/*FALLTHROUGH*/
default:
- pr_err("rpcrdma: Unsupported memory registration mode: %d\n",
- memreg);
+ pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
+ ia->ri_device->name, xprt_rdma_memreg_strategy);
rc = -EINVAL;
- goto out3;
+ goto out_err;
}
return 0;
-out3:
- ib_dealloc_pd(ia->ri_pd);
- ia->ri_pd = NULL;
-out2:
- rpcrdma_destroy_id(ia->ri_id);
- ia->ri_id = NULL;
-out1:
+out_err:
+ rpcrdma_ia_close(ia);
return rc;
}
-/*
- * Clean up/close an IA.
- * o if event handles and PD have been initialized, free them.
- * o close the IA
+/**
+ * rpcrdma_ia_remove - Handle device driver unload
+ * @ia: interface adapter being removed
+ *
+ * Divest transport H/W resources associated with this adapter,
+ * but allow it to be restored later.
+ */
+void
+rpcrdma_ia_remove(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+ rx_ia);
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_req *req;
+ struct rpcrdma_rep *rep;
+
+ cancel_delayed_work_sync(&buf->rb_refresh_worker);
+
+ /* This is similar to rpcrdma_ep_destroy, but:
+ * - Don't cancel the connect worker.
+ * - Don't call rpcrdma_ep_disconnect, which waits
+ * for another conn upcall, which will deadlock.
+ * - rdma_disconnect is unneeded, the underlying
+ * connection is already gone.
+ */
+ if (ia->ri_id->qp) {
+ ib_drain_qp(ia->ri_id->qp);
+ rdma_destroy_qp(ia->ri_id);
+ ia->ri_id->qp = NULL;
+ }
+ ib_free_cq(ep->rep_attr.recv_cq);
+ ib_free_cq(ep->rep_attr.send_cq);
+
+ /* The ULP is responsible for ensuring all DMA
+ * mappings and MRs are gone.
+ */
+ list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
+ rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+ list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
+ rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
+ rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
+ rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+ }
+ rpcrdma_destroy_mrs(buf);
+
+ /* Allow waiters to continue */
+ complete(&ia->ri_remove_done);
+}
+
+/**
+ * rpcrdma_ia_close - Clean up/close an IA.
+ * @ia: interface adapter to close
+ *
*/
void
rpcrdma_ia_close(struct rpcrdma_ia *ia)
@@ -482,13 +527,15 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id);
- rpcrdma_destroy_id(ia->ri_id);
- ia->ri_id = NULL;
+ rdma_destroy_id(ia->ri_id);
}
+ ia->ri_id = NULL;
+ ia->ri_device = NULL;
/* If the pd is still busy, xprtrdma missed freeing a resource */
if (ia->ri_pd && !IS_ERR(ia->ri_pd))
ib_dealloc_pd(ia->ri_pd);
+ ia->ri_pd = NULL;
}
/*
@@ -646,6 +693,99 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ib_free_cq(ep->rep_attr.send_cq);
}
+/* Re-establish a connection after a device removal event.
+ * Unlike a normal reconnection, a fresh PD and a new set
+ * of MRs and buffers is needed.
+ */
+static int
+rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+ int rc, err;
+
+ pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
+
+ rc = -EHOSTUNREACH;
+ if (rpcrdma_ia_open(r_xprt, sap))
+ goto out1;
+
+ rc = -ENOMEM;
+ err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
+ if (err) {
+ pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
+ goto out2;
+ }
+
+ rc = -ENETUNREACH;
+ err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+ if (err) {
+ pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
+ goto out3;
+ }
+
+ rpcrdma_create_mrs(r_xprt);
+ return 0;
+
+out3:
+ rpcrdma_ep_destroy(ep, ia);
+out2:
+ rpcrdma_ia_close(ia);
+out1:
+ return rc;
+}
+
+static int
+rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
+ struct rpcrdma_ia *ia)
+{
+ struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+ struct rdma_cm_id *id, *old;
+ int err, rc;
+
+ dprintk("RPC: %s: reconnecting...\n", __func__);
+
+ rpcrdma_ep_disconnect(ep, ia);
+
+ rc = -EHOSTUNREACH;
+ id = rpcrdma_create_id(r_xprt, ia, sap);
+ if (IS_ERR(id))
+ goto out;
+
+ /* As long as the new ID points to the same device as the
+ * old ID, we can reuse the transport's existing PD and all
+ * previously allocated MRs. Also, the same device means
+ * the transport's previous DMA mappings are still valid.
+ *
+ * This is a sanity check only. There should be no way these
+ * point to two different devices here.
+ */
+ old = id;
+ rc = -ENETUNREACH;
+ if (ia->ri_device != id->device) {
+ pr_err("rpcrdma: can't reconnect on different device!\n");
+ goto out_destroy;
+ }
+
+ err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+ if (err) {
+ dprintk("RPC: %s: rdma_create_qp returned %d\n",
+ __func__, err);
+ goto out_destroy;
+ }
+
+ /* Atomically replace the transport's ID and QP. */
+ rc = 0;
+ old = ia->ri_id;
+ ia->ri_id = id;
+ rdma_destroy_qp(old);
+
+out_destroy:
+ rdma_destroy_id(old);
+out:
+ return rc;
+}
+
/*
* Connect unconnected endpoint.
*/
@@ -654,61 +794,30 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia);
- struct rdma_cm_id *id, *old;
- struct sockaddr *sap;
unsigned int extras;
- int rc = 0;
+ int rc;
- if (ep->rep_connected != 0) {
retry:
- dprintk("RPC: %s: reconnecting...\n", __func__);
-
- rpcrdma_ep_disconnect(ep, ia);
-
- sap = (struct sockaddr *)&r_xprt->rx_data.addr;
- id = rpcrdma_create_id(r_xprt, ia, sap);
- if (IS_ERR(id)) {
- rc = -EHOSTUNREACH;
- goto out;
- }
- /* TEMP TEMP TEMP - fail if new device:
- * Deregister/remarshal *all* requests!
- * Close and recreate adapter, pd, etc!
- * Re-determine all attributes still sane!
- * More stuff I haven't thought of!
- * Rrrgh!
- */
- if (ia->ri_device != id->device) {
- printk("RPC: %s: can't reconnect on "
- "different device!\n", __func__);
- rpcrdma_destroy_id(id);
- rc = -ENETUNREACH;
- goto out;
- }
- /* END TEMP */
- rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
- if (rc) {
- dprintk("RPC: %s: rdma_create_qp failed %i\n",
- __func__, rc);
- rpcrdma_destroy_id(id);
- rc = -ENETUNREACH;
- goto out;
- }
-
- old = ia->ri_id;
- ia->ri_id = id;
-
- rdma_destroy_qp(old);
- rpcrdma_destroy_id(old);
- } else {
+ switch (ep->rep_connected) {
+ case 0:
dprintk("RPC: %s: connecting...\n", __func__);
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
if (rc) {
dprintk("RPC: %s: rdma_create_qp failed %i\n",
__func__, rc);
- /* do not update ep->rep_connected */
- return -ENETUNREACH;
+ rc = -ENETUNREACH;
+ goto out_noupdate;
}
+ break;
+ case -ENODEV:
+ rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
+ if (rc)
+ goto out_noupdate;
+ break;
+ default:
+ rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
+ if (rc)
+ goto out;
}
ep->rep_connected = 0;
@@ -736,6 +845,8 @@ retry:
out:
if (rc)
ep->rep_connected = rc;
+
+out_noupdate:
return rc;
}
@@ -878,7 +989,6 @@ struct rpcrdma_rep *
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_rep *rep;
int rc;
@@ -894,7 +1004,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
goto out_free;
}
- rep->rr_device = ia->ri_device;
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
@@ -1037,6 +1146,7 @@ void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
cancel_delayed_work_sync(&buf->rb_recovery_worker);
+ cancel_delayed_work_sync(&buf->rb_refresh_worker);
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
@@ -1081,7 +1191,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
out_nomws:
dprintk("RPC: %s: no MWs available\n", __func__);
- schedule_delayed_work(&buf->rb_refresh_worker, 0);
+ if (r_xprt->rx_ep.rep_connected != -ENODEV)
+ schedule_delayed_work(&buf->rb_refresh_worker, 0);
/* Allow the reply handler and refresh worker to run */
cond_resched();
@@ -1231,17 +1342,19 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
bool
__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
{
+ struct ib_device *device = ia->ri_device;
+
if (rb->rg_direction == DMA_NONE)
return false;
- rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
+ rb->rg_iov.addr = ib_dma_map_single(device,
(void *)rb->rg_base,
rdmab_length(rb),
rb->rg_direction);
- if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
+ if (ib_dma_mapping_error(device, rdmab_addr(rb)))
return false;
- rb->rg_device = ia->ri_device;
+ rb->rg_device = device;
rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
return true;
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 171a35116de9..1d66acf1a723 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -69,6 +69,7 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct completion ri_done;
+ struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth;
@@ -78,10 +79,15 @@ struct rpcrdma_ia {
bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
+ unsigned long ri_flags;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
+enum {
+ RPCRDMA_IAF_REMOVING = 0,
+};
+
/*
* RDMA Endpoint -- one per transport instance
*/
@@ -164,6 +170,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
return (struct rpcrdma_msg *)rb->rg_base;
}
+static inline struct ib_device *
+rdmab_device(struct rpcrdma_regbuf *rb)
+{
+ return rb->rg_device;
+}
+
#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
/* To ensure a transport can always make forward progress,
@@ -209,7 +221,6 @@ struct rpcrdma_rep {
unsigned int rr_len;
int rr_wc_flags;
u32 rr_inv_rkey;
- struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
struct list_head rr_list;
@@ -380,7 +391,6 @@ struct rpcrdma_buffer {
spinlock_t rb_mwlock; /* protect rb_mws list */
struct list_head rb_mws;
struct list_head rb_all;
- char *rb_pool;
spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count;
@@ -497,10 +507,16 @@ struct rpcrdma_xprt {
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern int xprt_rdma_pad_optimize;
+/* This setting controls the hunt for a supported memory
+ * registration strategy.
+ */
+extern unsigned int xprt_rdma_memreg_strategy;
+
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
-int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *);