summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 20:32:23 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 20:32:23 +0200
commit8688d9540cc6e17df4cba71615e27f04e0378fe6 (patch)
tree45ab333822188966217f6a3ec7e8289ca7eced72 /fs
parentMerge branch 'overlayfs-next' of git://git.kernel.org/pub/scm/linux/kernel/gi... (diff)
parentnfs: Remove invalid tk_pid from debug message (diff)
downloadlinux-8688d9540cc6e17df4cba71615e27f04e0378fe6.tar.xz
linux-8688d9540cc6e17df4cba71615e27f04e0378fe6.zip
Merge tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable patches: - Fix a crash in the NFSv4 file locking code. - Fix an fsync() regression, where we were failing to retry I/O in some circumstances. - Fix an infinite loop in NFSv4.0 OPEN stateid recovery - Fix a memory leak when an attempted pnfs fails. - Fix a memory leak in the backchannel code - Large hostnames were not supported correctly in NFSv4.1 - Fix a pNFS/flexfiles bug that was impeding error reporting on I/O. - Fix a couple of credential issues in pNFS/flexfiles Bugfixes + cleanups: - Open flag sanity checks in the NFSv4 atomic open codepath - More NFSv4 delegation related bugfixes - Various NFSv4.1 backchannel bugfixes and cleanups - Fix the NFS swap socket code - Various cleanups of the NFSv4 SETCLIENTID and EXCHANGE_ID code - Fix a UDP transport deadlock issue Features: - More RDMA client transport improvements - NFSv4.2 LAYOUTSTATS functionality for pnfs flexfiles" * tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (87 commits) nfs: Remove invalid tk_pid from debug message nfs: Remove invalid NFS_ATTR_FATTR_V4_REFERRAL checking in nfs4_get_rootfh nfs: Drop bad comment in nfs41_walk_client_list() nfs: Remove unneeded micro checking of CONFIG_PROC_FS nfs: Don't setting FILE_CREATED flags always nfs: Use remove_proc_subtree() instead remove_proc_entry() nfs: Remove unused argument in nfs_server_set_fsinfo() nfs: Fix a memory leak when meeting an unsupported state protect nfs: take extra reference to fl->fl_file when running a LOCKU operation NFSv4: When returning a delegation, don't reclaim an incompatible open mode. NFSv4.2: LAYOUTSTATS is optional to implement NFSv4.2: Fix up a decoding error in layoutstats pNFS/flexfiles: Fix the reset of struct pgio_header when resending pNFS/flexfiles: Turn off layoutcommit for servers that don't need it pnfs/flexfiles: protect ktime manipulation with mirror lock nfs: provide pnfs_report_layoutstat when NFS42 is disabled nfs: verify open flags before allowing open nfs: always update creds in mirror, even when we have an already connected ds nfs: fix potential credential leak in ff_layout_update_mirror_cred pnfs/flexfiles: report layoutstat regularly ...
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/callback.c6
-rw-r--r--fs/nfs/callback_proc.c38
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/client.c40
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c480
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h33
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c7
-rw-r--r--fs/nfs/inode.c12
-rw-r--r--fs/nfs/nfs3xdr.c2
-rw-r--r--fs/nfs/nfs42.h9
-rw-r--r--fs/nfs/nfs42proc.c87
-rw-r--r--fs/nfs/nfs42xdr.c106
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4getroot.c7
-rw-r--r--fs/nfs/nfs4idmap.c7
-rw-r--r--fs/nfs/nfs4proc.c221
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/nfs4xdr.c15
-rw-r--r--fs/nfs/pagelist.c10
-rw-r--r--fs/nfs/pnfs.c64
-rw-r--r--fs/nfs/pnfs.h13
-rw-r--r--fs/nfs/write.c9
26 files changed, 1004 insertions, 192 deletions
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 8d129bb7355a..682529c00996 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
* pg_authenticate method for nfsv4 callback threads.
*
* The authflavor has been negotiated, so an incorrect flavor is a server
- * bug. Drop packets with incorrect authflavor.
+ * bug. Deny packets with incorrect authflavor.
*
* All other checking done after NFS decoding where the nfs_client can be
* found in nfs4_callback_compound
@@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
switch (rqstp->rq_authop->flavour) {
case RPC_AUTH_NULL:
if (rqstp->rq_proc != CB_NULL)
- return SVC_DROP;
+ return SVC_DENIED;
break;
case RPC_AUTH_GSS:
/* No RPC_AUTH_GSS support yet in NFSv4.1 */
if (svc_is_backchannel(rqstp))
- return SVC_DROP;
+ return SVC_DENIED;
}
return SVC_OK;
}
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 197806fb87ff..29e3c1b011b7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
/* Normal */
- if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
- slot->seq_nr++;
+ if (likely(args->csa_sequenceid == slot->seq_nr + 1))
goto out_ok;
- }
/* Replay */
if (args->csa_sequenceid == slot->seq_nr) {
@@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_process_state *cps)
{
struct nfs4_slot_table *tbl;
+ struct nfs4_slot *slot;
struct nfs_client *clp;
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
@@ -429,25 +428,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
goto out;
+
tbl = &clp->cl_session->bc_slot_table;
+ slot = tbl->slots + args->csa_slotid;
spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
- spin_unlock(&tbl->slot_tbl_lock);
status = htonl(NFS4ERR_DELAY);
/* Return NFS4ERR_BADSESSION if we're draining the session
* in order to reset it.
*/
if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
status = htonl(NFS4ERR_BADSESSION);
- goto out;
+ goto out_unlock;
}
- status = validate_seqid(&clp->cl_session->bc_slot_table, args);
- spin_unlock(&tbl->slot_tbl_lock);
+ memcpy(&res->csr_sessionid, &args->csa_sessionid,
+ sizeof(res->csr_sessionid));
+ res->csr_sequenceid = args->csa_sequenceid;
+ res->csr_slotid = args->csa_slotid;
+ res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+
+ status = validate_seqid(tbl, args);
if (status)
- goto out;
+ goto out_unlock;
cps->slotid = args->csa_slotid;
@@ -458,15 +464,17 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
*/
if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
status = htonl(NFS4ERR_DELAY);
- goto out;
+ goto out_unlock;
}
- memcpy(&res->csr_sessionid, &args->csa_sessionid,
- sizeof(res->csr_sessionid));
- res->csr_sequenceid = args->csa_sequenceid;
- res->csr_slotid = args->csa_slotid;
- res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
- res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ /*
+ * RFC5661 20.9.3
+ * If CB_SEQUENCE returns an error, then the state of the slot
+ * (sequence ID, cached reply) MUST NOT change.
+ */
+ slot->seq_nr++;
+out_unlock:
+ spin_unlock(&tbl->slot_tbl_lock);
out:
cps->clp = clp; /* put in nfs4_callback_compound */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 19ca95cdfd9b..6b1697a01dde 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
- if (status == __constant_htonl(NFS4ERR_RESOURCE))
+ if (status == htonl(NFS4ERR_RESOURCE))
return rpc_garbage_args;
if (hdr_arg.minorversion == 0) {
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 892aefff3630..ecebb406cc1a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -825,7 +825,6 @@ error:
* Load up the server record from information gained in an fsinfo record
*/
static void nfs_server_set_fsinfo(struct nfs_server *server,
- struct nfs_fh *mntfh,
struct nfs_fsinfo *fsinfo)
{
unsigned long max_rpc_payload;
@@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
if (error < 0)
goto out_error;
- nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ nfs_server_set_fsinfo(server, &fsinfo);
/* Get some general file system info */
if (server->namelen == 0) {
@@ -1193,8 +1192,6 @@ void nfs_clients_init(struct net *net)
}
#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry *proc_fs_nfs;
-
static int nfs_server_list_open(struct inode *inode, struct file *file);
static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
@@ -1364,27 +1361,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
{
struct nfs_server *server;
struct nfs_client *clp;
- char dev[8], fsid[17];
+ char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0'
+ char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0'
struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* display header on line 1 */
if (v == &nn->nfs_volume_list) {
- seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
+ seq_puts(m, "NV SERVER PORT DEV FSID"
+ " FSC\n");
return 0;
}
/* display one transport per line on subsequent lines */
server = list_entry(v, struct nfs_server, master_link);
clp = server->nfs_client;
- snprintf(dev, 8, "%u:%u",
+ snprintf(dev, sizeof(dev), "%u:%u",
MAJOR(server->s_dev), MINOR(server->s_dev));
- snprintf(fsid, 17, "%llx:%llx",
+ snprintf(fsid, sizeof(fsid), "%llx:%llx",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
rcu_read_lock();
- seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
+ seq_printf(m, "v%u %s %s %-12s %-33s %s\n",
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
@@ -1434,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net)
*/
int __init nfs_fs_proc_init(void)
{
- struct proc_dir_entry *p;
-
- proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
- if (!proc_fs_nfs)
+ if (!proc_mkdir("fs/nfsfs", NULL))
goto error_0;
/* a file of servers with which we're dealing */
- p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
- if (!p)
+ if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers"))
goto error_1;
/* a file of volumes that we have mounted */
- p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
- if (!p)
- goto error_2;
- return 0;
+ if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes"))
+ goto error_1;
-error_2:
- remove_proc_entry("servers", proc_fs_nfs);
+ return 0;
error_1:
- remove_proc_entry("fs/nfsfs", NULL);
+ remove_proc_subtree("fs/nfsfs", NULL);
error_0:
return -ENOMEM;
}
@@ -1464,9 +1456,7 @@ error_0:
*/
void nfs_fs_proc_exit(void)
{
- remove_proc_entry("volumes", proc_fs_nfs);
- remove_proc_entry("servers", proc_fs_nfs);
- remove_proc_entry("fs/nfsfs", NULL);
+ remove_proc_subtree("fs/nfsfs", NULL);
}
#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b2c8b31b2be7..21457bb0edd6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
{
int err;
- if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
- *opened |= FILE_CREATED;
-
err = finish_open(file, dentry, do_open, opened);
if (err)
goto out;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8b8d83a526ce..cc4fa1ed61fc 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page)
return nfs_wb_page(inode, page);
}
-#ifdef CONFIG_NFS_SWAP
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
- int ret;
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
*span = sis->pages;
- rcu_read_lock();
- ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1);
- rcu_read_unlock();
-
- return ret;
+ return rpc_clnt_swap_activate(clnt);
}
static void nfs_swap_deactivate(struct file *file)
{
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
- rcu_read_lock();
- xs_swapper(rcu_dereference(clnt->cl_xprt), 0);
- rcu_read_unlock();
+ rpc_clnt_swap_deactivate(clnt);
}
-#endif
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
@@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = {
.launder_page = nfs_launder_page,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
-#ifdef CONFIG_NFS_SWAP
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
-#endif
};
/*
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 7d05089e52d6..c12951b9551e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -20,6 +20,7 @@
#include "../nfs4trace.h"
#include "../iostat.h"
#include "../nfs.h"
+#include "../nfs42.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -182,17 +183,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls)
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
{
- struct nfs4_ff_layout_mirror *tmp;
int i, j;
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
for (j = i + 1; j < fls->mirror_array_cnt; j++)
if (fls->mirror_array[i]->efficiency <
- fls->mirror_array[j]->efficiency) {
- tmp = fls->mirror_array[i];
- fls->mirror_array[i] = fls->mirror_array[j];
- fls->mirror_array[j] = tmp;
- }
+ fls->mirror_array[j]->efficiency)
+ swap(fls->mirror_array[i],
+ fls->mirror_array[j]);
}
}
@@ -274,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
spin_lock_init(&fls->mirror_array[i]->lock);
fls->mirror_array[i]->ds_count = ds_count;
+ fls->mirror_array[i]->lseg = &fls->generic_hdr;
/* deviceid */
rc = decode_deviceid(&stream, &devid);
@@ -344,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
fls->mirror_array[i]->gid);
}
+ p = xdr_inline_decode(&stream, 4);
+ if (p)
+ fls->flags = be32_to_cpup(p);
+
ff_layout_sort_mirrors(fls);
rc = ff_layout_check_layout(lgr);
if (rc)
@@ -415,6 +418,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls)
return 1;
}
+static void
+nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer)
+{
+ /* first IO request? */
+ if (atomic_inc_return(&timer->n_ops) == 1) {
+ timer->start_time = ktime_get();
+ }
+}
+
+static ktime_t
+nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer)
+{
+ ktime_t start, now;
+
+ if (atomic_dec_return(&timer->n_ops) < 0)
+ WARN_ON_ONCE(1);
+
+ now = ktime_get();
+ start = timer->start_time;
+ timer->start_time = now;
+ return ktime_sub(now, start);
+}
+
+static ktime_t
+nfs4_ff_layout_calc_completion_time(struct rpc_task *task)
+{
+ return ktime_sub(ktime_get(), task->tk_start);
+}
+
+static bool
+nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
+ struct nfs4_ff_layoutstat *layoutstat)
+{
+ static const ktime_t notime = {0};
+ ktime_t now = ktime_get();
+
+ nfs4_ff_start_busy_timer(&layoutstat->busy_timer);
+ if (ktime_equal(mirror->start_time, notime))
+ mirror->start_time = now;
+ if (ktime_equal(mirror->last_report_time, notime))
+ mirror->last_report_time = now;
+ if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
+ FF_LAYOUTSTATS_REPORT_INTERVAL) {
+ mirror->last_report_time = now;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat,
+ __u64 requested)
+{
+ struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
+
+ iostat->ops_requested++;
+ iostat->bytes_requested += requested;
+}
+
+static void
+nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
+ __u64 requested,
+ __u64 completed,
+ ktime_t time_completed)
+{
+ struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat;
+ ktime_t timer;
+
+ iostat->ops_completed++;
+ iostat->bytes_completed += completed;
+ iostat->bytes_not_delivered += requested - completed;
+
+ timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer);
+ iostat->total_busy_time =
+ ktime_add(iostat->total_busy_time, timer);
+ iostat->aggregate_completion_time =
+ ktime_add(iostat->aggregate_completion_time, time_completed);
+}
+
+static void
+nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested)
+{
+ bool report;
+
+ spin_lock(&mirror->lock);
+ report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat);
+ nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
+ spin_unlock(&mirror->lock);
+
+ if (report)
+ pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
+}
+
+static void
+nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
+ struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested,
+ __u64 completed)
+{
+ spin_lock(&mirror->lock);
+ nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
+ requested, completed,
+ nfs4_ff_layout_calc_completion_time(task));
+ spin_unlock(&mirror->lock);
+}
+
+static void
+nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested)
+{
+ bool report;
+
+ spin_lock(&mirror->lock);
+ report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat);
+ nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
+ spin_unlock(&mirror->lock);
+
+ if (report)
+ pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode);
+}
+
+static void
+nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
+ struct nfs4_ff_layout_mirror *mirror,
+ __u64 requested,
+ __u64 completed,
+ enum nfs3_stable_how committed)
+{
+ if (committed == NFS_UNSTABLE)
+ requested = completed = 0;
+
+ spin_lock(&mirror->lock);
+ nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
+ requested, completed,
+ nfs4_ff_layout_calc_completion_time(task));
+ spin_unlock(&mirror->lock);
+}
+
static int
ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo,
@@ -631,7 +774,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
nfs_direct_set_resched_writes(hdr->dreq);
/* fake unstable write to let common nfs resend pages */
hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = 0;
+ hdr->good_bytes = hdr->args.count;
}
return;
}
@@ -879,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
return 0;
}
+static bool
+ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg)
+{
+ return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT);
+}
+
/*
* We reference the rpc_cred of the first WRITE that triggers the need for
* a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
@@ -891,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
static void
ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr)
{
+ if (!ff_layout_need_layoutcommit(hdr->lseg))
+ return;
+
pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
hdr->mds_offset + hdr->res.count);
dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
@@ -909,6 +1061,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
static int ff_layout_read_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ nfs4_ff_layout_stat_io_start_read(
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count);
+
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -962,15 +1118,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
- if (ff_layout_read_prepare_common(task, hdr))
- return;
-
if (ff_layout_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
return;
+ if (ff_layout_read_prepare_common(task, hdr))
+ return;
+
if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context, FMODE_READ) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -982,6 +1138,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+ nfs4_ff_layout_stat_io_end_read(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count, hdr->res.count);
+
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1074,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
return -EAGAIN;
}
- if (data->verf.committed == NFS_UNSTABLE)
+ if (data->verf.committed == NFS_UNSTABLE
+ && ff_layout_need_layoutcommit(data->lseg))
pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
return 0;
@@ -1083,6 +1244,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
static int ff_layout_write_prepare_common(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ nfs4_ff_layout_stat_io_start_write(
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count);
+
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -1116,15 +1281,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
- if (ff_layout_write_prepare_common(task, hdr))
- return;
-
if (ff_layout_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task))
return;
+ if (ff_layout_write_prepare_common(task, hdr))
+ return;
+
if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context, FMODE_WRITE) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
@@ -1134,6 +1299,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count, hdr->res.count,
+ hdr->res.verf->committed);
+
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1152,8 +1322,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}
+static void ff_layout_commit_prepare_common(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
+{
+ nfs4_ff_layout_stat_io_start_write(
+ FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ 0);
+}
+
static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
+ ff_layout_commit_prepare_common(task, data);
rpc_call_start(task);
}
@@ -1161,10 +1340,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
{
struct nfs_commit_data *wdata = data;
- ff_layout_setup_sequence(wdata->ds_clp,
+ if (ff_layout_setup_sequence(wdata->ds_clp,
&wdata->args.seq_args,
&wdata->res.seq_res,
- task);
+ task))
+ return;
+ ff_layout_commit_prepare_common(task, data);
+}
+
+static void ff_layout_commit_done(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *cdata = data;
+ struct nfs_page *req;
+ __u64 count = 0;
+
+ if (task->tk_status == 0) {
+ list_for_each_entry(req, &cdata->pages, wb_list)
+ count += req->wb_bytes;
+ }
+
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ count, count, NFS_FILE_SYNC);
+
+ pnfs_generic_write_commit_done(task, data);
}
static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
@@ -1205,14 +1404,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
.rpc_call_prepare = ff_layout_commit_prepare_v3,
- .rpc_call_done = pnfs_generic_write_commit_done,
+ .rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
.rpc_release = pnfs_generic_commit_release,
};
static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
.rpc_call_prepare = ff_layout_commit_prepare_v4,
- .rpc_call_done = pnfs_generic_write_commit_done,
+ .rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
.rpc_release = pnfs_generic_commit_release,
};
@@ -1256,7 +1455,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
if (fh)
hdr->args.fh = fh;
-
/*
* Note that if we ever decide to split across DSes,
* then we may need to handle dense-like offsets.
@@ -1385,6 +1583,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
data->args.fh = fh;
+
return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
@@ -1488,6 +1687,247 @@ out:
dprintk("%s: Return\n", __func__);
}
+static int
+ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
+{
+ const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+
+ return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
+}
+
+static size_t
+ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf,
+ const int buflen)
+{
+ const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+ const struct in6_addr *addr = &sin6->sin6_addr;
+
+ /*
+ * RFC 4291, Section 2.2.2
+ *
+ * Shorthanded ANY address
+ */
+ if (ipv6_addr_any(addr))
+ return snprintf(buf, buflen, "::");
+
+ /*
+ * RFC 4291, Section 2.2.2
+ *
+ * Shorthanded loopback address
+ */
+ if (ipv6_addr_loopback(addr))
+ return snprintf(buf, buflen, "::1");
+
+ /*
+ * RFC 4291, Section 2.2.3
+ *
+ * Special presentation address format for mapped v4
+ * addresses.
+ */
+ if (ipv6_addr_v4mapped(addr))
+ return snprintf(buf, buflen, "::ffff:%pI4",
+ &addr->s6_addr32[3]);
+
+ /*
+ * RFC 4291, Section 2.2.1
+ */
+ return snprintf(buf, buflen, "%pI6c", addr);
+}
+
+/* Derived from rpc_sockaddr2uaddr */
+static void
+ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
+{
+ struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
+ char portbuf[RPCBIND_MAXUADDRPLEN];
+ char addrbuf[RPCBIND_MAXUADDRLEN];
+ char *netid;
+ unsigned short port;
+ int len, netid_len;
+ __be32 *p;
+
+ switch (sap->sa_family) {
+ case AF_INET:
+ if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
+ return;
+ port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+ netid = "tcp";
+ netid_len = 3;
+ break;
+ case AF_INET6:
+ if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
+ return;
+ port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+ netid = "tcp6";
+ netid_len = 4;
+ break;
+ default:
+ /* we only support tcp and tcp6 */
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
+ len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
+
+ p = xdr_reserve_space(xdr, 4 + netid_len);
+ xdr_encode_opaque(p, netid, netid_len);
+
+ p = xdr_reserve_space(xdr, 4 + len);
+ xdr_encode_opaque(p, addrbuf, len);
+}
+
+static void
+ff_layout_encode_nfstime(struct xdr_stream *xdr,
+ ktime_t t)
+{
+ struct timespec64 ts;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 12);
+ ts = ktime_to_timespec64(t);
+ p = xdr_encode_hyper(p, ts.tv_sec);
+ *p++ = cpu_to_be32(ts.tv_nsec);
+}
+
+static void
+ff_layout_encode_io_latency(struct xdr_stream *xdr,
+ struct nfs4_ff_io_stat *stat)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 5 * 8);
+ p = xdr_encode_hyper(p, stat->ops_requested);
+ p = xdr_encode_hyper(p, stat->bytes_requested);
+ p = xdr_encode_hyper(p, stat->ops_completed);
+ p = xdr_encode_hyper(p, stat->bytes_completed);
+ p = xdr_encode_hyper(p, stat->bytes_not_delivered);
+ ff_layout_encode_nfstime(xdr, stat->total_busy_time);
+ ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time);
+}
+
+static void
+ff_layout_encode_layoutstats(struct xdr_stream *xdr,
+ struct nfs42_layoutstat_args *args,
+ struct nfs42_layoutstat_devinfo *devinfo)
+{
+ struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private;
+ struct nfs4_pnfs_ds_addr *da;
+ struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
+ struct nfs_fh *fh = &mirror->fh_versions[0];
+ __be32 *p, *start;
+
+ da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
+ dprintk("%s: DS %s: encoding address %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);
+ /* layoutupdate length */
+ start = xdr_reserve_space(xdr, 4);
+ /* netaddr4 */
+ ff_layout_encode_netaddr(xdr, da);
+ /* nfs_fh4 */
+ p = xdr_reserve_space(xdr, 4 + fh->size);
+ xdr_encode_opaque(p, fh->data, fh->size);
+ /* ff_io_latency4 read */
+ spin_lock(&mirror->lock);
+ ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
+ /* ff_io_latency4 write */
+ ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
+ spin_unlock(&mirror->lock);
+ /* nfstime4 */
+ ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
+ /* bool */
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(false);
+
+ *start = cpu_to_be32((xdr->p - start - 1) * 4);
+}
+
+static bool
+ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args,
+ struct pnfs_layout_segment *pls,
+ int *dev_count, int dev_limit)
+{
+ struct nfs4_ff_layout_mirror *mirror;
+ struct nfs4_deviceid_node *dev;
+ struct nfs42_layoutstat_devinfo *devinfo;
+ int i;
+
+ for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) {
+ if (*dev_count >= dev_limit)
+ break;
+ mirror = FF_LAYOUT_COMP(pls, i);
+ if (!mirror || !mirror->mirror_ds)
+ continue;
+ dev = FF_LAYOUT_DEVID_NODE(pls, i);
+ devinfo = &args->devinfo[*dev_count];
+ memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
+ devinfo->offset = pls->pls_range.offset;
+ devinfo->length = pls->pls_range.length;
+ /* well, we don't really know if IO is continuous or not! */
+ devinfo->read_count = mirror->read_stat.io_stat.bytes_completed;
+ devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
+ devinfo->write_count = mirror->write_stat.io_stat.bytes_completed;
+ devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
+ devinfo->layout_type = LAYOUT_FLEX_FILES;
+ devinfo->layoutstats_encode = ff_layout_encode_layoutstats;
+ devinfo->layout_private = mirror;
+ /* lseg refcount put in cleanup_layoutstats */
+ pnfs_get_lseg(pls);
+
+ ++(*dev_count);
+ }
+
+ return *dev_count < dev_limit;
+}
+
+static int
+ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
+{
+ struct pnfs_layout_segment *pls;
+ int dev_count = 0;
+
+ spin_lock(&args->inode->i_lock);
+ list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
+ dev_count += FF_LAYOUT_MIRROR_COUNT(pls);
+ }
+ spin_unlock(&args->inode->i_lock);
+ /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
+ if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) {
+ dprintk("%s: truncating devinfo to limit (%d:%d)\n",
+ __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV);
+ dev_count = PNFS_LAYOUTSTATS_MAXDEV;
+ }
+ args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL);
+ if (!args->devinfo)
+ return -ENOMEM;
+
+ dev_count = 0;
+ spin_lock(&args->inode->i_lock);
+ list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) {
+ if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count,
+ PNFS_LAYOUTSTATS_MAXDEV)) {
+ break;
+ }
+ }
+ spin_unlock(&args->inode->i_lock);
+ args->num_dev = dev_count;
+
+ return 0;
+}
+
+static void
+ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data)
+{
+ struct nfs4_ff_layout_mirror *mirror;
+ int i;
+
+ for (i = 0; i < data->args.num_dev; i++) {
+ mirror = data->args.devinfo[i].layout_private;
+ data->args.devinfo[i].layout_private = NULL;
+ pnfs_put_lseg(mirror->lseg);
+ }
+}
+
static struct pnfs_layoutdriver_type flexfilelayout_type = {
.id = LAYOUT_FLEX_FILES,
.name = "LAYOUT_FLEX_FILES",
@@ -1510,6 +1950,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
.alloc_deviceid_node = ff_layout_alloc_deviceid_node,
.encode_layoutreturn = ff_layout_encode_layoutreturn,
.sync = pnfs_nfs_generic_sync,
+ .prepare_layoutstats = ff_layout_prepare_layoutstats,
+ .cleanup_layoutstats = ff_layout_cleanup_layoutstats,
};
static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 070f20445b2d..f92f9a0a856b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -9,12 +9,17 @@
#ifndef FS_NFS_NFS4FLEXFILELAYOUT_H
#define FS_NFS_NFS4FLEXFILELAYOUT_H
+#define FF_FLAGS_NO_LAYOUTCOMMIT 1
+
#include "../pnfs.h"
/* XXX: Let's filter out insanely large mirror count for now to avoid oom
* due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
+/* LAYOUTSTATS report interval in ms */
+#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
+
struct nfs4_ff_ds_version {
u32 version;
u32 minor_version;
@@ -41,24 +46,48 @@ struct nfs4_ff_layout_ds_err {
struct nfs4_deviceid deviceid;
};
+struct nfs4_ff_io_stat {
+ __u64 ops_requested;
+ __u64 bytes_requested;
+ __u64 ops_completed;
+ __u64 bytes_completed;
+ __u64 bytes_not_delivered;
+ ktime_t total_busy_time;
+ ktime_t aggregate_completion_time;
+};
+
+struct nfs4_ff_busy_timer {
+ ktime_t start_time;
+ atomic_t n_ops;
+};
+
+struct nfs4_ff_layoutstat {
+ struct nfs4_ff_io_stat io_stat;
+ struct nfs4_ff_busy_timer busy_timer;
+};
+
struct nfs4_ff_layout_mirror {
+ struct pnfs_layout_segment *lseg; /* back pointer */
u32 ds_count;
u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
- struct nfs4_string user_name;
- struct nfs4_string group_name;
u32 uid;
u32 gid;
struct rpc_cred *cred;
spinlock_t lock;
+ struct nfs4_ff_layoutstat read_stat;
+ struct nfs4_ff_layoutstat write_stat;
+ ktime_t start_time;
+ ktime_t last_report_time;
};
struct nfs4_ff_layout_segment {
struct pnfs_layout_segment generic_hdr;
u64 stripe_unit;
+ u32 flags;
u32 mirror_array_cnt;
struct nfs4_ff_layout_mirror **mirror_array;
};
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 77a2d026aa12..f13e1969eedd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror,
__func__, PTR_ERR(cred));
return PTR_ERR(cred);
} else {
- mirror->cred = cred;
+ if (cmpxchg(&mirror->cred, NULL, cred))
+ put_rpccred(cred);
}
}
return 0;
@@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
smp_rmb();
if (ds->ds_clp)
- goto out;
+ goto out_update_creds;
flavor = nfs4_ff_layout_choose_authflavor(mirror);
@@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
}
}
}
-
+out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds))
ds = NULL;
out:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f734562c6d24..b77b328a06d7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
if (!err) {
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+ if (S_ISDIR(inode->i_mode))
+ stat->blksize = NFS_SERVER(inode)->dtsize;
}
out:
trace_nfs_getattr_exit(inode, err);
@@ -2008,17 +2010,15 @@ static int __init init_nfs_fs(void)
if (err)
goto out1;
-#ifdef CONFIG_PROC_FS
rpc_proc_register(&init_net, &nfs_rpcstat);
-#endif
- if ((err = register_nfs_fs()) != 0)
+
+ err = register_nfs_fs();
+ if (err)
goto out0;
return 0;
out0:
-#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
-#endif
nfs_destroy_directcache();
out1:
nfs_destroy_writepagecache();
@@ -2049,9 +2049,7 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_nfspagecache();
nfs_fscache_unregister();
unregister_pernet_subsys(&nfs_net_ops);
-#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
-#endif
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 53852a4bd88b..9b04c2e6fffc 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
if (args->npages != 0)
xdr_write_pages(xdr, args->pages, 0, args->len);
else
- xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
+ xdr_reserve_space(xdr, args->len);
error = nfsacl_encode(xdr->buf, base, args->inode,
(args->mask & NFS_ACL) ?
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 7afb8947dfdf..ff66ae700b89 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -5,11 +5,18 @@
#ifndef __LINUX_FS_NFS_NFS4_2_H
#define __LINUX_FS_NFS_NFS4_2_H
+/*
+ * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support
+ * more? Need to consider not to pre-alloc too much for a compound.
+ */
+#define PNFS_LAYOUTSTATS_MAXDEV (4)
+
/* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
-
+int nfs42_proc_layoutstats_generic(struct nfs_server *,
+ struct nfs42_layoutstat_data *);
/* nfs4.2xdr.h */
extern struct rpc_procinfo nfs4_2_procedures[];
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 3a9e75235f30..f486b80f927a 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -10,6 +10,11 @@
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "nfs42.h"
+#include "iostat.h"
+#include "pnfs.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY NFSDBG_PNFS
static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
fmode_t fmode)
@@ -165,3 +170,85 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
+
+static void
+nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_layoutstat_data *data = calldata;
+ struct nfs_server *server = NFS_SERVER(data->args.inode);
+
+ nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
+ &data->res.seq_res, task);
+}
+
+static void
+nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_layoutstat_data *data = calldata;
+
+ if (!nfs4_sequence_done(task, &data->res.seq_res))
+ return;
+
+ switch (task->tk_status) {
+ case 0:
+ break;
+ case -ENOTSUPP:
+ case -EOPNOTSUPP:
+ NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
+ default:
+ dprintk("%s server returns %d\n", __func__, task->tk_status);
+ }
+}
+
+static void
+nfs42_layoutstat_release(void *calldata)
+{
+ struct nfs42_layoutstat_data *data = calldata;
+ struct nfs_server *nfss = NFS_SERVER(data->args.inode);
+
+ if (nfss->pnfs_curr_ld->cleanup_layoutstats)
+ nfss->pnfs_curr_ld->cleanup_layoutstats(data);
+
+ pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout);
+ smp_mb__before_atomic();
+ clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags);
+ smp_mb__after_atomic();
+ nfs_iput_and_deactive(data->inode);
+ kfree(data->args.devinfo);
+ kfree(data);
+}
+
+static const struct rpc_call_ops nfs42_layoutstat_ops = {
+ .rpc_call_prepare = nfs42_layoutstat_prepare,
+ .rpc_call_done = nfs42_layoutstat_done,
+ .rpc_release = nfs42_layoutstat_release,
+};
+
+int nfs42_proc_layoutstats_generic(struct nfs_server *server,
+ struct nfs42_layoutstat_data *data)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ };
+ struct rpc_task_setup task_setup = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs42_layoutstat_ops,
+ .callback_data = data,
+ .flags = RPC_TASK_ASYNC,
+ };
+ struct rpc_task *task;
+
+ data->inode = nfs_igrab_and_active(data->args.inode);
+ if (!data->inode) {
+ nfs42_layoutstat_release(data);
+ return -EAGAIN;
+ }
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+ task = rpc_run_task(&task_setup);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ return 0;
+}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 1a25b27248f2..a6bd27da6286 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -4,6 +4,8 @@
#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
#define __LINUX_FS_NFS_NFS4_2XDR_H
+#include "nfs42.h"
+
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
@@ -22,6 +24,16 @@
1 /* whence */ + \
2 /* offset */ + \
2 /* length */)
+#define encode_io_info_maxsz 4
+#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \
+ 2 /* offset */ + \
+ 2 /* length */ + \
+ encode_stateid_maxsz + \
+ encode_io_info_maxsz + \
+ encode_io_info_maxsz + \
+ 1 /* opaque devaddr4 length */ + \
+ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
+#define decode_layoutstats_maxsz (op_decode_hdr_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
@@ -45,6 +57,14 @@
#define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
decode_seek_maxsz)
+#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz)
+#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
static void encode_fallocate(struct xdr_stream *xdr,
@@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr,
encode_uint32(xdr, args->sa_what);
}
+static void encode_layoutstats(struct xdr_stream *xdr,
+ struct nfs42_layoutstat_args *args,
+ struct nfs42_layoutstat_devinfo *devinfo,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr);
+ p = reserve_space(xdr, 8 + 8);
+ p = xdr_encode_hyper(p, devinfo->offset);
+ p = xdr_encode_hyper(p, devinfo->length);
+ encode_nfs4_stateid(xdr, &args->stateid);
+ p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4);
+ p = xdr_encode_hyper(p, devinfo->read_count);
+ p = xdr_encode_hyper(p, devinfo->read_bytes);
+ p = xdr_encode_hyper(p, devinfo->write_count);
+ p = xdr_encode_hyper(p, devinfo->write_bytes);
+ p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data,
+ NFS4_DEVICEID4_SIZE);
+ /* Encode layoutupdate4 */
+ *p++ = cpu_to_be32(devinfo->layout_type);
+ if (devinfo->layoutstats_encode != NULL)
+ devinfo->layoutstats_encode(xdr, args, devinfo);
+ else
+ encode_uint32(xdr, 0);
+}
+
/*
* Encode ALLOCATE request
*/
@@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode LAYOUTSTATS request
+ */
+static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs42_layoutstat_args *args)
+{
+ int i;
+
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
+ for (i = 0; i < args->num_dev; i++)
+ encode_layoutstats(xdr, args, &args->devinfo[i], &hdr);
+ encode_nops(&hdr);
+}
+
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -169,6 +238,12 @@ out_overflow:
return -EIO;
}
+static int decode_layoutstats(struct xdr_stream *xdr,
+ struct nfs42_layoutstat_res *res)
+{
+ return decode_op_hdr(xdr, OP_LAYOUTSTATS);
+}
+
/*
* Decode ALLOCATE request
*/
@@ -246,4 +321,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
out:
return status;
}
+
+/*
+ * Decode LAYOUTSTATS request
+ */
+static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs42_layoutstat_res *res)
+{
+ struct compound_hdr hdr;
+ int status, i;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV);
+ for (i = 0; i < res->num_dev; i++) {
+ status = decode_layoutstats(xdr, res);
+ if (status)
+ goto out;
+ }
+out:
+ res->rpc_status = status;
+ return status;
+}
+
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index fdef424b0cd3..ea3bee919a76 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
+extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index e42be52a8c18..3aa6a9ba5113 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -676,7 +676,6 @@ found:
break;
}
- /* No matching nfs_client found. */
spin_unlock(&nn->nfs_client_lock);
dprintk("NFS: <-- %s status = %d\n", __func__, status);
nfs_put_client(prev);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index f58c17b3b480..dcd39d4e2efe 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp)
dprintk("NFS: open file(%pd2)\n", dentry);
+ err = nfs_check_flags(openflags);
+ if (err)
+ return err;
+
if ((openflags & O_ACCMODE) == 3)
openflags--;
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index c0b3a16b4a00..039b3eb6d834 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
goto out;
}
- if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
- printk(KERN_ERR "nfs4_get_rootfh:"
- " getroot obtained referral\n");
- ret = -EREMOTE;
- goto out;
- }
-
memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
out:
nfs_free_fattr(fsinfo.fattr);
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 2e1737c40a29..535dfc69c628 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp)
int nfs_idmap_init(void)
{
- int ret;
- ret = nfs_idmap_init_keyring();
- if (ret != 0)
- goto out;
-out:
- return ret;
+ return nfs_idmap_init_keyring();
}
void nfs_idmap_quit(void)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 55e1e3af23a3..6f228b5af819 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
case 0:
return 0;
case -NFS4ERR_OPENMODE:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
if (inode && nfs4_have_delegation(inode, FMODE_READ)) {
nfs4_inode_return_delegation(inode);
exception->retry = 1;
@@ -367,15 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
if (ret < 0)
break;
goto wait_on_recovery;
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- if (state == NULL)
- break;
- ret = nfs4_schedule_stateid_recovery(server, state);
- if (ret < 0)
- break;
- goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
ret = nfs4_schedule_stateid_recovery(server, state);
@@ -482,8 +476,8 @@ struct nfs4_call_sync_data {
struct nfs4_sequence_res *seq_res;
};
-static void nfs4_init_sequence(struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res, int cache_reply)
+void nfs4_init_sequence(struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res, int cache_reply)
{
args->sa_slot = NULL;
args->sa_cache_this = cache_reply;
@@ -1553,6 +1547,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
struct nfs4_state *newstate;
int ret;
+ if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
+ opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) &&
+ (opendata->o_arg.u.delegation_type & fmode) != fmode)
+ /* This mode can't have been delegated, so we must have
+ * a valid open_stateid to cover it - not need to reclaim.
+ */
+ return 0;
opendata->o_arg.open_flags = 0;
opendata->o_arg.fmode = fmode;
opendata->o_arg.share_access = nfs4_map_atomic_open_share(
@@ -1684,6 +1685,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
"%d.\n", __func__, err);
case 0:
case -ENOENT:
+ case -EAGAIN:
case -ESTALE:
break;
case -NFS4ERR_BADSESSION:
@@ -3355,6 +3357,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
goto out;
case -NFS4ERR_MOVED:
err = nfs4_get_referral(client, dir, name, fattr, fhandle);
+ if (err == -NFS4ERR_MOVED)
+ err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception);
goto out;
case -NFS4ERR_WRONGSEC:
err = -EPERM;
@@ -4955,49 +4959,128 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
-static unsigned int
-nfs4_init_nonuniform_client_string(struct nfs_client *clp,
- char *buf, size_t len)
+static int
+nfs4_init_nonuniform_client_string(struct nfs_client *clp)
{
- unsigned int result;
+ int result;
+ size_t len;
+ char *str;
+ bool retried = false;
if (clp->cl_owner_id != NULL)
- return strlcpy(buf, clp->cl_owner_id, len);
+ return 0;
+retry:
+ rcu_read_lock();
+ len = 10 + strlen(clp->cl_ipaddr) + 1 +
+ strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) +
+ 1 +
+ strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) +
+ 1;
+ rcu_read_unlock();
+
+ if (len > NFS4_OPAQUE_LIMIT + 1)
+ return -EINVAL;
+
+ /*
+ * Since this string is allocated at mount time, and held until the
+ * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+ * about a memory-reclaim deadlock.
+ */
+ str = kmalloc(len, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
rcu_read_lock();
- result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s",
- clp->cl_ipaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR),
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_PROTO));
+ result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s",
+ clp->cl_ipaddr,
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO));
rcu_read_unlock();
- clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
- return result;
+
+ /* Did something change? */
+ if (result >= len) {
+ kfree(str);
+ if (retried)
+ return -EINVAL;
+ retried = true;
+ goto retry;
+ }
+ clp->cl_owner_id = str;
+ return 0;
}
-static unsigned int
-nfs4_init_uniform_client_string(struct nfs_client *clp,
- char *buf, size_t len)
+static int
+nfs4_init_uniquifier_client_string(struct nfs_client *clp)
+{
+ int result;
+ size_t len;
+ char *str;
+
+ len = 10 + 10 + 1 + 10 + 1 +
+ strlen(nfs4_client_id_uniquifier) + 1 +
+ strlen(clp->cl_rpcclient->cl_nodename) + 1;
+
+ if (len > NFS4_OPAQUE_LIMIT + 1)
+ return -EINVAL;
+
+ /*
+ * Since this string is allocated at mount time, and held until the
+ * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+ * about a memory-reclaim deadlock.
+ */
+ str = kmalloc(len, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
+
+ result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ nfs4_client_id_uniquifier,
+ clp->cl_rpcclient->cl_nodename);
+ if (result >= len) {
+ kfree(str);
+ return -EINVAL;
+ }
+ clp->cl_owner_id = str;
+ return 0;
+}
+
+static int
+nfs4_init_uniform_client_string(struct nfs_client *clp)
{
- const char *nodename = clp->cl_rpcclient->cl_nodename;
- unsigned int result;
+ int result;
+ size_t len;
+ char *str;
if (clp->cl_owner_id != NULL)
- return strlcpy(buf, clp->cl_owner_id, len);
+ return 0;
if (nfs4_client_id_uniquifier[0] != '\0')
- result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
- clp->rpc_ops->version,
- clp->cl_minorversion,
- nfs4_client_id_uniquifier,
- nodename);
- else
- result = scnprintf(buf, len, "Linux NFSv%u.%u %s",
- clp->rpc_ops->version, clp->cl_minorversion,
- nodename);
- clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
- return result;
+ return nfs4_init_uniquifier_client_string(clp);
+
+ len = 10 + 10 + 1 + 10 + 1 +
+ strlen(clp->cl_rpcclient->cl_nodename) + 1;
+
+ if (len > NFS4_OPAQUE_LIMIT + 1)
+ return -EINVAL;
+
+ /*
+ * Since this string is allocated at mount time, and held until the
+ * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying
+ * about a memory-reclaim deadlock.
+ */
+ str = kmalloc(len, GFP_KERNEL);
+ if (!str)
+ return -ENOMEM;
+
+ result = scnprintf(str, len, "Linux NFSv%u.%u %s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ clp->cl_rpcclient->cl_nodename);
+ if (result >= len) {
+ kfree(str);
+ return -EINVAL;
+ }
+ clp->cl_owner_id = str;
+ return 0;
}
/*
@@ -5044,7 +5127,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
struct nfs4_setclientid setclientid = {
.sc_verifier = &sc_verifier,
.sc_prog = program,
- .sc_cb_ident = clp->cl_cb_ident,
+ .sc_clnt = clp,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
@@ -5064,16 +5147,15 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
/* nfs_client_id4 */
nfs4_init_boot_verifier(clp, &sc_verifier);
+
if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags))
- setclientid.sc_name_len =
- nfs4_init_uniform_client_string(clp,
- setclientid.sc_name,
- sizeof(setclientid.sc_name));
+ status = nfs4_init_uniform_client_string(clp);
else
- setclientid.sc_name_len =
- nfs4_init_nonuniform_client_string(clp,
- setclientid.sc_name,
- sizeof(setclientid.sc_name));
+ status = nfs4_init_nonuniform_client_string(clp);
+
+ if (status)
+ goto out;
+
/* cb_client4 */
setclientid.sc_netid_len =
nfs4_init_callback_netid(clp,
@@ -5083,9 +5165,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
sizeof(setclientid.sc_uaddr), "%s.%u.%u",
clp->cl_ipaddr, port >> 8, port & 255);
- dprintk("NFS call setclientid auth=%s, '%.*s'\n",
+ dprintk("NFS call setclientid auth=%s, '%s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
- setclientid.sc_name_len, setclientid.sc_name);
+ clp->cl_owner_id);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
status = PTR_ERR(task);
@@ -5402,6 +5484,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
atomic_inc(&lsp->ls_count);
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
+ get_file(fl->fl_file);
memcpy(&p->fl, fl, sizeof(p->fl));
p->server = NFS_SERVER(inode);
return p;
@@ -5413,6 +5496,7 @@ static void nfs4_locku_release_calldata(void *data)
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_lock_state(calldata->lsp);
put_nfs_open_context(calldata->ctx);
+ fput(calldata->fl.fl_file);
kfree(calldata);
}
@@ -6846,11 +6930,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
};
nfs4_init_boot_verifier(clp, &verifier);
- args.id_len = nfs4_init_uniform_client_string(clp, args.id,
- sizeof(args.id));
- dprintk("NFS call exchange_id auth=%s, '%.*s'\n",
+
+ status = nfs4_init_uniform_client_string(clp);
+ if (status)
+ goto out;
+
+ dprintk("NFS call exchange_id auth=%s, '%s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
- args.id_len, args.id);
+ clp->cl_owner_id);
res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
GFP_NOFS);
@@ -6885,7 +6972,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
/* unsupported! */
WARN_ON_ONCE(1);
status = -EINVAL;
- goto out_server_scope;
+ goto out_impl_id;
}
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
@@ -6913,6 +7000,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
/* use the most recent implementation id */
kfree(clp->cl_implid);
clp->cl_implid = res.impl_id;
+ res.impl_id = NULL;
if (clp->cl_serverscope != NULL &&
!nfs41_same_server_scope(clp->cl_serverscope,
@@ -6926,15 +7014,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
if (clp->cl_serverscope == NULL) {
clp->cl_serverscope = res.server_scope;
- goto out;
+ res.server_scope = NULL;
}
- } else
- kfree(res.impl_id);
+ }
-out_server_owner:
- kfree(res.server_owner);
+out_impl_id:
+ kfree(res.impl_id);
out_server_scope:
kfree(res.server_scope);
+out_server_owner:
+ kfree(res.server_owner);
out:
if (clp->cl_implid != NULL)
dprintk("NFS reply exchange_id: Server Implementation ID: "
@@ -8061,9 +8150,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
struct rpc_task *task;
int status = 0;
- dprintk("NFS: %4d initiating layoutcommit call. sync %d "
- "lbw: %llu inode %lu\n",
- data->task.tk_pid, sync,
+ dprintk("NFS: initiating layoutcommit call. sync %d "
+ "lbw: %llu inode %lu\n", sync,
data->args.lastbytewritten,
data->args.inode->i_ino);
@@ -8557,7 +8645,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_ATOMIC_OPEN_V1
| NFS_CAP_ALLOCATE
| NFS_CAP_DEALLOCATE
- | NFS_CAP_SEEK,
+ | NFS_CAP_SEEK
+ | NFS_CAP_LAYOUTSTATS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2782cfca2265..605840dc89cf 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
goto do_confirm;
- nfs4_begin_drain_session(clp);
status = nfs4_proc_exchange_id(clp, cred);
if (status != 0)
goto out;
@@ -1482,6 +1481,8 @@ restart:
spin_unlock(&state->state_lock);
}
nfs4_put_open_state(state);
+ clear_bit(NFS4CLNT_RECLAIM_NOGRACE,
+ &state->flags);
spin_lock(&sp->so_lock);
goto restart;
}
@@ -1830,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp)
clp->cl_mvops->reboot_recovery_ops;
int status;
+ nfs4_begin_drain_session(clp);
cred = nfs4_get_clid_cred(clp);
if (cred == NULL)
return -ENOENT;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0aea97841d30..558cd65dbdb7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int);
#define encode_setclientid_maxsz \
(op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
- XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \
+ /* client name */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
1 /* sc_prog */ + \
1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
@@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int);
#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
encode_verifier_maxsz + \
1 /* co_ownerid.len */ + \
- XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
+ /* eia_clientowner */ \
+ 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
1 /* flags */ + \
1 /* spa_how */ + \
/* max is SP4_MACH_CRED (for now) */ + \
@@ -1667,13 +1669,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
encode_nfs4_verifier(xdr, setclientid->sc_verifier);
- encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
+ encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id),
+ setclientid->sc_clnt->cl_owner_id);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(setclientid->sc_prog);
encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
p = reserve_space(xdr, 4);
- *p = cpu_to_be32(setclientid->sc_cb_ident);
+ *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident);
}
static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
@@ -1747,7 +1750,8 @@ static void encode_exchange_id(struct xdr_stream *xdr,
encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
encode_nfs4_verifier(xdr, args->verifier);
- encode_string(xdr, args->id_len, args->id);
+ encode_string(xdr, strlen(args->client->cl_owner_id),
+ args->client->cl_owner_id);
encode_uint32(xdr, args->flags);
encode_uint32(xdr, args->state_protect.how);
@@ -7427,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(SEEK, enc_seek, dec_seek),
PROC(ALLOCATE, enc_allocate, dec_allocate),
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
+ PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
#endif /* CONFIG_NFS_V4_2 */
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 282b39369510..1da68d3b1eda 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how);
- dprintk("NFS: %5u initiated pgio call "
+ dprintk("NFS: initiated pgio call "
"(req %s/%llu, %u bytes @ offset %llu)\n",
- hdr->task.tk_pid,
hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode),
hdr->args.count,
@@ -690,8 +689,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
static void nfs_pgio_release(void *calldata)
{
struct nfs_pgio_header *hdr = calldata;
- if (hdr->rw_ops->rw_release)
- hdr->rw_ops->rw_release(hdr);
nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
}
@@ -711,7 +708,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror,
* nfs_pageio_init - initialise a page io descriptor
* @desc: pointer to descriptor
* @inode: pointer to inode
- * @doio: pointer to io function
+ * @pg_ops: pointer to pageio operations
+ * @compl_ops: pointer to pageio completion operations
+ * @rw_ops: pointer to nfs read/write operations
* @bsize: io block size
* @io_flags: extra parameters for the io function
*/
@@ -1186,6 +1185,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
* nfs_pageio_complete_mirror - Complete I/O on the current mirror of an
* nfs_pageio_descriptor
* @desc: pointer to io descriptor
+ * @mirror_idx: pointer to mirror index
*/
static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
u32 mirror_idx)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 230606243be6..0ba9a02c9566 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -35,6 +35,7 @@
#include "iostat.h"
#include "nfs4trace.h"
#include "delegation.h"
+#include "nfs42.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
@@ -1821,6 +1822,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
/* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
hdr->completion_ops);
+ set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
@@ -1865,6 +1867,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
mirror->pg_recoalesce = 1;
}
nfs_pgio_data_destroy(hdr);
+ hdr->release(hdr);
}
static enum pnfs_try_status
@@ -1979,6 +1982,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
mirror->pg_recoalesce = 1;
}
nfs_pgio_data_destroy(hdr);
+ hdr->release(hdr);
}
/*
@@ -2247,3 +2251,63 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
}
return thp;
}
+
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+int
+pnfs_report_layoutstat(struct inode *inode)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs42_layoutstat_data *data;
+ struct pnfs_layout_hdr *hdr;
+ int status = 0;
+
+ if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
+ goto out;
+
+ if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
+ goto out;
+
+ if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
+ goto out;
+
+ spin_lock(&inode->i_lock);
+ if (!NFS_I(inode)->layout) {
+ spin_unlock(&inode->i_lock);
+ goto out;
+ }
+ hdr = NFS_I(inode)->layout;
+ pnfs_get_layout_hdr(hdr);
+ spin_unlock(&inode->i_lock);
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ status = -ENOMEM;
+ goto out_put;
+ }
+
+ data->args.fh = NFS_FH(inode);
+ data->args.inode = inode;
+ nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
+ status = ld->prepare_layoutstats(&data->args);
+ if (status)
+ goto out_free;
+
+ status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
+
+out:
+ dprintk("%s returns %d\n", __func__, status);
+ return status;
+
+out_free:
+ kfree(data);
+out_put:
+ pnfs_put_layout_hdr(hdr);
+ smp_mb__before_atomic();
+ clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
+ smp_mb__after_atomic();
+ goto out;
+}
+EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
+#endif
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1e6308f82fc3..3e6ab7bfbabd 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type {
void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args);
+ int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
+ void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data);
};
struct pnfs_layout_hdr {
@@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
-
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
@@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
#endif /* CONFIG_NFS_V4_1 */
+#if IS_ENABLED(CONFIG_NFS_V4_2)
+int pnfs_report_layoutstat(struct inode *inode);
+#else
+static inline int
+pnfs_report_layoutstat(struct inode *inode)
+{
+ return 0;
+}
+#endif
+
#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e6c262555e08..65869ca9c851 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1290,6 +1290,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
+ set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
nfs_unlock_request(req);
nfs_end_page_writeback(req);
nfs_release_request(req);
@@ -1348,11 +1349,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
-{
- /* do nothing! */
-}
-
/*
* Special version of should_remove_suid() that ignores capabilities.
*/
@@ -1556,7 +1552,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
/* Set up the initial task struct. */
nfs_ops->commit_setup(data, &msg);
- dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+ dprintk("NFS: initiated commit call\n");
nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
@@ -2013,7 +2009,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
.rw_mode = FMODE_WRITE,
.rw_alloc_header = nfs_writehdr_alloc,
.rw_free_header = nfs_writehdr_free,
- .rw_release = nfs_writeback_release_common,
.rw_done = nfs_writeback_done,
.rw_result = nfs_writeback_result,
.rw_initiate = nfs_initiate_write,