summaryrefslogtreecommitdiffstats
path: root/fs/afs/fs_probe.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2020-04-24 16:10:00 +0200
committerDavid Howells <dhowells@redhat.com>2020-05-31 16:19:51 +0200
commitf6cbb368bcb0bc4fa7c11554d5293658bb4b26a2 (patch)
tree49b9ad0705135c3e985ce6560b793ab038a2b111 /fs/afs/fs_probe.c
parentafs: Split the usage count on struct afs_server (diff)
downloadlinux-f6cbb368bcb0bc4fa7c11554d5293658bb4b26a2.tar.xz
linux-f6cbb368bcb0bc4fa7c11554d5293658bb4b26a2.zip
afs: Actively poll fileservers to maintain NAT or firewall openings
When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch <botsch@cnf.cornell.edu> Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/afs/fs_probe.c')
-rw-r--r--fs/afs/fs_probe.c277
1 files changed, 209 insertions, 68 deletions
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index d37d78eb84bd..442b5e7944ff 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* AFS fileserver probing
*
- * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
@@ -11,14 +11,83 @@
#include "internal.h"
#include "protocol_yfs.h"
-static bool afs_fs_probe_done(struct afs_server *server)
+static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
+static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
+
+/*
+ * Start the probe polling timer. We have to supply it with an inc on the
+ * outstanding server count.
+ */
+static void afs_schedule_fs_probe(struct afs_net *net,
+ struct afs_server *server, bool fast)
+{
+ unsigned long atj;
+
+ if (!net->live)
+ return;
+
+ atj = server->probed_at;
+ atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
+
+ afs_inc_servers_outstanding(net);
+ if (timer_reduce(&net->fs_probe_timer, atj))
+ afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Handle the completion of a set of probes.
+ */
+static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
+{
+ bool responded = server->probe.responded;
+
+ write_seqlock(&net->fs_lock);
+ if (responded)
+ list_add_tail(&server->probe_link, &net->fs_probe_slow);
+ else
+ list_add_tail(&server->probe_link, &net->fs_probe_fast);
+ write_sequnlock(&net->fs_lock);
+
+ afs_schedule_fs_probe(net, server, !responded);
+}
+
+/*
+ * Handle the completion of a probe.
+ */
+static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
+{
+ _enter("");
+
+ if (atomic_dec_and_test(&server->probe_outstanding))
+ afs_finished_fs_probe(net, server);
+
+ wake_up_all(&server->probe_wq);
+}
+
+/*
+ * Handle inability to send a probe due to ENOMEM when trying to allocate a
+ * call struct.
+ */
+static void afs_fs_probe_not_done(struct afs_net *net,
+ struct afs_server *server,
+ struct afs_addr_cursor *ac)
{
- if (!atomic_dec_and_test(&server->probe_outstanding))
- return false;
+ struct afs_addr_list *alist = ac->alist;
+ unsigned int index = ac->index;
+
+ _enter("");
- clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
- wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
- return true;
+ trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
+ spin_lock(&server->probe_lock);
+
+ server->probe.local_failure = true;
+ if (server->probe.error == 0)
+ server->probe.error = -ENOMEM;
+
+ set_bit(index, &alist->failed);
+
+ spin_unlock(&server->probe_lock);
+ return afs_done_one_fs_probe(net, server);
}
/*
@@ -29,10 +98,8 @@ void afs_fileserver_probe_result(struct afs_call *call)
{
struct afs_addr_list *alist = call->alist;
struct afs_server *server = call->server;
- unsigned int server_index = call->server_index;
unsigned int index = call->addr_ix;
unsigned int rtt_us = 0;
- bool have_result = false;
int ret = call->error;
_enter("%pU,%u", &server->uuid, index);
@@ -51,8 +118,9 @@ void afs_fileserver_probe_result(struct afs_call *call)
goto responded;
case -ENOMEM:
case -ENONET:
+ clear_bit(index, &alist->responded);
server->probe.local_failure = true;
- afs_io_error(call, afs_io_error_fs_probe_fail);
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
goto out;
case -ECONNRESET: /* Responded, but call expired. */
case -ERFKILL:
@@ -71,12 +139,11 @@ void afs_fileserver_probe_result(struct afs_call *call)
server->probe.error == -ETIMEDOUT ||
server->probe.error == -ETIME))
server->probe.error = ret;
- afs_io_error(call, afs_io_error_fs_probe_fail);
+ trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
goto out;
}
responded:
- set_bit(index, &alist->responded);
clear_bit(index, &alist->failed);
if (call->service_id == YFS_FS_SERVICE) {
@@ -95,38 +162,31 @@ responded:
if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
alist->preferred = index;
- have_result = true;
}
smp_wmb(); /* Set rtt before responded. */
server->probe.responded = true;
- set_bit(AFS_SERVER_FL_PROBED, &server->flags);
+ set_bit(index, &alist->responded);
out:
spin_unlock(&server->probe_lock);
- _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
- server_index, index, &alist->addrs[index].transport, rtt_us, ret);
+ _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
+ &server->uuid, index, &alist->addrs[index].transport,
+ rtt_us, ret);
- have_result |= afs_fs_probe_done(server);
- if (have_result)
- wake_up_all(&server->probe_wq);
+ return afs_done_one_fs_probe(call->net, server);
}
/*
- * Probe all of a fileserver's addresses to find out the best route and to
- * query its capabilities.
+ * Probe one or all of a fileserver's addresses to find out the best route and
+ * to query its capabilities.
*/
-static int afs_do_probe_fileserver(struct afs_net *net,
- struct afs_server *server,
- struct key *key,
- unsigned int server_index,
- struct afs_error *_e)
+void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
+ struct key *key, bool all)
{
struct afs_addr_cursor ac = {
.index = 0,
};
- struct afs_call *call;
- bool in_progress = false;
_enter("%pU", &server->uuid);
@@ -136,50 +196,25 @@ static int afs_do_probe_fileserver(struct afs_net *net,
afs_get_addrlist(ac.alist);
read_unlock(&server->fs_lock);
- atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+ server->probed_at = jiffies;
+ atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
memset(&server->probe, 0, sizeof(server->probe));
server->probe.rtt = UINT_MAX;
- for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
- call = afs_fs_get_capabilities(net, server, &ac, key, server_index);
- if (!IS_ERR(call)) {
- afs_put_call(call);
- in_progress = true;
- } else {
- afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code);
- }
- }
-
- if (!in_progress)
- afs_fs_probe_done(server);
- afs_put_addrlist(ac.alist);
- return in_progress;
-}
+ ac.index = ac.alist->preferred;
+ if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
+ all = true;
-/*
- * Send off probes to all unprobed servers.
- */
-int afs_probe_fileservers(struct afs_net *net, struct key *key,
- struct afs_server_list *list)
-{
- struct afs_server *server;
- struct afs_error e;
- bool in_progress = false;
- int i;
-
- e.error = 0;
- e.responded = false;
- for (i = 0; i < list->nr_servers; i++) {
- server = list->servers[i].server;
- if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
- continue;
-
- if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) &&
- afs_do_probe_fileserver(net, server, key, i, &e))
- in_progress = true;
+ if (all) {
+ for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
+ if (!afs_fs_get_capabilities(net, server, &ac, key))
+ afs_fs_probe_not_done(net, server, &ac);
+ } else {
+ if (!afs_fs_get_capabilities(net, server, &ac, key))
+ afs_fs_probe_not_done(net, server, &ac);
}
- return in_progress ? 0 : e.error;
+ afs_put_addrlist(ac.alist);
}
/*
@@ -199,7 +234,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
for (i = 0; i < slist->nr_servers; i++) {
if (test_bit(i, &untried)) {
server = slist->servers[i].server;
- if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+ if (!atomic_read(&server->probe_outstanding))
__clear_bit(i, &untried);
if (server->probe.responded)
have_responders = true;
@@ -229,7 +264,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
server = slist->servers[i].server;
if (server->probe.responded)
goto stop;
- if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+ if (atomic_read(&server->probe_outstanding))
still_probing = true;
}
}
@@ -264,3 +299,109 @@ stop:
slist->preferred = pref;
return 0;
}
+
+/*
+ * Probe timer. We have an increment on fs_outstanding that we need to pass
+ * along to the work item.
+ */
+void afs_fs_probe_timer(struct timer_list *timer)
+{
+ struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
+
+ if (!queue_work(afs_wq, &net->fs_prober))
+ afs_dec_servers_outstanding(net);
+}
+
+/*
+ * Dispatch a probe to a server.
+ */
+static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
+ __releases(&net->fs_lock)
+{
+ struct key *key = NULL;
+
+ /* We remove it from the queues here - it will be added back to
+ * one of the queues on the completion of the probe.
+ */
+ list_del_init(&server->probe_link);
+
+ afs_get_server(server, afs_server_trace_get_probe);
+ write_sequnlock(&net->fs_lock);
+
+ afs_fs_probe_fileserver(net, server, key, all);
+ afs_put_server(net, server, afs_server_trace_put_probe);
+}
+
+/*
+ * Probe dispatcher to regularly dispatch probes to keep NAT alive.
+ */
+void afs_fs_probe_dispatcher(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, fs_prober);
+ struct afs_server *fast, *slow, *server;
+ unsigned long nowj, timer_at, poll_at;
+ bool first_pass = true, set_timer = false;
+
+ if (!net->live)
+ return;
+
+ _enter("");
+
+ if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
+ _leave(" [none]");
+ return;
+ }
+
+again:
+ write_seqlock(&net->fs_lock);
+
+ fast = slow = server = NULL;
+ nowj = jiffies;
+ timer_at = nowj + MAX_JIFFY_OFFSET;
+
+ if (!list_empty(&net->fs_probe_fast)) {
+ fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
+ poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
+ if (time_before(nowj, poll_at)) {
+ timer_at = poll_at;
+ set_timer = true;
+ fast = NULL;
+ }
+ }
+
+ if (!list_empty(&net->fs_probe_slow)) {
+ slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
+ poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
+ if (time_before(nowj, poll_at)) {
+ if (time_before(poll_at, timer_at))
+ timer_at = poll_at;
+ set_timer = true;
+ slow = NULL;
+ }
+ }
+
+ server = fast ?: slow;
+ if (server)
+ _debug("probe %pU", &server->uuid);
+
+ if (server && (first_pass || !need_resched())) {
+ afs_dispatch_fs_probe(net, server, server == fast);
+ first_pass = false;
+ goto again;
+ }
+
+ write_sequnlock(&net->fs_lock);
+
+ if (server) {
+ if (!queue_work(afs_wq, &net->fs_prober))
+ afs_dec_servers_outstanding(net);
+ _leave(" [requeue]");
+ } else if (set_timer) {
+ if (timer_reduce(&net->fs_probe_timer, timer_at))
+ afs_dec_servers_outstanding(net);
+ _leave(" [timer]");
+ } else {
+ afs_dec_servers_outstanding(net);
+ _leave(" [quiesce]");
+ }
+}