diff options
author | David Howells <dhowells@redhat.com> | 2023-10-18 10:24:01 +0200 |
---|---|---|
committer | David Howells <dhowells@redhat.com> | 2024-01-01 17:37:27 +0100 |
commit | 495f2ae9e3552c30f7b83be3d142a932885d506e (patch) | |
tree | 3b92948d42863f697e4b62f7ef3f56387ff8e80d /fs/afs/fs_probe.c | |
parent | afs: Overhaul invalidation handling to better support RO volumes (diff) | |
download | linux-495f2ae9e3552c30f7b83be3d142a932885d506e.tar.xz linux-495f2ae9e3552c30f7b83be3d142a932885d506e.zip |
afs: Fix fileserver rotation
Fix the fileserver rotation so that it doesn't use RTT as the basis for
deciding which server and address to use as this doesn't necessarily give a
good indication of the best path. Instead, use the configurable preference
list in conjunction with whatever probes have succeeded at the time of
looking.
To this end, make the following changes:
(1) Keep an array of "server states" to track what addresses we've tried
on each server and move the waitqueue entries there that we'll need
for probing.
(2) Each afs_server_state struct is made to pin the corresponding server's
endpoint state rather than the afs_operation struct carrying a pin on
the server we're currently looking at.
(3) Drop the server list preference; we now always rescan the server list.
(4) afs_wait_for_probes() now uses the server state list to guide it in
what it waits for (and to provide the waitqueue entries) and returns
an indication of whether we'd got a response, run out of responsive
addresses or the endpoint state had been superseded and we need to
restart the iteration.
(5) Call afs_get_address_preferences*() occasionally to refresh the
preference values.
(6) When picking a server, scan the addresses of the servers for which we
have as-yet untested communications, looking for the highest priority
one and use that instead of trying all the addresses for a particular
server in ascending-RTT order.
(7) When a Busy or Offline state is seen across all available servers, do
a short sleep.
(8) If we detect that we accessed a future RO volume version whilst it is
undergoing replication, reissue the op against the older version until
at least half of the servers are replicated.
(9) Whilst RO replication is ongoing, increase the frequency of Volume
Location server checks for that volume to every ten minutes instead of
hourly.
Also add a tracepoint to track progress through the rotation algorithm.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Diffstat (limited to 'fs/afs/fs_probe.c')
-rw-r--r-- | fs/afs/fs_probe.c | 103 |
1 files changed, 40 insertions, 63 deletions
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c00d38b98a67..580de4adaaf6 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -296,58 +296,48 @@ void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, } /* - * Wait for the first as-yet untried fileserver to respond. + * Wait for the first as-yet untried fileserver to respond, for the probe state + * to be superseded or for all probes to finish. */ -int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) +int afs_wait_for_fs_probes(struct afs_operation *op, struct afs_server_state *states, bool intr) { struct afs_endpoint_state *estate; - struct wait_queue_entry *waits; - struct afs_server *server; - unsigned int rtt = UINT_MAX, rtt_s; - bool have_responders = false; - int pref = -1, i; + struct afs_server_list *slist = op->server_list; + bool still_probing = true; + int ret = 0, i; - _enter("%u,%lx", slist->nr_servers, untried); + _enter("%u", slist->nr_servers); - /* Only wait for servers that have a probe outstanding. */ - rcu_read_lock(); for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - estate = rcu_dereference(server->endpoint_state); - if (!atomic_read(&estate->nr_probing)) - __clear_bit(i, &untried); - if (test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) - have_responders = true; - } + estate = states[i].endpoint_state; + if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) + return 2; + if (atomic_read(&estate->nr_probing)) + still_probing = true; + if (estate->responsive_set & states[i].untried_addrs) + return 1; } - rcu_read_unlock(); - if (have_responders || !untried) + if (!still_probing) return 0; - waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); - if (!waits) - return -ENOMEM; - - for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - init_waitqueue_entry(&waits[i], current); - add_wait_queue(&server->probe_wq, &waits[i]); - } - } + for (i = 0; i < slist->nr_servers; i++) + add_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter); for (;;) { - bool still_probing = false; + still_probing = false; - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - if (test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) - goto stop; - if (atomic_read(&estate->nr_probing)) - still_probing = true; + estate = states[i].endpoint_state; + if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) { + ret = 2; + goto stop; + } + if (atomic_read(&estate->nr_probing)) + still_probing = true; + if (estate->responsive_set & states[i].untried_addrs) { + ret = 1; + goto stop; } } @@ -359,28 +349,12 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) stop: set_current_state(TASK_RUNNING); - for (i = 0; i < slist->nr_servers; i++) { - if (test_bit(i, &untried)) { - server = slist->servers[i].server; - rtt_s = READ_ONCE(server->rtt); - if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && - rtt_s < rtt) { - pref = i; - rtt = rtt_s; - } - - remove_wait_queue(&server->probe_wq, &waits[i]); - } - } - - kfree(waits); - - if (pref == -1 && signal_pending(current)) - return -ERESTARTSYS; + for (i = 0; i < slist->nr_servers; i++) + remove_wait_queue(&slist->servers[i].server->probe_wq, &states[i].probe_waiter); - if (pref >= 0) - slist->preferred = pref; - return 0; + if (!ret && signal_pending(current)) + ret = -ERESTARTSYS; + return ret; } /* @@ -508,7 +482,7 @@ again: * Wait for a probe on a particular fileserver to complete for 2s. */ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_state *estate, - bool is_intr) + unsigned long exclude, bool is_intr) { struct wait_queue_entry wait; unsigned long timo = 2 * HZ; @@ -521,7 +495,8 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta prepare_to_wait_event(&server->probe_wq, &wait, is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (timo == 0 || - test_bit(AFS_ESTATE_RESPONDED, &estate->flags) || + test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags) || + (estate->responsive_set & ~exclude) || atomic_read(&estate->nr_probing) == 0 || (is_intr && signal_pending(current))) break; @@ -531,7 +506,9 @@ int afs_wait_for_one_fs_probe(struct afs_server *server, struct afs_endpoint_sta finish_wait(&server->probe_wq, &wait); dont_wait: - if (test_bit(AFS_ESTATE_RESPONDED, &estate->flags)) + if (estate->responsive_set & ~exclude) + return 1; + if (test_bit(AFS_ESTATE_SUPERSEDED, &estate->flags)) return 0; if (is_intr && signal_pending(current)) return -ERESTARTSYS; |