diff options
author | Hao Liu <44379968+TheRealHaoLiu@users.noreply.github.com> | 2024-02-21 22:08:43 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-21 22:08:43 +0100 |
commit | 3fb3125bc37dad6ca38ba369c55b1972ae8611c3 (patch) | |
tree | b54c7afdf00fce8acf89b33b97bcb68c54bb6f4a | |
parent | Reset another to test-playbooks (diff) | |
download | awx-3fb3125bc37dad6ca38ba369c55b1972ae8611c3.tar.xz awx-3fb3125bc37dad6ca38ba369c55b1972ae8611c3.zip |
Send QUIT to worker before dying (#14913)
Fix deadlock scenario where dispatcher child process stuck in reading from queue loop after dispatcher parent process decided to quit
Co-authored-by: Alan Rominger <arominge@redhat.com>
-rw-r--r-- | awx/main/dispatch/worker/base.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/awx/main/dispatch/worker/base.py b/awx/main/dispatch/worker/base.py index 8a8f8dc02a..264205a8ed 100644 --- a/awx/main/dispatch/worker/base.py +++ b/awx/main/dispatch/worker/base.py @@ -259,6 +259,12 @@ class AWXConsumerPG(AWXConsumerBase): current_downtime = time.time() - self.pg_down_time if current_downtime > self.pg_max_wait: logger.exception(f"Postgres event consumer has not recovered in {current_downtime} s, exiting") + # Sending QUIT to multiprocess queue to signal workers to exit + for worker in self.pool.workers: + try: + worker.quit() + except Exception: + logger.exception(f"Error sending QUIT to worker {worker}") raise # Wait for a second before next attempt, but still listen for any shutdown signals for i in range(10): @@ -270,6 +276,12 @@ class AWXConsumerPG(AWXConsumerBase): except Exception: # Log unanticipated exception in addition to writing to stderr to get timestamps and other metadata logger.exception('Encountered unhandled error in dispatcher main loop') + # Sending QUIT to multiprocess queue to signal workers to exit + for worker in self.pool.workers: + try: + worker.quit() + except Exception: + logger.exception(f"Error sending QUIT to worker {worker}") raise |