diff options
author | Brian Coca <bcoca@users.noreply.github.com> | 2024-09-10 18:15:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-10 18:15:29 +0200 |
commit | d58988d8ffe04b0c8e4434162b7e2a4e09d8ea94 (patch) | |
tree | f48fe482dd5da815b34918848ba98570208dda9c | |
parent | Replace binary_modules Makefile with Python script (#83925) (diff) | |
download | ansible-d58988d8ffe04b0c8e4434162b7e2a4e09d8ea94.tar.xz ansible-d58988d8ffe04b0c8e4434162b7e2a4e09d8ea94.zip |
fact gathering, mounts, fixes for single proc code and tests (#83866)
* Fact gathering fix 'no shm' branhc
* Use concurrent.futures instead of multiprocessing
This entirely avoids the need for fallback logic since the concurrent.futures thread pool does not depend on `/dev/shm`.
Co-authored-by: Matt Clay <matt@mystile.com>
5 files changed, 40 insertions, 105 deletions
diff --git a/lib/ansible/module_utils/facts/hardware/linux.py b/lib/ansible/module_utils/facts/hardware/linux.py index a0772eff2d..293c75a250 100644 --- a/lib/ansible/module_utils/facts/hardware/linux.py +++ b/lib/ansible/module_utils/facts/hardware/linux.py @@ -21,15 +21,12 @@ import glob import json import os import re -import signal import sys import time -from multiprocessing import cpu_count -from multiprocessing.pool import ThreadPool - -from ansible.module_utils.common.text.converters import to_text +from ansible.module_utils._internal._concurrent import _futures from ansible.module_utils.common.locale import get_best_parsable_locale +from ansible.module_utils.common.text.converters import to_text from ansible.module_utils.common.text.formatters import bytes_to_human from ansible.module_utils.facts.hardware.base import Hardware, HardwareCollector from ansible.module_utils.facts.utils import get_file_content, get_file_lines, get_mount_size @@ -39,10 +36,6 @@ from ansible.module_utils.six import iteritems from ansible.module_utils.facts import timeout -def _timeout_handler(signum, frame): - raise TimeoutError(f"Timeout reached in:{frame}") - - def get_partition_uuid(partname): try: uuids = os.listdir("/dev/disk/by-uuid") @@ -582,12 +575,7 @@ class LinuxHardware(Hardware): # start threads to query each mount results = {} - pool = None - try: - pool = ThreadPool(processes=min(len(mtab_entries), cpu_count())) - except (IOError, OSError) as e: - self.module.warn(f"Cannot use multiprocessing, falling back on serial execution: {e}") - + executor = _futures.DaemonThreadPoolExecutor() maxtime = timeout.GATHER_TIMEOUT or timeout.DEFAULT_GATHER_TIMEOUT for fields in mtab_entries: # Transform octal escape sequences @@ -611,67 +599,46 @@ class LinuxHardware(Hardware): if not self.MTAB_BIND_MOUNT_RE.match(options): mount_info['options'] += ",bind" - results[mount] = {'info': mount_info, 'timelimit': time.time() + maxtime} - if pool is None: - old_handler = signal.signal(signal.SIGALRM, _timeout_handler) - signal.alarm(maxtime) - try: - size, uuid = self.get_mount_info(mount, device, uuids) - except TimeoutError as e: - results[mount]['info']['note'] = 'Could not get extra information due to timeout' - self.module.log(f"Timeout while gathering mount {mount} data: {e}") - self.module.warn(f"Timeout exceeded when getting mount info for {mount}") - finally: - signal.alarm(0) - signal.signal(signal.SIGALRM, old_handler) - - if size: - results[mount]['info'].update(size) - results[mount]['info']['uuid'] = uuid or 'N/A' - else: - # use multiproc pool, handle results below - results[mount]['extra'] = pool.apply_async(self.get_mount_info, (mount, device, uuids)) + results[mount] = {'info': mount_info, 'timelimit': time.monotonic() + maxtime} + results[mount]['extra'] = executor.submit(self.get_mount_info, mount, device, uuids) - if pool is None: - # serial processing, just assing results - mounts.append(results[mount]['info']) - else: - pool.close() # done with spawing new workers, start gc + # done with spawning new workers, start gc + executor.shutdown() - while results: # wait for workers and get results - for mount in list(results): - done = False - res = results[mount]['extra'] - try: - if res.ready(): - done = True - if res.successful(): - mount_size, uuid = res.get() - if mount_size: - results[mount]['info'].update(mount_size) - results[mount]['info']['uuid'] = uuid or 'N/A' - else: - # failed, try to find out why, if 'res.successful' we know there are no exceptions - results[mount]['info']['note'] = 'Could not get extra information: %s.' % (to_text(res.get())) - - elif time.time() > results[mount]['timelimit']: - done = True - self.module.warn("Timeout exceeded when getting mount info for %s" % mount) - results[mount]['info']['note'] = 'Could not get extra information due to timeout' - except Exception as e: - import traceback + while results: # wait for workers and get results + for mount in list(results): + done = False + res = results[mount]['extra'] + try: + if res.done(): done = True - results[mount]['info'] = 'N/A' - self.module.warn("Error prevented getting extra info for mount %s: [%s] %s." % (mount, type(e), to_text(e))) - self.module.debug(traceback.format_exc()) - - if done: - # move results outside and make loop only handle pending - mounts.append(results[mount]['info']) - del results[mount] - - # avoid cpu churn, sleep between retrying for loop with remaining mounts - time.sleep(0.1) + if res.exception() is None: + mount_size, uuid = res.result() + if mount_size: + results[mount]['info'].update(mount_size) + results[mount]['info']['uuid'] = uuid or 'N/A' + else: + # failed, try to find out why, if 'res.successful' we know there are no exceptions + results[mount]['info']['note'] = f'Could not get extra information: {res.exception()}' + + elif time.monotonic() > results[mount]['timelimit']: + done = True + self.module.warn("Timeout exceeded when getting mount info for %s" % mount) + results[mount]['info']['note'] = 'Could not get extra information due to timeout' + except Exception as e: + import traceback + done = True + results[mount]['info'] = 'N/A' + self.module.warn("Error prevented getting extra info for mount %s: [%s] %s." % (mount, type(e), to_text(e))) + self.module.debug(traceback.format_exc()) + + if done: + # move results outside and make loop only handle pending + mounts.append(results[mount]['info']) + del results[mount] + + # avoid cpu churn, sleep between retrying for loop with remaining mounts + time.sleep(0.1) return {'mounts': mounts} diff --git a/test/integration/targets/gathering_facts/lib/multriprocessing/__init__.py b/test/integration/targets/gathering_facts/lib/multriprocessing/__init__.py deleted file mode 100644 index 9d48db4f9f..0000000000 --- a/test/integration/targets/gathering_facts/lib/multriprocessing/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import annotations diff --git a/test/integration/targets/gathering_facts/lib/multriprocessing/pool/__init__.py b/test/integration/targets/gathering_facts/lib/multriprocessing/pool/__init__.py deleted file mode 100644 index 9c5a5d26a6..0000000000 --- a/test/integration/targets/gathering_facts/lib/multriprocessing/pool/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from __future__ import annotations - - -class ThreadPool: - - def __init__(self, *args, **kwargs): - raise PermissionError("To test single proc ansible") diff --git a/test/integration/targets/gathering_facts/no_threads.yml b/test/integration/targets/gathering_facts/no_threads.yml deleted file mode 100644 index f8e21cd807..0000000000 --- a/test/integration/targets/gathering_facts/no_threads.yml +++ /dev/null @@ -1,21 +0,0 @@ -- hosts: localhost - tasks: - - block: - - set_fact: - normal_devices: "{{ ansible_facts['devices'].keys() }}" - - - name: facts already gathered normally, but now we do mounts again w/o multithreading - gather_facts: - gather_subset: mounts - register: no_multi - environment: - PYTHONPATH: "${PWD}/lib" - - - set_fact: - single_devices: "{{no_multi['ansible_facts']['ansible_devices'].keys()}}" - - - assert: - that: - - normal_devices == single_devices - when: - - ansible_facts['os_family'] not in ['FreeBSD', 'Darwin'] diff --git a/test/integration/targets/gathering_facts/runme.sh b/test/integration/targets/gathering_facts/runme.sh index 39824a4a52..ace83aa2ef 100755 --- a/test/integration/targets/gathering_facts/runme.sh +++ b/test/integration/targets/gathering_facts/runme.sh @@ -39,7 +39,4 @@ ANSIBLE_FACTS_MODULES='ansible.legacy.slow' ansible -m gather_facts localhost -- # test parallelism ANSIBLE_FACTS_MODULES='dummy1,dummy2,dummy3' ansible -m gather_facts localhost --playbook-dir ./ -a 'gather_timeout=30 parallel=true' "$@" 2>&1 -# test lack of threads -ansible-playbook no_threads.yml "$@" 2>&1 - rm "${OUTPUT_DIR}/canary.txt" |