Move queuing tasks to a background thread

2026-05-06 21:32:49 +00:00 · 2016-08-26 14:55:56 -05:00
parent 9ecec6c28e
commit b71957d6e6
8 changed files with 226 additions and 156 deletions
--- a/lib/ansible/executor/action_write_locks.py
+++ b/lib/ansible/executor/action_write_locks.py
@@ -0,0 +1,43 @@
+# (c) 2016 - Red Hat, Inc. <support@ansible.com>
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+from multiprocessing import Lock
+from ansible.module_utils.facts import Facts
+
+if 'action_write_locks' not in globals():
+    # Do not initialize this more than once because it seems to bash
+    # the existing one.  multiprocessing must be reloading the module
+    # when it forks?
+    action_write_locks = dict()
+
+    # Below is a Lock for use when we weren't expecting a named module.
+    # It gets used when an action plugin directly invokes a module instead
+    # of going through the strategies.  Slightly less efficient as all
+    # processes with unexpected module names will wait on this lock
+    action_write_locks[None] = Lock()
+
+    # These plugins are called directly by action plugins (not going through
+    # a strategy).  We precreate them here as an optimization
+    mods = set(p['name'] for p in Facts.PKG_MGRS)
+    mods.update(('copy', 'file', 'setup', 'slurp', 'stat'))
+    for mod_name in mods:
+        action_write_locks[mod_name] = Lock()
+
--- a/lib/ansible/executor/module_common.py
+++ b/lib/ansible/executor/module_common.py
@@ -37,7 +37,7 @@ from ansible.utils.unicode import to_bytes, to_unicode
 # Must import strategy and use write_locks from there
 # If we import write_locks directly then we end up binding a
 # variable to the object and then it never gets updated.
-from ansible.plugins import strategy
+from ansible.executor import action_write_locks

 try:
    from __main__ import display
@@ -596,16 +596,16 @@ def _find_snippet_imports(module_name, module_data, module_path, module_args, ta
            display.debug('ANSIBALLZ: using cached module: %s' % cached_module_filename)
            zipdata = open(cached_module_filename, 'rb').read()
        else:
-            if module_name in strategy.action_write_locks:
+            if module_name in action_write_locks.action_write_locks:
                display.debug('ANSIBALLZ: Using lock for %s' % module_name)
-                lock = strategy.action_write_locks[module_name]
+                lock = action_write_locks.action_write_locks[module_name]
            else:
                # If the action plugin directly invokes the module (instead of
                # going through a strategy) then we don't have a cross-process
                # Lock specifically for this module.  Use the "unexpected
                # module" lock instead
                display.debug('ANSIBALLZ: Using generic lock for %s' % module_name)
-                lock = strategy.action_write_locks[None]
+                lock = action_write_locks.action_write_locks[None]

            display.debug('ANSIBALLZ: Acquiring lock')
            with lock:
--- a/lib/ansible/executor/process/worker.py
+++ b/lib/ansible/executor/process/worker.py
@@ -64,12 +64,12 @@ class WorkerProcess(multiprocessing.Process):
    for reading later.
    '''

-    def __init__(self, rslt_q, task_vars, host, task, play_context, loader, variable_manager, shared_loader_obj):
+    def __init__(self, rslt_q, play, host, task, task_vars, play_context, loader, variable_manager, shared_loader_obj):

        super(WorkerProcess, self).__init__()
        # takes a task queue manager as the sole param:
        self._rslt_q            = rslt_q
-        self._task_vars         = task_vars
+        self._play              = play
        self._host              = host
        self._task              = task
        self._play_context      = play_context
@@ -77,6 +77,8 @@ class WorkerProcess(multiprocessing.Process):
        self._variable_manager  = variable_manager
        self._shared_loader_obj = shared_loader_obj

+        self._task_vars = task_vars
+
        # dupe stdin, if we have one
        self._new_stdin = sys.stdin
        try:
@@ -158,3 +160,4 @@ class WorkerProcess(multiprocessing.Process):
        #with open('worker_%06d.stats' % os.getpid(), 'w') as f:
        #    f.write(s.getvalue())

+        sys.exit(0)
--- a/lib/ansible/executor/task_queue_manager.py
+++ b/lib/ansible/executor/task_queue_manager.py
@@ -22,14 +22,21 @@ __metaclass__ = type
 import multiprocessing
 import os
 import tempfile
+import threading
+import time
+
+from collections import deque

 from ansible import constants as C
 from ansible.errors import AnsibleError
+from ansible.executor import action_write_locks
 from ansible.executor.play_iterator import PlayIterator
+from ansible.executor.process.worker import WorkerProcess
 from ansible.executor.stats import AggregateStats
+from ansible.module_utils.facts import Facts
 from ansible.playbook.block import Block
 from ansible.playbook.play_context import PlayContext
-from ansible.plugins import callback_loader, strategy_loader, module_loader
+from ansible.plugins import action_loader, callback_loader, connection_loader, filter_loader, lookup_loader, module_loader, strategy_loader, test_loader
 from ansible.template import Templar
 from ansible.vars.hostvars import HostVars
 from ansible.plugins.callback import CallbackBase
@@ -46,6 +53,23 @@ except ImportError:
 __all__ = ['TaskQueueManager']


+# TODO: this should probably be in the plugins/__init__.py, with
+#       a smarter mechanism to set all of the attributes based on
+#       the loaders created there
+class SharedPluginLoaderObj:
+    '''
+    A simple object to make pass the various plugin loaders to
+    the forked processes over the queue easier
+    '''
+    def __init__(self):
+        self.action_loader = action_loader
+        self.connection_loader = connection_loader
+        self.filter_loader = filter_loader
+        self.test_loader   = test_loader
+        self.lookup_loader = lookup_loader
+        self.module_loader = module_loader
+
+
 class TaskQueueManager:

    '''
@@ -77,6 +101,8 @@ class TaskQueueManager:
        self._run_additional_callbacks = run_additional_callbacks
        self._run_tree         = run_tree

+        self._iterator         = None
+
        self._callbacks_loaded = False
        self._callback_plugins = []
        self._start_at_done    = False
@@ -98,12 +124,86 @@ class TaskQueueManager:
        self._failed_hosts      = dict()
        self._unreachable_hosts = dict()

+        # the "queue" for the background thread to use
+        self._queued_tasks = deque()
+        self._queued_tasks_lock = threading.Lock()
+
+        # the background queuing thread
+        self._queue_thread = None
+
+        self._workers = []
        self._final_q = multiprocessing.Queue()

        # A temporary file (opened pre-fork) used by connection
        # plugins for inter-process locking.
        self._connection_lockfile = tempfile.TemporaryFile()

+    def _queue_thread_main(self):
+
+        # create a dummy object with plugin loaders set as an easier
+        # way to share them with the forked processes
+        shared_loader_obj = SharedPluginLoaderObj()
+
+        display.debug("queuing thread starting")
+        while not self._terminated:
+            available_workers = []
+            for idx, entry in enumerate(self._workers):
+                (worker_prc, _) = entry
+                if worker_prc is None or not worker_prc.is_alive():
+                    available_workers.append(idx)
+
+            if len(available_workers) == 0:
+                time.sleep(0.01)
+                continue
+
+            for worker_idx in available_workers:
+                try:
+                    self._queued_tasks_lock.acquire()
+                    (host, task, task_vars, play_context) = self._queued_tasks.pop()
+                except IndexError:
+                    break
+                finally:
+                    self._queued_tasks_lock.release()
+
+                if task.action not in action_write_locks.action_write_locks:
+                    display.debug('Creating lock for %s' % task.action)
+                    action_write_locks.action_write_locks[task.action] = multiprocessing.Lock()
+
+                try:
+                    worker_prc = WorkerProcess(
+                        self._final_q,
+                        self._iterator._play,
+                        host,
+                        task,
+                        task_vars,
+                        play_context,
+                        self._loader,
+                        self._variable_manager,
+                        shared_loader_obj,
+                    )
+                    self._workers[worker_idx][0] = worker_prc
+                    worker_prc.start()
+                    display.debug("worker is %d (out of %d available)" % (worker_idx+1, len(self._workers)))
+
+                except (EOFError, IOError, AssertionError) as e:
+                    # most likely an abort
+                    display.debug("got an error while queuing: %s" % e)
+                    break
+
+        display.debug("queuing thread exiting")
+
+    def queue_task(self, host, task, task_vars, play_context):
+        self._queued_tasks_lock.acquire()
+        self._queued_tasks.append((host, task, task_vars, play_context))
+        self._queued_tasks_lock.release()
+
+    def queue_multiple_tasks(self, items, play_context):
+        for item in items:
+            (host, task, task_vars) = item
+            self._queued_tasks_lock.acquire()
+            self._queued_tasks.append((host, task, task_vars, play_context))
+            self._queued_tasks_lock.release()
+
    def _initialize_processes(self, num):
        self._workers = []

@@ -207,6 +307,10 @@ class TaskQueueManager:
        if not self._callbacks_loaded:
            self.load_callbacks()

+        if self._queue_thread is None:
+            self._queue_thread = threading.Thread(target=self._queue_thread_main)
+            self._queue_thread.start()
+
        all_vars = self._variable_manager.get_vars(loader=self._loader, play=play)
        templar = Templar(loader=self._loader, variables=all_vars)

@@ -252,7 +356,7 @@ class TaskQueueManager:
            raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds)

        # build the iterator
-        iterator = PlayIterator(
+        self._iterator = PlayIterator(
            inventory=self._inventory,
            play=new_play,
            play_context=play_context,
@@ -267,7 +371,7 @@ class TaskQueueManager:
        # hosts so we know what failed this round.
        for host_name in self._failed_hosts.keys():
            host = self._inventory.get_host(host_name)
-            iterator.mark_host_failed(host)
+            self._iterator.mark_host_failed(host)

        self.clear_failed_hosts()

@@ -278,10 +382,10 @@ class TaskQueueManager:
            self._start_at_done = True

        # and run the play using the strategy and cleanup on way out
-        play_return = strategy.run(iterator, play_context)
+        play_return = strategy.run(self._iterator, play_context)

        # now re-save the hosts that failed from the iterator to our internal list
-        for host_name in iterator.get_failed_hosts():
+        for host_name in self._iterator.get_failed_hosts():
            self._failed_hosts[host_name] = True

        self._cleanup_processes()
@@ -294,14 +398,13 @@ class TaskQueueManager:
        self._cleanup_processes()

    def _cleanup_processes(self):
-        if hasattr(self, '_workers'):
-            for (worker_prc, rslt_q) in self._workers:
-                rslt_q.close()
-                if worker_prc and worker_prc.is_alive():
-                    try:
-                        worker_prc.terminate()
-                    except AttributeError:
-                        pass
+        for (worker_prc, rslt_q) in self._workers:
+            rslt_q.close()
+            if worker_prc and worker_prc.is_alive():
+                try:
+                    worker_prc.terminate()
+                except AttributeError:
+                    pass

    def clear_failed_hosts(self):
        self._failed_hosts = dict()