Move queuing tasks to a background thread

This commit is contained in:
James Cammarata
2016-08-26 14:55:56 -05:00
parent 9ecec6c28e
commit b71957d6e6
8 changed files with 226 additions and 156 deletions

View File

@@ -0,0 +1,43 @@
# (c) 2016 - Red Hat, Inc. <support@ansible.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
from multiprocessing import Lock
from ansible.module_utils.facts import Facts
if 'action_write_locks' not in globals():
# Do not initialize this more than once because it seems to bash
# the existing one. multiprocessing must be reloading the module
# when it forks?
action_write_locks = dict()
# Below is a Lock for use when we weren't expecting a named module.
# It gets used when an action plugin directly invokes a module instead
# of going through the strategies. Slightly less efficient as all
# processes with unexpected module names will wait on this lock
action_write_locks[None] = Lock()
# These plugins are called directly by action plugins (not going through
# a strategy). We precreate them here as an optimization
mods = set(p['name'] for p in Facts.PKG_MGRS)
mods.update(('copy', 'file', 'setup', 'slurp', 'stat'))
for mod_name in mods:
action_write_locks[mod_name] = Lock()

View File

@@ -37,7 +37,7 @@ from ansible.utils.unicode import to_bytes, to_unicode
# Must import strategy and use write_locks from there
# If we import write_locks directly then we end up binding a
# variable to the object and then it never gets updated.
from ansible.plugins import strategy
from ansible.executor import action_write_locks
try:
from __main__ import display
@@ -596,16 +596,16 @@ def _find_snippet_imports(module_name, module_data, module_path, module_args, ta
display.debug('ANSIBALLZ: using cached module: %s' % cached_module_filename)
zipdata = open(cached_module_filename, 'rb').read()
else:
if module_name in strategy.action_write_locks:
if module_name in action_write_locks.action_write_locks:
display.debug('ANSIBALLZ: Using lock for %s' % module_name)
lock = strategy.action_write_locks[module_name]
lock = action_write_locks.action_write_locks[module_name]
else:
# If the action plugin directly invokes the module (instead of
# going through a strategy) then we don't have a cross-process
# Lock specifically for this module. Use the "unexpected
# module" lock instead
display.debug('ANSIBALLZ: Using generic lock for %s' % module_name)
lock = strategy.action_write_locks[None]
lock = action_write_locks.action_write_locks[None]
display.debug('ANSIBALLZ: Acquiring lock')
with lock:

View File

@@ -64,12 +64,12 @@ class WorkerProcess(multiprocessing.Process):
for reading later.
'''
def __init__(self, rslt_q, task_vars, host, task, play_context, loader, variable_manager, shared_loader_obj):
def __init__(self, rslt_q, play, host, task, task_vars, play_context, loader, variable_manager, shared_loader_obj):
super(WorkerProcess, self).__init__()
# takes a task queue manager as the sole param:
self._rslt_q = rslt_q
self._task_vars = task_vars
self._play = play
self._host = host
self._task = task
self._play_context = play_context
@@ -77,6 +77,8 @@ class WorkerProcess(multiprocessing.Process):
self._variable_manager = variable_manager
self._shared_loader_obj = shared_loader_obj
self._task_vars = task_vars
# dupe stdin, if we have one
self._new_stdin = sys.stdin
try:
@@ -158,3 +160,4 @@ class WorkerProcess(multiprocessing.Process):
#with open('worker_%06d.stats' % os.getpid(), 'w') as f:
# f.write(s.getvalue())
sys.exit(0)

View File

@@ -22,14 +22,21 @@ __metaclass__ = type
import multiprocessing
import os
import tempfile
import threading
import time
from collections import deque
from ansible import constants as C
from ansible.errors import AnsibleError
from ansible.executor import action_write_locks
from ansible.executor.play_iterator import PlayIterator
from ansible.executor.process.worker import WorkerProcess
from ansible.executor.stats import AggregateStats
from ansible.module_utils.facts import Facts
from ansible.playbook.block import Block
from ansible.playbook.play_context import PlayContext
from ansible.plugins import callback_loader, strategy_loader, module_loader
from ansible.plugins import action_loader, callback_loader, connection_loader, filter_loader, lookup_loader, module_loader, strategy_loader, test_loader
from ansible.template import Templar
from ansible.vars.hostvars import HostVars
from ansible.plugins.callback import CallbackBase
@@ -46,6 +53,23 @@ except ImportError:
__all__ = ['TaskQueueManager']
# TODO: this should probably be in the plugins/__init__.py, with
# a smarter mechanism to set all of the attributes based on
# the loaders created there
class SharedPluginLoaderObj:
'''
A simple object to make pass the various plugin loaders to
the forked processes over the queue easier
'''
def __init__(self):
self.action_loader = action_loader
self.connection_loader = connection_loader
self.filter_loader = filter_loader
self.test_loader = test_loader
self.lookup_loader = lookup_loader
self.module_loader = module_loader
class TaskQueueManager:
'''
@@ -77,6 +101,8 @@ class TaskQueueManager:
self._run_additional_callbacks = run_additional_callbacks
self._run_tree = run_tree
self._iterator = None
self._callbacks_loaded = False
self._callback_plugins = []
self._start_at_done = False
@@ -98,12 +124,86 @@ class TaskQueueManager:
self._failed_hosts = dict()
self._unreachable_hosts = dict()
# the "queue" for the background thread to use
self._queued_tasks = deque()
self._queued_tasks_lock = threading.Lock()
# the background queuing thread
self._queue_thread = None
self._workers = []
self._final_q = multiprocessing.Queue()
# A temporary file (opened pre-fork) used by connection
# plugins for inter-process locking.
self._connection_lockfile = tempfile.TemporaryFile()
def _queue_thread_main(self):
# create a dummy object with plugin loaders set as an easier
# way to share them with the forked processes
shared_loader_obj = SharedPluginLoaderObj()
display.debug("queuing thread starting")
while not self._terminated:
available_workers = []
for idx, entry in enumerate(self._workers):
(worker_prc, _) = entry
if worker_prc is None or not worker_prc.is_alive():
available_workers.append(idx)
if len(available_workers) == 0:
time.sleep(0.01)
continue
for worker_idx in available_workers:
try:
self._queued_tasks_lock.acquire()
(host, task, task_vars, play_context) = self._queued_tasks.pop()
except IndexError:
break
finally:
self._queued_tasks_lock.release()
if task.action not in action_write_locks.action_write_locks:
display.debug('Creating lock for %s' % task.action)
action_write_locks.action_write_locks[task.action] = multiprocessing.Lock()
try:
worker_prc = WorkerProcess(
self._final_q,
self._iterator._play,
host,
task,
task_vars,
play_context,
self._loader,
self._variable_manager,
shared_loader_obj,
)
self._workers[worker_idx][0] = worker_prc
worker_prc.start()
display.debug("worker is %d (out of %d available)" % (worker_idx+1, len(self._workers)))
except (EOFError, IOError, AssertionError) as e:
# most likely an abort
display.debug("got an error while queuing: %s" % e)
break
display.debug("queuing thread exiting")
def queue_task(self, host, task, task_vars, play_context):
self._queued_tasks_lock.acquire()
self._queued_tasks.append((host, task, task_vars, play_context))
self._queued_tasks_lock.release()
def queue_multiple_tasks(self, items, play_context):
for item in items:
(host, task, task_vars) = item
self._queued_tasks_lock.acquire()
self._queued_tasks.append((host, task, task_vars, play_context))
self._queued_tasks_lock.release()
def _initialize_processes(self, num):
self._workers = []
@@ -207,6 +307,10 @@ class TaskQueueManager:
if not self._callbacks_loaded:
self.load_callbacks()
if self._queue_thread is None:
self._queue_thread = threading.Thread(target=self._queue_thread_main)
self._queue_thread.start()
all_vars = self._variable_manager.get_vars(loader=self._loader, play=play)
templar = Templar(loader=self._loader, variables=all_vars)
@@ -252,7 +356,7 @@ class TaskQueueManager:
raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds)
# build the iterator
iterator = PlayIterator(
self._iterator = PlayIterator(
inventory=self._inventory,
play=new_play,
play_context=play_context,
@@ -267,7 +371,7 @@ class TaskQueueManager:
# hosts so we know what failed this round.
for host_name in self._failed_hosts.keys():
host = self._inventory.get_host(host_name)
iterator.mark_host_failed(host)
self._iterator.mark_host_failed(host)
self.clear_failed_hosts()
@@ -278,10 +382,10 @@ class TaskQueueManager:
self._start_at_done = True
# and run the play using the strategy and cleanup on way out
play_return = strategy.run(iterator, play_context)
play_return = strategy.run(self._iterator, play_context)
# now re-save the hosts that failed from the iterator to our internal list
for host_name in iterator.get_failed_hosts():
for host_name in self._iterator.get_failed_hosts():
self._failed_hosts[host_name] = True
self._cleanup_processes()
@@ -294,14 +398,13 @@ class TaskQueueManager:
self._cleanup_processes()
def _cleanup_processes(self):
if hasattr(self, '_workers'):
for (worker_prc, rslt_q) in self._workers:
rslt_q.close()
if worker_prc and worker_prc.is_alive():
try:
worker_prc.terminate()
except AttributeError:
pass
for (worker_prc, rslt_q) in self._workers:
rslt_q.close()
if worker_prc and worker_prc.is_alive():
try:
worker_prc.terminate()
except AttributeError:
pass
def clear_failed_hosts(self):
self._failed_hosts = dict()