Fix scale wait and add tests

Move wait logic out of raw and into common and use that
logic in scale

Fix a few broken wait condition cases highlighted by scaling
up and down

Move scale-related tests into dedicated test task file

Additional service related tests
This commit is contained in:
Will Thames
2020-05-22 12:39:43 +10:00
parent 3bdfb4745a
commit beebe98ce6
7 changed files with 486 additions and 170 deletions

View File

@@ -19,7 +19,8 @@ from __future__ import absolute_import, division, print_function
__metaclass__ = type
import copy
import json
from datetime import datetime
import time
import os
import traceback
@@ -34,7 +35,7 @@ try:
import kubernetes
import openshift
from openshift.dynamic import DynamicClient
from openshift.dynamic.exceptions import ResourceNotFoundError, ResourceNotUniqueError
from openshift.dynamic.exceptions import ResourceNotFoundError, ResourceNotUniqueError, NotFoundError
HAS_K8S_MODULE_HELPER = True
k8s_import_exception = None
except ImportError as e:
@@ -291,3 +292,90 @@ class KubernetesAnsibleModule(AnsibleModule, K8sAnsibleMixin):
def fail(self, msg=None):
self.fail_json(msg=msg)
def _wait_for(self, resource, name, namespace, predicate, sleep, timeout, state):
start = datetime.now()
def _wait_for_elapsed():
return (datetime.now() - start).seconds
response = None
while _wait_for_elapsed() < timeout:
try:
response = resource.get(name=name, namespace=namespace)
if predicate(response):
if response:
return True, response.to_dict(), _wait_for_elapsed()
else:
return True, {}, _wait_for_elapsed()
time.sleep(sleep)
except NotFoundError:
if state == 'absent':
return True, {}, _wait_for_elapsed()
if response:
response = response.to_dict()
return False, response, _wait_for_elapsed()
def wait(self, resource, definition, sleep, timeout, state='present', condition=None):
def _deployment_ready(deployment):
# FIXME: frustratingly bool(deployment.status) is True even if status is empty
# Furthermore deployment.status.availableReplicas == deployment.status.replicas == None if status is empty
# deployment.status.replicas is None is perfectly ok if desired replicas == 0
# Scaling up means that we also need to check that we're not in a
# situation where status.replicas == status.availableReplicas
# but spec.replicas != status.replicas
return (deployment.status and
deployment.spec.replicas == (deployment.status.replicas or 0) and
deployment.status.availableReplicas == deployment.status.replicas and
deployment.status.observedGeneration == deployment.metadata.generation and
not deployment.status.unavailableReplicas)
def _pod_ready(pod):
return (pod.status and pod.status.containerStatuses is not None and
all([container.ready for container in pod.status.containerStatuses]))
def _daemonset_ready(daemonset):
return (daemonset.status and daemonset.status.desiredNumberScheduled is not None and
daemonset.status.numberReady == daemonset.status.desiredNumberScheduled and
daemonset.status.observedGeneration == daemonset.metadata.generation and
not daemonset.status.unavailableReplicas)
def _custom_condition(resource):
if not resource.status or not resource.status.conditions:
return False
match = [x for x in resource.status.conditions if x.type == condition['type']]
if not match:
return False
# There should never be more than one condition of a specific type
match = match[0]
if match.status == 'Unknown':
if match.status == condition['status']:
if 'reason' not in condition:
return True
if condition['reason']:
return match.reason == condition['reason']
return False
status = True if match.status == 'True' else False
if status == condition['status']:
if condition.get('reason'):
return match.reason == condition['reason']
return True
return False
def _resource_absent(resource):
return not resource
waiter = dict(
Deployment=_deployment_ready,
DaemonSet=_daemonset_ready,
Pod=_pod_ready
)
kind = definition['kind']
if state == 'present' and not condition:
predicate = waiter.get(kind, lambda x: x)
elif state == 'present' and condition:
predicate = _custom_condition
else:
predicate = _resource_absent
return self._wait_for(resource, definition['metadata']['name'], definition['metadata'].get('namespace'), predicate, sleep, timeout, state)

View File

@@ -20,9 +20,7 @@ from __future__ import absolute_import, division, print_function
__metaclass__ = type
import copy
from datetime import datetime
from distutils.version import LooseVersion
import time
import sys
import traceback
@@ -442,85 +440,3 @@ class KubernetesRawModule(KubernetesAnsibleModule):
result['changed'] = True
result['method'] = 'create'
return result
def _wait_for(self, resource, name, namespace, predicate, sleep, timeout, state):
start = datetime.now()
def _wait_for_elapsed():
return (datetime.now() - start).seconds
response = None
while _wait_for_elapsed() < timeout:
try:
response = resource.get(name=name, namespace=namespace)
if predicate(response):
if response:
return True, response.to_dict(), _wait_for_elapsed()
else:
return True, {}, _wait_for_elapsed()
time.sleep(sleep)
except NotFoundError:
if state == 'absent':
return True, {}, _wait_for_elapsed()
if response:
response = response.to_dict()
return False, response, _wait_for_elapsed()
def wait(self, resource, definition, sleep, timeout, state='present', condition=None):
def _deployment_ready(deployment):
# FIXME: frustratingly bool(deployment.status) is True even if status is empty
# Furthermore deployment.status.availableReplicas == deployment.status.replicas == None if status is empty
return (deployment.status and deployment.status.replicas is not None and
deployment.status.availableReplicas == deployment.status.replicas and
deployment.status.observedGeneration == deployment.metadata.generation and
not deployment.status.unavailableReplicas)
def _pod_ready(pod):
return (pod.status and pod.status.containerStatuses is not None and
all([container.ready for container in pod.status.containerStatuses]))
def _daemonset_ready(daemonset):
return (daemonset.status and daemonset.status.desiredNumberScheduled is not None and
daemonset.status.numberReady == daemonset.status.desiredNumberScheduled and
daemonset.status.observedGeneration == daemonset.metadata.generation and
not daemonset.status.unavailableReplicas)
def _custom_condition(resource):
if not resource.status or not resource.status.conditions:
return False
match = [x for x in resource.status.conditions if x.type == condition['type']]
if not match:
return False
# There should never be more than one condition of a specific type
match = match[0]
if match.status == 'Unknown':
if match.status == condition['status']:
if 'reason' not in condition:
return True
if condition['reason']:
return match.reason == condition['reason']
return False
status = True if match.status == 'True' else False
if status == condition['status']:
if condition.get('reason'):
return match.reason == condition['reason']
return True
return False
def _resource_absent(resource):
return not resource
waiter = dict(
Deployment=_deployment_ready,
DaemonSet=_daemonset_ready,
Pod=_pod_ready
)
kind = definition['kind']
if state == 'present' and not condition:
predicate = waiter.get(kind, lambda x: x)
elif state == 'present' and condition:
predicate = _custom_condition
else:
predicate = _resource_absent
return self._wait_for(resource, definition['metadata']['name'], definition['metadata'].get('namespace'), predicate, sleep, timeout, state)

View File

@@ -20,8 +20,6 @@ from __future__ import absolute_import, division, print_function
__metaclass__ = type
import copy
import math
import time
from ansible_collections.community.kubernetes.plugins.module_utils.common import AUTH_ARG_SPEC, COMMON_ARG_SPEC
from ansible_collections.community.kubernetes.plugins.module_utils.common import KubernetesAnsibleModule
@@ -29,17 +27,9 @@ from ansible.module_utils.six import string_types
try:
import yaml
from openshift.dynamic.client import ResourceInstance
from openshift.dynamic.exceptions import NotFoundError
except ImportError:
pass
try:
from openshift import watch
except ImportError:
try:
from openshift.dynamic.client import watch
except ImportError:
pass
SCALE_ARG_SPEC = {
@@ -112,7 +102,9 @@ class KubernetesAnsibleScaleModule(KubernetesAnsibleModule):
wait_time = self.params.get('wait_timeout')
existing = None
existing_count = None
return_attributes = dict(changed=False, result=dict())
return_attributes = dict(changed=False, result=dict(), diff=dict())
if wait:
return_attributes['duration'] = 0
resource = self.find_resource(kind, api_version, fail=True)
@@ -142,10 +134,9 @@ class KubernetesAnsibleScaleModule(KubernetesAnsibleModule):
if not self.check_mode:
if self.kind == 'job':
existing.spec.parallelism = replicas
k8s_obj = resource.patch(existing.to_dict())
return_attributes['result'] = resource.patch(existing.to_dict()).to_dict()
else:
k8s_obj = self.scale(resource, existing, replicas, wait, wait_time)
return_attributes['result'] = k8s_obj.to_dict()
return_attributes = self.scale(resource, existing, replicas, wait, wait_time)
self.exit_json(**return_attributes)
@@ -161,86 +152,31 @@ class KubernetesAnsibleScaleModule(KubernetesAnsibleModule):
def scale(self, resource, existing_object, replicas, wait, wait_time):
name = existing_object.metadata.name
namespace = existing_object.metadata.namespace
kind = existing_object.kind
if not hasattr(resource, 'scale'):
self.fail_json(
msg="Cannot perform scale on resource of kind {0}".format(resource.kind)
)
scale_obj = {'metadata': {'name': name, 'namespace': namespace}, 'spec': {'replicas': replicas}}
scale_obj = {'kind': kind, 'metadata': {'name': name, 'namespace': namespace}, 'spec': {'replicas': replicas}}
return_obj = None
stream = None
if wait:
w, stream = self._create_stream(resource, namespace, wait_time)
existing = resource.get(name=name, namespace=namespace)
try:
resource.scale.patch(body=scale_obj)
except Exception as exc:
self.fail_json(
msg="Scale request failed: {0}".format(exc)
)
self.fail_json(msg="Scale request failed: {0}".format(exc))
if wait and stream is not None:
return_obj = self._read_stream(resource, w, stream, name, replicas)
k8s_obj = resource.get(name=name, namespace=namespace).to_dict()
match, diffs = self.diff_objects(existing.to_dict(), k8s_obj)
result = dict()
result['result'] = k8s_obj
result['changed'] = not match
result['diff'] = diffs
if not return_obj:
return_obj = self._wait_for_response(resource, name, namespace)
return return_obj
def _create_stream(self, resource, namespace, wait_time):
""" Create a stream of events for the object """
w = None
stream = None
w = watch.Watch()
w._api_client = self.client.client
if namespace:
stream = w.stream(resource.get, serialize=False, namespace=namespace, timeout_seconds=wait_time)
else:
stream = w.stream(resource.get, serialize=False, namespace=namespace, timeout_seconds=wait_time)
return w, stream
def _read_stream(self, resource, watcher, stream, name, replicas):
""" Wait for ready_replicas to equal the requested number of replicas. """
return_obj = None
try:
for event in stream:
if event.get('object'):
obj = ResourceInstance(resource, event['object'])
if obj.metadata.name == name and hasattr(obj, 'status'):
if replicas == 0:
if not hasattr(obj.status, 'readyReplicas') or not obj.status.readyReplicas:
return_obj = obj
watcher.stop()
break
if hasattr(obj.status, 'readyReplicas') and obj.status.readyReplicas == replicas:
return_obj = obj
watcher.stop()
break
except Exception as exc:
self.fail_json(msg="Exception reading event stream: {0}".format(exc))
if not return_obj:
self.fail_json(msg="Error fetching the patched object. Try a higher wait_timeout value.")
if replicas and return_obj.status.readyReplicas is None:
self.fail_json(msg="Failed to fetch the number of ready replicas. Try a higher wait_timeout value.")
if replicas and return_obj.status.readyReplicas != replicas:
self.fail_json(msg="Number of ready replicas is {0}. Failed to reach {1} ready replicas within "
"the wait_timeout period.".format(return_obj.status.ready_replicas, replicas))
return return_obj
def _wait_for_response(self, resource, name, namespace):
""" Wait for an API response """
tries = 0
half = math.ceil(20 / 2)
obj = None
while tries <= half:
obj = resource.get(name=name, namespace=namespace)
if obj:
break
tries += 2
time.sleep(2)
return obj
if wait:
success, result['result'], result['duration'] = self.wait(resource, scale_obj, 5, wait_time)
if not success:
self.fail_json(msg="Resource scaling timed out", **result)
return result