Merge pull request #1193 from stanislav-zaprudskiy/add_termination_grace_period_seconds

AWX: Add `termination_grace_period_seconds`
This commit is contained in:
Hao Liu
2023-02-28 15:37:51 -05:00
committed by GitHub
13 changed files with 376 additions and 58 deletions

View File

@@ -0,0 +1,66 @@
# file, which when exists, indicates that `master` script has successfully
# completed pre-stop script execution
marker_file="${PRE_STOP_MARKER_FILE:-/var/lib/pre-stop/.termination_marker}"
# file which the running `master` script continuously updates (mtime) to
# indicate it's still running. this file is then read by `watcher`s to
# understand if they still have to wait for `termination_marker`
heartbeat_file="${PRE_STOP_HEARTBEAT_FILE:-/var/lib/pre-stop/.heartbeat}"
# file which:
# * `watcher`s create when they bail out because they didn't see the
# `heartbeat_file` to be updated within `$heartbeat_failed_threshold`;
# * `master` creates when its handler command fails;
# when scripts see such file, they also give up
bailout_file="${PRE_STOP_BAILOUT_FILE:-/var/lib/pre-stop/.bailout}"
heartbeat_threshold="${PRE_STOP_HEARTBEAT_THRESHOLD:-60}"
# where the scripts' stdout/stderr are streamed
stdout="${PRE_STOP_STDOUT:-/proc/1/fd/1}"
stderr="${PRE_STOP_STDERR:-/proc/1/fd/2}"
# command the `master` script executes, which when successfully finishes,
# causes the script to create the `marker_file`
handler="${PRE_STOP_HANDLER:-bash -c \"PYTHONUNBUFFERED=x awx-manage disable_instance --wait --retry=inf\"}"
log_prefix="${PRE_STOP_LOG_PREFIX:-preStop.exec}"
[[ -n ${PRE_STOP_LOG_ROLE} ]] && log_prefix="${log_prefix}] [$PRE_STOP_LOG_ROLE"
# interval at which `watcher`s check for `marker_file` presence
recheck_sleep="${PRE_STOP_RECHECK_SLEEP:-1}"
# interval at which `watcher`s report into $stdout that they are still watching
report_every="${PRE_STOP_REPORT_EVERY:-30}"
function log {
printf "[%s] $1\n" "$log_prefix" "${@:2}"
}
function parameters_string {
for param in "$@"; do
printf "%s=\"%s\"\n" "$param" "${!param}"
done | paste -s -d ' '
}
function check_bailout {
if [[ -f $bailout_file ]]; then
log "\"%s\" file has been detected, accepting bail out signal and failing the hook script" \
"$bailout_file"
exit 1
fi
}
function check_heartbeat {
if [[ -f $heartbeat_file ]]; then
delta=$(( $(date +%s) - $(stat -c %Y "$heartbeat_file") ))
else
delta=$(( $(date +%s) - $1 ))
fi
if [[ $delta -gt $heartbeat_threshold ]]; then
log "The heartbeat file hasn't been updated since %ss, which is above the threshold of %ds, assuming the master is not operating and failing the hook script" \
$delta
$heartbeat_threshold
touch "$bailout_file"
exit 1
fi
}

View File

@@ -0,0 +1,50 @@
#/usr/bin/env bash
PRE_STOP_LOG_ROLE="${PRE_STOP_LOG_ROLE:-master}"
source $(dirname "$0")/termination-env
{
log "The hook has started: %s" \
"$(parameters_string \
"marker_file" \
"heartbeat_file" \
"bailout_file" \
"handler" \
)"
touch "$heartbeat_file"
set -o pipefail
eval "$handler" 2>&1 | while IFS= read -r line; do
# we check the files here and break early, but overall script termination
# happens later - as we need to distinguish between files detection and
# command failure, while bash doesn't offer a simple way to do this here
# inside the loop (`exit` does not terminate the script)
[[ -f $bailout_file ]] && break
[[ -f $marker_file ]] && break
log "[handler] %s" "$line"
touch "$heartbeat_file"
done
ec=$?
set +o pipefail
# process various cases in specific order
check_bailout
if [[ -f $marker_file ]]; then
log "Done! The marker file has been detected, assuming some other instance of the script has run to completion"
exit 0
elif [[ $ec -ne 0 ]]; then
log "The handler has failed with \"%d\" exit code, failing the hook script too" \
$ec
# signal others to bail out
touch "$bailout_file"
exit $ec
else
log "Done! Generating the marker file allowing to proceed to termination"
touch "$marker_file"
fi
} > "$stdout" 2> "$stderr"

View File

@@ -0,0 +1,33 @@
#/usr/bin/env bash
PRE_STOP_LOG_ROLE="${PRE_STOP_LOG_ROLE:-waiter}"
source $(dirname "$0")/termination-env
{
log "The hook has started: %s" \
"$(parameters_string \
"marker_file" \
"heartbeat_file" \
"bailout_file" \
"recheck_sleep" \
"report_every" \
)"
n=0
checks_started=$(date +%s)
while ! [[ -f $marker_file ]]; do
check_bailout
check_heartbeat $checks_started
if [[ $(($n % $report_every)) -eq 0 ]]; then
log "Waiting for the marker file to be accessible..."
fi
n=$(($n + 1))
sleep $recheck_sleep
done
log "The marker file found, exiting to proceed to termination"
} > "$stdout" 2> "$stderr"

View File

@@ -39,17 +39,17 @@
- name: Load LDAP CAcert certificate
include_tasks: load_ldap_cacert_secret.yml
when:
- ldap_cacert_secret != ''
- ldap_cacert_secret != ''
- name: Load ldap bind password
include_tasks: load_ldap_password_secret.yml
when:
- ldap_password_secret != ''
- ldap_password_secret != ''
- name: Load bundle certificate authority certificate
include_tasks: load_bundle_cacert_secret.yml
when:
- bundle_cacert_secret != ''
- bundle_cacert_secret != ''
- name: Include admin password configuration tasks
include_tasks: admin_password_configuration.yml
@@ -66,8 +66,8 @@
- name: Load Route TLS certificate
include_tasks: load_route_tls_secret.yml
when:
- ingress_type | lower == 'route'
- route_tls_secret != ''
- ingress_type | lower == 'route'
- route_tls_secret != ''
- name: Include resources configuration tasks
include_tasks: resources_configuration.yml
@@ -91,8 +91,8 @@
bash -c "awx-manage migrate --noinput"
register: migrate_result
when:
- database_check is defined
- (database_check.stdout|trim) != '0'
- database_check is defined
- (database_check.stdout|trim) != '0'
- name: Initialize Django
include_tasks: initialize_django.yml

View File

@@ -13,9 +13,17 @@
- status.phase=Running
register: tower_pod
- name: Set the resource pod as a variable.
set_fact:
tower_pod: >-
{{ tower_pod['resources']
| rejectattr('metadata.deletionTimestamp', 'defined')
| sort(attribute='metadata.creationTimestamp')
| first | default({}) }}
- name: Set the resource pod name as a variable.
set_fact:
tower_pod_name: "{{ tower_pod['resources'][0]['metadata']['name'] | default('') }}"
tower_pod_name: "{{ tower_pod['metadata']['name'] | default('') }}"
- name: Set user provided control plane ee image
set_fact:
@@ -32,13 +40,13 @@
kind: Secret
namespace: '{{ ansible_operator_meta.namespace }}'
name: '{{ ansible_operator_meta.name }}-receptor-ca'
register: _receptor_ca
register: receptor_ca
no_log: "{{ no_log }}"
- name: Migrate Receptor CA Secret
when:
- _receptor_ca['resources'] | default([]) | length
- _receptor_ca['resources'][0]['type'] != "kubernetes.io/tls"
- receptor_ca['resources'] | default([]) | length
- receptor_ca['resources'][0]['type'] != "kubernetes.io/tls"
block:
- name: Delete old Receptor CA Secret
k8s:
@@ -53,7 +61,7 @@
register: _receptor_ca_key_file
- name: Copy Receptor CA key from old secret to tempfile
copy:
content: "{{ _receptor_ca['resources'][0]['data']['receptor-ca.key'] | b64decode }}"
content: "{{ receptor_ca['resources'][0]['data']['receptor-ca.key'] | b64decode }}"
dest: "{{ _receptor_ca_key_file.path }}"
no_log: "{{ no_log }}"
- name: Create tempfile for receptor-ca.crt
@@ -63,7 +71,7 @@
register: _receptor_ca_crt_file
- name: Copy Receptor CA cert from old secret to tempfile
copy:
content: "{{ _receptor_ca['resources'][0]['data']['receptor-ca.crt'] | b64decode }}"
content: "{{ receptor_ca['resources'][0]['data']['receptor-ca.crt'] | b64decode }}"
dest: "{{ _receptor_ca_crt_file.path }}"
no_log: "{{ no_log }}"
- name: Create New Receptor CA secret
@@ -71,6 +79,17 @@
apply: true
definition: "{{ lookup('template', 'secrets/receptor_ca_secret.yaml.j2') }}"
no_log: "{{ no_log }}"
- name: Read New Receptor CA Secret
k8s_info:
kind: Secret
namespace: '{{ ansible_operator_meta.namespace }}'
name: '{{ ansible_operator_meta.name }}-receptor-ca'
register: _receptor_ca
no_log: "{{ no_log }}"
- name: Set receptor_ca variable
set_fact:
receptor_ca: '{{ _receptor_ca }}'
no_log: "{{ no_log }}"
- name: Remove tempfiles
file:
path: "{{ item }}"
@@ -106,6 +125,17 @@
apply: true
definition: "{{ lookup('template', 'secrets/receptor_ca_secret.yaml.j2') }}"
no_log: "{{ no_log }}"
- name: Read Receptor CA secret
k8s_info:
kind: Secret
namespace: '{{ ansible_operator_meta.namespace }}'
name: '{{ ansible_operator_meta.name }}-receptor-ca'
register: _receptor_ca
no_log: "{{ no_log }}"
- name: Set receptor_ca variable
set_fact:
receptor_ca: '{{ _receptor_ca }}'
no_log: "{{ no_log }}"
- name: Remove tempfiles
file:
path: "{{ item }}"
@@ -113,14 +143,14 @@
loop:
- "{{ _receptor_ca_key_file.path }}"
- "{{ _receptor_ca_crt_file.path }}"
when: not _receptor_ca['resources'] | default([]) | length
when: not receptor_ca['resources'] | default([]) | length
- name: Check for Receptor work signing Secret
k8s_info:
kind: Secret
namespace: '{{ ansible_operator_meta.namespace }}'
name: '{{ ansible_operator_meta.name }}-receptor-work-signing'
register: _receptor_work_signing
register: receptor_work_signing
no_log: "{{ no_log }}"
- name: Generate Receptor work signing RSA key pair
@@ -151,6 +181,17 @@
apply: true
definition: "{{ lookup('template', 'secrets/receptor_work_signing_secret.yaml.j2') }}"
no_log: "{{ no_log }}"
- name: Read Receptor work signing Secret
k8s_info:
kind: Secret
namespace: '{{ ansible_operator_meta.namespace }}'
name: '{{ ansible_operator_meta.name }}-receptor-work-signing'
register: _receptor_work_signing
no_log: "{{ no_log }}"
- name: Set receptor_work_signing variable
set_fact:
receptor_work_signing: '{{ _receptor_work_signing }}'
no_log: "{{ no_log }}"
- name: Remove tempfiles
file:
path: "{{ item }}"
@@ -158,16 +199,16 @@
loop:
- "{{ _receptor_work_signing_private_key_file.path }}"
- "{{ _receptor_work_signing_public_key_file.path }}"
when: not _receptor_work_signing['resources'] | default([]) | length
when: not receptor_work_signing['resources'] | default([]) | length
- name: Apply Resources
k8s:
apply: yes
definition: "{{ lookup('template', item + '.yaml.j2') }}"
wait: yes
register: tower_resources_result
loop:
- 'configmaps/config'
- 'configmaps/pre_stop_scripts'
- 'secrets/app_credentials'
- 'rbac/service_account'
- 'storage/persistent'
@@ -210,21 +251,10 @@
apply: yes
definition: "{{ lookup('template', 'deployments/deployment.yaml.j2') }}"
wait: yes
wait_timeout: "{{ 120 * replicas or 120 }}"
register: this_deployment_result
- block:
- name: Delete pod to reload a resource configuration
k8s:
api_version: v1
state: absent
kind: Pod
namespace: '{{ ansible_operator_meta.namespace }}'
name: '{{ tower_pod_name }}'
wait: yes
when:
- tower_resources_result.changed
- tower_pod_name | length
- name: Get the new resource pod information after updating resource.
k8s_info:
kind: Pod
@@ -236,17 +266,20 @@
field_selectors:
- status.phase=Running
register: _new_pod
until:
- _new_pod['resources'] | length
- _new_pod['resources'][0]['metadata']['name'] != tower_pod_name
delay: 5
retries: 60
- name: Update new resource pod as a variable.
set_fact:
tower_pod: >-
{{ _new_pod['resources']
| rejectattr('metadata.deletionTimestamp', 'defined')
| sort(attribute='metadata.creationTimestamp')
| last | default({}) }}
- name: Update new resource pod name as a variable.
set_fact:
tower_pod_name: '{{ _new_pod["resources"][0]["metadata"]["name"] }}'
tower_pod_name: '{{ tower_pod["metadata"]["name"] | default("")}}'
when:
- tower_resources_result.changed or this_deployment_result.changed
- this_deployment_result.changed
- name: Verify the resource pod name is populated.
assert:

View File

@@ -40,10 +40,10 @@
- name: Set secret key secret
set_fact:
__secret_key_secret: '{{ _generated_secret_key["resources"] | default([]) | length | ternary(_generated_secret_key, _secret_key_secret) }}'
secret_key: '{{ _generated_secret_key["resources"] | default([]) | length | ternary(_generated_secret_key, _secret_key_secret) }}'
no_log: "{{ no_log }}"
- name: Store secret key secret name
set_fact:
secret_key_secret_name: "{{ __secret_key_secret['resources'][0]['metadata']['name'] }}"
secret_key_secret_name: "{{ secret_key['resources'][0]['metadata']['name'] }}"
no_log: "{{ no_log }}"

View File

@@ -0,0 +1,16 @@
{% if termination_grace_period_seconds is defined %}
apiVersion: v1
kind: ConfigMap
metadata:
name: '{{ ansible_operator_meta.name }}-{{ deployment_type }}-pre-stop-scripts'
namespace: '{{ ansible_operator_meta.namespace }}'
labels:
{{ lookup("template", "../common/templates/labels/common.yaml.j2") | indent(width=4) | trim }}
data:
termination-master: |
{{ lookup("file", "files/pre-stop/termination-master") | indent(width=4) | trim }}
termination-waiter: |
{{ lookup("file", "files/pre-stop/termination-waiter") | indent(width=4) | trim }}
termination-env: |
{{ lookup("file", "files/pre-stop/termination-env") | indent(width=4) | trim }}
{% endif %}

View File

@@ -20,8 +20,26 @@ spec:
labels:
{{ lookup("template", "../common/templates/labels/common.yaml.j2") | indent(width=8) | trim }}
{{ lookup("template", "../common/templates/labels/version.yaml.j2") | indent(width=8) | trim }}
{% if annotations %}
annotations:
{% for template in [
"configmaps/config",
"configmaps/pre_stop_scripts",
"secrets/app_credentials",
"storage/persistent",
] %}
checksum-{{ template | replace('/', '-') }}: "{{ lookup('template', template + '.yaml.j2') | md5 }}"
{% endfor %}
{% for secret in [
"bundle_cacert",
"route_tls",
"ldap_cacert",
"secret_key",
"receptor_ca",
"receptor_work_signing",
] %}
checksum-secret-{{ secret }}: "{{ lookup('ansible.builtin.vars', secret, default='')["resources"][0]["data"] | default('') | md5 }}"
{% endfor %}
{% if annotations %}
{{ annotations | indent(width=8) }}
{% endif %}
spec:
@@ -122,6 +140,23 @@ spec:
mountPath: "/var/run/redis"
- name: "{{ ansible_operator_meta.name }}-redis-data"
mountPath: "/data"
{% if termination_grace_period_seconds is defined %}
- name: pre-stop-data
mountPath: /var/lib/pre-stop
- name: pre-stop-scripts
mountPath: /var/lib/pre-stop/scripts
lifecycle:
preStop:
exec:
command:
- bash
- -c
# redis image doesn't support writing to `/proc/1/fd/*`
- >
PRE_STOP_STDOUT=/dev/stdout
PRE_STOP_STDERR=/dev/stderr
/var/lib/pre-stop/scripts/termination-waiter
{% endif %}
resources: {{ redis_resource_requirements }}
- image: '{{ _image }}'
name: '{{ ansible_operator_meta.name }}-web'
@@ -291,6 +326,18 @@ spec:
{% endif %}
{% if task_extra_volume_mounts -%}
{{ task_extra_volume_mounts | indent(width=12, first=True) }}
{% endif %}
{% if termination_grace_period_seconds is defined %}
- name: pre-stop-data
mountPath: /var/lib/pre-stop
- name: pre-stop-scripts
mountPath: /var/lib/pre-stop/scripts
lifecycle:
preStop:
exec:
command:
- bash
- /var/lib/pre-stop/scripts/termination-master
{% endif %}
env:
- name: SUPERVISOR_WEB_CONFIG_PATH
@@ -360,6 +407,18 @@ spec:
mountPath: "/var/lib/awx/projects"
{% if ee_extra_volume_mounts -%}
{{ ee_extra_volume_mounts | indent(width=12, first=True) }}
{% endif %}
{% if termination_grace_period_seconds is defined %}
- name: pre-stop-data
mountPath: /var/lib/pre-stop
- name: pre-stop-scripts
mountPath: /var/lib/pre-stop/scripts
lifecycle:
preStop:
exec:
command:
- bash
- /var/lib/pre-stop/scripts/termination-waiter
{% endif %}
env:
{% if development_mode | bool %}
@@ -395,6 +454,9 @@ spec:
{% if security_context_settings|length %}
{{ security_context_settings | to_nice_yaml | indent(8) }}
{% endif %}
{% endif %}
{% if termination_grace_period_seconds is defined %}
terminationGracePeriodSeconds: {{ termination_grace_period_seconds }}
{% endif %}
volumes:
{% if bundle_ca_crt %}
@@ -424,6 +486,14 @@ spec:
items:
- key: ldap-ca.crt
path: 'ldap-ca.crt'
{% endif %}
{% if termination_grace_period_seconds is defined %}
- name: pre-stop-data
emptyDir: {}
- name: pre-stop-scripts
configMap:
name: '{{ ansible_operator_meta.name }}-{{ deployment_type }}-pre-stop-scripts'
defaultMode: 0775
{% endif %}
- name: "{{ ansible_operator_meta.name }}-application-credentials"
secret: