From e7d17ad1acc9254052524eca5cc672e095f7c288 Mon Sep 17 00:00:00 2001 From: andryyy Date: Wed, 27 Mar 2019 23:15:04 +0100 Subject: [PATCH] [Watchdog] Check for ACME failures --- data/Dockerfiles/watchdog/watchdog.sh | 49 ++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/data/Dockerfiles/watchdog/watchdog.sh b/data/Dockerfiles/watchdog/watchdog.sh index a9db9321f..cd9920a1b 100755 --- a/data/Dockerfiles/watchdog/watchdog.sh +++ b/data/Dockerfiles/watchdog/watchdog.sh @@ -5,6 +5,8 @@ trap "kill 0" EXIT # Prepare BACKGROUND_TASKS=() +echo "Waiting for containers to settle..." +sleep 10 if [[ "${USE_WATCHDOG}" =~ ^([nN][oO]|[nN])+$ ]]; then echo -e "$(date) - USE_WATCHDOG=n, skipping watchdog..." @@ -350,6 +352,38 @@ ratelimit_checks() { return 1 } +acme_checks() { + err_count=0 + diff_c=0 + THRESHOLD=1 + ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME) + if [[ -z "${ACME_LOG_STATUS}" ]]; then + redis-cli -h redis SET ACME_FAIL_TIME 0 + ACME_LOG_STATUS=0 + fi + # Reduce error count by 2 after restarting an unhealthy container + trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 + while [ ${err_count} -lt ${THRESHOLD} ]; do + err_c_cur=${err_count} + ACME_LOG_STATUS_PREV=${ACME_LOG_STATUS} + ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME) + if [[ ${ACME_LOG_STATUS_PREV} != ${ACME_LOG_STATUS} ]]; then + err_count=$(( ${err_count} + 1 )) + fi + [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 + [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) + progress "ACME" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} + if [[ $? == 10 ]]; then + diff_c=0 + sleep 1 + else + diff_c=0 + sleep $(( ( RANDOM % 30 ) + 10 )) + fi + done + return 1 +} + ipv6nat_checks() { err_count=0 diff_c=0 @@ -518,6 +552,16 @@ done ) & BACKGROUND_TASKS+=($!) +( +while true; do + if ! acme_checks; then + log_msg "ACME client hit error limit" + echo acme-tiny > /tmp/com_pipe + fi +done +) & +BACKGROUND_TASKS+=($!) + ( while true; do if ! ipv6nat_checks; then @@ -567,7 +611,10 @@ while true; do fi if [[ ${com_pipe_answer} == "ratelimit" ]]; then log_msg "At least one ratelimit was applied" - [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "No further information available." + [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please see mailcow UI logs for further information." + elif [[ ${com_pipe_answer} == "acme-tiny" ]]; then + log_msg "acme-tiny client returned non-zero exit code" + [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check acme-mailcow for ruther information." elif [[ ${com_pipe_answer} =~ .+-mailcow ]] || [[ ${com_pipe_answer} == "ipv6nat-mailcow" ]]; then kill -STOP ${BACKGROUND_TASKS[*]} sleep 3