#!/bin/bash
set -euo pipefail

# Available env vars:
#   $TMP_DIR
#   $CLUSTER_NAME
#   $KUBECONFIG
#   $NODE_TERMINATION_HANDLER_DOCKER_REPO
#   $NODE_TERMINATION_HANDLER_DOCKER_TAG
#   $WEBHOOK_DOCKER_REPO
#   $WEBHOOK_DOCKER_TAG
#   $AEMM_URL
#   $AEMM_VERSION


function fail_and_exit {
    echo "❌ EC2 State Change SQS Test failed $CLUSTER_NAME ❌"
    exit "${1:-1}"
}

echo "Starting EC2 State Change SQS Test for Node Termination Handler"
START_TIME=$(date -u +"%Y-%m-%dT%TZ")

SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"

common_helm_args=()

localstack_helm_args=(
    upgrade
    --install
    "$CLUSTER_NAME-localstack"
    "$SCRIPTPATH/../../config/helm/localstack/"
    --wait
    --namespace default
    --set nodeSelector."$NTH_CONTROL_LABEL"
    --set defaultRegion="${AWS_REGION}"
)

set -x
helm "${localstack_helm_args[@]}"
set +x

sleep 10

RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION}"
set -x
localstack_pod=$(kubectl get pods --selector app=localstack --field-selector="status.phase=Running" \
                                  -o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \
                                  | awk '$2 >= "'"${START_TIME//+0000/Z}"'" { print $1 }')
echo "🥑 Using localstack pod $localstack_pod"
run_instances_resp=$(kubectl exec -i "${localstack_pod}" -- bash -c "$RUN_INSTANCE_CMD")
private_dns_name=$(echo "${run_instances_resp}" | jq -r '.Instances[] .PrivateDnsName')
instance_id=$(echo "${run_instances_resp}" | jq -r '.Instances[] .InstanceId')
echo "🥑 Started mock EC2 instance ($instance_id) w/ private DNS name: ${private_dns_name}"

CREATE_SQS_CMD="awslocal sqs create-queue --queue-name "${CLUSTER_NAME}-queue" --attributes MessageRetentionPeriod=300 --region ${AWS_REGION}"
queue_url=$(kubectl exec -i "${localstack_pod}" -- bash -c "$CREATE_SQS_CMD" | jq -r .QueueUrl)

echo "🥑 Created SQS Queue ${queue_url}"

anth_helm_args=(
  upgrade
  --install
  "$CLUSTER_NAME-acth"
  "$SCRIPTPATH/../../config/helm/aws-node-termination-handler/"
  --namespace kube-system
  --set instanceMetadataURL="${INSTANCE_METADATA_URL:-"http://$AEMM_URL:$IMDS_PORT"}"
  --set image.repository="$NODE_TERMINATION_HANDLER_DOCKER_REPO"
  --set image.tag="$NODE_TERMINATION_HANDLER_DOCKER_TAG"
  --set awsAccessKeyID=foo
  --set awsSecretAccessKey=bar
  --set awsRegion="${AWS_REGION}"
  --set awsEndpoint="http://localstack.default"
  --set checkASGTagBeforeDraining=false
  --set enableSqsTerminationDraining=true
  --set enableScheduledEventDraining=false
  --set enableSpotInterruptionDraining=false
  --set nodeSelector."$NTH_CONTROL_LABEL"
  --set "queueURL=${queue_url}"
)
[[ -n "${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY-}" ]] &&
    anth_helm_args+=(--set image.pullPolicy="$NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY")
[[ ${#common_helm_args[@]} -gt 0 ]] &&
    anth_helm_args+=("${common_helm_args[@]}")

set -x
helm "${anth_helm_args[@]}"
set +x

emtp_helm_args=(
  upgrade
  --install
  "$CLUSTER_NAME-emtp"
  "$SCRIPTPATH/../../config/helm/webhook-test-proxy/"
  --namespace default
  --set webhookTestProxy.image.repository="$WEBHOOK_DOCKER_REPO"
  --set webhookTestProxy.image.tag="$WEBHOOK_DOCKER_TAG"
)
[[ -n "${WEBHOOK_DOCKER_PULL_POLICY-}" ]] &&
    emtp_helm_args+=(--set webhookTestProxy.image.pullPolicy="$WEBHOOK_DOCKER_PULL_POLICY")
[[ ${#common_helm_args[@]} -gt 0 ]] &&
    emtp_helm_args+=("${common_helm_args[@]}")

set -x
helm "${emtp_helm_args[@]}"
set +x

TAINT_CHECK_CYCLES=15
TAINT_CHECK_SLEEP=15

DEPLOYED=0

for i in $(seq 1 $TAINT_CHECK_CYCLES); do
    if [[ $(kubectl get deployments regular-pod-test -o jsonpath='{.status.unavailableReplicas}') -eq 0 ]]; then
        echo "✅ Verified regular-pod-test pod was scheduled and started!"
        DEPLOYED=1
        break
    fi
    echo "Setup Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds"
    sleep $TAINT_CHECK_SLEEP
done

if [[ $DEPLOYED -eq 0 ]]; then
    echo "❌ regular-pod-test pod deployment failed"
    fail_and_exit 2
fi

EC2_STATE_CHANGE_EVENT=$(cat <<EOF
{
  "version": "0",
  "id": "7bf73129-1428-4cd3-a780-95db273d1602",
  "detail-type": "EC2 Instance State-change Notification",
  "source": "aws.ec2",
  "account": "123456789012",
  "time": "$(date -u +"%Y-%m-%dT%TZ")",
  "region": "us-east-1",
  "resources": [
    "arn:aws:ec2:us-east-1:123456789012:instance/${instance_id}"
  ],
  "detail": {
    "instance-id": "${instance_id}",
    "state": "stopping"
  }
}
EOF
)

EC2_STATE_CHANGE_EVENT_ONE_LINE=$(echo "${EC2_STATE_CHANGE_EVENT}" | tr -d '\n' |sed 's/\"/\\"/g')
SEND_SQS_CMD="awslocal sqs send-message --queue-url ${queue_url} --message-body \"${EC2_STATE_CHANGE_EVENT_ONE_LINE}\" --region ${AWS_REGION}"
kubectl exec -i "${localstack_pod}" -- bash -c "$SEND_SQS_CMD"
echo "✅ Sent EC2 State Change Event to SQS queue: ${queue_url}"

GET_ATTRS_SQS_CMD="awslocal sqs get-queue-attributes --queue-url ${queue_url} --attribute-names All --region ${AWS_REGION}"

cordoned=0
evicted=0
test_node="${TEST_NODE:-$CLUSTER_NAME-worker}"
for i in $(seq 1 $TAINT_CHECK_CYCLES); do
    if [[ $cordoned -eq 0 ]] && kubectl get nodes "${test_node}" | grep SchedulingDisabled > /dev/null; then
        echo "✅ Verified the worker node was cordoned!"
        cordoned=1
    fi

    if [[ $cordoned -eq 1 && $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then
        echo "✅ Verified the regular-pod-test pod was evicted!"
        evicted=1
    fi

    if [[ ${evicted} -eq 1 && $(kubectl exec -i "${localstack_pod}" -- bash -c "$GET_ATTRS_SQS_CMD" | jq '(.Attributes.ApproximateNumberOfMessagesNotVisible|tonumber) + (.Attributes.ApproximateNumberOfMessages|tonumber)' ) -eq 0 ]]; then
        kubectl exec -i "${localstack_pod}" -- bash -c "$GET_ATTRS_SQS_CMD"
        echo "✅ Verified the message was deleted from the queue after processing!"
        echo "✅ EC2 State Change SQS Test Passed $CLUSTER_NAME! ✅"
        exit 0
    fi

    echo "Assertion Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds"
    sleep $TAINT_CHECK_SLEEP
done

if [[ $cordoned -eq 0 ]]; then
    echo "❌ Worker node was not cordoned"
else
    echo "❌ regular-pod-test was not evicted"
fi

echo "❌ EC2 State Change SQS Test Failed $CLUSTER_NAME ❌"
fail_and_exit 1
