9905abd9b4
Co-authored-by: Copilot <copilot@github.com>
496 lines
16 KiB
Bash
Executable File
496 lines
16 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
NAMESPACE="authentik"
|
|
RELEASE="authentik"
|
|
CHART="authentik/authentik"
|
|
VALUES_FILE="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)/values.yaml"
|
|
TARGET_PVC="authentik-postgresql-longhorn-pvc"
|
|
LONGHORN_STORAGE_CLASS="longhorn"
|
|
JOB_NAME="authentik-postgres-migrate-to-longhorn"
|
|
|
|
PVC_BIND_TIMEOUT_SECONDS=900
|
|
POD_STOP_TIMEOUT_SECONDS=300
|
|
JOB_TIMEOUT_SECONDS=3600
|
|
POSTGRES_READY_TIMEOUT_SECONDS=300
|
|
|
|
DRY_RUN="false"
|
|
AUTO_CONFIRM="false"
|
|
|
|
ORIGINAL_POSTGRES_REPLICAS="1"
|
|
SOURCE_NODE_NAME=""
|
|
SOURCE_PVC=""
|
|
SOURCE_STORAGE=""
|
|
POSTGRES_STS=""
|
|
POSTGRES_POD=""
|
|
|
|
DEPLOYMENTS_TO_RESTORE=()
|
|
DEPLOYMENT_REPLICAS=()
|
|
AUTHENTIK_SCALED_DOWN="false"
|
|
POSTGRES_SCALED_DOWN="false"
|
|
|
|
info() {
|
|
echo "[INFO] $*"
|
|
}
|
|
|
|
warn() {
|
|
echo "[WARN] $*" >&2
|
|
}
|
|
|
|
err() {
|
|
echo "[ERROR] $*" >&2
|
|
exit 1
|
|
}
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: migrate-postgres-to-longhorn.sh [options]
|
|
|
|
Options:
|
|
--namespace <name> Kubernetes namespace (default: authentik)
|
|
--release <name> Helm release name (default: authentik)
|
|
--chart <ref> Helm chart ref/path (default: authentik/authentik)
|
|
--values <path> Helm values file to use (default: ./values.yaml)
|
|
--target-pvc <name> Target Longhorn PVC name (default: authentik-postgresql-longhorn-pvc)
|
|
--storage-class <name> StorageClass for target PVC (default: longhorn)
|
|
--dry-run Print plan without changing cluster state
|
|
--yes Skip interactive confirmation
|
|
--help Show this help
|
|
EOF
|
|
}
|
|
|
|
parse_args() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--namespace)
|
|
NAMESPACE="$2"
|
|
shift
|
|
;;
|
|
--release)
|
|
RELEASE="$2"
|
|
shift
|
|
;;
|
|
--chart)
|
|
CHART="$2"
|
|
shift
|
|
;;
|
|
--values)
|
|
VALUES_FILE="$2"
|
|
shift
|
|
;;
|
|
--target-pvc)
|
|
TARGET_PVC="$2"
|
|
shift
|
|
;;
|
|
--storage-class)
|
|
LONGHORN_STORAGE_CLASS="$2"
|
|
shift
|
|
;;
|
|
--dry-run)
|
|
DRY_RUN="true"
|
|
;;
|
|
--yes)
|
|
AUTO_CONFIRM="true"
|
|
;;
|
|
--help|-h)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
err "Unknown argument: $1"
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
}
|
|
|
|
require_tool() {
|
|
local tool="$1"
|
|
command -v "${tool}" >/dev/null 2>&1 || err "Required tool not found: ${tool}"
|
|
}
|
|
|
|
require_file() {
|
|
local path="$1"
|
|
[[ -f "${path}" ]] || err "Required file not found: ${path}"
|
|
}
|
|
|
|
wait_for_pvc_bound() {
|
|
local pvc_name="$1"
|
|
local timeout="$2"
|
|
local elapsed=0
|
|
local interval=5
|
|
|
|
info "Waiting for PVC ${pvc_name} to become Bound..."
|
|
while true; do
|
|
local phase
|
|
phase="$(kubectl -n "${NAMESPACE}" get pvc "${pvc_name}" -o jsonpath='{.status.phase}' 2>/dev/null || true)"
|
|
|
|
if [[ "${phase}" == "Bound" ]]; then
|
|
info "PVC ${pvc_name} is Bound."
|
|
return 0
|
|
fi
|
|
|
|
if (( elapsed >= timeout )); then
|
|
err "Timeout while waiting for PVC ${pvc_name} to bind."
|
|
fi
|
|
|
|
sleep "${interval}"
|
|
elapsed=$((elapsed + interval))
|
|
done
|
|
}
|
|
|
|
wait_for_statefulset_replicas() {
|
|
local sts_name="$1"
|
|
local expected="$2"
|
|
local timeout="$3"
|
|
local elapsed=0
|
|
local interval=5
|
|
|
|
info "Waiting for StatefulSet ${sts_name} to reach ${expected} ready replicas..."
|
|
while true; do
|
|
local ready
|
|
ready="$(kubectl -n "${NAMESPACE}" get sts "${sts_name}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || true)"
|
|
ready="${ready:-0}"
|
|
|
|
if [[ "${ready}" == "${expected}" ]]; then
|
|
info "StatefulSet ${sts_name} has ${expected} ready replicas."
|
|
return 0
|
|
fi
|
|
|
|
if (( elapsed >= timeout )); then
|
|
err "Timeout while waiting for StatefulSet ${sts_name} ready replicas ${expected}."
|
|
fi
|
|
|
|
sleep "${interval}"
|
|
elapsed=$((elapsed + interval))
|
|
done
|
|
}
|
|
|
|
wait_for_no_pods_for_release() {
|
|
local selector="$1"
|
|
local timeout="$2"
|
|
local elapsed=0
|
|
local interval=5
|
|
|
|
info "Waiting for pods with selector ${selector} to stop..."
|
|
while true; do
|
|
local count
|
|
count="$(kubectl -n "${NAMESPACE}" get pods -l "${selector}" --no-headers 2>/dev/null | awk '$3 != "Completed" && $3 != "Succeeded" {count++} END {print count+0}')"
|
|
|
|
if [[ "${count}" == "0" ]]; then
|
|
info "No active pods left for selector ${selector}."
|
|
return 0
|
|
fi
|
|
|
|
if (( elapsed >= timeout )); then
|
|
err "Timeout while waiting for pods with selector ${selector} to stop."
|
|
fi
|
|
|
|
sleep "${interval}"
|
|
elapsed=$((elapsed + interval))
|
|
done
|
|
}
|
|
|
|
discover_postgres_runtime() {
|
|
POSTGRES_STS="$(kubectl -n "${NAMESPACE}" get sts -l "app.kubernetes.io/name=postgresql,app.kubernetes.io/instance=${RELEASE}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
|
|
[[ -n "${POSTGRES_STS}" ]] || err "Could not find PostgreSQL StatefulSet for release ${RELEASE}."
|
|
|
|
POSTGRES_POD="${POSTGRES_STS}-0"
|
|
|
|
SOURCE_PVC="$(kubectl -n "${NAMESPACE}" get sts "${POSTGRES_STS}" -o jsonpath='{.spec.volumeClaimTemplates[0].metadata.name}' 2>/dev/null || true)"
|
|
[[ -n "${SOURCE_PVC}" ]] || err "Could not detect PostgreSQL volumeClaimTemplate name from StatefulSet ${POSTGRES_STS}."
|
|
SOURCE_PVC="${SOURCE_PVC}-${POSTGRES_STS}-0"
|
|
kubectl -n "${NAMESPACE}" get pvc "${SOURCE_PVC}" >/dev/null 2>&1 || err "Source PVC not found: ${SOURCE_PVC}"
|
|
|
|
SOURCE_NODE_NAME="$(kubectl -n "${NAMESPACE}" get pod "${POSTGRES_POD}" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true)"
|
|
|
|
SOURCE_STORAGE="$(kubectl -n "${NAMESPACE}" get pvc "${SOURCE_PVC}" -o jsonpath='{.spec.resources.requests.storage}' 2>/dev/null || true)"
|
|
[[ -n "${SOURCE_STORAGE}" ]] || err "Could not read requested storage size from source PVC ${SOURCE_PVC}."
|
|
|
|
ORIGINAL_POSTGRES_REPLICAS="$(kubectl -n "${NAMESPACE}" get sts "${POSTGRES_STS}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)"
|
|
ORIGINAL_POSTGRES_REPLICAS="${ORIGINAL_POSTGRES_REPLICAS:-1}"
|
|
|
|
info "Detected PostgreSQL StatefulSet: ${POSTGRES_STS}"
|
|
info "Detected source PVC: ${SOURCE_PVC}"
|
|
if [[ -n "${SOURCE_NODE_NAME}" ]]; then
|
|
info "Detected source node: ${SOURCE_NODE_NAME}"
|
|
else
|
|
info "Source PostgreSQL pod currently not running, proceeding without node pinning"
|
|
fi
|
|
info "Detected source requested storage: ${SOURCE_STORAGE}"
|
|
}
|
|
|
|
ensure_target_pvc() {
|
|
info "Ensuring Longhorn target PVC ${TARGET_PVC} exists."
|
|
|
|
cat <<EOF | kubectl apply -f - >/dev/null
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: ${TARGET_PVC}
|
|
namespace: ${NAMESPACE}
|
|
spec:
|
|
storageClassName: ${LONGHORN_STORAGE_CLASS}
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: ${SOURCE_STORAGE}
|
|
EOF
|
|
|
|
wait_for_pvc_bound "${TARGET_PVC}" "${PVC_BIND_TIMEOUT_SECONDS}"
|
|
}
|
|
|
|
scale_down_authentik() {
|
|
local deployment_lines
|
|
deployment_lines="$(kubectl -n "${NAMESPACE}" get deploy -l "app.kubernetes.io/instance=${RELEASE}" -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true)"
|
|
|
|
if [[ -z "${deployment_lines}" ]]; then
|
|
warn "No Authentik deployments found for release ${RELEASE}."
|
|
return 0
|
|
fi
|
|
|
|
while IFS=$'\t' read -r name replicas; do
|
|
[[ -n "${name}" ]] || continue
|
|
replicas="${replicas:-1}"
|
|
DEPLOYMENTS_TO_RESTORE+=("${name}")
|
|
DEPLOYMENT_REPLICAS+=("${replicas}")
|
|
if [[ "${replicas}" != "0" ]]; then
|
|
info "Scaling deployment ${name} to 0."
|
|
kubectl -n "${NAMESPACE}" scale deploy "${name}" --replicas=0 >/dev/null
|
|
fi
|
|
done <<< "${deployment_lines}"
|
|
|
|
AUTHENTIK_SCALED_DOWN="true"
|
|
wait_for_no_pods_for_release "app.kubernetes.io/instance=${RELEASE},app.kubernetes.io/name=authentik" "${POD_STOP_TIMEOUT_SECONDS}"
|
|
}
|
|
|
|
scale_down_postgres() {
|
|
info "Scaling StatefulSet ${POSTGRES_STS} to 0 for consistent copy."
|
|
kubectl -n "${NAMESPACE}" scale sts "${POSTGRES_STS}" --replicas=0 >/dev/null
|
|
POSTGRES_SCALED_DOWN="true"
|
|
wait_for_statefulset_replicas "${POSTGRES_STS}" "0" "${POD_STOP_TIMEOUT_SECONDS}"
|
|
}
|
|
|
|
run_migration_job() {
|
|
info "Recreating migration job ${JOB_NAME}."
|
|
kubectl -n "${NAMESPACE}" delete job "${JOB_NAME}" --ignore-not-found >/dev/null
|
|
|
|
cat <<EOF | kubectl apply -f - >/dev/null
|
|
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: ${JOB_NAME}
|
|
namespace: ${NAMESPACE}
|
|
spec:
|
|
backoffLimit: 2
|
|
template:
|
|
metadata:
|
|
name: ${JOB_NAME}
|
|
spec:
|
|
restartPolicy: Never
|
|
containers:
|
|
- name: migrate
|
|
image: alpine:3.20
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -euo pipefail
|
|
apk add --no-cache rsync findutils coreutils
|
|
|
|
src_count="\$(find /source -mindepth 1 | wc -l | tr -d ' ')"
|
|
echo "Source entries before copy: \${src_count}"
|
|
|
|
rsync -aHAX --numeric-ids --delete /source/ /target/
|
|
|
|
target_count="\$(find /target -mindepth 1 | wc -l | tr -d ' ')"
|
|
echo "Target entries after copy: \${target_count}"
|
|
|
|
if [ -f /target/PG_VERSION ] && [ -d /target/base ]; then
|
|
pg_root="/target"
|
|
elif [ -f /target/data/PG_VERSION ] && [ -d /target/data/base ]; then
|
|
pg_root="/target/data"
|
|
else
|
|
echo "Could not find PostgreSQL data root on target" >&2
|
|
echo "Top-level target contents:" >&2
|
|
ls -la /target >&2 || true
|
|
echo "Nested target/data contents:" >&2
|
|
ls -la /target/data >&2 || true
|
|
exit 1
|
|
fi
|
|
|
|
test "\${target_count}" -gt 0
|
|
echo "Detected PostgreSQL data root: \${pg_root}"
|
|
|
|
echo "Top-level target contents:"
|
|
ls -la /target
|
|
echo "Migration verification successful"
|
|
volumeMounts:
|
|
- name: source
|
|
mountPath: /source
|
|
readOnly: true
|
|
- name: target
|
|
mountPath: /target
|
|
volumes:
|
|
- name: source
|
|
persistentVolumeClaim:
|
|
claimName: ${SOURCE_PVC}
|
|
- name: target
|
|
persistentVolumeClaim:
|
|
claimName: ${TARGET_PVC}
|
|
EOF
|
|
|
|
info "Streaming migration job logs."
|
|
kubectl -n "${NAMESPACE}" logs -f "job/${JOB_NAME}" || true
|
|
|
|
info "Waiting for migration job completion."
|
|
if ! kubectl -n "${NAMESPACE}" wait --for=condition=complete "job/${JOB_NAME}" --timeout="${JOB_TIMEOUT_SECONDS}s" >/dev/null; then
|
|
kubectl -n "${NAMESPACE}" describe job "${JOB_NAME}" || true
|
|
kubectl -n "${NAMESPACE}" logs "job/${JOB_NAME}" --tail=-1 || true
|
|
err "Migration job failed."
|
|
fi
|
|
|
|
info "Migration job completed."
|
|
}
|
|
|
|
verify_target_pvc_has_data() {
|
|
info "Verifying copied PostgreSQL markers on target PVC ${TARGET_PVC}."
|
|
kubectl -n "${NAMESPACE}" run authentik-postgres-verify \
|
|
--rm --restart=Never --image=alpine:3.20 \
|
|
--overrides="{\"apiVersion\":\"v1\",\"spec\":{\"containers\":[{\"name\":\"verify\",\"image\":\"alpine:3.20\",\"command\":[\"sh\",\"-c\",\"set -e; if [ -f /target/PG_VERSION ] && [ -d /target/base ]; then root=/target; elif [ -f /target/data/PG_VERSION ] && [ -d /target/data/base ]; then root=/target/data; else echo missing-postgres-layout >&2; ls -la /target >&2 || true; ls -la /target/data >&2 || true; exit 1; fi; echo detected-root=\$root; find /target -mindepth 1 | wc -l; ls -la /target | head -n 20\"],\"volumeMounts\":[{\"name\":\"target\",\"mountPath\":\"/target\"}]}],\"volumes\":[{\"name\":\"target\",\"persistentVolumeClaim\":{\"claimName\":\"${TARGET_PVC}\"}}]}}" \
|
|
--attach=true >/dev/null
|
|
}
|
|
|
|
helm_switch_postgres_to_target_pvc() {
|
|
info "Upgrading Helm release ${RELEASE} to use existing PostgreSQL claim ${TARGET_PVC}."
|
|
helm -n "${NAMESPACE}" upgrade --install "${RELEASE}" "${CHART}" \
|
|
-f "${VALUES_FILE}" \
|
|
--reuse-values \
|
|
--set "postgresql.primary.persistence.enabled=true" \
|
|
--set "postgresql.primary.persistence.existingClaim=${TARGET_PVC}" \
|
|
--set "authentik.existingSecret.secretName=" >/dev/null
|
|
}
|
|
|
|
verify_postgres_uses_target_claim() {
|
|
info "Waiting for PostgreSQL StatefulSet to become ready after Helm upgrade."
|
|
wait_for_statefulset_replicas "${POSTGRES_STS}" "1" "${POSTGRES_READY_TIMEOUT_SECONDS}"
|
|
|
|
local mounted_claim
|
|
mounted_claim="$(kubectl -n "${NAMESPACE}" get pod "${POSTGRES_STS}-0" -o jsonpath='{.spec.volumes[?(@.name=="data")].persistentVolumeClaim.claimName}' 2>/dev/null || true)"
|
|
if [[ -z "${mounted_claim}" ]]; then
|
|
mounted_claim="$(kubectl -n "${NAMESPACE}" get pod "${POSTGRES_STS}-0" -o jsonpath='{.spec.volumes[?(@.persistentVolumeClaim)].persistentVolumeClaim.claimName}' 2>/dev/null | awk '{print $1}')"
|
|
fi
|
|
|
|
[[ -n "${mounted_claim}" ]] || err "Could not detect mounted PostgreSQL claim after upgrade."
|
|
[[ "${mounted_claim}" == "${TARGET_PVC}" ]] || err "PostgreSQL pod mounts ${mounted_claim}, expected ${TARGET_PVC}."
|
|
|
|
info "PostgreSQL now mounts target claim ${mounted_claim}."
|
|
}
|
|
|
|
restore_authentik_deployments() {
|
|
if [[ "${AUTHENTIK_SCALED_DOWN}" != "true" ]]; then
|
|
return 0
|
|
fi
|
|
|
|
local index
|
|
for index in "${!DEPLOYMENTS_TO_RESTORE[@]}"; do
|
|
local name="${DEPLOYMENTS_TO_RESTORE[${index}]}"
|
|
local replicas="${DEPLOYMENT_REPLICAS[${index}]}"
|
|
if [[ "${replicas}" == "0" ]]; then
|
|
info "Skipping restore of deployment ${name} to 0 replicas."
|
|
continue
|
|
fi
|
|
info "Restoring deployment ${name} replicas to ${replicas}."
|
|
kubectl -n "${NAMESPACE}" scale deploy "${name}" --replicas="${replicas}" >/dev/null
|
|
done
|
|
|
|
AUTHENTIK_SCALED_DOWN="false"
|
|
}
|
|
|
|
restore_on_error() {
|
|
if [[ "${POSTGRES_SCALED_DOWN}" == "true" ]]; then
|
|
warn "Restoring StatefulSet ${POSTGRES_STS} replicas to ${ORIGINAL_POSTGRES_REPLICAS}."
|
|
kubectl -n "${NAMESPACE}" scale sts "${POSTGRES_STS}" --replicas="${ORIGINAL_POSTGRES_REPLICAS}" >/dev/null || true
|
|
POSTGRES_SCALED_DOWN="false"
|
|
fi
|
|
|
|
restore_authentik_deployments || true
|
|
}
|
|
|
|
on_exit() {
|
|
local exit_code=$?
|
|
if [[ ${exit_code} -ne 0 ]]; then
|
|
restore_on_error
|
|
warn "Script failed with exit code ${exit_code}."
|
|
fi
|
|
}
|
|
trap on_exit EXIT
|
|
|
|
validate_prerequisites() {
|
|
require_tool kubectl
|
|
require_tool helm
|
|
require_file "${VALUES_FILE}"
|
|
|
|
kubectl get namespace "${NAMESPACE}" >/dev/null 2>&1 || err "Namespace does not exist: ${NAMESPACE}"
|
|
helm -n "${NAMESPACE}" status "${RELEASE}" >/dev/null 2>&1 || err "Helm release not found: ${RELEASE} in namespace ${NAMESPACE}"
|
|
}
|
|
|
|
confirm_migration() {
|
|
if [[ "${AUTO_CONFIRM}" == "true" ]]; then
|
|
return 0
|
|
fi
|
|
|
|
echo
|
|
warn "Authentik and PostgreSQL workloads in namespace ${NAMESPACE} will be scaled down during migration."
|
|
read -r -p "Type MIGRATE to continue: " confirmation
|
|
if [[ "${confirmation}" != "MIGRATE" ]]; then
|
|
err "Confirmation failed. Aborted by user."
|
|
fi
|
|
}
|
|
|
|
print_dry_run_plan() {
|
|
info "Dry-run mode active. No cluster changes will be made."
|
|
info "Planned steps:"
|
|
info "1) Discover PostgreSQL StatefulSet, pod, source PVC, and size"
|
|
info "2) Create Longhorn target PVC ${TARGET_PVC}"
|
|
info "3) Scale down Authentik deployments and PostgreSQL StatefulSet"
|
|
info "4) Run rsync migration job ${JOB_NAME}"
|
|
info "5) Verify copied DB files on target PVC"
|
|
info "6) Helm upgrade release ${RELEASE} with postgresql.primary.persistence.existingClaim=${TARGET_PVC}"
|
|
info "7) Verify new PostgreSQL pod uses target PVC"
|
|
info "8) Restore Authentik deployments"
|
|
}
|
|
|
|
run_migration() {
|
|
discover_postgres_runtime
|
|
|
|
if [[ "${DRY_RUN}" == "true" ]]; then
|
|
print_dry_run_plan
|
|
return 0
|
|
fi
|
|
|
|
confirm_migration
|
|
ensure_target_pvc
|
|
scale_down_authentik
|
|
scale_down_postgres
|
|
run_migration_job
|
|
verify_target_pvc_has_data
|
|
helm_switch_postgres_to_target_pvc
|
|
POSTGRES_SCALED_DOWN="false"
|
|
verify_postgres_uses_target_claim
|
|
restore_authentik_deployments
|
|
|
|
info "Migration finished successfully."
|
|
info "PostgreSQL is now running on Longhorn PVC ${TARGET_PVC}."
|
|
}
|
|
|
|
main() {
|
|
parse_args "$@"
|
|
info "Starting Authentik PostgreSQL migration to Longhorn PVC."
|
|
validate_prerequisites
|
|
run_migration
|
|
}
|
|
|
|
main "$@"
|