#!/usr/bin/env bash set -euo pipefail NAMESPACE="authentik" RELEASE="authentik" CHART="authentik/authentik" VALUES_FILE="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)/values.yaml" TARGET_PVC="authentik-postgresql-longhorn-pvc" LONGHORN_STORAGE_CLASS="longhorn" JOB_NAME="authentik-postgres-migrate-to-longhorn" PVC_BIND_TIMEOUT_SECONDS=900 POD_STOP_TIMEOUT_SECONDS=300 JOB_TIMEOUT_SECONDS=3600 POSTGRES_READY_TIMEOUT_SECONDS=300 DRY_RUN="false" AUTO_CONFIRM="false" ORIGINAL_POSTGRES_REPLICAS="1" SOURCE_NODE_NAME="" SOURCE_PVC="" SOURCE_STORAGE="" POSTGRES_STS="" POSTGRES_POD="" DEPLOYMENTS_TO_RESTORE=() DEPLOYMENT_REPLICAS=() AUTHENTIK_SCALED_DOWN="false" POSTGRES_SCALED_DOWN="false" info() { echo "[INFO] $*" } warn() { echo "[WARN] $*" >&2 } err() { echo "[ERROR] $*" >&2 exit 1 } usage() { cat <<'EOF' Usage: migrate-postgres-to-longhorn.sh [options] Options: --namespace Kubernetes namespace (default: authentik) --release Helm release name (default: authentik) --chart Helm chart ref/path (default: authentik/authentik) --values Helm values file to use (default: ./values.yaml) --target-pvc Target Longhorn PVC name (default: authentik-postgresql-longhorn-pvc) --storage-class StorageClass for target PVC (default: longhorn) --dry-run Print plan without changing cluster state --yes Skip interactive confirmation --help Show this help EOF } parse_args() { while [[ $# -gt 0 ]]; do case "$1" in --namespace) NAMESPACE="$2" shift ;; --release) RELEASE="$2" shift ;; --chart) CHART="$2" shift ;; --values) VALUES_FILE="$2" shift ;; --target-pvc) TARGET_PVC="$2" shift ;; --storage-class) LONGHORN_STORAGE_CLASS="$2" shift ;; --dry-run) DRY_RUN="true" ;; --yes) AUTO_CONFIRM="true" ;; --help|-h) usage exit 0 ;; *) err "Unknown argument: $1" ;; esac shift done } require_tool() { local tool="$1" command -v "${tool}" >/dev/null 2>&1 || err "Required tool not found: ${tool}" } require_file() { local path="$1" [[ -f "${path}" ]] || err "Required file not found: ${path}" } wait_for_pvc_bound() { local pvc_name="$1" local timeout="$2" local elapsed=0 local interval=5 info "Waiting for PVC ${pvc_name} to become Bound..." while true; do local phase phase="$(kubectl -n "${NAMESPACE}" get pvc "${pvc_name}" -o jsonpath='{.status.phase}' 2>/dev/null || true)" if [[ "${phase}" == "Bound" ]]; then info "PVC ${pvc_name} is Bound." return 0 fi if (( elapsed >= timeout )); then err "Timeout while waiting for PVC ${pvc_name} to bind." fi sleep "${interval}" elapsed=$((elapsed + interval)) done } wait_for_statefulset_replicas() { local sts_name="$1" local expected="$2" local timeout="$3" local elapsed=0 local interval=5 info "Waiting for StatefulSet ${sts_name} to reach ${expected} ready replicas..." while true; do local ready ready="$(kubectl -n "${NAMESPACE}" get sts "${sts_name}" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || true)" ready="${ready:-0}" if [[ "${ready}" == "${expected}" ]]; then info "StatefulSet ${sts_name} has ${expected} ready replicas." return 0 fi if (( elapsed >= timeout )); then err "Timeout while waiting for StatefulSet ${sts_name} ready replicas ${expected}." fi sleep "${interval}" elapsed=$((elapsed + interval)) done } wait_for_no_pods_for_release() { local selector="$1" local timeout="$2" local elapsed=0 local interval=5 info "Waiting for pods with selector ${selector} to stop..." while true; do local count count="$(kubectl -n "${NAMESPACE}" get pods -l "${selector}" --no-headers 2>/dev/null | awk '$3 != "Completed" && $3 != "Succeeded" {count++} END {print count+0}')" if [[ "${count}" == "0" ]]; then info "No active pods left for selector ${selector}." return 0 fi if (( elapsed >= timeout )); then err "Timeout while waiting for pods with selector ${selector} to stop." fi sleep "${interval}" elapsed=$((elapsed + interval)) done } discover_postgres_runtime() { POSTGRES_STS="$(kubectl -n "${NAMESPACE}" get sts -l "app.kubernetes.io/name=postgresql,app.kubernetes.io/instance=${RELEASE}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" [[ -n "${POSTGRES_STS}" ]] || err "Could not find PostgreSQL StatefulSet for release ${RELEASE}." POSTGRES_POD="${POSTGRES_STS}-0" SOURCE_PVC="$(kubectl -n "${NAMESPACE}" get sts "${POSTGRES_STS}" -o jsonpath='{.spec.volumeClaimTemplates[0].metadata.name}' 2>/dev/null || true)" [[ -n "${SOURCE_PVC}" ]] || err "Could not detect PostgreSQL volumeClaimTemplate name from StatefulSet ${POSTGRES_STS}." SOURCE_PVC="${SOURCE_PVC}-${POSTGRES_STS}-0" kubectl -n "${NAMESPACE}" get pvc "${SOURCE_PVC}" >/dev/null 2>&1 || err "Source PVC not found: ${SOURCE_PVC}" SOURCE_NODE_NAME="$(kubectl -n "${NAMESPACE}" get pod "${POSTGRES_POD}" -o jsonpath='{.spec.nodeName}' 2>/dev/null || true)" SOURCE_STORAGE="$(kubectl -n "${NAMESPACE}" get pvc "${SOURCE_PVC}" -o jsonpath='{.spec.resources.requests.storage}' 2>/dev/null || true)" [[ -n "${SOURCE_STORAGE}" ]] || err "Could not read requested storage size from source PVC ${SOURCE_PVC}." ORIGINAL_POSTGRES_REPLICAS="$(kubectl -n "${NAMESPACE}" get sts "${POSTGRES_STS}" -o jsonpath='{.spec.replicas}' 2>/dev/null || true)" ORIGINAL_POSTGRES_REPLICAS="${ORIGINAL_POSTGRES_REPLICAS:-1}" info "Detected PostgreSQL StatefulSet: ${POSTGRES_STS}" info "Detected source PVC: ${SOURCE_PVC}" if [[ -n "${SOURCE_NODE_NAME}" ]]; then info "Detected source node: ${SOURCE_NODE_NAME}" else info "Source PostgreSQL pod currently not running, proceeding without node pinning" fi info "Detected source requested storage: ${SOURCE_STORAGE}" } ensure_target_pvc() { info "Ensuring Longhorn target PVC ${TARGET_PVC} exists." cat </dev/null apiVersion: v1 kind: PersistentVolumeClaim metadata: name: ${TARGET_PVC} namespace: ${NAMESPACE} spec: storageClassName: ${LONGHORN_STORAGE_CLASS} accessModes: - ReadWriteOnce resources: requests: storage: ${SOURCE_STORAGE} EOF wait_for_pvc_bound "${TARGET_PVC}" "${PVC_BIND_TIMEOUT_SECONDS}" } scale_down_authentik() { local deployment_lines deployment_lines="$(kubectl -n "${NAMESPACE}" get deploy -l "app.kubernetes.io/instance=${RELEASE}" -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.replicas}{"\n"}{end}' 2>/dev/null || true)" if [[ -z "${deployment_lines}" ]]; then warn "No Authentik deployments found for release ${RELEASE}." return 0 fi while IFS=$'\t' read -r name replicas; do [[ -n "${name}" ]] || continue replicas="${replicas:-1}" DEPLOYMENTS_TO_RESTORE+=("${name}") DEPLOYMENT_REPLICAS+=("${replicas}") if [[ "${replicas}" != "0" ]]; then info "Scaling deployment ${name} to 0." kubectl -n "${NAMESPACE}" scale deploy "${name}" --replicas=0 >/dev/null fi done <<< "${deployment_lines}" AUTHENTIK_SCALED_DOWN="true" wait_for_no_pods_for_release "app.kubernetes.io/instance=${RELEASE},app.kubernetes.io/name=authentik" "${POD_STOP_TIMEOUT_SECONDS}" } scale_down_postgres() { info "Scaling StatefulSet ${POSTGRES_STS} to 0 for consistent copy." kubectl -n "${NAMESPACE}" scale sts "${POSTGRES_STS}" --replicas=0 >/dev/null POSTGRES_SCALED_DOWN="true" wait_for_statefulset_replicas "${POSTGRES_STS}" "0" "${POD_STOP_TIMEOUT_SECONDS}" } run_migration_job() { info "Recreating migration job ${JOB_NAME}." kubectl -n "${NAMESPACE}" delete job "${JOB_NAME}" --ignore-not-found >/dev/null cat </dev/null apiVersion: batch/v1 kind: Job metadata: name: ${JOB_NAME} namespace: ${NAMESPACE} spec: backoffLimit: 2 template: metadata: name: ${JOB_NAME} spec: restartPolicy: Never containers: - name: migrate image: alpine:3.20 command: - sh - -c - | set -euo pipefail apk add --no-cache rsync findutils coreutils src_count="\$(find /source -mindepth 1 | wc -l | tr -d ' ')" echo "Source entries before copy: \${src_count}" rsync -aHAX --numeric-ids --delete /source/ /target/ target_count="\$(find /target -mindepth 1 | wc -l | tr -d ' ')" echo "Target entries after copy: \${target_count}" if [ -f /target/PG_VERSION ] && [ -d /target/base ]; then pg_root="/target" elif [ -f /target/data/PG_VERSION ] && [ -d /target/data/base ]; then pg_root="/target/data" else echo "Could not find PostgreSQL data root on target" >&2 echo "Top-level target contents:" >&2 ls -la /target >&2 || true echo "Nested target/data contents:" >&2 ls -la /target/data >&2 || true exit 1 fi test "\${target_count}" -gt 0 echo "Detected PostgreSQL data root: \${pg_root}" echo "Top-level target contents:" ls -la /target echo "Migration verification successful" volumeMounts: - name: source mountPath: /source readOnly: true - name: target mountPath: /target volumes: - name: source persistentVolumeClaim: claimName: ${SOURCE_PVC} - name: target persistentVolumeClaim: claimName: ${TARGET_PVC} EOF info "Streaming migration job logs." kubectl -n "${NAMESPACE}" logs -f "job/${JOB_NAME}" || true info "Waiting for migration job completion." if ! kubectl -n "${NAMESPACE}" wait --for=condition=complete "job/${JOB_NAME}" --timeout="${JOB_TIMEOUT_SECONDS}s" >/dev/null; then kubectl -n "${NAMESPACE}" describe job "${JOB_NAME}" || true kubectl -n "${NAMESPACE}" logs "job/${JOB_NAME}" --tail=-1 || true err "Migration job failed." fi info "Migration job completed." } verify_target_pvc_has_data() { info "Verifying copied PostgreSQL markers on target PVC ${TARGET_PVC}." kubectl -n "${NAMESPACE}" run authentik-postgres-verify \ --rm --restart=Never --image=alpine:3.20 \ --overrides="{\"apiVersion\":\"v1\",\"spec\":{\"containers\":[{\"name\":\"verify\",\"image\":\"alpine:3.20\",\"command\":[\"sh\",\"-c\",\"set -e; if [ -f /target/PG_VERSION ] && [ -d /target/base ]; then root=/target; elif [ -f /target/data/PG_VERSION ] && [ -d /target/data/base ]; then root=/target/data; else echo missing-postgres-layout >&2; ls -la /target >&2 || true; ls -la /target/data >&2 || true; exit 1; fi; echo detected-root=\$root; find /target -mindepth 1 | wc -l; ls -la /target | head -n 20\"],\"volumeMounts\":[{\"name\":\"target\",\"mountPath\":\"/target\"}]}],\"volumes\":[{\"name\":\"target\",\"persistentVolumeClaim\":{\"claimName\":\"${TARGET_PVC}\"}}]}}" \ --attach=true >/dev/null } helm_switch_postgres_to_target_pvc() { info "Upgrading Helm release ${RELEASE} to use existing PostgreSQL claim ${TARGET_PVC}." helm -n "${NAMESPACE}" upgrade --install "${RELEASE}" "${CHART}" \ -f "${VALUES_FILE}" \ --reuse-values \ --set "postgresql.primary.persistence.enabled=true" \ --set "postgresql.primary.persistence.existingClaim=${TARGET_PVC}" \ --set "authentik.existingSecret.secretName=" >/dev/null } verify_postgres_uses_target_claim() { info "Waiting for PostgreSQL StatefulSet to become ready after Helm upgrade." wait_for_statefulset_replicas "${POSTGRES_STS}" "1" "${POSTGRES_READY_TIMEOUT_SECONDS}" local mounted_claim mounted_claim="$(kubectl -n "${NAMESPACE}" get pod "${POSTGRES_STS}-0" -o jsonpath='{.spec.volumes[?(@.name=="data")].persistentVolumeClaim.claimName}' 2>/dev/null || true)" if [[ -z "${mounted_claim}" ]]; then mounted_claim="$(kubectl -n "${NAMESPACE}" get pod "${POSTGRES_STS}-0" -o jsonpath='{.spec.volumes[?(@.persistentVolumeClaim)].persistentVolumeClaim.claimName}' 2>/dev/null | awk '{print $1}')" fi [[ -n "${mounted_claim}" ]] || err "Could not detect mounted PostgreSQL claim after upgrade." [[ "${mounted_claim}" == "${TARGET_PVC}" ]] || err "PostgreSQL pod mounts ${mounted_claim}, expected ${TARGET_PVC}." info "PostgreSQL now mounts target claim ${mounted_claim}." } restore_authentik_deployments() { if [[ "${AUTHENTIK_SCALED_DOWN}" != "true" ]]; then return 0 fi local index for index in "${!DEPLOYMENTS_TO_RESTORE[@]}"; do local name="${DEPLOYMENTS_TO_RESTORE[${index}]}" local replicas="${DEPLOYMENT_REPLICAS[${index}]}" if [[ "${replicas}" == "0" ]]; then info "Skipping restore of deployment ${name} to 0 replicas." continue fi info "Restoring deployment ${name} replicas to ${replicas}." kubectl -n "${NAMESPACE}" scale deploy "${name}" --replicas="${replicas}" >/dev/null done AUTHENTIK_SCALED_DOWN="false" } restore_on_error() { if [[ "${POSTGRES_SCALED_DOWN}" == "true" ]]; then warn "Restoring StatefulSet ${POSTGRES_STS} replicas to ${ORIGINAL_POSTGRES_REPLICAS}." kubectl -n "${NAMESPACE}" scale sts "${POSTGRES_STS}" --replicas="${ORIGINAL_POSTGRES_REPLICAS}" >/dev/null || true POSTGRES_SCALED_DOWN="false" fi restore_authentik_deployments || true } on_exit() { local exit_code=$? if [[ ${exit_code} -ne 0 ]]; then restore_on_error warn "Script failed with exit code ${exit_code}." fi } trap on_exit EXIT validate_prerequisites() { require_tool kubectl require_tool helm require_file "${VALUES_FILE}" kubectl get namespace "${NAMESPACE}" >/dev/null 2>&1 || err "Namespace does not exist: ${NAMESPACE}" helm -n "${NAMESPACE}" status "${RELEASE}" >/dev/null 2>&1 || err "Helm release not found: ${RELEASE} in namespace ${NAMESPACE}" } confirm_migration() { if [[ "${AUTO_CONFIRM}" == "true" ]]; then return 0 fi echo warn "Authentik and PostgreSQL workloads in namespace ${NAMESPACE} will be scaled down during migration." read -r -p "Type MIGRATE to continue: " confirmation if [[ "${confirmation}" != "MIGRATE" ]]; then err "Confirmation failed. Aborted by user." fi } print_dry_run_plan() { info "Dry-run mode active. No cluster changes will be made." info "Planned steps:" info "1) Discover PostgreSQL StatefulSet, pod, source PVC, and size" info "2) Create Longhorn target PVC ${TARGET_PVC}" info "3) Scale down Authentik deployments and PostgreSQL StatefulSet" info "4) Run rsync migration job ${JOB_NAME}" info "5) Verify copied DB files on target PVC" info "6) Helm upgrade release ${RELEASE} with postgresql.primary.persistence.existingClaim=${TARGET_PVC}" info "7) Verify new PostgreSQL pod uses target PVC" info "8) Restore Authentik deployments" } run_migration() { discover_postgres_runtime if [[ "${DRY_RUN}" == "true" ]]; then print_dry_run_plan return 0 fi confirm_migration ensure_target_pvc scale_down_authentik scale_down_postgres run_migration_job verify_target_pvc_has_data helm_switch_postgres_to_target_pvc POSTGRES_SCALED_DOWN="false" verify_postgres_uses_target_claim restore_authentik_deployments info "Migration finished successfully." info "PostgreSQL is now running on Longhorn PVC ${TARGET_PVC}." } main() { parse_args "$@" info "Starting Authentik PostgreSQL migration to Longhorn PVC." validate_prerequisites run_migration } main "$@"