#!/usr/bin/env bash set -euo pipefail NAMESPACE="gitea" GITEA_DEPLOYMENT="gitea" POSTGRES_DEPLOYMENT="postgres" POSTGRES_VOLUME_NAME="postgres-storage" JOB_NAME="gitea-postgres-migrate-to-longhorn" SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" TARGET_PVC_MANIFEST="${SCRIPT_DIR}/postgres-longhorn-pvc.yaml" JOB_MANIFEST="${SCRIPT_DIR}/migrate-postgres-to-longhorn.yaml" SOURCE_PVC="postgres-pvc" TARGET_PVC="postgres-longhorn-pvc" PVC_BIND_TIMEOUT_SECONDS=900 POD_STOP_TIMEOUT_SECONDS=300 POSTGRES_READY_TIMEOUT_SECONDS=300 JOB_TIMEOUT_SECONDS=3600 ORIGINAL_GITEA_REPLICAS="1" ORIGINAL_POSTGRES_REPLICAS="1" SOURCE_NODE_NAME="" SCALED_DOWN_GITEA="false" SCALED_DOWN_POSTGRES="false" POSTGRES_CLAIM_SWITCHED="false" DRY_RUN="false" AUTO_CONFIRM="false" info() { echo "[INFO] $*" } warn() { echo "[WARN] $*" >&2 } err() { echo "[ERROR] $*" >&2 exit 1 } usage() { cat <<'EOF' Usage: migrate-postgres-to-longhorn.sh [--dry-run] [--yes] [--help] Options: --dry-run Show planned actions without changing cluster state. --yes Skip interactive confirmation before scaling down Gitea. --help Show this help. EOF } parse_args() { while [[ $# -gt 0 ]]; do case "$1" in --dry-run) DRY_RUN="true" ;; --yes) AUTO_CONFIRM="true" ;; --help|-h) usage exit 0 ;; *) err "Unknown argument: $1" ;; esac shift done } require_tool() { local tool="$1" command -v "${tool}" >/dev/null 2>&1 || err "Required tool not found: ${tool}" } require_file() { local path="$1" [[ -f "${path}" ]] || err "Required file not found: ${path}" } wait_for_pvc_bound() { local pvc_name="$1" local timeout="$2" local elapsed=0 local interval=5 info "Waiting for PVC ${pvc_name} to become Bound..." while true; do local phase phase="$(kubectl -n "${NAMESPACE}" get pvc "${pvc_name}" -o jsonpath='{.status.phase}' 2>/dev/null || true)" if [[ "${phase}" == "Bound" ]]; then info "PVC ${pvc_name} is Bound." return 0 fi if (( elapsed >= timeout )); then err "Timeout while waiting for PVC ${pvc_name} to bind." fi sleep "${interval}" elapsed=$((elapsed + interval)) done } wait_for_no_pods() { local selector="$1" local timeout="$2" local elapsed=0 local interval=5 info "Waiting for pods with selector ${selector} to stop..." while true; do local count count="$(kubectl -n "${NAMESPACE}" get pods -l "${selector}" --no-headers 2>/dev/null | awk '$3 != "Completed" && $3 != "Succeeded" {count++} END {print count+0}')" if [[ "${count}" == "0" ]]; then info "No pods left for selector ${selector}." return 0 fi if (( elapsed >= timeout )); then err "Timeout while waiting for pods with selector ${selector} to stop." fi sleep "${interval}" elapsed=$((elapsed + interval)) done } discover_source_node() { SOURCE_NODE_NAME="$(kubectl -n "${NAMESPACE}" get pod -l app=postgres -o jsonpath='{.items[0].spec.nodeName}' 2>/dev/null || true)" [[ -n "${SOURCE_NODE_NAME}" ]] || err "Could not determine the current PostgreSQL node before shutdown." info "Using source node ${SOURCE_NODE_NAME} for the migration job." } apply_migration_job() { sed "/restartPolicy: Never/a\ nodeName: ${SOURCE_NODE_NAME}" "${JOB_MANIFEST}" | kubectl apply -f - >/dev/null } wait_for_deployment_ready() { local deployment_name="$1" local timeout="$2" info "Waiting for deployment ${deployment_name} rollout to finish..." kubectl -n "${NAMESPACE}" rollout status "deployment/${deployment_name}" --timeout="${timeout}s" >/dev/null } wait_for_job_complete() { local timeout="$1" info "Waiting for migration job ${JOB_NAME} to complete..." if kubectl -n "${NAMESPACE}" wait --for=condition=complete "job/${JOB_NAME}" --timeout="${timeout}s" >/dev/null; then info "Migration job completed successfully." return 0 fi warn "Migration job did not complete in time or failed." kubectl -n "${NAMESPACE}" describe job "${JOB_NAME}" || true kubectl -n "${NAMESPACE}" logs "job/${JOB_NAME}" --tail=-1 || true err "Migration job failed." } verify_target_data() { local verification verification="$(kubectl -n "${NAMESPACE}" get job "${JOB_NAME}" -o jsonpath='{.status.succeeded}' 2>/dev/null || true)" [[ "${verification}" == "1" ]] || err "Migration job did not report success." info "Verifying target PVC content markers before starting Gitea." kubectl -n "${NAMESPACE}" run gitea-postgres-verify \ --rm --restart=Never --image=alpine:3.20 \ --overrides='{ "apiVersion":"v1", "spec":{ "containers":[{ "name":"verify", "image":"alpine:3.20", "command":["sh","-c","set -e; test -f /target/PG_VERSION; test -d /target/base; find /target -mindepth 1 | wc -l; ls -la /target | head -n 20"], "volumeMounts":[{ "name":"target", "mountPath":"/target" }] }], "volumes":[{ "name":"target", "persistentVolumeClaim":{ "claimName":"postgres-longhorn-pvc" } }] } }' \ --attach=true >/dev/null } patch_postgres_claim() { local claim_name="$1" kubectl -n "${NAMESPACE}" patch deployment "${POSTGRES_DEPLOYMENT}" \ --type=strategic \ -p "{\"spec\":{\"template\":{\"spec\":{\"volumes\":[{\"name\":\"${POSTGRES_VOLUME_NAME}\",\"persistentVolumeClaim\":{\"claimName\":\"${claim_name}\"}}]}}}}" >/dev/null } restore_on_error() { if [[ "${POSTGRES_CLAIM_SWITCHED}" == "true" ]]; then warn "Migration failed after deployment patch. Switching PostgreSQL back to ${SOURCE_PVC}." patch_postgres_claim "${SOURCE_PVC}" || true POSTGRES_CLAIM_SWITCHED="false" fi if [[ "${SCALED_DOWN_POSTGRES}" == "true" ]]; then warn "Restoring PostgreSQL replicas to ${ORIGINAL_POSTGRES_REPLICAS}." kubectl -n "${NAMESPACE}" scale deployment "${POSTGRES_DEPLOYMENT}" --replicas="${ORIGINAL_POSTGRES_REPLICAS}" >/dev/null || true fi if [[ "${SCALED_DOWN_GITEA}" == "true" ]]; then warn "Restoring Gitea replicas to ${ORIGINAL_GITEA_REPLICAS}." kubectl -n "${NAMESPACE}" scale deployment "${GITEA_DEPLOYMENT}" --replicas="${ORIGINAL_GITEA_REPLICAS}" >/dev/null || true fi } on_exit() { local exit_code=$? if [[ ${exit_code} -ne 0 ]]; then restore_on_error warn "Script failed with exit code ${exit_code}." fi } trap on_exit EXIT validate_prerequisites() { require_tool kubectl require_file "${TARGET_PVC_MANIFEST}" require_file "${JOB_MANIFEST}" kubectl get namespace "${NAMESPACE}" >/dev/null 2>&1 || err "Namespace does not exist: ${NAMESPACE}" kubectl -n "${NAMESPACE}" get deployment "${GITEA_DEPLOYMENT}" >/dev/null 2>&1 || err "Deployment not found: ${GITEA_DEPLOYMENT}" kubectl -n "${NAMESPACE}" get deployment "${POSTGRES_DEPLOYMENT}" >/dev/null 2>&1 || err "Deployment not found: ${POSTGRES_DEPLOYMENT}" kubectl -n "${NAMESPACE}" get pvc "${SOURCE_PVC}" >/dev/null 2>&1 || err "Source PVC not found: ${SOURCE_PVC}" } confirm_scale_down() { if [[ "${AUTO_CONFIRM}" == "true" ]]; then return 0 fi echo warn "Gitea and PostgreSQL in namespace ${NAMESPACE} will be stopped for the migration." read -r -p "Type MIGRATE to continue: " confirmation if [[ "${confirmation}" != "MIGRATE" ]]; then err "Confirmation failed. Aborted by user." fi } print_dry_run_plan() { info "Dry-run mode active. No cluster changes will be made." info "Planned steps:" info "1) Apply ${TARGET_PVC_MANIFEST}" info "2) Wait for PVC ${TARGET_PVC} to be Bound" info "3) Scale deployments ${GITEA_DEPLOYMENT} and ${POSTGRES_DEPLOYMENT} to 0" info "4) Run job ${JOB_NAME} from ${JOB_MANIFEST}" info "5) Verify PG_VERSION and base/ exist on ${TARGET_PVC}" info "6) Patch ${POSTGRES_DEPLOYMENT} to use ${TARGET_PVC}" info "7) Start PostgreSQL and wait until ready" info "8) Start Gitea after PostgreSQL is ready" } run_migration() { ORIGINAL_GITEA_REPLICAS="$(kubectl -n "${NAMESPACE}" get deployment "${GITEA_DEPLOYMENT}" -o jsonpath='{.spec.replicas}')" ORIGINAL_POSTGRES_REPLICAS="$(kubectl -n "${NAMESPACE}" get deployment "${POSTGRES_DEPLOYMENT}" -o jsonpath='{.spec.replicas}')" ORIGINAL_GITEA_REPLICAS="${ORIGINAL_GITEA_REPLICAS:-1}" ORIGINAL_POSTGRES_REPLICAS="${ORIGINAL_POSTGRES_REPLICAS:-1}" discover_source_node if [[ "${DRY_RUN}" == "true" ]]; then print_dry_run_plan return 0 fi confirm_scale_down info "Applying Longhorn target PVC manifest." kubectl apply -f "${TARGET_PVC_MANIFEST}" >/dev/null wait_for_pvc_bound "${TARGET_PVC}" "${PVC_BIND_TIMEOUT_SECONDS}" info "Scaling Gitea down first to stop writes." kubectl -n "${NAMESPACE}" scale deployment "${GITEA_DEPLOYMENT}" --replicas=0 >/dev/null SCALED_DOWN_GITEA="true" wait_for_no_pods "app=gitea" "${POD_STOP_TIMEOUT_SECONDS}" info "Scaling PostgreSQL down for a consistent filesystem copy." kubectl -n "${NAMESPACE}" scale deployment "${POSTGRES_DEPLOYMENT}" --replicas=0 >/dev/null SCALED_DOWN_POSTGRES="true" wait_for_no_pods "app=postgres" "${POD_STOP_TIMEOUT_SECONDS}" info "Recreating migration job ${JOB_NAME}." kubectl -n "${NAMESPACE}" delete job "${JOB_NAME}" --ignore-not-found >/dev/null apply_migration_job info "Streaming migration job logs." kubectl -n "${NAMESPACE}" logs -f "job/${JOB_NAME}" || true wait_for_job_complete "${JOB_TIMEOUT_SECONDS}" verify_target_data info "Switching PostgreSQL deployment to Longhorn PVC ${TARGET_PVC}." patch_postgres_claim "${TARGET_PVC}" POSTGRES_CLAIM_SWITCHED="true" info "Starting PostgreSQL on Longhorn." kubectl -n "${NAMESPACE}" scale deployment "${POSTGRES_DEPLOYMENT}" --replicas="${ORIGINAL_POSTGRES_REPLICAS}" >/dev/null SCALED_DOWN_POSTGRES="false" wait_for_deployment_ready "${POSTGRES_DEPLOYMENT}" "${POSTGRES_READY_TIMEOUT_SECONDS}" info "Starting Gitea after PostgreSQL verification succeeded." kubectl -n "${NAMESPACE}" scale deployment "${GITEA_DEPLOYMENT}" --replicas="${ORIGINAL_GITEA_REPLICAS}" >/dev/null SCALED_DOWN_GITEA="false" wait_for_deployment_ready "${GITEA_DEPLOYMENT}" "${POSTGRES_READY_TIMEOUT_SECONDS}" info "Migration finished successfully." info "Source PVC ${SOURCE_PVC} remains untouched as rollback source." } main() { parse_args "$@" info "Starting Gitea PostgreSQL migration from NFS to Longhorn." validate_prerequisites run_migration } main "$@"