#!/bin/bash

# a script to manage snapshots for Nextcloud and MariaDB

# a function to get the directory of this script
get_script_dir()
{
    local SOURCE_PATH="${BASH_SOURCE[0]}"
    local SYMLINK_DIR
    local SCRIPT_DIR
    # Resolve symlinks recursively
    while [ -L "$SOURCE_PATH" ]; do
        # Get symlink directory
        SYMLINK_DIR="$( cd -P "$( dirname "$SOURCE_PATH" )" >/dev/null 2>&1 && pwd )"
        # Resolve symlink target (relative or absolute)
        SOURCE_PATH="$(readlink "$SOURCE_PATH")"
        # Check if candidate path is relative or absolute
        if [[ $SOURCE_PATH != /* ]]; then
            # Candidate path is relative, resolve to full path
            SOURCE_PATH=$SYMLINK_DIR/$SOURCE_PATH
        fi
    done
    # Get final script directory path from fully resolved source path
    SCRIPT_DIR="$(cd -P "$( dirname "$SOURCE_PATH" )" >/dev/null 2>&1 && pwd)"
    echo "$SCRIPT_DIR"
}

# the email recipient for error reporting
export NOTIFICATION_EMAIL_RECIPIENTS='someone@example.com'
# the location of standalone-tn-send-email
export SENDEMAIL="/mnt/tank/bin/sendemail.py"
# the dataset to be snapshoted
export DATASET="tank/nextcloud"

# the directory of this script
export MY_DIR
MY_DIR="$(get_script_dir)"

# the directory for synchronization
export SYNC_DIRECTORY_IN_CONTAINER='/snapshot_sync'
# the mariadb client configure file
export BACKUP_STAGE_CLIENT_CONF="${SYNC_DIRECTORY_IN_CONTAINER}/backup_stage.cnf"
# the backup stage sql
export BACKUP_STAGE_SQL="${SYNC_DIRECTORY_IN_CONTAINER}/backup_stage.sql"

# the directory where the log of the backup_stage output should be
export BACKUP_STAGE_LOG_DIR='/mnt/tank/logs/nc/mariadb'
# the directory in container where the log of the backup_stage output should be
export BACKUP_STAGE_LOG_DIR_IN_CONTAINER='/mariadb/log'
# the file name of the log to be used with the timestamp substituted
export BACKUP_STAGE_COMMAND_FILENAME_BASE="backup_stage_"
# the file name pattern of the log to be find while purging old backup stage logs
export BACKUP_STAGE_COMMAND_FILENAME_PATTERN="backup_stage_*.log"
# the full command to be called with the path of the log to be substituted
export CONTAINER_MARIADB_NAME='nextcloud-mariadb'

# a directory for synchronization between $MARIADB_LOCK_PROCESS and this script
export SYNC_DIRECTORY="${MY_DIR}/mariadb${SYNC_DIRECTORY_IN_CONTAINER}"
# a tmp file thar signals database lock.
# the existent of the file means database is in backup stage.
export MARIADB_BACKUP_STAGE_SIGNAL_FILE="${SYNC_DIRECTORY}/nc_mariadb_bakcup_stage_started.txt"
# a tmp file that signals of snapshot taken.
# the existence of the file means snapshot has been taken.
export SNAPSHOT_TAKEN_SIGNAL_FILE="${SYNC_DIRECTORY}/nc_snapshot_taken.txt"
# the timeout value for backup stage sql
export BACKUP_STAGE_TIMEOUT_IN_SECONDS=60
# the hour of the day which a daily snapshot should be taken
export DAILY_HOUR=00
# the number of days to keep hourly snapshots
export HOURLY_SNAPSHOT_EXIPRE_IN_DAYS='3'
# the number of dates to keep daily snapshots
export DAILY_SNAPSHOT_EXIPRE_IN_DAYS='14'
# the directory for logs on host
export LOG_DIR='/mnt/tank/logs/nc/snapshots'
# the number of days the logs should be kept
export LOGS_DATES_TO_KEEP='14'
# the prefix for log files
export LOG_FILE_PREFIX='nc_sanpshot_'
export LOG_EXT='log'
# the pattern to be used for find used to remove expired logs
export LOG_PATTERN="${LOG_FILE_PREFIX}*.${LOG_EXT}"


convert_text_to_html()
{
	# replace "<" with "&lt;"
	local output_lt output_gt output_nl
	output_lt="$(echo -e "$1" | sed '{:q;N;s_<_&lt;_g;t q}')"
	# replace ">" with "&gt;"
 	output_gt="$(echo -e "$output_lt" | sed '{:q;N;s_>_&gt;_g;t q}')"
	# replace "\n" with "<br/>"
	output_nl="$(echo -e "${output_gt//$'\n'/<br />$'\n'}")"
	echo "$output_nl"
}

# send notification.
# current implementation is email
# arguments:
#	$1 recipient email addresses
#	$2 subject
#	$3 content
notify()
{
	local content_path recipients subject content host t
	content_path="/tmp/nc_mariadb_backup_email_content.txt"
	recipients="$1"
	subject="$2"
	content="$3"
	host="$(hostname)"
	# mail is removed from TrueNAS 24.10.0.2
	# sending the mail_body_html via file content may avoid some passing issue.
	echo -e "$content" > "$content_path"
	echo "----"
	t="$(date +'%H:%M:%S')"
	echo "${t} Email sending..."
	# send email
	python3 "$SENDEMAIL" \
		--subject "[${host}] $subject" \
		--to_address "$recipients" \
		--mail_body_html "$content_path"
	echo "${t} sendemail.py returned."
	echo "----"
}


take_snapshots() {
	local snapshot_name tm hour rc
	tm="$(expr "$ts" : '\([0-9][0-9][0-9][0-9]-\(0[1-9]\|1[0-2]\)-\(0[1-9]\|[12][0-9]\|3[01]\)_\([01][0-9]\|2[0-3]\)-[0-5][0-9]\)')"
	# take hourly snapshots
	snapshot_name="auto-${tm}"
	hourly="${DATASET}@${snapshot_name}"
	
	t="$(date +'%H:%M:%S')"
	echo "${t} snapshot '${hourly}' taking..."
	zfs snapshot -r "${hourly}"
	rc=$?
	t="$(date +'%H:%M:%S')"
	if [ 0 -eq "$rc" ]
	then
		echo "${t} snapshot '${hourly}' taken."
	else
		echo "${t} snapshot '${hourly}' failed."
	fi
	
	hour="$(expr "$ts" : '.*\_\(\([01][0-9]\|2[0-3]\)\)')"
	if [ "$hour" -eq "$DAILY_HOUR" ]
	then
		# take a daily snapshot as well
		daily="${hourly}-daily"
		t="$(date +'%H:%M:%S')"
		echo "${t} snapshot '${daily}' taking..."
		zfs snapshot -r "${daily}"
		drc=$?
		if [ 0 -eq "$rc" ]
		then
			rc="$drc"
		fi
		t="$(date +'%H:%M:%S')"
		echo "${t} snapshot '${daily}' taken."
	fi
	
	return "$rc"
}

purge_old_snapshots() {

	local t snapshots snapshot hourly_max_duration daily_max_duration now
	hourly_max_duration="$((HOURLY_SNAPSHOT_EXIPRE_IN_DAYS * 24 * 3600))"

	snapshots="$(zfs list -H -o name -t snapshot -r tank/nextcloud)"

	t="$(date +'%H:%M:%S')"
	echo "${t} old hourly snapshots purging..."

	now="$(date +'%s')"

	hourlies="$(echo -E "$snapshots" | grep -E '[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}$')"
	
	while IFS= read -r snapshot
	do
		raw="$(echo "$snapshot" | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}$')"
		timestamp="$(echo "$raw" | sed 's/_\([0-2][0-9]\)-\([0-5][0-9]\)$/ \1:\2/g')"
		ts="$(date --date "$timestamp" +'%s')"
		age="$((now - ts))"
		if [ "$age" -gt "$hourly_max_duration" ]
		then
			t="$(date +'%H:%M:%S')"
			echo "${t} '$snapshot' purging..."
			zfs destroy "$snapshot"
			echo "${t} '$snapshot' purged."
		fi
	done <<< "$hourlies"

	t="$(date +'%H:%M:%S')"
	echo "${t} old hourly snapshots purged."

	daily_max_duration="$((DAILY_SNAPSHOT_EXIPRE_IN_DAYS * 24 * 3600))"

	echo "${t} old daily snapshots purging..."

	dailies="$(echo -E "$snapshots" | grep -E '[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-daily$')"
	
	while IFS= read -r snapshot
	do
		raw="$(echo "$snapshot" | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-daily$')"
		timestamp="$(echo "$raw" | sed 's/_\([0-2][0-9]\)-\([0-5][0-9]\)-daily$/ \1:\2/g')"
		ts="$(date --date "$timestamp" +'%s')"
		age="$((now - ts))"
		if [ "$age" -gt "$daily_max_duration" ]
		then
			t="$(date +'%H:%M:%S')"
			echo "${t} '$snapshot' purging..."
			zfs destroy "$snapshot"
			echo "${t} '$snapshot' purged."
		fi
	done <<< "$dailies"

	t="$(date +'%H:%M:%S')"
	echo "${t} old daily snapshots purged."

	return 0
}


purge_old_logs() {
	# purge old logs
	local t
	t="$(date +'%H:%M:%S')"
	echo "${t} old snapshot logs purging..."
	find "$LOG_DIR" -type f -name "$LOG_PATTERN" -mtime "+$LOGS_DATES_TO_KEEP" -exec rm -vf {} \;
	t="$(date +'%H:%M:%S')"
	echo "${t} old snapshot logs purged."
	
	echo "${t} old backup stage logs purging..."
	find "$BACKUP_STAGE_LOG_DIR" -type f -name "$BACKUP_STAGE_COMMAND_FILENAME_PATTERN" -mtime "+$LOGS_DATES_TO_KEEP" -exec rm -vf {} \;
	t="$(date +'%H:%M:%S')"
	echo "${t} old backup stage logs purged."
	
}


export ts
ts="$(date +'%Y-%m-%d_%H-%M-%S%z')"
date="$(expr "$ts" : '\([0-9][0-9][0-9][0-9]-\(0[1-9]\|1[0-2]\)-\(0[1-9]\|[12][0-9]\|3[01]\)\)')"
log="${LOG_FILE_PREFIX}${date}.${LOG_EXT}"
mkdir -p "$LOG_DIR"

{	# scope for logging with tee
	echo "===="
	t=$(expr "$ts" : '.*\(\([01][0-9]\|2[0-9]\)-[0-5][0-9]-[0-5][0-9]+[0-9][0-9][0-9][0-9]\)')
	echo "${t} '$0' started."
	
	# Is there another copy running?
# 	ps_count=$(pgrep --count "$me")
	ps_count="$(ps aux | grep "$0" | wc -l)"
	if [ 6 -lt "$ps_count" ]
	then
		# Another copy is running
		echo "Another '$0' is running. Now exit."
		subject='Nextcloud ZFS snapshot NOT taken'
		content="Another copy of '$0' is running. No snapshot is taken.\nPlease check '${LOG_DIR}/${log}'"
		content_encoded=$(convert_text_to_html "$content")
		notify "$NOTIFICATION_EMAIL_RECIPIENTS" "$subject" "$content_encoded"
		exit 130
	fi

	echo "${t} synchronization files removing..."
	rm -fv "$MARIADB_BACKUP_STAGE_SIGNAL_FILE" "$SNAPSHOT_TAKEN_SIGNAL_FILE"
	t="$(date +'%H:%M:%S')"
	echo "${t} synchronization files removed."
	
	echo "${t} maintenance mode turning on..."
	docker exec nextcloud occ maintenance:mode --on

	t="$(date +'%H:%M:%S')"
	echo "${t} database backup stage starting..."

	# the subscript runs independently and has its own log file
	backup_stage_log="${BACKUP_STAGE_LOG_DIR}/${BACKUP_STAGE_COMMAND_FILENAME_BASE}${ts}${LOG_EXT}"
	# the finalized backup stage command
	backup_stage_command="docker exec '${CONTAINER_MARIADB_NAME}' bash -c 'mariadb --defaults-file=\"${BACKUP_STAGE_CLIENT_CONF}\" < \"${BACKUP_STAGE_SQL}\" 2>&1 | tee -a \"${BACKUP_STAGE_LOG_DIR_IN_CONTAINER}/${BACKUP_STAGE_COMMAND_FILENAME_BASE}${ts}${LOG_EXT}\"'"
	
	backup_stage_started=false
	start="$(date +'%s')"
	end="$((start + BACKUP_STAGE_TIMEOUT_IN_SECONDS))"

	# time stamp into the subscript log
	echo "${t} mariadb backup stage starting." | tee "$backup_stage_log"
	bash -c "$backup_stage_command" &
	while true
	do
		if [ -e "$MARIADB_BACKUP_STAGE_SIGNAL_FILE" ]
		then
			# backup_stage_started
			backup_stage_started=true
			break
		fi
		t="$(date +'%s')"
		if [ "$end" -lt "$t" ]
		then
			# timed out
			break;
		fi
		sleep 1;
	done
	
	subject="Nextcloud ZFS snapshots NOT taken"
	content="Backup stage failed to start in time.\nNo snapshot is taken."
	rc=128
	t="$(date +'%H:%M:%S')"
	if [ "$backup_stage_started" = true ]
	then
		# lock acquisition succeeded.
		echo "${t} backup stage started succeeded."

		subject="Nextcloud ZFS snapshot taken"
		# take snapshots
		content="$({ take_snapshots; } 2>&1 )"
		rc=$?
		echo -E "$content"
		if [ 0 -ne "$rc" ]
		then
			subject="Nextcloud ZFS snapshots failed"
		fi
	else
		# lock acquisition failed.
		echo "${t} backup stage failed to start in time. No snapshot is taken."
	fi
	
	# let the subscript continue even if MariaDB backup stage failed to start in time
	touch "$SNAPSHOT_TAKEN_SIGNAL_FILE"
	
	# wait until subscript backup stage ended
	backup_stage_ended=false
	start="$(date +'%s')"
	end="$((start + BACKUP_STAGE_TIMEOUT_IN_SECONDS))"
	while true
	do
		if [ ! -e "$MARIADB_BACKUP_STAGE_SIGNAL_FILE" ]
		then
			backup_stage_ended=true
			break
		fi
		t="$(date +'%s')"
		if [ "$end" -lt "$t" ]
		then
			# timed out
			break;
		fi
		sleep 1
	done

	# time stamp into the subscript log
	t="$(date +'%H:%M:%S')"
	if [ "$backup_stage_ended" = true ]
	then
		echo "${t} mariadb backup stage ended." | tee -a "$backup_stage_log"
		# remove the synchronization file after the subscript unlocked
		rm "$SNAPSHOT_TAKEN_SIGNAL_FILE"
	else
		echo "${t} mariadb backup stage end was not called."
		if [ 0 -ne "$rc" ]
		then
			rc=129
			subject="Nextcloud ZFS snapshots taken but MariaDB backup stage did not end"
			content="${content}\nMariaDB backup stage did not end."
		fi
	fi

	t="$(date +'%H:%M:%S')"
	echo "${t} maintenance mode turning off..."
	docker exec nextcloud occ maintenance:mode --off

	if [ 0 -eq "$rc" ]
	then
		purge_old_snapshots
	else
		t="$(date +'%H:%M:%S')"
		echo "${t} skip old snapshot purge."
	fi
	purge_old_logs
	
	if [ 0 -ne "$rc" ]
	then
		content="${content}\n\nMariaDB backup stage file='${backup_stage_log}'"
		content_encoded=$(convert_text_to_html "$content")
		notify "$NOTIFICATION_EMAIL_RECIPIENTS" "$subject" "$content_encoded"
	fi
	
	t="$(date +'%H:%M:%S')"
	echo "${t} '$0' ended."
	exit "$rc"

} 2>&1 | tee -a "${LOG_DIR}/${log}"
