#!/bin/bash

## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

set -o errexit
set -o nounset
set -o errtrace
set -o pipefail

# shellcheck disable=SC1091
{
  source /usr/libexec/helper-scripts/log_run_die.sh
  source /usr/libexec/helper-scripts/has.sh
  source /usr/libexec/helper-scripts/not_as_root.sh
  source /usr/libexec/helper-scripts/strings.bsh
}

collect_reqs() {
  local collect cmd
  collect=""
  for cmd in "${@}"; do
    if ! has "$cmd"; then
      collect="${collect:+$collect }$cmd"
    fi
  done
  if test ${#collect} -gt 0; then
    die 1 "${FUNCNAME[0]}: command(s) unavailable: '$collect'" >&2
  fi
}
collect_reqs retry curl jq safe-rm str_replace stprint realpath sponge

check_vars_exist() {
  local var_name
  for var_name in "$@"; do
    check_variable_name "${var_name}" || exit 1
    if [ -z "${!var_name+x}" ]; then
      die 1 "Variable '$var_name' is not set or is empty."
      exit 1
    fi
  done
}

missing_variable() {
  log error "MISSING VARIABLE: $*"
  exit 1
}

error_output() {
  stecho "error_msg: '${1:-none}'
WIKI_INDEX: '${WIKI_INDEX-unset}'
TITLE: '${TITLE-unset}'
TMPFOLDER: '${TMPFOLDER-unset}'
counter_chunk: '${counter_chunk-unset}'
counter_currently: '${counter_currently-unset}'
###" >&2
}

error_handler() {
  local last_exit_code="$?"
  log error "
BASH_COMMAND: '$BASH_COMMAND'
failed with exit code '$last_exit_code'." >&2
  error_output "called-by-error_handler"
  exit 1
}

trap error_handler ERR

exit_handler() {
  trap - INT HUP ABRT QUIT EXIT
  local exit_code="${1:-}"
  if [ "$exit_code" = "0" ]; then
    log info "END: with OK exit code: '$exit_code'"
  else
    log error "END: with ERROR exit code: '$exit_code'"
  fi
  exit "$exit_code"
}

set_curl_binary_default() {
  if has scurl; then
    curl=scurl
  else
    curl=curl
  fi
}

set_backup_page_item() {
  local backup_page_item

  backup_page_item="$(stecho "$1")"
  if [ -z "$backup_page_item" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Encode page name into a filesystem-safe filename component using
  ## standard percent-encoding via Python's urllib.parse.quote.
  ## - Spaces become underscores (MediaWiki convention, same as git-mediawiki).
  ## - '/' becomes '%2F' (preserves subpage structure for round-trip).
  ## - '%', '&', '#', '?' etc. are percent-encoded.
  ## - '_', '.', '-', '~', ':' are left as-is (safe in filenames).
  ## Round-trips with decode_backup_page_item / mw-urlencode --decode-filename-to-page.
  backup_page_item="$(mw-urlencode --encode-page-to-filename "$backup_page_item")"

  check_is_not_empty_and_only_one_line backup_page_item
  if ! validate_safe_filename backup_page_item; then
    log error "${FUNCNAME[0]}: validate_safe_filename backup_page_item failed. Checking $backup_page_item for unicode using unicode-show for debugging purposes."
    printf '%s\n' "$backup_page_item" | unicode-show
    return 1
  fi

  stecho "$backup_page_item"
}

set_backup_filename_item() {
  local backup_filename_item

  backup_filename_item="$1"

  if [ -z "$backup_filename_item" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Same format as git-mediawiki: add '.mw' file extension.
  ## '/' is already encoded as '%2F' by set_backup_page_item /
  ## mw-urlencode --encode-page-to-filename, so no extra replacement needed.
  backup_filename_item="${backup_filename_item}.mw"

  check_is_not_empty_and_only_one_line backup_filename_item
  if ! validate_safe_filename backup_filename_item; then
    log error "${FUNCNAME[0]}: validate_safe_filename backup_filename_item failed."
    printf '%s\n' "$backup_filename_item" | unicode-show
    return 1
  fi

  stecho "$backup_filename_item"
}

## WARNING: decoded output restores '/' characters. The result must
## only be used as a MediaWiki API page title parameter, NEVER to
## construct local file paths (directory traversal risk).
decode_backup_page_item() {
  local decoded

  decoded="$(stecho "$1")"
  if [ -z "$decoded" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Standard percent-decoding via Python's urllib.parse.unquote.
  ## Reverses set_backup_page_item / mw-urlencode --encode-page-to-filename.
  mw-urlencode --decode-filename-to-page "$decoded"
}

decode_backup_filename_item() {
  local filename decoded

  filename="$(stecho "$1")"
  if [ -z "$filename" ]; then
    die 1 "${FUNCNAME[0]}: 1 is empty!"
  fi

  ## Remove .mw extension.
  decoded="${filename%.mw}"

  decode_backup_page_item "$decoded"
}

## Defense-in-depth against directory traversal.
## Verify that a target file path is contained within the expected
## base directory. Catches path traversal via '..', symlinks, or any
## encoding bypass.
##
## Uses the conventional canonicalize-then-check-prefix approach:
## realpath resolves '..', symlinks, and redundant slashes into an
## absolute path, then a prefix match ensures containment.
## '-m' (--canonicalize-missing) is required because the target file
## typically does not exist yet at the time of the check.
##
## Usage: assert_path_within_dir BASE_DIR TARGET_PATH
assert_path_within_dir() {
  local base_dir target_path resolved_base resolved_target

  base_dir="$1"
  target_path="$2"

  if [ -z "$base_dir" ]; then
    die 1 "${FUNCNAME[0]}: base_dir is empty!"
  fi

  if [ -z "$target_path" ]; then
    die 1 "${FUNCNAME[0]}: target_path is empty!"
  fi

  resolved_base="$(realpath -- "$base_dir")"
  resolved_target="$(realpath -m -- "$target_path")"

  case "$resolved_target" in
    "$resolved_base"/*)
      return 0
      ;;
    *)
      die 1 "${FUNCNAME[0]}: path traversal blocked! target: '$resolved_target' is outside base: '$resolved_base'"
      ;;
  esac
}

curl_get_output_parameter() {
  local loop_counter loop_max
  loop_counter=0
  loop_max=50

  output_file=""

  while true; do
    (( loop_counter++ )) || true
    if (( loop_counter >= loop_max )); then
      die 1 "${FUNCNAME[0]}: loop_max reached!"
    fi

    case "${1-}" in
      "")
        break
        ;;
      --)
        shift || true
        break
        ;;
      --output)
        if [ -z "${2-}" ]; then
          die 1 "${FUNCNAME[0]}: --output given but missing value"
        fi
        output_file="$2"
        shift 2 || true
        ;;
      --output=*)
        output_file="${1#--output=}"
        shift || true
        ;;
      -o)
        if [ -z "${2-}" ]; then
          die 1 "${FUNCNAME[0]}: -o given but missing value"
        fi
        output_file="$2"
        shift 2 || true
        ;;
      -o*)
        ## supports: -oFILE
        output_file="${1#-o}"
        shift || true
        ;;
      *)
        shift || true
        ;;
    esac
  done
}

curl_run() {
  local curl_run_no_encode args url curl_exit_code encoded_url output_file
  local created_temp_output output_is_stdout

  curl_get_output_parameter "$@"

  created_temp_output="false"
  output_is_stdout="false"

  ## If caller didn't provide an output file, capture to a temp file
  if [ -z "${output_file-}" ]; then
    created_temp_output="true"
    output_file="$(mktemp -t curl_run.XXXXXXXX)" || die 1 "${FUNCNAME[0]}: mktemp failed"
  else
    ## If caller explicitly targets stdout, don't try to rm/touch/stcat it
    case "$output_file" in
      "-"|/dev/stdout|/dev/fd/1|/proc/self/fd/1)
        output_is_stdout="true"
        ;;
    esac
  fi

  if [ "$output_is_stdout" != "true" ]; then
    safe-rm -f -- "$output_file"
    touch -- "$output_file"
  fi

  args=( "$@" )
  url="${args[$((${#args[@]} - 1))]}"

  [[ -v curl_run_no_encode ]] || curl_run_no_encode=""

  if [ "$curl_run_no_encode" = "true" ]; then
    encoded_url="$url"
  else
    encoded_url="$(mw-urlencode "$url")"
  fi

  curl_exit_code=0

  ## Rebuild args: exclude last param (url), then append encoded url.
  ## If we created a temp output, inject --output <temp> so we can capture it.
  if [ "$created_temp_output" = "true" ]; then
    args=("${@:1:$#-1}" "--output" "$output_file" "$encoded_url")
  else
    args=("${@:1:$#-1}" "$encoded_url")
  fi

  log_run debug "$curl" "${args[@]}" || curl_exit_code="$?"

  if [ "$curl_exit_code" != "0" ]; then
    log error "curl failed with exit code '$curl_exit_code': url: '$encoded_url' output_file: '${output_file-}'"
    if [ "$output_is_stdout" != "true" ] && [ -n "${output_file-}" ]; then
      stcat "$output_file" >&2 || true
    fi
    die 1 'curl failure, cannot continue.'
  fi

  # If caller didn't specify --output, behave like curl and print to stdout
  if [ "$created_temp_output" = "true" ]; then
    cat -- "$output_file"
  fi
}

trap 'exit_handler $?' INT HUP ABRT QUIT EXIT

[[ -v TMPFOLDER ]] || TMPFOLDER="$HOME/mediawiki-shell-temp"
[[ -v USERDOMAIN ]] || USERDOMAIN=""
[[ -v cookie_jar ]] || cookie_jar="$TMPFOLDER/wiki-cookiejar"
[[ -v curl ]] || set_curl_binary_default
[[ -v curl_opts ]] || curl_opts=(
  "--fail"
  "--no-progress-meter"
  "--show-error"
  "--retry-connrefused"
  "--retry" "10"
  "--retry-delay" "5"
  "--cookie" "nocache=true"
  "--user-agent" "mediawiki-shell"
)

umask 077
mkdir --parents -- "$TMPFOLDER"
chmod og-rw -- "$TMPFOLDER"
