#!/usr/bin/env bash
# Copyright (C) 2013-2017, 2024 Luke T. Shumaker <lukeshu@parabola.nu>
#
# If you don't see the string "EMBEDLIB.SH" below, but see function
# definitions for panic() et al., then this is a generated file, and
# contains some code from messages.sh/common.sh.  See the source
# distribution for full copyright information.
#
# License: GNU GPLv2+
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# NB: During normal operation (i.e. not printing `usage()`) text, we
# don't use librelib.  This is important as we need to be able to run
# this program statically in-place when building libretools.

export TEXTDOMAIN='librelib'

# Begin embedlib.sh ############################################################

if type gettext &>/dev/null; then
	_() { gettext "$@"; }
else
	_() { echo -n "$@"; }
fi

. /usr/share/makepkg/util.sh

declare -ir EXIT_SUCCESS="0"
declare -ir EXIT_FAILURE="1"
declare -ir EXIT_INVALIDARGUMENT="2"
_l () 
{ 
    TEXTDOMAIN='librelib' "$@"
}
_p () 
{ 
    TEXTDOMAIN='pacman-scripts' "$@"
}
panic () 
{ 
    local mesg="$(_l _ 'General error')";
    if [[ $# -gt 0 ]]; then
        mesg="$(print "$@")";
    fi;
    _l print '%s: BUG: please report this to Parabola: %s' "${0##*/}" "$mesg" 1>&2;
    local i;
    for ((i = 0; i < ${#FUNCNAME[@]}; i++))
    do
        _l print '\t%s()\tcalled at %s:%s' "${FUNCNAME[$i]}" "${BASH_SOURCE[$((i + 1))]:-$0}" "${BASH_LINENO[$i]}" 1>&2;
    done;
    exit $EXIT_FAILURE
}
print () 
{ 
    [[ $# -ge 1 ]] || panic;
    local mesg;
    mesg="$(_ "$1")";
    shift;
    printf -- "$mesg\n" "$@"
}
gnuerror () 
{ 
    [[ $# -ge 1 ]] || panic;
    local fmt mesg;
    fmt="$(_ "$1")";
    shift;
    printf -v mesg -- "$fmt" "$@";
    printf -- '%s: %s\n' "${0##*/}" "$mesg" 1>&2
}
whitespace_collapse () 
{ 
    [[ $# == 0 ]] || panic;
    tr '\n' '\r' | sed -E -e 's/\r/  /g' -e 's/\t/ /g' -e 's/(^|[^.!? ]) +/\1 /g' -e 's/([.!?])  +/\1  /g' -e 's/\s+$//'
}
prose () 
{ 
    [[ $# -ge 1 ]] || panic;
    local mesg;
    mesg="$(_ "$(whitespace_collapse <<< "$1")")";
    shift;
    printf -- "$mesg" "$@" | fmt -u
}
bullet () 
{ 
    [[ $# -ge 1 ]] || panic;
    local mesg;
    mesg="$(_ "$(whitespace_collapse <<< "$1")")";
    shift;
    printf -- "$mesg" "$@" | fmt -u -w 71 | sed -e '1s/^/  - /' -e '2,$s/^/    /'
}
flag () 
{ 
    _flag "$@"
}
_flag () 
{ 
    local args=("$@");
    declare -i flaglen=0;
    while [[ $# -gt 0 ]]; do
        if [[ $1 == *: ]]; then
            shift 1;
        else
            if [[ ${#1} -gt $flaglen ]]; then
                flaglen=${#1};
            fi;
            shift 2;
        fi;
    done;
    set -- "${args[@]}";
    declare -i indent=12;
    while [[ $indent -lt $flaglen ]]; do
        indent+=8;
    done;
    local fmt2 fmt1;
    fmt2="  %-${indent}s  %s\n";
    printf -v fmt1 "  %-${indent}s  %%s\n" '';
    while [[ $# -gt 0 ]]; do
        if [[ $1 == *: ]]; then
            printf -- ' %s\n' "$(_ "$1")";
            shift;
        else
            [[ $# -gt 1 ]] || panic;
            local flag=$1;
            local desc;
            desc="$(_ "$(whitespace_collapse <<< "$2")")";
            shift 2;
            local lines;
            IFS='
' lines=($(fmt -u -w $((71 - indent)) <<< "$desc"));
            printf -- "$fmt2" "$flag" "${lines[0]}";
            [[ ${#lines[@]} -lt 2 ]] || printf -- "$fmt1" "${lines[@]:1}";
        fi;
    done
}
eval "$(
	fns=(
		plain
		msg
		msg2
		ask

		warning
		error
		plainerr
	)

	# declare _makepkg_${fn} as a copy of ${fn}
	declare -f "${fns[@]}" | sed 's/^[a-z]/_makepkg_&/'

	# re-declare ${fn} as a wrapper around _makepkg_${fn}
	printf '%s() { local mesg; mesg="$(_ "$1")"; local QUIET=${QUIET:-0}; _p _makepkg_"${FUNCNAME[0]}" "$mesg" "${@:2}" >&2; }\n' \
	       "${fns[@]}"
)"

# End embedlib.sh ##############################################################

################################################################################

default_simple=(
	# xgettext
	--keyword={eval_,}{gettext,'ngettext:1,2'}
	# libmakepkg/util/message.sh
	--keyword={plain,plainerr,msg,msg2,ask,warning,error}
	# devtools/lib/common.sh
	--keyword={stat_busy,die,lock:3,slock:3}
	# devtools-par/lib/common.sh
	--keyword=_
	# libretools/src/lib/messages.sh
	--keyword={panic,print,gnuerror,term_title}
)
default_prose=(--keyword={prose,bullet})

readonly default_simple default_prose

usage() {
	print 'Usage: %s [OPTIONS] FILES...' "${0##*/}"
	print 'Generates .pot files for programs using libremessages.'
	echo
	prose 'In librexgettext, there are 2 types of keywords:'
	bullet 'simple: Simple keywords are just like normal xgettext'
	bullet 'prose: Prose keywords are similar, but the text is
	        word-wrapped'
	prose 'The keyword format is the same as in GNU xgettext.'
	echo
	prose 'The libremessages `flag` command is also handled
	       specially, and is not configurable as a keyword.'
	echo
	prose 'The default simple keywords are: %s' "${default_simple[*]#--keyword=}"
	echo
	prose 'The default prose keywords are: %s' "${default_prose[*]#--keyword=}"
	echo
	print 'Options:'
	flag \
		'--simple=KEYWORD' 'Look for KEYWORD as an additional simple keyword' \
		'--prose=KEYWORD' 'Look for KEYWORD as an additional prose keyword' \
		'-k' 'Disable using the default keywords' \
		'-h, --help' 'Show this text'
}

xgettext-sh() {
	xgettext --omit-header --from-code=UTF-8 -L shell -k -o - "$@"
}

xgettext-flag() {
	local file="$1"
	{
		# Stage 1: Generate
		#
		# Get all of the arguments to `flag`.  Because `flag`
		# takes an arbitrary number of arguments, just iterate
		# through arg1, arg2, ... argN; until we've come up
		# empty 3 times.  Why 3?  Because each flag takes 2
		# arguments, and because we don't keep track of which
		# one of those we're on, waiting for 3 empties ensures
		# us that we've had a complete "round" with nothing.
		#
		# Why can't I just do i+=2, and not have to keep track
		# of empties?  Because, we also allow for arguments
		# ending in a colon to be headings, which changes the
		# offsets.
		declare -i empties=0
		declare -i i
		for ((i = 1; empties < 3; i++)); do
			local out
			out="$(xgettext-sh --keyword="flag:$i,\"$i\"" -- "$file")"
			if [[ -n $out ]]; then
				printf -- '%s\n' "$out"
				empties=0
			else
				empties+=1
			fi
		done
	} | xgettext-whitespace-collapse | sed '/^\#, sh-format/d' | {
		# Stage 2: Parse
		#
		# Read in the lines, and group them into an array of
		# (multi-line) msgs.  This just makes working with
		# them easier.
		local msgs=()
		declare -i i=-1
		local re='^#\. ([0-9]+)$'
		IFS=''
		local line
		while read -r line; do
			if [[ $line =~ $re ]]; then
				i+=1
			fi
			msgs[$i]+="$line"$'\n'
		done
		# Stage 3: Sort
		#
		# Now, we have the `msgs` array, and it is
		# sorted such that it is all of the arg1's to `flag`,
		# then all of the arg2's, then all of the arg3's, and
		# so on.  We want to re-order them such that it's all
		# of the args for the first invocation then all of the
		# args for the second; and so on.
		#
		# We do this by simply sorting them by the location
		# that they appear in the file.  Then, when we see the
		# argument number go back down, we know that a new
		# invocation has started!
		local locations=()
		readarray -t locations < <(
			local i
			for i in "${!msgs[@]}"; do
				local lines=()
				readarray -t lines < <(printf '%s' "${msgs[$i]}")

				declare -i arg row
				arg=${lines[0]#'#. '}
				row=${lines[1]##*:}

				printf '%d %d %d\n' "$row" "$arg" "$i"
			done | sort -k 1n -k 2n
		)
		# Stage 4: Output
		#
		# Now, we prune out the arguments that aren't
		# localizable.  Also, remove the "#." comment lines.
		# As explained above (in stage 3), when we see $arg go
		# down, that's the beginning of a new invocation.
		local expectflag=true
		local prev_arg=0
		local prev_row=0 # for better error messages only; no real logic
		local location
		for location in "${locations[@]}"; do
			IFS=' '
			local row arg i
			read -r row arg i <<<"$location"
			local msg="${msgs[$i]#*$'\n'}"

			# See if we need to fiddle with $expectflag
			# (and do some sanity checking).
			if [[ $arg != "$((prev_arg + 1))" ]]; then
				if ! $expectflag; then
					local pos
					if [[ $row != "$prev_row" ]]; then
						printf -v pos "%s:%d-%d" "$file" "$prev_row" "$row"
					else
						printf -v pos "%s:%d" "$file" "$prev_row"
					fi
					>&2 printf "%s: $(_ "flag error: Missing expected flag meaning at argument %d")\n" \
						"$pos" "$((prev_arg + 1))"
					exit $EXIT_FAILURE
				elif [[ $arg == "$((prev_arg + 2))" ]]; then
					# skipped flag argument
					expectflag=false
				elif [[ $arg == 1 ]]; then
					# started new invocation
					expectflag=true
				elif [[ $arg == 2 ]]; then
					# started new invocation and skipped flag argument
					expectflag=false
				else
					local pos
					if [[ $row != "$prev_row" ]]; then
						printf -v pos "%s:%d-%d" "$file" "$prev_row" "$row"
					else
						printf -v pos "%s:%d" "$file" "$prev_row"
					fi
					>&2 printf "%s: $(_ "flag error: Jumped from argument %d to %d")\n" \
						"$pos" "$prev_arg" "$arg"
					exit $EXIT_FAILURE
				fi
			fi
			prev_arg=$arg
			prev_row=$row

			# Now we operate based on $row, $arg, $msg,
			# and $expectflag.
			if $expectflag; then
				IFS=$'\n'
				local lines=(${msg})
				if [[ ${lines[1]} == *':"' ]]; then
					# We expected a flag, but got
					# a heading
					printf -- '%s\n' "$msg"
				else
					# We expected a flag, and got
					# one!
					expectflag=false
				fi
			else
				printf -- '%s\n' "$msg"
				expectflag=true
			fi
		done
		if ! $expectflag; then
			>&2 printf "%s:%d: $(_ "flag error: Missing expected flag meaning at argument %d")\n" \
				"$file" "$prev_row" \
				"$((prev_arg + 1))"
			exit $EXIT_FAILURE
		fi
	}
}

xgettext-whitespace-collapse() {
	{
		# Remove the awkward word-wrapping done by xgettext.
		tr '\n' '\r' | sed 's/"\r\s*"//g' | tr '\r' '\n'
	} | {
		# Collapse in-line whitespace.
		sed -E -e 's/(\\n|\\t|\t)/ /g' -e 's/(^|[^.!? ]) +/\1 /g' -e 's/([.!?])  +/\1  /g'
	}
}

main() {
	set -euE -o pipefail
	local simple=()
	local prose=()
	local files=()
	local use_defaults=true

	local args mode=run
	if ! args="$(getopt -n "${0##*/}" -o 'kh' -l 'simple:,prose:,help' -- "$@")"; then
		mode=errusage
	else
		eval "set -- $args"
		local flag
		while true; do
			flag=$1
			shift
			case "$flag" in
				--simple)
					simple+=(--keyword="$1")
					shift
					;;
				--prose)
					prose+=(--keyword="$1")
					shift
					;;
				-k) use_defaults=false ;;
				--help | -h) mode=usage ;;
				--) break ;;
				*) panic 'unhandled flag: %q' "$flag" ;;
			esac
		done
		files+=("$@")
		if [[ $mode == run && ${#files[@]} -lt 1 ]]; then
			gnuerror 'no input file given'
			mode=errusage
		fi
	fi
	case "$mode" in
		errusage)
			print "Try '%s --help' for more information." "${0##*/}" >&2
			return $EXIT_INVALIDARGUMENT
			;;
		usage)
			usage
			return $EXIT_SUCCESS
			;;
		run) : ;;
		*) panic 'invalid mode: %q' "$mode" ;;
	esac

	if "$use_defaults"; then
		simple+=("${default_simple[@]}")
		prose+=("${default_prose[@]}")
	fi

	# Main code
	{
		xgettext-sh "${simple[@]}" -- "${files[@]}"
		xgettext-sh "${prose[@]}" -- "${files[@]}" | xgettext-whitespace-collapse
		for file in "${files[@]}"; do
			xgettext-flag "$file"
		done
	} | sed '/^\#, sh-format/d' | msguniq -Fi --to-code=UTF-8
}

main "$@"
