#!/usr/bin/perl

# Copyright 2012-2022, Alexander Shibakov
# This file is part of SPLinT
#
# SPLinT is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SPLinT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.

use Getopt::Long;
use Pod::Usage;
use File::Basename;
use English;

( $my_name, $my_path, $my_suffix ) = fileparse( $PROGRAM_NAME );
$invocation_line = "\% ".$my_name." ".(join ' ', @ARGV)."\n";

my $man = 0;
my $help = 0;
my $fine_index = 0;
my $fine_headercs = "\\FI"; # index command sequence for the fine format
my $crude_headercs = "\\GI"; # index command sequence for the standard format
my $headercs = ""; # the default is the standard format

#Getopt::Long::Configure ("bundling"); # to allow -abc to set a, b, and c

GetOptions ("help|?" => \$help, 
            man => \$man,
            "fine" => \$fine_index,
            "cs=s" => \$headercs
    ) or pod2usage(2);

pod2usage(-exitval => 0, -verbose => 1) if $help;
pod2usage(-exitval => 0, -verbose => 2) if $man;

open FILE, "$ARGV[0]" or die "Cannot open input file $ARGV[0]\n";
open FILE_OUT, ">$ARGV[1]" or die "Cannot open input file $ARGV[1]\n";

if ( $headercs eq "" ) {
    if ( $fine_index ) {
        $headercs = $fine_headercs;
    } else {
        $headercs = $crude_headercs;
    }
}

print FILE_OUT $invocation_line;

sub lex_order (\@\@) { # lexicographic ordering

    my (@string1) = @{shift @_};
    my (@string2) = @{shift @_};

    my ($i);

    $i = 0;

    while ( $string1[$i] == $string2[$i] && $i <= $#string1 && $i <= $#string2 ) { $i++ }

    if ( $i > $#string1 || $i > $#string2 ) { 
   
	return $#string1 <=> $#string2;

    }

    return ( $string1[$i] <=> $string2[$i] );

}

sub numerically { $b <=> $a; }

sub alphabetically { # lexicographic ordering based on the ASCII order defined in @main_order

    my (@chars1) = map { $main_order{$_} } split //, $a;
    my (@chars2) = map { $main_order{$_} } split //, $b;

    return lex_order @chars1, @chars2;

}

sub lexicographically { # lexicographic ordering for numeric sequences separated by spaces

    my (@chars1) = split / /, $a;
    my (@chars2) = split / /, $b;

    return lex_order @chars1, @chars2;
}

$alphabet = " /\#\$\%^&*<>[]{}()+-=_|\\,:;~`.?!\'\"\@0123456789AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
@main_set = split //, $alphabet;
map { $main_order{$_} = index $alphabet, $_ } @main_set; # inefficient ...

$ldelim[0] = "\\("; $rdelim[0] = ")";
$ldelim[1] = "\\["; $rdelim[1] = "]";
$ldelim[2] = ""; $rdelim[2] = "";
$ldelim[3] = "\\g"; $rdelim[3] = "g";
$ldelim[4] = "\\f"; $rdelim[4] = "f";
$ldelim[5] = "\\e"; $rdelim[5] = "e";

sub alpha_jump { # have we switched to the next letter?

    my $a = substr shift @_, 0, 1;
    my $b = substr shift @_, 0, 1;

    $a =~ tr/a-z/A-Z/; $a =~ tr/a-zA-Z/0/cs;

    return (ord $a) <=> (ord $b);
}

while (<FILE>) {

    $input = $_;

    if ( $input =~ /\\i\@\@\@e\s*  \{([0-9]+)\}   # section number
                                   \{([0-9]+)\}   # page number
                                   \{((\{[^\{\}]*\})+)\} # host namespace, context, etc.
                                   \{([^\{\}]+)\} # domain
                                   \{([0-9]+)\}   # rank
                                   \{([^\{\}]*)\} # type1
                                   \{([^\{\}]+)\s*\} # type2
                                   \{((\{[^\{\}]+\})+|\\vend.*\\vend\s*)\} # key
                                   \{((\{[^\{\}]+\})*)\} # visual key
                                   (\%.*)*\n/x ) {
        # ordinary index entry
        
	($section, $pageno, $nspace, $junk0, $domain, $rank, $type1, $type2, $key, $junk1, $vkey) = 
        ($1,       $2,      $3,      $4,     $5,      $6,    $7,     $8,     $9,   $10,    $11  );
        
        $term = $key;
        $key =~ s/\{([0-9]+)\}/pack "c1", $1/eg;

        if ( $term =~ /\\vend/ ) {

            # only process the key if it is not raw

        } else {

            $term = $key;

        }

        $vkey =~ s/\{([0-9]+)\}/pack "c1", $1/eg;

        if ( $vkey ne "" ) {

            if (exists $index{$domain}{$vkey}{type} && $index{$domain}{$vkey}{type} ne $type2) {
                #warn "Differing output types for term <$term> with key <$vkey>.\n", "$index{$domain}{$vkey}{type} vs. $type2\n";
                $key = "$vkey $key";
                
                if (exists $index{$domain}{$key}{type} && $index{$domain}{$key}{type} ne $type2) {
                    warn "Differing output types for term <$term> with key <$vkey>.\n", "$index{$domain}{$vkey}{type} vs. $type2\n";
                }
            } else {
                $key = $vkey;
            }
        }
        
        #        print "KEY: ", $key, "  ", $vkey, "\n";
        
        push @{$index{$domain}{$key}{refs}}, "$section $rank $pageno";
        $index{$domain}{$key}{nspace} = $nspace;        
        $index{$domain}{$key}{type} = $type2;
        $index{$domain}{$key}{term} = $term;

    }
}

$i = 0;
$last_alpha = "0"; # the last index section

foreach $domain (sort keys %index ) {

    if ( $i > 0) {

        $last_alpha = "0";
	print FILE_OUT "\\indexseparator{$domain}{$i}\n";

    }

    $i++;

    foreach $key ( sort alphabetically keys %{$index{$domain}} ) {

        if (exists $index{$domain}{$key}{refs}) {

            %ref_hash = ();
            map { @r = split / /, $_; exists $ref_hash{$r[0]}{$r[1]}{$r[2]} ? 
                      $ref_hash{$r[0]}{$r[1]}{$r[2]}++ : ($ref_hash{$r[0]}{$r[1]}{$r[2]} = 0) } @{$index{$domain}{$key}{refs}};

            my @fine_ref_list = ();
            my @crude_ref_list = ();

            foreach $key ( reverse sort numerically keys %ref_hash ) {

                foreach $rkey ( reverse sort numerically keys %{$ref_hash{$key}} ) {

                    $ref_string = "$ldelim[$rkey]$key$rdelim[$rkey]";
                    push @crude_ref_list, $ref_string;
                    $ref_string = $ref_string."\{".(join ', ', (reverse sort numerically keys %{$ref_hash{$key}{$rkey}}))."\}";
                    push @fine_ref_list, $ref_string;

                }

            }

            $ref_string = join ', ', @fine_ref_list;
            $cref_string = join ', ', @crude_ref_list;
                    
            $term = $index{$domain}{$key}{term};
            $term_printable = $term;

            if ( $term =~ /\\vend/ ) {
              
                $term =~ s/\\vend(.*)\\vend/$1/eg; # unwrap the entry

            } else {

                $term =~ s/(.)/"\{".(unpack "c1", $1)."\}"/eg;

            }
            
            if ( alpha_jump( $key, $last_alpha ) > 0 ) {
                $last_alpha = substr $key, 0, 1;
                $last_alpha =~ tr/a-z/A-Z/;
                print FILE_OUT "\\indexsection{$last_alpha}\n",  
            }

            print FILE_OUT $headercs."{$index{$domain}{$key}{nspace}}{$index{$domain}{$key}{type}}{$term}, ". 
                ($fine_index ? $ref_string : $cref_string).".\% $term_printable, ($key)\n",
                "\% sec nos. ".$ref_string."\n";

        }	
    }
}

__END__

=head1 BINDX

bindx.pl - Postprocess an index (.gdx) in <input_file> to produce a set of index entries in
           the <output_file> (.gdy)

=head1 SYNOPSIS

bindx.pl [options] input_file output_file


 Options:
   --help|-h|-?      brief help message
   --man|-m          full documentation
   --fine|-f         add page references to each index entry
   --cs=<string>     specify the index control sequence name

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the manual page and exit.

=item B<--fine>

Create index entries in the form  B<r>I<nnn>B<l>{n1, n2, ...} where B<r> and B<l>
are the left and ring delimeters, I<nnn> is the section number and the list of page 
numbers appears inside the braced group.  

=item B<--cs>

The name of the index control sequence. The default is B<\GI> for the standard
index format and B<\FI> for the 'fine' format.

=back

=head1 DESCRIPTION

B<bindx.pl> will read the given <input_file>, and output an index 
in the <output_file>.

=cut