#!/usr/bin/perl

# Copyright 2012-2024, Alexander Shibakov
# This file is part of SPLinT
#
# SPLinT is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SPLinT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.

# a simple script to replace all @G ... @`other' regions with 
# @= ... @>@; lines
# comments are allowed at the end of the lines.
# only one style of comments is accepted: /* ... */. note that these are not
# output

use strict;
use Getopt::Long;
use Pod::Usage;

my $man = 0;
my $help = 0;
my $replace_only = '';
my $binterwork = '';
my $elang_start = "\@t}\\lsectionbegin{\%s}\\vb{\@>\n";  
my $elang_finish = "\@t}\\vb{\\yyendgame}\\vb{}\\endparse\\postparse{\@>\n"; 

#Getopt::Long::Configure ("bundling"); # to allow -abc to set a, b, and c

GetOptions ("help|?" => \$help, 
             man => \$man,
            "startol=s" => \$elang_start,    # the string that starts an `other language' region
            "finishol=s"  => \$elang_finish,  # the string that ends an `other language' region
            "bison-link=s" => \$binterwork, # whether to produce .z files to allow `$?' notation in \TeX]
            "replace-only" => \$replace_only, # make reverse substitutions only
    ) or pod2usage(2);

pod2usage(-exitval => 0, -verbose => 1) if $help;
pod2usage(-exitval => 0, -verbose => 2) if $man;

my %tex_replacements = ();

if ( $replace_only ) {
    open FILE, "$ARGV[0]" or die "Cannot open input file $ARGV[0]\n";
    open FILEOUT, ">$ARGV[1]" or die "Cannot open output file $ARGV[1]\n";
    open BLINK_IW, "$binterwork" or die "Cannot open link file $binterwork\n";

    while ( <BLINK_IW> ) {
        $_ =~ m/^(BZ\d+ZB_\d+)=(.*)$/;
        my ( $key, $value ) = ( $1, $2 );
        $tex_replacements{$key} = $value;
    }

    close BLINK_IW;

    while ( <FILE> ) {
        $_ =~ s/(BZ\d+ZB_\d+)/$tex_replacements{$1}/g;
        print FILEOUT $_;
    }

    close FILE;
    close FILEOUT;

    exit 0;
}

open FILE, "$ARGV[0]" or die "Cannot open input file $ARGV[0]\n";
open FILEOUT, ">$ARGV[1]" or die "Cannot open output file $ARGV[1]\n";

if ( $binterwork ) {
    open BLINK_IW, ">$binterwork" or die "Cannot open link file $binterwork\n";
    open BLINK, ">$ARGV[2]" or die "Cannot open diff file $ARGV[2]\n";
}

sub output_all {
    my $wline = shift;
    my $bline = shift || [];
    my @bline = @{$bline};

    if ( not @bline ) {
        @bline = @{$wline};
    }
    
    printf FILEOUT @{$wline};
    
    if ( $binterwork ) {
        printf BLINK @bline;
    }
}

my $state = 0;
my $paused_state = 0;

sub replace_tex {
    my $prefix = shift;
    my $tex_string = shift;
    my $suffix = shift;
    $prefix =~ s/TeX(a|b|ao|f|fo)_/TeX$1/;
    $tex_string =~ s{\$\[?(\d+)\]?}{/yy($1)}xg;
    $tex_string =~ s/\$\[?([a-zA-Z_.][a-zA-Z0-9_.]*)\]?/\/yy\]$1\[/xg;

    return $prefix.$tex_string.$suffix;
}

sub replace_c {
    my $prefix = shift;
    my $c_string = shift;
    my $suffix = shift;
    my $marker = shift;
    my $counter = shift;
    my @matches = ();
    my $match_count = 0;

    $$counter++;

    $marker .= "_".$$counter;
    $prefix =~ s/TeX(a|b|ao|f|fo)_/TeX$1/;
    $match_count = 
        ($c_string =~ s/
         \$(
         \d+|
         [a-zA-Z0-9_.]+|
         \[[a-zA-Z0-9_.]+\]
         )
         /replace_yy($1, \@matches)/xeg);

    if ( $match_count ) {
        $tex_replacements{$marker} =
            "TeX__(\"".$c_string."\",".(join ',', @matches).");";
        return $marker; # replacement for .c file
    } else {
        return $prefix.$c_string.$suffix;
    }
}

sub replace_yy{
    my $match = shift;
    my $matches = shift;
    $match =~ s/[\[\]\(\)]//g;
    $match = "[$match]" if $match !~ m/^\d/;
    push @{$matches}, "BZ(\$$match,\$1), BZ((yyvsp[0]), \$$match), BZZ((yyvsp[0]),\$1)";
    return "/yg{\%ld}{\%ld}{\%ld}";
}

my $line_count = 0;

while (<FILE>) {

    my $inline = $_;
    my $inline_z = $inline;
    my ( $string, $string_z, $comment );
    $line_count++;
    
    if ( $binterwork && $inline =~ m/TeX(_|a_|b_|ao_|f_|fo_)\b/ ) {
        my $marker = "BZ$line_count"."ZB";
        my $counter = 0;
        $inline_z =~ s/(TeX(_|a_|b_|ao_|f_|fo_)?\b\s*\([^"]*")(([^"]|\\")*)("[^"\)]*\);)/replace_c($1, $3, $5, $marker, \$counter)/eg;
        $inline   =~ s/(TeX(_|a_|b_|ao_|f_|fo_)?\b\s*\([^"]*")(([^"]|\\")*)("[^"\)]*\);)/replace_tex($1, $3, $5)/eg;
    }

    if ( $inline =~ m/^\@G(.*)$/ ) { # @G detected, this line is part of the `other language' region

	$inline = $1; $state = 1;
        
        if ( $inline =~ m/^\(([^)]*)\).*/ ) { # language specifier present
            $inline = $1;
        } else {
            $inline = "";
        }
        
        output_all( ["\@q Start generic language section\@>\n" . $elang_start, "$inline"] ); # a parser switcher

    } elsif ( $inline =~ m/^\@[\scp\*0-9].*$/ ) { # @`other' detected, so `other language' region is over

        if ($state == 1) {
	    output_all( ["\@q%s\@>\n" . $elang_finish, "End of generic language section"] ); # a parser switcher
        }

	$state = 0;
	output_all( ["%s", "$inline"] ); 

    } elsif ( $inline =~ m/^\s*\@[=t].*$/ ) { # @= detected, just copy the line

	output_all( ["%s", "$inline"] ); 

    } elsif ( $inline =~ m/^\@g(.*)$/ ) { # explicit end of other languge region detected

        $inline = $1;

        if ($state == 1) {
	    output_all( ["\@q%s\@>\n" . $elang_finish, "End of generic language section"] ); # a parser switcher
        }

	$state = 0; 

    } elsif ( $inline =~ m/^\@O(.*)$/ ) { # @O detected, so `other language' region is paused

	$paused_state = $state; $state = 0; 

	$inline = $1 || "End generic language section";
	output_all( ["\@q%s\@>\n", "$inline"] );

    } elsif ( $inline =~ m/^\@o(.*)$/ ) { # @o detected, so `other language' region is resumed

	$state = $paused_state; $paused_state = 0;

	$inline = $1 || "End generic language section";
	output_all( ["\@q%s\@>\n", "$inline"] );

    } elsif ( $state != 0 ) {

	if ( $inline =~ m/\/\*.*\*\/\s*$/ ) { # the line contains a comment at the end
	    
	    $inline =~ m/^(.*\S|)\s*(\/\*.*\*\/)\s*$/; # this is not very robust ...
	    $string = $1; $comment = $2;
	    $inline_z =~ m/^(.*\S|)\s*(\/\*.*\*\/)\s*$/; # this is not very robust ...
	    $string_z = $1;
	    
	} else {

	    $string = $inline; $comment = "";
	    $string_z = $inline_z;
	    
	}

        $string =~ s/\n//;
        $string_z =~ s/\n//;
        output_all( ["\@=%s\@>\@t}\\vb{\\n}{\@>\@;", ( $string || " " )],
            ["\@=%s\@>\@t}\\vb{\\n}{\@>\@;", ( $string_z || " " )] );
        output_all( ["%s", "$comment"] ) if $comment;
	output_all( ["%s", "\n"] );

    } else {
	
	output_all( ["%s", "$inline"],  ["%s", "$inline_z"] );
	
    }
    
}

foreach my $key ( keys %tex_replacements ) {
    print BLINK_IW "$key=$tex_replacements{$key}\n";
}

__END__

=head1 BRACK

brack.pl - Postprocess a CWEB file to allow language extensions

=head1 SYNOPSIS

brack.pl [options] --bison-link=<subst file> <input file> <B<CWEAVE>output file> <B<CTANGLE>output file>
    or
brack.pl --replace-only --bison-link=<subst file> <B<CTANGLE>output file> <output file>

 Options:
   --help|-h|-?      brief help message
   --man|-m          full documentation
   --startol|-s      string to begin a language region
   --finishol|-f     string to end a language region
   --bison-link|-b   file to write future substitutions to
   --replace-only|-r replace the result of postprocessing using the subst file

=head1 OPTIONS

=over 8

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the manual page and exit.

=item B<--startol>=I<CWEB string>

The string to print at the beginning of an other language region

=item B<--finishol>=I<CWEB string>

The string to print at the end of an other language region

=item B<--bison-link>=I<file name>

The file to write future substitutions to

=item B<--replace-only>

Carry out the replacements using the substitution file and quit

=back

=head1 DESCRIPTION

B<brack.pl> will read the given <input_file>, and the format @G(...) sections 
appropriately to be read by CWEAVE and output the result in the <output_file>.

The processing mechanism is very primitive and makes use of some assumptions
on the appearance of the  B<CWEB> file. Unlike the 'standard' B<CWEB> input, the
new 'generic language' section markers (the @G(...) construct) are 
I<case sensitive> and I<must> appear at the beginning of the line. The 
'other language' markers (the @O... sections) follow the same restrictions as
the @G sections above, and I<do not nest>.

A comment at the very end of the line is moved to the B<C> portion of the input.
To put the comment inside the verbatim blocks, one may surround it by [@>@=] and
[@>@= ] (the square brackets are not part of the input and are here to draw attention
to the spacing, see next). Note the space at the end of the closing construct: this
is necessary to pacify B<CWEAVE>.

In order to use the `native' bison term references inside TeX code, the
      brack.pl --bison-link=<subst file> <input file> <B<CWEAVE>output file> <B<CTANGLE>output file>
form must be used first to create the substitutions file as well as two B<CWEB> files.
Then B<CTANGLE> is run on the <B<CTANGLE>output file> and finally 
      brack.pl --replace-only --bison-link=<subst file> <B<CTANGLE>output file> <output file>
to finish the creation of a file suitable for producing a parsing automaton.
=cut
