#!/usr/bin/perl
#
# indentada.pl - Reindenter for Ada files
#
# Requires:     UniLexer.pm
#
# Usage:        $ perl indentada.pl InputFile
#                  indents the InputFile with 3 spaces as one indentation
#               $ perl indentada.pl -4 InputFile
#                  indents the InputFile with 4 spaces as one indentation
#               Output goes to stdout.
#
# Copyright (C) 2008, Oliver Kellogg <okellogg@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# $Log: indentada.pl,v $
# Revision 1.2  2020/10/18 21:25:35  okellogg
# - In @reswd add 'protected'.
# - Declare sub handle_trailing_comment before first call.
#
# Revision 1.1  2012/07/11 05:36:16  kellogg
# Handle "is separate"
#
# Revision 1.0  2008/08/16 19:20:52  kellogg
# various fixes mostly related to comment handling
#
# Revision 0.9  2008/07/06 19:12:56  kellogg
# Align with UniLexer.pm version 0.8.
# Improve formatting of subprogram parameters.
#
# Revision 0.8  2008/06/17 20:34:27  kellogg
# New sub skip_comment fixes handling of comments during searches.
#
# Revision 0.7  2008/06/17 06:21:28  kellogg
# Implement handling of declarative regions (parse_block, @stack)
#
# Revision 0.6  2008/06/15 21:28:01  kellogg
# Fixed indentation of "elsif". Fixed criterion for adding space after closing parenthesis.
# Improved the hack bridging the missing block stack.
#
#
# Version History
#--------+------------------------------------------------------------------
#   0.5   New command line switch "-c" applies Ada95_Style to identifiers.
#         Fixed handling of task declarations.
#         TODO: Make a block stack - the program currently cannot handle any
#         kind of nested blocks.
#
#   0.4   Fixed the formatting in case statements and exception handlers.
#         New subroutines (prettytoken, parenth_term, prettyprint) prepare
#         for work on expression wrapping. But it's not there yet.
#
#   0.3   Fixed excess indentation after "and then" / "or else".
#         First very primitive implementation of RHS expression wrapping.
#
#   0.2   Added command line switch for user specified indentation amount.
#         Added extra newline after 'end' of unit.
#         Do not print whitespace around '.' and '(' ')' ',' ';'
#

use UniLexer;

$one_indentation = 3;  # one indentation measured in spaces
$right_border   = 80;  # column after which to wrap lines
$change_casing = 0;    # boolean for Ada95_Identifier_Style

@lex = ();
my $lndx = 0;

my $indentlevel = 0;
my $prindentlevel = 0;  # "pretty indent level", used for long RHS expressions
                        # that span multiple lines
my $column = 0;         # output column of next character that will be printed
my @reswd = qw(
               abort abs accept access all and array at
               begin body
               case constant
               declare delay delta digits do
               else elsif end entry exception exit
               for function
               generic goto
               if in interface is
               limited loop
               mod
               new not null
               of or others out
               package pragma private procedure protected
               raise range record rem renames return reverse
               select separate subtype
               tagged task terminate then type
               until use
               when while with
               xor);

sub emit {
    my $txt = shift;
    print $txt;
    if ($txt =~ /\n$/) {
        $column = 0;
    } else {
        $column += length($txt);
    }
}

sub amount {
   return $one_indentation * ($indentlevel + $prindentlevel);
}

sub spaces {
    return ' ' x amount;
}

sub dent {
    emit(spaces . shift);
}

sub indent {
    dent shift;
    $indentlevel++;
}

sub decr_indentlevel {
    my $where = shift;
    if ($indentlevel == 0) {
        warn "$where (tokenindex $lndx): internal error - negative indentlevel\n";
    } else {
        $indentlevel--;
    }
}

sub dedent {
    my $text = shift;
    decr_indentlevel("dedent $text");
    dent $text;
}

sub ada95_identifier_style {
    my $tok = shift;
    if ($tok =~ /^ ?--/) {
        return $tok;
    }
    my $identifier = ucfirst(lc($tok));
    $identifier =~ s/_([a-z])/_\u$1/g;
    return $identifier;
}

sub is_reserved_word {
    my $lx = shift;
    my $exclude_mode = 0;
    if (@_) {
        $exclude_mode = shift;
    }
    $lx = lc($lx);
    if ($exclude_mode &&
        ($lx eq 'in' || $lx eq 'out' || $lx eq 'access')) {
        return 0;
    }
    my $is_reswd = 0;
    foreach (@reswd) {
        if ($lx eq $_) {
            $is_reswd = 1;
            last;
        }
    }
    return $is_reswd;
}

sub prettytoken {
    my $lexindex = shift;
    my $lx = $lex[$lexindex];
    my $nx = $lex[$lexindex + 1];

    if ($lx =~ /^ ?--/) {
        $lx .= "\n" . spaces;
        return $lx;
    }
    if (is_reserved_word($lx)) {
        $lx = lc($lx);
    } elsif ($change_casing) {
        $lx = ada95_identifier_style($lx);
    }
    if ($lx eq ')' && $nx ne ')' && $nx ne ',' && $nx ne ';' && $nx ne '.') {
        $lx .= ' ';
    } elsif ($lx ne "." && $lx ne "'" && $lx ne "(" && $lx ne ")" &&
             $nx ne "." && $nx ne "'" && $nx ne ")" && $nx ne "," &&
             $nx ne ";") {
        $lx .= ' ';
    }
    return $lx;
}

sub skip_comment {
    my $lexindex = shift;
    while ($lexindex < scalar(@lex)) {
        last if ($lex[$lexindex] !~ /^ ?--/);
        $lexindex++;
    }
    return $lexindex;
}

sub find_closing {
    my $i = shift;
    my $stop_on_comment = 0;
    if (@_) {
        $stop_on_comment = shift;
    }
    my $stop_on_semi = 0;
    if (@_) {
        $stop_on_semi = shift;
    }
    my $parenth_level = 0;
    while ($i < scalar(@lex)) {
        my $lx = $lex[++$i];
        if ($lx =~ /^ ?--/) {
            last if $stop_on_comment;
            $lx = skip_comment($i);
        }
        last if (!defined($lx) || ($stop_on_semi && $lx eq ';'));
        if ($lx eq ')') {
            last if ($parenth_level == 0);
            $parenth_level--;
        } elsif ($lx eq '(') {
            $parenth_level++;
        }
    }
    return $i;
}

sub parenth_term {
    my $lexindex = shift;
    my $text = $lex[$lexindex];
    if ($text ne '(') {
        return "";
    }
    my $endpos = find_closing($lexindex, 0, 0);
    for (my $i = $lexindex; $i <= $endpos; $i++) {
        $text .= prettytoken($i);
    }
    return $text;
}

my $at_new_line = 1;

sub handle_trailing_comment;

sub prettyprint {
    # Leaves $lndx pointing to the last token printed.
    my $param_decl = 0;
    if (@_) {
        $param_decl = shift;
    }
    while ($lex[$lndx] =~ /^ ?--/) {
        dent($lex[$lndx] . "\n");
        $lndx++;
    }
    if ($column >= $right_border) {
        warn("prettyprint: inserting newline at $lndx ($lex[$lndx]) "
             . "because column exceeds $right_border\n");
        emit "\n";
        emit spaces;
    }
    $at_new_line = 0;
    my $pterm = parenth_term($lndx);
    my $plen  = length($pterm);
    unless ($plen) {
       my $text = &prettytoken($lndx);
       emit $text;
       $lndx = handle_trailing_comment($lndx);
       return;
    }
    my $endindex = find_closing($lndx);
    if ($plen > $right_border - $column) {
        emit "\n";
        $prindentlevel++;
        my $col = &amount - 1;
        emit(' ' x $col);
        while (1) {
            my $ptok = prettytoken($lndx);
            $col += length($ptok);
            if ($col >= $right_border) {
                emit "\n";
                emit spaces;
                $col = amount;
            }
            emit $ptok;
            last if ($lndx >= $endindex);
            my $nx = $lex[$lndx + 1];
            if ($nx eq ';') {
                if ($param_decl) {
                    emit ";";
                    my $newindex = handle_trailing_comment(++$lndx);
                    if ($newindex == $lndx) {
                        emit "\n";
                    } else {
                        $lndx = $newindex;
                    }
                    emit spaces;
                    $col = amount;
                } else {
                    $prindentlevel = 0;
                    return;
                }
            } elsif (!$param_decl && is_reserved_word($nx)) {
                $prindentlevel = 0;
                return;
            }
            $lndx++;
        }
        $prindentlevel--;
    } else {
        $prindentlevel++;
        if ($param_decl && $lex[$lndx] eq '(') {
            my $col = &amount - 1;
            emit(' ' x $col);
        }
        while (1) {
            emit(prettytoken($lndx));
            last if ($lndx >= $endindex);
            $lndx++;
        }
        $lndx = handle_trailing_comment($lndx);
        while ($lex[$lndx + 1] =~ /^ ?--/) {
            dent($lex[++$lndx] . "\n");
        }
        $prindentlevel--;
    }
}

# Values pushed onto @stack in sub parse_block:
sub K_BEGIN   () { 0 }  # only set on 'begin' without preceding 'declare'
sub K_WHEN    () { 1 }

sub handle_trailing_comment {
    my $lexindex = shift;
    if ($lex[$lexindex + 1] =~ /^ --/) {
        emit $lex[++$lexindex];
        emit "\n";
        $at_new_line = 1;
    }
    $prindentlevel++;
    while ($lex[$lexindex + 1] =~ /^ ?--/) {
        dent($lex[++$lexindex] . "\n");
        $at_new_line = 1;
    }
    $prindentlevel--;
    return $lexindex;
}

sub handle_eol {
    my $lexindex = shift;
    my $newindex = handle_trailing_comment($lexindex);
    if ($newindex == $lexindex) {
        emit "\n";
        $at_new_line = 1;
    } else {
        $lexindex = $newindex;
    }
    return $lexindex;
}


sub parse_block;

sub parse_block {
    my $seen_declare = 1;
    my $seen_unit = 0;
    my $seen_case = 0;
    my $seen_end = 0;
    my $seen_assignment = 0;
    my $seen_exception = 0;
    my @stack = ();
    $at_new_line = 1;

    for (; $lndx < scalar(@lex); $lndx++) {
        my $lx = $lex[$lndx];
        if ($lx eq ';') {
            emit ';';
            $lndx = handle_eol($lndx);
            if ($seen_end) {
                emit "\n";
                $seen_end = 0;
            }
            $seen_assignment = 0;
            next;
        }
        if ($lx =~ /^--/) {
            dent "$lx\n";
            $at_new_line = 1;
            next;
        }
        $lx = lc($lx);
        if ($lx eq 'end') {
            my $nx = lc($lex[$lndx + 1]);
            if (@stack && $stack[$#stack] == K_WHEN and $nx !~ /^(if|loop|select)$/) {
                decr_indentlevel("end case");
                pop @stack;
            }
            dedent "$lx ";
            unless ($nx =~ /^(record|if|case|loop|select)$/) {
                if (@stack && $stack[$#stack] == K_BEGIN) {
                    pop @stack;
                } else {
                    return 0;
                }
            }
            $at_new_line = 0;
            if ($seen_exception) {
                # @todo This is far too primitive and does not work in the
                #       presence of blocks nested in the exception handler.
                $seen_exception = 0;
            }
        } elsif ($lx eq 'record' || $lx eq 'loop' || $lx eq 'select') {
            if ($at_new_line) {
                emit spaces;
                $at_new_line = 0;
            }
            emit "$lx";
            my $newindex = handle_trailing_comment($lndx);
            if (lc($lex[$lndx - 1]) eq 'end') {
                if ($lx eq 'record') {
                    $seen_end = 1;
                }
            } else {
                emit "\n";
                $indentlevel++;
                $at_new_line = 1;
            }
            $lndx = $newindex;
        } elsif (($lx eq 'then' && lc($lex[$lndx - 1]) ne 'and') || $lx eq 'do') {
            if ($at_new_line) {
                emit spaces;
            }
            emit "$lx";
            $lndx = handle_eol($lndx);
            $indentlevel++;
        } elsif ($lx eq 'declare') {
            indent "$lx";
            $lndx = handle_eol($lndx);
            # $at_new_line = 1;
            # $seen_declare = 1;
            $lndx++;
            $at_new_line = parse_block;
            $seen_end = 1;
        } elsif ($lx eq 'case') {
            if (lc($lex[$lndx - 1]) eq 'end') {
                emit "$lx";
                $seen_case = 0;
            } else {
                emit spaces;
                $seen_case = 1;
                $at_new_line = 0;
                emit "$lx ";
            }
        } elsif ($lx eq 'exception' && $lex[$lndx + 1] ne ';') {
            dedent "$lx";
            $lndx = handle_eol($lndx);
            $indentlevel++;
            $seen_exception = 1;
        } elsif ($lx eq 'when' && ($seen_case || $seen_exception)) {
            if (@stack && $stack[$#stack] == K_WHEN) {
                decr_indentlevel("repeated \"when\"");
            } else {
                push @stack, K_WHEN;
            }
            emit spaces;
            $indentlevel++;
            while ($lndx < scalar(@lex)) {
                emit prettytoken($lndx);
                $lndx = handle_trailing_comment($lndx);
                last if ($lex[$lndx] eq '=>');
                $lndx++;
            }
            emit "\n";
            $at_new_line = 1;
        } elsif ($lx eq 'is') {
            if ($seen_unit || $seen_case) {
                if ($at_new_line) {
                    emit spaces;
                }
                emit "is";
                if ($lex[$lndx + 1] eq 'separate') {
                    emit " separate;";
                    $lndx = handle_eol($lndx + 2);
                } else {
                    $lndx = handle_eol($lndx);
                    $indentlevel++;
                    if ($seen_unit) {
                        $lndx++;
                        $at_new_line = parse_block;
                        $seen_end = 1;
                        $seen_unit = 0;
                    } else {
                        $at_new_line = 1;
                    }
                }
            } else {
                emit "$lx ";
            }
        } elsif ($lx eq 'begin') {
            if ($seen_declare) {
                $seen_declare = 0;
                decr_indentlevel("begin after declare");
            } else {
                push @stack, K_BEGIN;
            }
            indent "$lx\n";
            $lndx = handle_eol($lndx);
        } elsif ($lx eq 'elsif') {
            dedent "$lx ";
            $at_new_line = 0;
        } elsif ($lx eq 'else' && lc($lex[$lndx - 1]) ne 'or') {
            dedent "$lx";
            $lndx = handle_eol($lndx);
            $indentlevel++;
        } else {
            if ($lx eq 'body') {
                $seen_unit = 1;
                $seen_declare = 1;
            } elsif ($lx eq 'package' || $lx eq 'procedure' || $lx eq 'function') {
                unless (lc($lex[$lndx + 3]) eq 'new') {
                    emit "\n";
                    $at_new_line = 0;
                    dent "$lx ";
                    emit(prettytoken(++$lndx));
                    if ($lex[$lndx] eq "body") {
                        emit(prettytoken(++$lndx));
                    }
                    $lndx = handle_trailing_comment($lndx);
                    if ($lex[$lndx + 1] eq '(') {
                        $lndx++;
                        prettyprint(1);
                        $lndx = handle_trailing_comment($lndx);
                    }
                    # Determine whether to set $is_unit -
                    # which shall not be set on specifications.
                    if ($lex[$lndx + 1] eq 'return') {
                        while (++$lndx < scalar(@lex)) {
                            emit(prettytoken($lndx));
                            $lndx = handle_trailing_comment($lndx);
                            my $txt = lc($lex[$lndx + 1]);
                            last if ($txt eq 'is' || $txt eq ';');
                        }
                    }
                    if ($lex[$lndx + 1] eq 'is') {
                        $seen_unit = 1;
                    }
                    next;
                }
            } elsif ($lx eq 'task') {
                emit "\n";
                $at_new_line = 1;
                # Determine whether to set $is_unit -
                # which shall not be set on specifications.
                my $ndx = $lndx + 1;
                if ($lex[$ndx] eq 'type') {
                    $ndx++;
                }
                $ndx = skip_comment(++$ndx);
                if ($lex[$ndx] eq 'is') {
                    $seen_unit = 1;
                }
            } elsif ($lx eq ':=') {
                $seen_assignment = 1;
            }
            if ($at_new_line) {
                emit spaces;
                $at_new_line = 0;
                $prindentlevel = 0;
            }
            prettyprint;
        }
    }
}

#### Main program ####

my $filename = "";

@ARGV or die "supply input file name\n";

foreach (@ARGV) {
    if (/^-(\d+)$/) {
        $one_indentation = $1;
    } elsif (/^-c$/) {
        $change_casing = 1;
    } else {
        $filename = $_;
    }
}

@lex = UniLexer::lex($filename);
@lex or die "an error happened in UniLexer::lex, cannot continue\n";

# @lindex = @UniLexer::lindex;
# for (my $i = 0; $i < scalar(@lindex); $i++) {
#     if (defined $lindex[$i]) {
#        print("lindex $i = $lindex[$i]\n");
#     } else {
#        print("lindex $i = undef\n");
#     }
# }
# print "\n";

parse_block;
while ($lndx + 1 < scalar(@lex)) {
    emit(prettytoken(++$lndx));
}
emit "\n\n";

1;