diff options
Diffstat (limited to 'runtime/Perl5')
76 files changed, 5103 insertions, 0 deletions
diff --git a/runtime/Perl5/.p4ignore b/runtime/Perl5/.p4ignore new file mode 100644 index 0000000..afa10e0 --- /dev/null +++ b/runtime/Perl5/.p4ignore @@ -0,0 +1,8 @@ +Build +Build.bat +Makefile +Makefile.old +_build +blib +pm_to_blib +tags diff --git a/runtime/Perl5/Build.PL b/runtime/Perl5/Build.PL new file mode 100644 index 0000000..8d9e4f6 --- /dev/null +++ b/runtime/Perl5/Build.PL @@ -0,0 +1,20 @@ +use Module::Build; + +my $build = Module::Build->new( + module_name => 'ANTLR::Runtime', + license => 'perl', + requires => { + 'Carp' => '1.04', + 'Moose' => '0.82', + 'Readonly' => '1.03', + 'List::MoreUtils' => '0.22', + 'Want' => '0.18', + 'Error' => '0.17014', + }, + build_requires => { + 'Test::Class' => '0.28', + 'Java::JVM::Classfile' => '0.20', + }, + test_files => 't/*.t t/examples/*.t', +); +$build->create_build_script; diff --git a/runtime/Perl5/Changes b/runtime/Perl5/Changes new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/runtime/Perl5/Changes diff --git a/runtime/Perl5/INSTALL b/runtime/Perl5/INSTALL new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/runtime/Perl5/INSTALL diff --git a/runtime/Perl5/MANIFEST b/runtime/Perl5/MANIFEST new file mode 100644 index 0000000..a7db480 --- /dev/null +++ b/runtime/Perl5/MANIFEST @@ -0,0 +1,59 @@ +Build.PL +Changes +INSTALL +MANIFEST This list of files +MANIFEST.SKIP +Makefile.PL +README +docs/design.pod +examples/README +examples/expr/Expr.g +examples/expr/expr.pl +examples/id/IDLexer.g +examples/id/id.pl +examples/mexpr/MExpr.g +examples/mexpr/mexpr.pl +examples/simplecalc/SimpleCalc.g +examples/simplecalc/simplecalc.pl +examples/zero-one/T.g +examples/zero-one/t-error.pl +examples/zero-one/t.pl +lib/ANTLR/Runtime.pm +lib/ANTLR/Runtime/ANTLRFileStream.pm +lib/ANTLR/Runtime/ANTLRStringStream.pm +lib/ANTLR/Runtime/BaseRecognizer.pm +lib/ANTLR/Runtime/BitSet.pm +lib/ANTLR/Runtime/CharStream.pm +lib/ANTLR/Runtime/CharStreamState.pm +lib/ANTLR/Runtime/CommonToken.pm +lib/ANTLR/Runtime/CommonTokenStream.pm +lib/ANTLR/Runtime/DFA.pm +lib/ANTLR/Runtime/EarlyExitException.pm +lib/ANTLR/Runtime/Exception.pm +lib/ANTLR/Runtime/IntStream.pm +lib/ANTLR/Runtime/Lexer.pm +lib/ANTLR/Runtime/MismatchedSetException.pm +lib/ANTLR/Runtime/MismatchedTokenException.pm +lib/ANTLR/Runtime/MissingTokenException.pm +lib/ANTLR/Runtime/NoViableAltException.pm +lib/ANTLR/Runtime/Parser.pm +lib/ANTLR/Runtime/RecognitionException.pm +lib/ANTLR/Runtime/Stream.pm +lib/ANTLR/Runtime/Token.pm +lib/ANTLR/Runtime/TokenSource.pm +lib/ANTLR/Runtime/TokenStream.pm +lib/ANTLR/Runtime/UnwantedTokenException.pm +port.yml +t/author/api.t +t/author/perlcritic.t +t/author/perlcriticrc +t/author/pod-coverage.t +t/author/pod.t +t/examples/expr.t +t/examples/simplecalc.t +t/lexer.t +t/lib/ANTLR/Runtime/Test.pm +t/version.t +tools/antlr.bat +tools/antlr.sh +tools/port.pl diff --git a/runtime/Perl5/MANIFEST.SKIP b/runtime/Perl5/MANIFEST.SKIP new file mode 100644 index 0000000..407a77e --- /dev/null +++ b/runtime/Perl5/MANIFEST.SKIP @@ -0,0 +1,10 @@ +# project files +\.kpf$ + +^Makefile$ +^pm_to_blib$ +^blib/ + +# temporary and backup files +~$ +\.bak$ diff --git a/runtime/Perl5/Makefile.PL b/runtime/Perl5/Makefile.PL new file mode 100644 index 0000000..7f7268a --- /dev/null +++ b/runtime/Perl5/Makefile.PL @@ -0,0 +1,28 @@ +use ExtUtils::MakeMaker; + +sub MY::postamble { + my $postamble = <<'MAKE_FRAG'; +.PHONY: tags + +tags: + ctags -f tags --recurse --totals \ + --exclude=blib \ + --exclude='*~' \ + --languages=Perl --langmap=Perl:+.t \ +MAKE_FRAG +} + +WriteMakefile( + NAME => 'ANTLR::Runtime', + VERSION_FROM => 'lib/ANTLR/Runtime.pm', + LICENSE => 'perl', + ABSTRACT_FROM => 'lib/ANTLR/Runtime.pm', + AUTHOR => 'Ronald Blaschke <ron@rblasch.org>', + PREREQ_PM => { + 'Carp' => '1.04', + 'Moose' => '0.82', + 'Readonly' => '1.03', + 'Test::Class' => '0.28', + }, + test => { TESTS => 't/*.t t/examples/*.t' }, +); diff --git a/runtime/Perl5/README b/runtime/Perl5/README new file mode 100644 index 0000000..9bb3fd0 --- /dev/null +++ b/runtime/Perl5/README @@ -0,0 +1,28 @@ +ANTLR::Runtime + +This is the Perl 5 runtime for ANTLR. It is currently under early +development. Most parts are not working yet, and some of those that are need +to catch up with the current development version. The good news is that some +parts are working, as illustrated by the examples. You'd probably want to +take a look at examples/expr/. + + +KNOWN ISSUES +============ + +* Perl's syntax clashes with ANTLR's StringTemplate ('%') and variable ('$') +syntax. Expect error messages like the following. + +error(146): Expr.g:10:10: invalid StringTemplate % shorthand syntax: '%memory' +error(114): Expr.g:18:9: attribute is not a token, parameter, or return value: memory +error(114): Expr.g:36:9: attribute is not a token, parameter, or return value: v + +Currently, it's best to check the syntax of the generated code, like so: + +$ perl -c ExprLexer.pm +ExprLexer.pm syntax OK + +$ perl -c ExprParser.pm +ExprParser.pm syntax OK + +Or use the lexer/parser in an example. diff --git a/runtime/Perl5/docs/design.pod b/runtime/Perl5/docs/design.pod new file mode 100644 index 0000000..587a64f --- /dev/null +++ b/runtime/Perl5/docs/design.pod @@ -0,0 +1,66 @@ +=head1 NAME + +design.pod - ANTLR::Runtime Design Document + +=head1 ABSTRACT + +This document describes the design of the C<ANTLR::Runtime> Perl 5 port. + +=head1 OVERVIEW + +C<ANTLR::Runtime> is the port of the runtime part for ANTLR to Perl 5. ANTLR's +primary target language is Java. This port tries to mimic this implementation +very closely, which hopefully makes it easier to port. The parts visible to +Perl users should be perlish if possible - at least as a secondary interface. + +=head1 Basic Porting Considerations + +Java is built on two basic concepts: Objects and Exceptions. If there's a +good match for those features most other things should fall into place. + +=head2 OO + +C<ANTLR::Runtime> uses C<Object::InsideOut> for OO modelling. + +=head3 Packages + +The Java package C<org.antlr.runtime> maps to the Perl 5 package +C<ANTLR::Runtime>. + +=head3 Classes + +Each Java class maps to a Perl 5 class with the same name. + +=head2 Exceptions + +C<Exception::Class> should meet our needs and is used by C<Object::InsideOut> +too. + +=head2 Types + +Here's a list of Java types and how they should map to Perl. + +=over + +=item byte, short, int, long, float, double, String + +Use a Perl scalar. + +=item C<java.util.List> + +Use a reference to a Perl array. + +=item C<java.util.Map> + +Use a reference to a Perl hash. + +=back + +=head1 SEE ALSO + +L<http://www.antlr.org/> + + +=head1 AUTHOR + +Ronald Blaschke (ron@rblasch.org) diff --git a/runtime/Perl5/examples/README b/runtime/Perl5/examples/README new file mode 100644 index 0000000..ebd4150 --- /dev/null +++ b/runtime/Perl5/examples/README @@ -0,0 +1,16 @@ +This directory contains examples for the Perl 5 code generation target. + +First you need to compile ANTLR, running "ant" in the top level directory. + +Then you need to build the Perl module. + + perl Makefile.PL + make + +After that you need to compile the grammar using ANTLR, for example by using +the antlr.bat I've added. + + cd examples\expr + ..\..\antlr Expr.g + +Finally you can run the example programs. diff --git a/runtime/Perl5/examples/expr/Expr.g b/runtime/Perl5/examples/expr/Expr.g new file mode 100644 index 0000000..030098b --- /dev/null +++ b/runtime/Perl5/examples/expr/Expr.g @@ -0,0 +1,50 @@ +grammar Expr; + +options { + language = Perl5; +} + +@header { +} + +@members { + my %memory; +} + +prog: stat+ ; + +stat: expr NEWLINE { print "$expr.value\n"; } + | ID '=' expr NEWLINE + { $memory{$ID.text} = $expr.value; } + | NEWLINE + ; + +expr returns [value] + : e=multExpr { $value = $e.value; } + ( '+' e=multExpr { $value += $e.value; } + | '-' e=multExpr { $value -= $e.value; } + )* + ; + +multExpr returns [value] + : e=atom { $value = $e.value; } ('*' e=atom { $value *= $e.value; })* + ; + +atom returns [value] + : INT { $value = $INT.text; } + | ID + { + my $v = $memory{$ID.text}; + if (defined $v) { + $value = $v; + } else { + print STDERR "undefined variable $ID.text\n"; + } + } + | '(' expr ')' { $value = $expr.value; } + ; + +ID : ('a'..'z'|'A'..'Z')+ ; +INT : '0'..'9'+ ; +NEWLINE:'\r'? '\n' ; +WS : (' '|'\t')+ { $self->skip(); } ; diff --git a/runtime/Perl5/examples/expr/expr.pl b/runtime/Perl5/examples/expr/expr.pl new file mode 100644 index 0000000..7c849cd --- /dev/null +++ b/runtime/Perl5/examples/expr/expr.pl @@ -0,0 +1,24 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use blib; + +use ANTLR::Runtime::ANTLRStringStream; +use ANTLR::Runtime::CommonTokenStream; +use ExprLexer; +use ExprParser; + +my $in; +{ + undef $/; + $in = <>; +} + +my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => $in }); +my $lexer = ExprLexer->new({ input => $input }); + +my $tokens = ANTLR::Runtime::CommonTokenStream->new({ token_source => $lexer }); +my $parser = ExprParser->new({ input => $tokens }); +$parser->prog(); diff --git a/runtime/Perl5/examples/id/IDLexer.g b/runtime/Perl5/examples/id/IDLexer.g new file mode 100644 index 0000000..a92efd7 --- /dev/null +++ b/runtime/Perl5/examples/id/IDLexer.g @@ -0,0 +1,7 @@ +lexer grammar IDLexer; +options { language = Perl5; } + +ID : ('a'..'z'|'A'..'Z')+ ; +INT : '0'..'9'+ ; +NEWLINE:'\r'? '\n' { $self->skip(); } ; +WS : (' '|'\t')+ { $channel = HIDDEN; } ; diff --git a/runtime/Perl5/examples/id/id.pl b/runtime/Perl5/examples/id/id.pl new file mode 100644 index 0000000..a31deb4 --- /dev/null +++ b/runtime/Perl5/examples/id/id.pl @@ -0,0 +1,25 @@ +#!/usr/bin/perl + +use blib; + +use English qw( -no_match_vars ); +use ANTLR::Runtime::ANTLRStringStream; +use IDLexer; + +use strict; +use warnings; + +my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => "Hello World!\n42\n" }); +my $lexer = IDLexer->new({ input => $input }); + +while (1) { + my $token = $lexer->next_token(); + last if $token->get_type() == IDLexer->EOF; + + print "text: ", $token->get_text(), "\n"; + print "type: ", $token->get_type(), "\n"; + print "pos: ", $token->get_line(), ':', $token->get_char_position_in_line(), "\n"; + print "channel: ", $token->get_channel(), "\n"; + print "token index: ", $token->get_token_index(), "\n"; + print "\n"; +} diff --git a/runtime/Perl5/examples/mexpr/MExpr.g b/runtime/Perl5/examples/mexpr/MExpr.g new file mode 100644 index 0000000..b2e045f --- /dev/null +++ b/runtime/Perl5/examples/mexpr/MExpr.g @@ -0,0 +1,28 @@ +grammar MExpr; + +options { + language = Perl5; +} + +prog: stat+ ; + +stat: expr NEWLINE { print "$expr.value\n"; } + | NEWLINE + ; + +expr returns [value] + : e=atom { $value = $e.value; } + ( '+' e=atom { $value += $e.value; } + | '-' e=atom { $value -= $e.value; } + )* + ; + +atom returns [value] + : INT { $value = $INT.text; } + | '(' expr ')' { $value = $expr.value; } + ; + +ID : ('a'..'z'|'A'..'Z')+ ; +INT : '0'..'9'+ ; +NEWLINE:'\r'? '\n' ; +WS : (' '|'\t')+ { $self->skip(); } ; diff --git a/runtime/Perl5/examples/mexpr/mexpr.pl b/runtime/Perl5/examples/mexpr/mexpr.pl new file mode 100644 index 0000000..6c3ef93 --- /dev/null +++ b/runtime/Perl5/examples/mexpr/mexpr.pl @@ -0,0 +1,20 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use blib; + +use ANTLR::Runtime::ANTLRStringStream; +use ANTLR::Runtime::CommonTokenStream; +use MExprLexer; +use MExprParser; + +while (<>) { + my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => $_ }); + my $lexer = MExprLexer->new($input); + + my $tokens = ANTLR::Runtime::CommonTokenStream->new({ token_source => $lexer }); + my $parser = MExprParser->new($tokens); + $parser->prog(); +} diff --git a/runtime/Perl5/examples/simplecalc/SimpleCalc.g b/runtime/Perl5/examples/simplecalc/SimpleCalc.g new file mode 100644 index 0000000..a819e41 --- /dev/null +++ b/runtime/Perl5/examples/simplecalc/SimpleCalc.g @@ -0,0 +1,29 @@ +grammar SimpleCalc; +options { language = Perl5; } + +tokens { + PLUS = '+' ; + MINUS = '-' ; + MULT = '*' ; + DIV = '/' ; +} + +/*------------------------------------------------------------------ + * PARSER RULES + *------------------------------------------------------------------*/ + +expr : term ( ( PLUS | MINUS ) term )* ; + +term : factor ( ( MULT | DIV ) factor )* ; + +factor : NUMBER ; + +/*------------------------------------------------------------------ + * LEXER RULES + *------------------------------------------------------------------*/ + +NUMBER : (DIGIT)+ ; + +WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = $self->HIDDEN; } ; + +fragment DIGIT : '0'..'9' ; diff --git a/runtime/Perl5/examples/simplecalc/simplecalc.pl b/runtime/Perl5/examples/simplecalc/simplecalc.pl new file mode 100644 index 0000000..e6176ef --- /dev/null +++ b/runtime/Perl5/examples/simplecalc/simplecalc.pl @@ -0,0 +1,27 @@ +#!perl + +use strict; +use warnings; + +use ANTLR::Runtime::ANTLRFileStream; +use ANTLR::Runtime::CommonTokenStream; +use ANTLR::Runtime::RecognitionException; +use SimpleCalcLexer; +use SimpleCalcParser; + +my $input = ANTLR::Runtime::ANTLRFileStream->new({ file_name => $ARGV[0] }); +my $lexer = SimpleCalcLexer->new({ input => $input }); +my $tokens = ANTLR::Runtime::CommonTokenStream->new({ token_source => $lexer }); +my $parser = SimpleCalcParser->new({ input => $tokens }); +eval { + $parser->expr(); + print "ok\n"; + print "errors: ", $parser->get_number_of_syntax_errors(), "\n"; + print "failed: ", $parser->failed(), "\n"; +}; +if (my $ex = ANTLR::Runtime::RecognitionException->caught()) { + print $ex->trace, "\n"; +} +elsif ($ex = $@) { + die $ex; +} diff --git a/runtime/Perl5/examples/tweak/T.g b/runtime/Perl5/examples/tweak/T.g new file mode 100644 index 0000000..0ad1b83 --- /dev/null +++ b/runtime/Perl5/examples/tweak/T.g @@ -0,0 +1,65 @@ +/** Convert the simple input to be java code; wrap in a class, + * convert method with "public void", add decls. This shows how to insert + * extra text into a stream of tokens and how to replace a token + * with some text. Calling toString() on the TokenRewriteStream + * in Main will print out the original input stream. + * + * Note that you can do the instructions in any order as the + * rewrite instructions just get queued up and executed upon toString(). + */ +grammar T; +options { language = Perl5; } + +program + : method+ + { + $input->insert_before($input->LT(1), "public class Wrapper {\n"); + // note the reference to the last token matched for method: + $input->insert_after($method.stop, "\n}\n"); + } + ; + +method + : m='method' ID '(' ')' body + { $input->replace($m, "public void"); } + ; + +body +scope { + // decls is on body's local variable stack but is visible to + // any rule that body calls such as stat. From other rules + // it is referenced as $body::decls + // From within rule body, you can use $decls shorthand + decls; +} +@init { + $body::decls = []; +} + : lcurly='{' stat* '}' + { + // dump declarations for all identifiers seen in statement list + foreach my $id ($body::decls) { + $tokens->insert_after($lcurly, "\nint $id;"); + } + } + ; + +stat: ID '=' expr ';' { $body::decls->add($ID.text); } // track left-hand-sides + ; + +expr: mul ('+' mul)* + ; + +mul : atom ('*' atom)* + ; + +atom: ID + | INT + ; + +ID : ('a'..'z'|'A'..'Z')+ ; + +INT : ('0'..'9')+ ; + +WS : (' '|'\t'|'\n')+ { $channel = $self->HIDDEN; } + ; diff --git a/runtime/Perl5/examples/tweak/input b/runtime/Perl5/examples/tweak/input new file mode 100644 index 0000000..c8ef1b1 --- /dev/null +++ b/runtime/Perl5/examples/tweak/input @@ -0,0 +1,9 @@ +method foo() { + i = 3; + k = i; + i = k*4; +} + +method bar() { + j = i*2; +} diff --git a/runtime/Perl5/examples/tweak/output b/runtime/Perl5/examples/tweak/output new file mode 100644 index 0000000..6f8e938 --- /dev/null +++ b/runtime/Perl5/examples/tweak/output @@ -0,0 +1,16 @@ +public class Wrapper { +public void foo() { +int k; +int i; + i = 3; + k = i; + i = k*4; +} + +public void bar() { +int j; + j = i*2; +} +} + + diff --git a/runtime/Perl5/examples/tweak/tweak.pl b/runtime/Perl5/examples/tweak/tweak.pl new file mode 100644 index 0000000..22fb743 --- /dev/null +++ b/runtime/Perl5/examples/tweak/tweak.pl @@ -0,0 +1,16 @@ +#!perl + +use strict; +use warnings; + +use ANTLR::Runtime::ANTLRFileStream; +use ANTLR::Runtime::TokenRewriteStream; +use TLexer; +use TParser; + +my $input = ANTLR::Runtime::ANTLRFileStream->new({ file_name => $ARGV[0] }); +my $lexer = TLexer->new({ input => $input }); +my $tokens = ANTLR::Runtime::TokenRewriteStream({ token_source => $lexer }); +my $parser = TParser->new({ input => $tokens }); +$parser->program(); +print "$tokens\n"; diff --git a/runtime/Perl5/examples/zero-one/T.g b/runtime/Perl5/examples/zero-one/T.g new file mode 100644 index 0000000..6f7a1f7 --- /dev/null +++ b/runtime/Perl5/examples/zero-one/T.g @@ -0,0 +1,4 @@ +lexer grammar T; +options { language = Perl5; } +ZERO: '0'; +ONE: '1'; diff --git a/runtime/Perl5/examples/zero-one/t-error.pl b/runtime/Perl5/examples/zero-one/t-error.pl new file mode 100644 index 0000000..818b3c3 --- /dev/null +++ b/runtime/Perl5/examples/zero-one/t-error.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl + +use blib; + +use English qw( -no_match_vars ); +use ANTLR::Runtime::ANTLRStringStream; +use TLexer; + +use strict; +use warnings; + +my $input = ANTLR::Runtime::ANTLRStringStream->new({ '01X0' }); +my $lexer = TLexer->new($input); + +while (1) { + my $token = eval { $lexer->next_token(); }; + if ($EVAL_ERROR) { + my $exception = $EVAL_ERROR; + print $exception; + next; + } + last if $token->get_type() == $TLexer::EOF; + + print "type: ", $token->get_type(), "\n"; + print "text: ", $token->get_text(), "\n"; + print "\n"; +} diff --git a/runtime/Perl5/examples/zero-one/t.pl b/runtime/Perl5/examples/zero-one/t.pl new file mode 100644 index 0000000..ed69cc2 --- /dev/null +++ b/runtime/Perl5/examples/zero-one/t.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +use blib; + +use English qw( -no_match_vars ); +use ANTLR::Runtime::ANTLRStringStream; +use TLexer; + +use strict; +use warnings; + +my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => '010' }); +my $lexer = TLexer->new($input); + +while (1) { + my $token = $lexer->next_token(); + last if $token->get_type() == $TLexer::EOF; + + print "type: ", $token->get_type(), "\n"; + print "text: ", $token->get_text(), "\n"; + print "\n"; +} diff --git a/runtime/Perl5/lib/ANTLR/Runtime.pm b/runtime/Perl5/lib/ANTLR/Runtime.pm new file mode 100644 index 0000000..57bbc05 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime.pm @@ -0,0 +1,116 @@ +package ANTLR::Runtime; + +use strict; +use warnings; + +use version; our $VERSION = qv('0.0.1'); + +1; + +__END__ + +=head1 NAME + +ANTLR::Runtime - ANTLR Runtime for Perl 5 + + +=head1 VERSION + +This documentation refers to ANTLR::Runtime version 0.0.1 + + +=head1 SYNOPSIS + + use <Module::Name>; + # Brief but working code example(s) here showing the most common usage(s) + + # This section will be as far as many users bother reading + # so make it as educational and exemplary as possible. + + +=head1 DESCRIPTION + +A full description of the module and its features. +May include numerous subsections (i.e. =head2, =head3, etc.) + + +=head1 SUBROUTINES/METHODS + +A separate section listing the public components of the module's interface. +These normally consist of either subroutines that may be exported, or methods +that may be called on objects belonging to the classes that the module provides. +Name the section accordingly. + +In an object-oriented module, this section should begin with a sentence of the +form "An object of this class represents...", to give the reader a high-level +context to help them understand the methods that are subsequently described. + + +=head1 DIAGNOSTICS + +A list of every error and warning message that the module can generate +(even the ones that will "never happen"), with a full explanation of each +problem, one or more likely causes, and any suggested remedies. +(See also "Documenting Errors" in Chapter 13.) + + +=head1 CONFIGURATION AND ENVIRONMENT + +A full explanation of any configuration system(s) used by the module, +including the names and locations of any configuration files, and the +meaning of any environment variables or properties that can be set. These +descriptions must also include details of any configuration language used. +(See also "Configuration Files" in Chapter 19.) + + +=head1 DEPENDENCIES + +A list of all the other modules that this module relies upon, including any +restrictions on versions, and an indication whether these required modules are +part of the standard Perl distribution, part of the module's distribution, +or must be installed separately. + + +=head1 INCOMPATIBILITIES + +A list of any modules that this module cannot be used in conjunction with. +This may be due to name conflicts in the interface, or competition for +system or program resources, or due to internal limitations of Perl +(for example, many modules that use source code filters are mutually +incompatible). + + +=head1 BUGS AND LIMITATIONS + +A list of known problems with the module, together with some indication +whether they are likely to be fixed in an upcoming release. + +Also a list of restrictions on the features the module does provide: +data types that cannot be handled, performance issues and the circumstances +in which they may arise, practical limitations on the size of data sets, +special cases that are not (yet) handled, etc. + +The initial template usually just has: + +There are no known bugs in this module. +Please report problems to <Maintainer name(s)> (<contact address>) +Patches are welcome. + +=head1 AUTHOR + +Ronald Blaschke (ron@rblasch.org) + + +=head1 LICENCE AND COPYRIGHT + +Copyright (c) 2008 Ronald Blaschke (ron@rblasch.org). All rights reserved. + +Based on ANTLR Java Runtime by Terence Parr. + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl (see L<perlartistic>) +or ANTLR (see L<http://www.antlr.org/license.html>) itself. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. diff --git a/runtime/Perl5/lib/ANTLR/Runtime/ANTLRFileStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRFileStream.pm new file mode 100644 index 0000000..b6fe3de --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRFileStream.pm @@ -0,0 +1,84 @@ +package ANTLR::Runtime::ANTLRFileStream; + +use Carp; +use Readonly; + +use Moose; + +extends 'ANTLR::Runtime::ANTLRStringStream'; + +has 'file_name' => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +sub BUILDARGS { + my ($class, @args) = @_; + my $args = $class->SUPER::BUILDARGS(@args); + + my $file_name = $args->{file_name}; + if (!defined $file_name) { + return; + } + + my $fh; + my $encoding = $args->{encoding}; + if (defined $encoding) { + open $fh, "<:encoding($encoding)", $file_name + or croak "Can't open $file_name: $!"; + } + else { + open $fh, '<', $file_name + or croak "Can't open $file_name: $!"; + } + + my $content; + { + local $/; + $content = <$fh>; + } + close $fh or carp "Can't close $fh: $!"; + + $args->{input} = $content; + + return $args; +} + +sub load { + my ($self, $file_name, $encoding) = @_; + + if (!defined $file_name) { + return; + } + + my $fh; + if (defined $encoding) { + open $fh, "<:encoding($encoding)", $file_name + or croak "Can't open $file_name: $!"; + } + else { + open $fh, '<', $file_name + or croak "Can't open $file_name: $!"; + } + + my $content; + { + local $/; + $content = <$fh>; + } + close $fh or carp "Can't close $fh: $!"; + + $self->input($content); + return; +} + +sub get_source_name { + my ($self) = @_; + return $self->file_name; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/ANTLRStringStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRStringStream.pm new file mode 100644 index 0000000..2d55edc --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRStringStream.pm @@ -0,0 +1,224 @@ +package ANTLR::Runtime::ANTLRStringStream; + +use Carp; +use Readonly; + +use ANTLR::Runtime::CharStreamState; + +use Moose; + +with 'ANTLR::Runtime::IntStream', 'ANTLR::Runtime::CharStream'; + +has 'input' => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +has 'p' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'line' => ( + is => 'rw', + isa => 'Int', + default => 1, +); + +has 'char_position_in_line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'mark_depth' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'markers' => ( + is => 'rw', + isa => 'ArrayRef[Maybe[ANTLR::Runtime::CharStreamState]]', + default => sub { [ undef ] }, +); + +has 'last_marker' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'name' => ( + is => 'rw', + isa => 'Str', + default => q{}, +); + +sub get_line { + my ($self) = @_; + return $self->line; +} + +sub set_line { + my ($self, $value) = @_; + $self->line($value); + return; +} + +sub get_char_position_in_line { + my ($self) = @_; + return $self->char_position_in_line; +} + +sub set_char_position_in_line { + my ($self, $value) = @_; + $self->char_position_in_line($value); + return; +} + +sub reset { + my ($self) = @_; + + $self->p(0); + $self->line(1); + $self->char_position_in_line(0); + $self->mark_depth(0); + return; +} + +sub consume { + my ($self) = @_; + + if ($self->p < length $self->input) { + $self->char_position_in_line($self->char_position_in_line + 1); + if (substr($self->input, $self->p, 1) eq "\n") { + $self->line($self->line + 1); + $self->char_position_in_line(0); + } + $self->p($self->p + 1); + } + return; +} + +sub LA { + my ($self, $i) = @_; + + if ($i == 0) { + return undef; + } + + if ($i < 0) { + ++$i; # e.g., translate LA(-1) to use offset i=0; then input[p+0-1] + if ($self->p + $i - 1 < 0) { + return $self->EOF; + } + } + + if ($self->p + $i - 1 >= length $self->input) { + return $self->EOF; + } + + return substr $self->input, $self->p + $i - 1, 1; +} + +sub LT { + my ($self, $i) = @_; + + return $self->LA($i); +} + +sub index { + my ($self) = @_; + + return $self->p; +} + +sub size { + my ($self) = @_; + + return length $self->input; +} + +sub mark { + my ($self) = @_; + + $self->mark_depth($self->mark_depth + 1); + my $state; + if ($self->mark_depth >= @{$self->markers}) { + $state = ANTLR::Runtime::CharStreamState->new(); + push @{$self->markers}, $state; + } else { + $state = $self->markers->[$self->mark_depth]; + } + + $state->set_p($self->p); + $state->set_line($self->line); + $state->set_char_position_in_line($self->char_position_in_line); + $self->last_marker($self->mark_depth); + + return $self->mark_depth; +} + +sub rewind { + my $self = shift; + my $m; + if (@_ == 0) { + $m = $self->last_marker; + } else { + $m = shift; + } + + my $state = $self->markers->[$m]; + # restore stream state + $self->seek($state->get_p); + $self->line($state->get_line); + $self->char_position_in_line($state->get_char_position_in_line); + $self->release($m); + return; +} + +sub release { + my ($self, $marker) = @_; + + # unwind any other markers made after m and release m + $self->mark_depth($marker); + # release this marker + $self->mark_depth($self->mark_depth - 1); + return; +} + +# consume() ahead unit p == index; can't just set p = index as we must update +# line and char_position_in_line +sub seek { + my ($self, $index) = @_; + + if ($index <= $self->p) { + # just jump; don't update stream state (line, ...) + $self->p($index); + return; + } + + # seek forward, consume until p hits index + while ($self->p < $index) { + $self->consume(); + } + return; +} + +sub substring { + my ($self, $start, $stop) = @_; + + return substr $self->input, $start, $stop - $start + 1; +} + +sub get_source_name { + my ($self) = @_; + return $self->name; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/BaseRecognizer.pm b/runtime/Perl5/lib/ANTLR/Runtime/BaseRecognizer.pm new file mode 100644 index 0000000..4f46ea7 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/BaseRecognizer.pm @@ -0,0 +1,617 @@ +package ANTLR::Runtime::BaseRecognizer; + +use Readonly; +use Carp; + +use ANTLR::Runtime::RecognizerSharedState; +use ANTLR::Runtime::Token; +use ANTLR::Runtime::UnwantedTokenException; +use ANTLR::Runtime::MissingTokenException; +use ANTLR::Runtime::MismatchedTokenException; + +use Moose; + +Readonly my $MEMO_RULE_FAILED => -2; +sub MEMO_RULE_FAILED { $MEMO_RULE_FAILED } + +Readonly my $MEMO_RULE_UNKNOWN => -1; +sub MEMO_RULE_UNKNOWN { $MEMO_RULE_UNKNOWN } + +Readonly my $INITIAL_FOLLOW_STACK_SIZE => 100; +sub INITIAL_FOLLOW_STACK_SIZE { $INITIAL_FOLLOW_STACK_SIZE } + +# copies from Token object for convenience in actions +Readonly my $DEFAULT_TOKEN_CHANNEL => ANTLR::Runtime::Token->DEFAULT_CHANNEL; +sub DEFAULT_TOKEN_CHANNEL { $DEFAULT_TOKEN_CHANNEL } + +Readonly my $HIDDEN => ANTLR::Runtime::Token->HIDDEN_CHANNEL; +sub HIDDEN { $HIDDEN } + +Readonly my $NEXT_TOKEN_RULE_NAME => 'next_token'; +sub NEXT_TOKEN_RULE_NAME { $NEXT_TOKEN_RULE_NAME } + +# State of a lexer, parser, or tree parser are collected into a state +# object so the state can be shared. This sharing is needed to +# have one grammar import others and share same error variables +# and other state variables. It's a kind of explicit multiple +# inheritance via delegation of methods and shared state. +has 'state' => ( + is => 'rw', + isa => 'ANTLR::Runtime::RecognizerSharedState', + default => sub { ANTLR::Runtime::RecognizerSharedState->new() }, +); + +sub reset { + my ($self) = @_; + + if (!defined $self->state) { + return; + } + + my $state = $self->state; + $state->_fsp(-1); + $state->error_recovery(0); + $state->last_error_index(-1); + $state->failed(0); + $state->syntax_errors(0); + + # wack everything related to backtracking and memoization + $state->backtracking(0); + # wipe cache + $state->rule_memo([]); +} + +sub match { + Readonly my $usage => 'void match(IntStream input, int ttype, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $ttype, $follow) = @_; + + my $matched_symbol = $self->get_current_input_symbol($input); + if ($input->LA(1) eq $ttype) { + $input->consume(); + $self->state->error_recovery(0); + $self->state->failed(0); + return $matched_symbol; + } + + if ($self->state->backtracking > 0) { + $self->state->failed(1); + return $matched_symbol; + } + + return $self->recover_from_mismatched_token($input, $ttype, $follow); +} + +sub match_any { + Readonly my $usage => 'void match_any(IntStream input)'; + croak $usage if @_ != 2; + my ($self, $input) = @_; + + $self->state->error_recovery(0); + $self->state->failed(0); + $input->consume(); +} + +sub mismatch_is_unwanted_token { + my ($self, $input, $ttype) = @_; + return $input->LA(2) == $ttype; +} + +sub mismatch_is_missing_token { + my ($self, $input, $follow) = @_; + + if (!defined $follow) { + return 0; + } + + if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); + $follow = $follow->or($viable_tokens_following_this_rule); + if ($self->state->_fsp >= 0) { + $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); + } + } + + if ($follow->member($input->LA(1)) || $follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + return 1; + } + return 0; +} + +sub mismatch { + Readonly my $usage => 'void mismatch(IntStream input, int ttype, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $ttype, $follow) = @_; + + if ($self->mismatch_is_unwanted_token($input, $ttype)) { + ANTLR::Runtime::UnwantedTokenException->new({ + expecting => $ttype, + input => $input + })->throw(); + } + elsif ($self->mismatch_is_missing_token($input, $follow)) { + ANTLR::Runtime::MissingTokenException->new({ + expecting => $ttype, + input => $input + })->throw(); + } + else { + ANTLR::Runtime::MismatchedTokenException->new({ + expecting => $ttype, + input => $input + })->throw(); + } +} + +sub report_error { + Readonly my $usage => 'void report_error(RecognitionException e)'; + croak $usage if @_ != 2; + my ($self, $e) = @_; + + if ($self->state->error_recovery) { + return; + } + $self->state->syntax_errors($self->state->syntax_errors + 1); + $self->state->error_recovery(1); + + $self->display_recognition_error($self->get_token_names(), $e); + return; +} + +sub display_recognition_error { + Readonly my $usage => 'void display_recognition_error(String[] token_names, RecognitionException e)'; + croak $usage if @_ != 3; + my ($self, $token_names, $e) = @_; + + my $hdr = $self->get_error_header($e); + my $msg = $self->get_error_message($e, $token_names); + $self->emit_error_message("$hdr $msg"); +} + +sub get_error_message { + Readonly my $usage => 'String get_error_message(RecognitionException e, String[] token_names)'; + croak $usage if @_ != 3; + my ($self, $e, $token_names) = @_; + + if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { + my $token_name; + if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { + $token_name = 'EOF'; + } else { + $token_name = $token_names->[$e->get_expecting]; + } + + return 'mismatched input ' . $self->get_token_error_display($e->get_token) + . ' expecting ' . $token_name; + } elsif ($e->isa('ANTLR::Runtime::MismatchedTreeNodeException')) { + my $token_name; + if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { + $token_name = 'EOF'; + } else { + $token_name = $token_names->[$e->get_expecting]; + } + + return 'mismatched tree node: ' . $e->node + . ' expecting ' . $token_name; + } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { + return 'no viable alternative at input ' . $self->get_token_error_display($e->get_token); + } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { + return 'required (...)+ loop did not match anything at input ' + . get_token_error_display($e->get_token); + } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { + return 'mismatched input ' . $self->get_token_error_display($e->get_token) + . ' expecting set ' . $e->get_expecting; + } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { + return 'mismatched input ' . $self->get_token_error_display($e->get_token) + . ' expecting set ' . $e->get_expecting; + } elsif ($e->isa('ANTLR::Runtime::FailedPredicateException')) { + return 'rule ' . $e->rule_name . ' failed predicate: {' + . $e->predicate_text . '}?'; + } else { + return undef; + } +} + +sub get_number_of_syntax_errors { + my ($self) = @_; + return $self->state->syntax_errors; +} + +sub get_error_header { + Readonly my $usage => 'String get_error_header(RecognitionException e)'; + croak $usage if @_ != 2; + my ($self, $e) = @_; + + my $line = $e->get_line(); + my $col = $e->get_char_position_in_line(); + + return "line $line:$col"; +} + +sub get_token_error_display { + Readonly my $usage => 'String get_token_error_display(Token t)'; + croak $usage if @_ != 2; + my ($self, $t) = @_; + + my $s = $t->get_text(); + if (!defined $s) { + if ($t->get_type() == ANTLR::Runtime::Token->EOF) { + $s = '<EOF>'; + } else { + my $ttype = $t->get_type(); + $s = "<$ttype>"; + } + } + + $s =~ s/\n/\\\\n/g; + $s =~ s/\r/\\\\r/g; + $s =~ s/\t/\\\\t/g; + + return "'$s'"; +} + +sub emit_error_message { + Readonly my $usage => 'void emit_error_message(String msg)'; + croak $usage if @_ != 2; + my ($self, $msg) = @_; + + print STDERR $msg, "\n"; +} + +sub recover { + Readonly my $usage => 'void recover(IntStream input, RecognitionException re)'; + croak $usage if @_ != 3; + my ($self, $input, $re) = @_; + + if ($self->state->last_error_index == $input->index()) { + # uh oh, another error at same token index; must be a case + # where LT(1) is in the recovery token set so nothing is + # consumed; consume a single token so at least to prevent + # an infinite loop; this is a failsafe. + $input->consume(); + } + + $self->state->last_error_index($input->index()); + my $follow_set = $self->compute_error_recovery_set(); + $self->begin_resync(); + $self->consume_until($input, $follow_set); + $self->end_resync(); +} + +sub begin_resync { +} + +sub end_resync { +} + +sub compute_error_recovery_set { + Readonly my $usage => 'void compute_error_recovery_set()'; + croak $usage if @_ != 1; + my ($self) = @_; + + $self->combine_follows(0); +} + +sub compute_context_sensitive_rule_FOLLOW { + Readonly my $usage => 'void compute_context_sensitive_rule_FOLLOW()'; + croak $usage if @_ != 1; + my ($self) = @_; + + $self->combine_follows(1); +} + +sub combine_follows { + Readonly my $usage => 'BitSet combine_follows(boolean exact)'; + croak $usage if @_ != 2; + my ($self, $exact) = @_; + + my $top = $self->state->_fsp; + my $follow_set = ANTLR::Runtime::BitSet->new(); + + foreach my $local_follow_set (reverse @{$self->state->following}) { + $follow_set |= $local_follow_set; + if ($exact && $local_follow_set->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + last; + } + } + #$follow_set->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); + return $follow_set; +} + +sub recover_from_mismatched_token { + Readonly my $usage => 'void recover_from_mismatched_token(IntStream input, int ttype, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $ttype, $follow) = @_; + + if ($self->mismatch_is_unwanted_token($input, $ttype)) { + my $ex = ANTLR::Runtime::UnwantedTokenException->new({ + expecting => $ttype, + input => $input + }); + + $self->begin_resync(); + $input->consume(); + $self->end_resync(); + $self->report_error($ex); + + my $matched_symbol = $self->get_current_input_symbol($input); + $input->consume(); + return $matched_symbol; + } + + if ($self->mismatch_is_missing_token($input, $follow)) { + my $inserted = $self->get_missing_symbol({ + input => $input, + expected_token_type => $ttype, + follow => $follow, + }); + my $ex = ANTLR::Runtime::MissingTokenException({ + expecting => $ttype, + input => $input, + inserted => $inserted, + }); + $self->report_error($ex); + return $inserted; + } + + ANTLR::Runtime::MismatchedTokenException->new({ + expecting => $ttype, + input => $input, + })->throw(); +} + +sub recover_from_mismatched_set { + Readonly my $usage => 'void recover_from_mismatched_set(IntStream input, RecognitionException e, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $e, $follow) = @_; + + if ($self->mismatch_is_missing_token($input, $follow)) { + $self->report_error($e); + return $self->get_missing_symbol({ + input => $input, + exception => $e, + expected_token_type => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE, + follow => $follow, + }); + } + + $e->throw(); +} + +sub recover_from_mismatched_element { + Readonly my $usage => 'boolean recover_from_mismatched_element(IntStream input, RecognitionException e, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $e, $follow) = @_; + + return 0 if (!defined $follow); + + if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); + $follow |= $viable_tokens_following_this_rule; + $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); + } + + if ($follow->member($input->LA(1))) { + $self->report_error($e); + return 1; + } + + return 0; +} + +sub get_current_input_symbol { + my ($self, $input) = @_; + return undef; +} + +sub get_missing_symbol { + my ($self, $arg_ref) = @_; + my $input = $arg_ref->{input}; + my $exception = $arg_ref->{exception}; + my $expected_token_type = $arg_ref->{expected_token_type}; + my $follow = $arg_ref->{follow}; + + return undef; +} + +sub consume_until { + Readonly my $usage => 'void consume_until(IntStream input, (int token_type | BitSet set))'; + croak $usage if @_ != 3; + + if ($_[2]->isa('ANTLR::Runtime::BitSet')) { + my ($self, $input, $set) = @_; + + my $ttype = $input->LA(1); + while ($ttype != ANTLR::Runtime::Token->EOF && !$set->member($ttype)) { + $input->consume(); + $ttype = $input->LA(1); + } + } else { + my ($self, $input, $token_type) = @_; + + my $ttype = $input->LA(1); + while ($ttype != ANTLR::Runtime::Token->EOF && $ttype != $token_type) { + $input->consume(); + $ttype = $input->LA(1); + } + } +} + +sub push_follow { + Readonly my $usage => 'void push_follow(BitSet fset)'; + croak $usage if @_ != 2; + my ($self, $fset) = @_; + + push @{$self->state->following}, $fset; + $self->state->_fsp($self->state->_fsp + 1); +} + +sub get_rule_invocation_stack { + Readonly my $usage => 'List get_rule_invocation_stack()'; + croak $usage if @_ != 1; + my ($self) = @_; + + my $rules = []; + for (my $i = 0; ; ++$i) { + my @frame = caller $i; + last if !@frame; + + my ($package, $filename, $line, $subroutine) = @frame; + + if ($package =~ /^ANTLR::Runtime::/) { + next; + } + + if ($subroutine eq NEXT_TOKEN_RULE_NAME) { + next; + } + + if ($package ne ref $self) { + next; + } + + push @{$rules}, $subroutine; + } +} + +sub get_backtracking_level { + Readonly my $usage => 'int get_backtracking_level()'; + croak $usage if @_ != 1; + my ($self) = @_; + + return $self->state->backtracking; +} + +sub set_backtracking_level { + my ($self, $n) = @_; + $self->state->backtracking($n); +} + +sub failed { + my ($self) = @_; + return $self->state->failed; +} + +sub get_token_names { + return undef; +} + +sub get_grammar_file_name { + return undef; +} + +sub to_strings { + Readonly my $usage => 'List to_strings(List tokens)'; + croak $usage if @_ != 2; + my ($self, $tokens) = @_; + + if (!defined $tokens) { + return undef; + } + + return map { $_->get_text() } @{$tokens}; +} + +sub get_rule_memoization { + Readonly my $usage => 'int get_rule_memoization(int rule_index, int rule_start_index)'; + croak $usage if @_ != 3; + my ($self, $rule_index, $rule_start_index) = @_; + + if (!defined $self->rule_memo->[$rule_index]) { + $self->rule_memo->[$rule_index] = {}; + } + + my $stop_index = $self->state->rule_memo->[$rule_index]->{$rule_start_index}; + if (!defined $stop_index) { + return $self->MEMO_RULE_UNKNOWN; + } + return $stop_index; +} + +sub alredy_parsed_rule { + Readonly my $usage => 'boolean alredy_parsed_rule(IntStream input, int rule_index)'; + croak $usage if @_ != 3; + my ($self, $input, $rule_index) = @_; + + my $stop_index = $self->get_rule_memoization($rule_index, $input->index()); + if ($stop_index == $self->MEMO_RULE_UNKNOWN) { + return 0; + } + + if ($stop_index == $self->MEMO_RULE_FAILED) { + $self->state->failed(1); + } else { + $input->seek($stop_index + 1); + } + return 1; +} + +sub memoize { + Readonly my $usage => 'void memoize(IntStream input, int rule_index, int rule_start_index)'; + croak $usage if @_ != 4; + my ($self, $input, $rule_index, $rule_start_index) = @_; + + my $stop_token_index = $self->state->failed ? $self->MEMO_RULE_FAILED : $input->index() - 1; + if (defined $self->state->rule_memo->[$rule_index]) { + $self->state->rule_memo->[$rule_index]->{$rule_start_index} = $stop_token_index; + } +} + +sub get_rule_memoization_cache_size { + Readonly my $usage => 'int get_rule_memoization_cache_size()'; + croak $usage if @_ != 1; + my ($self) = @_; + + my $n = 0; + foreach my $m (@{$self->state->rule_memo}) { + $n += keys %{$m} if defined $m; + } + + return $n; +} + +sub trace_in { + Readonly my $usage => 'void trace_in(String rule_name, int rule_index, input_symbol)'; + croak $usage if @_ != 4; + my ($self, $rule_name, $rule_index, $input_symbol) = @_; + + print "enter $rule_name $input_symbol"; + if ($self->state->failed) { + print ' failed=', $self->state->failed; + } + if ($self->state->backtracking > 0) { + print ' backtracking=', $self->state->backtracking; + } + print "\n"; +} + +sub trace_out { + Readonly my $usage => 'void trace_out(String rule_name, int rule_index, input_symbol)'; + croak $usage if @_ != 4; + my ($self, $rule_name, $rule_index, $input_symbol) = @_; + + print "exit $rule_name $input_symbol"; + if ($self->state->failed) { + print ' failed=', $self->state->failed; + } + if ($self->state->backtracking > 0) { + print ' backtracking=', $self->state->backtracking; + } + print "\n"; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ + +=head1 NAME + +ANTLR::Runtime::BaseRecognizer + +=head1 DESCRIPTION + +A generic recognizer that can handle recognizers generated from +lexer, parser, and tree grammars. This is all the parsing +support code essentially; most of it is error recovery stuff and +backtracking. diff --git a/runtime/Perl5/lib/ANTLR/Runtime/BitSet.pm b/runtime/Perl5/lib/ANTLR/Runtime/BitSet.pm new file mode 100644 index 0000000..b066f34 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/BitSet.pm @@ -0,0 +1,346 @@ +package ANTLR::Runtime::BitSet; + +use Carp; +use Readonly; +use List::Util qw( max ); + +use Moose; +use Moose::Util::TypeConstraints; + +use overload + '|=' => \&or_in_place, + '""' => \&str; + +# number of bits / long +Readonly my $BITS => 64; +sub BITS { return $BITS } + +# 2^6 == 64 +Readonly my $LOG_BITS => 6; +sub LOG_BITS { return $LOG_BITS } + +# We will often need to do a mod operator (i mod nbits). Its +# turns out that, for powers of two, this mod operation is +# same as (i & (nbits-1)). Since mod is slow, we use a +# precomputed mod mask to do the mod instead. +Readonly my $MOD_MASK => BITS - 1; +sub MOD_MASK { return $MOD_MASK } + +# The actual data bit +has 'bits' => ( + is => 'rw', + isa => subtype 'Str' => where { /^(?:0|1)*$/xms }, +); + +sub trim_hex { + my ($number) = @_; + + $number =~ s/^0x//xms; + + return $number; +} + +sub BUILD { + my ($self, $args) = @_; + + my $bits; + if (!%$args) { ## no critic (ControlStructures::ProhibitCascadingIfElse) + # Construct a bitset of size one word (64 bits) + $bits = '0' x BITS; + } + elsif (exists $args->{bits}) { + $bits = $args->{bits}; + } + elsif (exists $args->{number}) { + $bits = reverse unpack('B*', pack('N', $args->{number})); + } + elsif (exists $args->{words64}) { + # Construction from a static array of longs + my $words64 = $args->{words64}; + + # $number is in hex format + my $number = join '', + map { trim_hex($_) } + reverse @$words64; + + $bits = ''; + foreach my $h (split //xms, reverse $number) { + $bits .= reverse substr(unpack('B*', pack('h', hex $h)), 4); + } + } + elsif (exists $args->{''}) { + # Construction from a list of integers + } + elsif (exists $args->{size}) { + # Construct a bitset given the size + $bits = '0' x $args->{size}; + } + else { + croak 'Invalid argument'; + } + + $self->bits($bits); + return; +} + +sub of { + my ($class, $el) = @_; + + my $bs = ANTLR::Runtime::BitSet->new({ size => $el + 1 }); + $bs->add($el); + + return $bs; +} + +sub or : method { ## no critic (Subroutines::ProhibitBuiltinHomonyms) + my ($self, $a) = @_; + + if (!defined $a) { + return $self; + } + + my $s = $self->clone(); + $s->or_in_place($a); + return $s; +} + +sub add : method { + my ($self, $el) = @_; + + $self->grow_to_include($el); + my $bits = $self->bits; + substr($bits, $el, 1, '1'); + $self->bits($bits); + + return; +} + +sub grow_to_include : method { + my ($self, $bit) = @_; + + if ($bit > length $self->bits) { + $self->bits .= '0' x ($bit - (length $self->bits) + 1); + } + + return; +} + +sub or_in_place : method { + my ($self, $a) = @_; + + my $i = 0; + foreach my $b (split //xms, $a->bits) { + if ($b) { + $self->add($i); + } + } continue { + ++$i; + } + + return $self; +} + +sub clone : method { + my ($self) = @_; + + return ANTLR::Runtime::BitSet->new(bits => $self->bits); +} + +sub size : method { + my ($self) = @_; + + return scalar $self->bits =~ /1/xms; +} + +sub equals : method { + my ($self, $other) = @_; + + return $self->bits eq $other->bits; +} + +sub member : method { + my ($self, $el) = @_; + + return (substr $self->bits, $el, 1) eq '1'; +} + +sub remove : method { + my ($self, $el) = @_; + + my $bits = $self->bits; + substr($bits, $el, 1, '0'); + $self->bits($bits); + + return; +} + +sub is_nil : method { + my ($self) = @_; + + return $self->bits =~ /1/xms ? 1 : 0; +} + +sub num_bits : method { + my ($self) = @_; + return length $self->bits; +} + +sub length_in_long_words : method { + my ($self) = @_; + return $self->num_bits() / $self->BITS; +} + +sub to_array : method { + my ($self) = @_; + + my $elems = []; + + while ($self->bits =~ /1/gxms) { + push @$elems, $-[0]; + } + + return $elems; +} + +sub to_packed_array : method { + my ($self) = @_; + + return [ + $self->bits =~ /.{BITS}/gxms + ]; +} + +sub str : method { + my ($self) = @_; + + return $self->to_string(); +} + +sub to_string : method { + my ($self, $args) = @_; + + my $token_names; + if (defined $args && exists $args->{token_names}) { + $token_names = $args->{token_names}; + } + + my @str; + my $i = 0; + foreach my $b (split //xms, $self->bits) { + if ($b) { + if (defined $token_names) { + push @str, $token_names->[$i]; + } else { + push @str, $i; + } + } + } continue { + ++$i; + } + + return '{' . (join ',', @str) . '}'; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ + + +=head1 NAME + +ANTLR::Runtime::BitSet - A bit set + + +=head1 SYNOPSIS + + use <Module::Name>; + # Brief but working code example(s) here showing the most common usage(s) + + # This section will be as far as many users bother reading + # so make it as educational and exemplary as possible. + + +=head1 DESCRIPTION + +A stripped-down version of org.antlr.misc.BitSet that is just good enough to +handle runtime requirements such as FOLLOW sets for automatic error recovery. + + +=head1 SUBROUTINES/METHODS + +=over + +=item C<of> + +... + +=item C<or> + +Return this | a in a new set. + +=item C<add> + +Or this element into this set (grow as necessary to accommodate). + +=item C<grow_to_include> + +Grows the set to a larger number of bits. + +=item C<set_size> + +Sets the size of a set. + +=item C<remove> + +Remove this element from this set. + +=item C<length_in_long_words> + +Return how much space is being used by the bits array not how many actually +have member bits on. + +=back + +A separate section listing the public components of the module's interface. +These normally consist of either subroutines that may be exported, or methods +that may be called on objects belonging to the classes that the module provides. +Name the section accordingly. + +In an object-oriented module, this section should begin with a sentence of the +form "An object of this class represents...", to give the reader a high-level +context to help them understand the methods that are subsequently described. + + +=head1 DIAGNOSTICS + +A list of every error and warning message that the module can generate +(even the ones that will "never happen"), with a full explanation of each +problem, one or more likely causes, and any suggested remedies. +(See also "Documenting Errors" in Chapter 13.) + + +=head1 CONFIGURATION AND ENVIRONMENT + +A full explanation of any configuration system(s) used by the module, +including the names and locations of any configuration files, and the +meaning of any environment variables or properties that can be set. These +descriptions must also include details of any configuration language used. +(See also "Configuration Files" in Chapter 19.) + + +=head1 DEPENDENCIES + +A list of all the other modules that this module relies upon, including any +restrictions on versions, and an indication whether these required modules are +part of the standard Perl distribution, part of the module's distribution, +or must be installed separately. + + +=head1 INCOMPATIBILITIES + +A list of any modules that this module cannot be used in conjunction with. +This may be due to name conflicts in the interface, or competition for +system or program resources, or due to internal limitations of Perl +(for example, many modules that use source code filters are mutually +incompatible). diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CharStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/CharStream.pm new file mode 100644 index 0000000..6936eaa --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CharStream.pm @@ -0,0 +1,21 @@ +package ANTLR::Runtime::CharStream; + +use Carp; +use Readonly; + +use Moose::Role; +#extends 'ANTLR::Runtime::IntStream'; + +Readonly my $EOF => -1; +sub EOF { return $EOF } + +requires 'substring'; + +requires 'LT'; + +requires 'get_line', 'set_line'; + +requires 'get_char_position_in_line', 'set_char_position_in_line'; + +no Moose::Role; +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CharStreamState.pm b/runtime/Perl5/lib/ANTLR/Runtime/CharStreamState.pm new file mode 100644 index 0000000..d33821b --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CharStreamState.pm @@ -0,0 +1,28 @@ +package ANTLR::Runtime::CharStreamState; + +use Moose; + +# Index into the char stream of next lookahead char +has 'p' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# What line number is the scanner at before processing buffer[p]? +has 'line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# What char position 0..n-1 in line is scanner before processing buffer[p]? +has 'char_position_in_line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CommonToken.pm b/runtime/Perl5/lib/ANTLR/Runtime/CommonToken.pm new file mode 100644 index 0000000..3c64b97 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CommonToken.pm @@ -0,0 +1,224 @@ +package ANTLR::Runtime::CommonToken; + +use Readonly; + +use Moose; + +use overload + 'bool' => \¬_eof, + fallback => 1, + ; + +with 'ANTLR::Runtime::Token'; + +has 'type' => ( + is => 'rw', + isa => 'Int', + required => 1, +); + +has 'line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'char_position_in_line' => ( + is => 'rw', + isa => 'Int', + # set to invalid position + default => -1, +); + +has 'channel' => ( + is => 'rw', + isa => 'Int', + default => sub { ANTLR::Runtime::Token->DEFAULT_CHANNEL } +); + +has 'input' => ( + is => 'rw', + isa => 'Maybe[ANTLR::Runtime::CharStream]', +); + +# We need to be able to change the text once in a while. If +# this is non-null, then getText should return this. Note that +# start/stop are not affected by changing this. +has 'text' => ( + is => 'rw', + isa => 'Maybe[Str]', +); + +# What token number is this from 0..n-1 tokens; < 0 implies invalid index +has 'index' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# The char position into the input buffer where this token starts +has 'start' => ( + is => 'rw', + isa => 'Int', +); + +# The char position into the input buffer where this token stops +has 'stop' => ( + is => 'rw', + isa => 'Int', +); + +sub BUILD { + my ($self, $arg_ref) = @_; + + if (exists $arg_ref->{token}) { + my $token = $arg_ref->{token}; + $self->text($token->get_text()); + $self->type($token->get_type()); + $self->line($token->get_line()); + $self->index($token->get_token_index()); + $self->char_position_in_line($token->get_char_position_in_line()); + $self->channel($token->get_channel()); + } + return; +} + +sub get_type { + my ($self) = @_; + return $self->type; +} + +sub set_type { + my ($self, $value) = @_; + $self->value($value); + return; +} + + +sub get_line { + my ($self) = @_; + return $self->line; +} + +sub set_line { + my ($self, $line) = @_; + $self->line($line); + return; +} + +sub get_text { + my ($self) = @_; + + if (defined $self->text) { + return $self->text; + } + if (!defined $self->input) { + return undef; + } + $self->text($self->input->substring($self->start, $self->stop)); + return $self->text; +} + +sub set_text { + my ($self, $text) = @_; + $self->text($text); + return; +} + +sub get_char_position_in_line { + my ($self) = @_; + return $self->char_position_in_line; +} + +sub set_char_position_in_line { + my ($self, $pos) = @_; + $self->char_position_in_line($pos); + return; +} + +sub get_channel { + my ($self) = @_; + return $self->channel; +} + +sub set_channel { + my ($self, $channel) = @_; + $self->channel($channel); + return; +} + +sub get_start_index { + my ($self) = @_; + return $self->start; +} + +sub set_start_index { + my ($self, $start) = @_; + $self->start($start); + return; +} + +sub get_stop_index { + my ($self) = @_; + return $self->stop; +} + +sub set_stop_index { + my ($self, $stop) = @_; + $self->stop($stop); + return; +} + +sub get_token_index { + my ($self) = @_; + return $self->index; +} + +sub set_token_index { + my ($self, $index) = @_; + $self->index($index); + return; +} + +sub get_input_stream { + my ($self) = @_; + return $self->input; +} + +sub set_input_stream { + my ($self, $input) = @_; + $self->input($input); + return; +} + +sub not_eof { + my ($self) = @_; + return $self->type != ANTLR::Runtime::Token->EOF; +} + +=begin later + + public String toString() { + String channelStr = ""; + if ( channel>0 ) { + channelStr=",channel="+channel; + } + String txt = getText(); + if ( txt!=null ) { + txt = txt.replaceAll("\n","\\\\n"); + txt = txt.replaceAll("\r","\\\\r"); + txt = txt.replaceAll("\t","\\\\t"); + } + else { + txt = "<no text>"; + } + return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; + } + +=end later + +=cut + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CommonTokenStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/CommonTokenStream.pm new file mode 100644 index 0000000..cf298ec --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CommonTokenStream.pm @@ -0,0 +1,392 @@ +package ANTLR::Runtime::CommonTokenStream; + +use Carp; +use Readonly; +use UNIVERSAL qw( isa ); + +use ANTLR::Runtime::CharStream; +use ANTLR::Runtime::Token; +use ANTLR::Runtime::TokenSource; + +use Moose; + +use overload + '""' => \&str + ; + +with 'ANTLR::Runtime::IntStream', + 'ANTLR::Runtime::TokenStream'; + +has 'token_source' => ( + is => 'rw', + does => 'ANTLR::Runtime::TokenSource', +); + +has 'tokens' => ( + is => 'rw', + isa => 'ArrayRef[ANTLR::Runtime::Token]', + default => sub { [] }, +); + +has 'channel_override_map' => ( + is => 'rw', + isa => 'HashRef[Int]', +); + +has 'discard_set' => ( + is => 'rw', + isa => 'HashRef[Int]', +); + +has 'channel' => ( + is => 'rw', + isa => 'Int', + default => ANTLR::Runtime::Token->DEFAULT_CHANNEL, +); + +has 'discard_off_channel_tokens' => ( + is => 'rw', + isa => 'Bool', + default => 0, +); + +has 'last_marker' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'p' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +sub set_token_source { + my ($self, $token_source) = @_; + + $self->token_source($token_source); + $self->tokens([]); + $self->p(-1); + $self->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); +} + +sub fill_buffer { + my ($self) = @_; + + my $index = 0; + my $t = $self->token_source->next_token(); + while (defined $t && $t->get_type() != ANTLR::Runtime::CharStream->EOF) { + my $discard = 0; + # is there a channel override for token type? + if (defined $self->channel_override_map) { + my $channel = $self->channel_override_map->{$t->get_type()}; + if (defined $channel) { + $t->set_channel($channel); + } + } + + if (defined $self->discard_set && $self->discard_set->contains($t->get_type())) { + $discard = 1; + } elsif ($self->discard_off_channel_tokens && $t->get_channel() != $self->channel) { + $discard = 1; + } + + if (!$discard) { + $t->set_token_index($index); + push @{$self->tokens}, $t; + ++$index; + } + } continue { + $t = $self->token_source->next_token(); + } + + # leave p pointing at first token on channel + $self->p(0); + $self->skip_off_token_channels($self->p); +} + +sub consume { + my ($self) = @_; + + if ($self->p < @{$self->tokens}) { + $self->p($self->p + 1); + $self->p($self->skip_off_token_channels($self->p)); # leave p on valid token + } +} + +sub skip_off_token_channels { + my ($self, $i) = @_; + + my $n = @{$self->tokens}; + while ($i < $n && $self->tokens->[$i]->get_channel() != $self->channel) { + ++$i; + } + + return $i; +} + +sub skip_off_token_channels_reverse { + my ($self, $i) = @_; + + while ($i >= 0 && $self->tokens->[$i]->get_channel() != $self->channel) { + --$i; + } + + return $i; +} + +sub set_token_type_channel { + my ($self, $ttype, $channel) = @_; + + if (!defined $self->channel_override_map) { + $self->channel_override_map({}); + } + + $self->channel_override_map->{$ttype} = $channel; +} + +sub discard_token_type { + my ($self, $ttype) = @_; + + if (!defined $self->discard_set) { + $self->discard_set({}); + } + + $self->discard_set->{$ttype} = 1; +} + +sub get_tokens { + my ($self, $args) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + if (!defined $args) { + return $self->tokens; + } + + my $start = $args->{start}; + my $stop = $args->{stop}; + + my $types; + if (exists $args->{types}) { + if (ref $args->{types} eq 'ARRAY') { + $types = ANTLR::Runtime::BitSet->new($args->{types}); + } else { + $types = $args->{types}; + } + } else { + my $ttype = $args->{ttype}; + $types = ANTLR::Runtime::BitSet->of($ttype); + } + + + if ($stop >= @{$self->tokens}) { + $stop = $#{$self->tokens}; + } + if ($start < 0) { + $start = 0; + } + + if ($start > $stop) { + return undef; + } + + my $filtered_tokens = []; + foreach my $t (@{$self->tokens}[$start..$stop]) { + if (!defined $types || $types->member($t->get_type())) { + push @$filtered_tokens, $t; + } + } + + if (!@{$filtered_tokens}) { + $filtered_tokens = undef; + } + + return $filtered_tokens; +} + +sub LT { + my ($self, $k) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + if ($k == 0) { + return undef; + } + if ($k < 0) { + return $self->LB(-$k); + } + + if ($self->p + $k - 1 >= @{$self->tokens}) { + return ANTLR::Runtime::Token->EOF_TOKEN; + } + + my $i = $self->p; + my $n = 1; + + while ($n < $k) { + $i = $self->skip_off_token_channels($i+1); + ++$n; + } + + if ($i >= @{$self->tokens}) { + return ANTLR::Runtime::Token->EOF_TOKEN; + } + + return $self->tokens->[$i]; +} + +sub LB { + my ($self, $k) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + if ($k == 0) { + return undef; + } + if ($self->p - $k < 0) { + return undef; + } + + my $i = $self->p; + my $n = 1; + while ($n <= $k) { + $k = $self->skip_off_token_channels_reverse($i - 1); + ++$n; + } + + if ($i < 0) { + return undef; + } + + return $self->tokens->[$i]; +} + +sub get { + my ($self, $i) = @_; + + return $self->tokens->[$i]; +} + +sub LA { + my ($self, $i) = @_; + + return $self->LT($i)->get_type(); +} + +sub mark { + my ($self) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + $self->last_marker($self->index()); + return $self->last_marker; +} + +sub release { + my ($self, $marker) = @_; + + # no resources to release +} + +sub size { + my ($self) = @_; + + return scalar @{$self->tokens}; +} + +sub index { + my ($self) = @_; + + return $self->p; +} + +sub rewind { + Readonly my $usage => 'void rewind(int marker) | void rewind()'; + croak $usage if @_ != 1 && @_ != 2; + + if (@_ == 1) { + my ($self) = @_; + $self->seek($self->last_marker); + } else { + my ($self, $marker) = @_; + $self->seek($marker); + } +} + +sub seek { + my ($self, $index) = @_; + + $self->p($index); +} + +sub get_token_source { + my ($self) = @_; + + return $self->token_source; +} + +sub get_source_name { + my ($self) = @_; + return $self->get_token_source()->get_source_name(); +} + +sub str { + my ($self) = @_; + return $self->to_string(); +} + +sub to_string { + Readonly my $usage => 'String to_string() | String to_string(int start, int stop | String to_string(Token start, Token stop)'; + croak $usage if @_ != 1 && @_ != 3; + + if (@_ == 1) { + my ($self) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + return $self->to_string(0, $#{$self->tokens}); + } else { + my ($self, $start, $stop) = @_; + + if (defined $start && defined $stop) { + if (ref($start) && $start->isa('ANTLR::Runtime::Token')) { + $start = $start->get_token_index(); + } + + if (ref($start) && $stop->isa('ANTLR::Runtime::Token')) { + $stop = $stop->get_token_index(); + } + + if ($start < 0 || $stop < 0) { + return undef; + } + if ($self->p == -1) { + $self->fill_buffer(); + } + if ($stop >= @{$self->tokens}) { + $stop = $#{$self->tokens}; + } + + my $buf = ''; + foreach my $t (@{$self->tokens}[$start..$stop]) { + $buf .= $t->get_text(); + } + + return $buf; + } else { + return undef; + } + } +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/DFA.pm b/runtime/Perl5/lib/ANTLR/Runtime/DFA.pm new file mode 100644 index 0000000..d1f0ef1 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/DFA.pm @@ -0,0 +1,192 @@ +package ANTLR::Runtime::DFA; + +use Params::Validate qw( :types ); +use Error qw( try finally ); + +use Moose; + +has 'eot' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'eof' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'min' => ( + is => 'rw', + isa => 'ArrayRef[Str]', +); + +has 'max' => ( + is => 'rw', + isa => 'ArrayRef[Str]', +); + +has 'accept' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'special' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'transition' => ( + is => 'rw', + isa => 'ArrayRef[ArrayRef[Int]]', +); + +has 'decision_number' => ( + is => 'rw', + isa => 'Int', +); + + +# Which recognizer encloses this DFA? Needed to check backtracking +has 'recognizer' => ( + is => 'rw', + isa => 'ANTLR::Runtime::BaseRecognizer', +); + + +sub get_description { + return "n/a"; +} + +# From the input stream, predict what alternative will succeed +# using this DFA (representing the covering regular approximation +# to the underlying CFL). Return an alternative number 1..n. Throw +# an exception upon error. +sub predict { + my ($self, $input) = @_; + + my $mark = $input->mark(); # remember where decision started in input + my $s = 0; # we always start at s0 + + try { + while (1) { + my $special_state = $self->special->[$s]; + if ($special_state >= 0) { + $s = $self->special_state_transition($special_state, $input); + if ($s == -1) { + $self->no_viable_alt($s, $input); + return 0; + } + $input->consume(); + next; + } + + if ($self->accept->[$s] >= 1) { + return $self->accept->[$s]; + } + + # look for a normal char transition + my $c = $input->LA(1); # -1 == \uFFFF, all tokens fit in 65000 space + + if ($c >= $self->min->[$s] && $c <= $self->max->[$s]) { + my $next_s = $self->transition->[$s][$c - $self->min->[$s]]; # move to next state + + if ($next_s < 0) { + # was in range but not a normal transition + # must check EOT, which is like the else clause. + # eot[s]>=0 indicates that an EOT edge goes to another + # state. + if ($self->eot->[$s] >= 0) { # EOT Transition to accept state? + $s = $self->eot->[$s]; + $input->consume(); + # TODO: I had this as return accept[eot[s]] + # which assumed here that the EOT edge always + #went to an accept...faster to do this, but + # what about predicated edges coming from EOT + # target? + next; + } + + $self->no_viable_alt($s, $input); + return 0; + } + + $s = $next_s; + $input->consume(); + next; + } + + if ($self->eot->[$s] >= 0) { # EOT Transition? + $s = $self->eot->[$s]; + $input->consume(); + next; + } + + if ($c == ANTLR::Runtime::Token->EOF && $self->eof->[$s] >= 0) { # EOF Transition to accept state? + return $self->accept->[$self->eof->[$s]]; + } + + # not in range and not EOF/EOT, must be invalid symbol + $self->no_viable_alt($s, $input); + return 0; + } + } + finally { + $input->rewind(); + }; +} + +sub no_viable_alt { + my ($self, $s, $input) = @_; + + if ($self->recognizer->state->backtracking > 0) { + $self->recognizer->state->failed = 1; + return; + } + my $nvae = ANTLR::Runtime::NoViableAltException({ + grammar_decision_description => $self->get_description(), + decision_number => $self->decision_number, + state_number => $self->state_number, + input => $input + }); + $self->error($nvae); + $nvae->throw(); +} + +# A hook for debugging interface +sub error { + my ($self, $nvae) = @_; +} + +sub special_state_transition { + my ($self, $s, $input) = @_; + + return -1; +} + +# Given a String that has a run-length-encoding of some unsigned shorts +# like "\1\2\3\9", convert to short[] {2,9,9,9}. We do this to avoid +# static short[] which generates so much init code that the class won't +# compile. :( +sub unpack_encoded_string { + my ($self, $encoded_string) = @_; + + my $data = []; + while ($encoded_string =~ /(.)(.)/gxms) { + my ($n, $v) = ($1, $2); + + push @$data, $v x $n; + } + + return $data; +} + +sub unpack_encoded_string_to_unsigned_chars { + my ($self, $encoded_string) = @_; + + return $self->unpack_encoded_string($encoded_string); +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/EarlyExitException.pm b/runtime/Perl5/lib/ANTLR/Runtime/EarlyExitException.pm new file mode 100644 index 0000000..17ccd44 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/EarlyExitException.pm @@ -0,0 +1,8 @@ +package ANTLR::Runtime::EarlyExitException; + +use strict; +use warnings; + +use base qw( ANTLR::Runtime::Exception ); + +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Exception.pm b/runtime/Perl5/lib/ANTLR/Runtime/Exception.pm new file mode 100644 index 0000000..1909cd4 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Exception.pm @@ -0,0 +1,23 @@ +package ANTLR::Runtime::Exception; + +use Exception::Class; + +use Moose; + +extends 'Moose::Object', 'Exception::Class::Base'; + +sub BUILD { + my ($self, $args) = @_; + + my %exception_args; + if (exists $args->{message}) { + $exception_args{message} = $args->{message}; + } + + $self->_initialize(%exception_args); + return; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/IntStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/IntStream.pm new file mode 100644 index 0000000..8b459fa --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/IntStream.pm @@ -0,0 +1,25 @@ +package ANTLR::Runtime::IntStream; + +use Moose::Role; + +requires 'consume'; + +requires 'LA'; + +requires 'mark'; + +requires 'index'; + +requires 'rewind'; + +requires 'release'; + +requires 'seek'; + +requires 'size'; + +requires 'get_source_name'; + +no Moose::Role; +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Lexer.pm b/runtime/Perl5/lib/ANTLR/Runtime/Lexer.pm new file mode 100644 index 0000000..5acbaec --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Lexer.pm @@ -0,0 +1,325 @@ +package ANTLR::Runtime::Lexer; + +use English qw( -no_match_vars ); +use Readonly; +use Carp; +use Switch; + +use ANTLR::Runtime::Token; +use ANTLR::Runtime::CommonToken; +use ANTLR::Runtime::CharStream; +use ANTLR::Runtime::MismatchedTokenException; + +use Moose; + +extends 'ANTLR::Runtime::BaseRecognizer'; +with 'ANTLR::Runtime::TokenSource'; + +has 'input' => ( + is => 'rw', + does => 'ANTLR::Runtime::CharStream', +); + +sub reset { + my ($self) = @_; + + # reset all recognizer state variables + $self->SUPER::reset(); + + # wack Lexer state variables + if (defined $self->input) { + # rewind the input + $self->input->seek(0); + } + + if (defined $self->state) { + $self->state->token(undef); + $self->state->type(ANTLR::Runtime::Token->INVALID_TOKEN_TYPE); + $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); + $self->state->token_start_char_index(-1); + $self->state->token_start_char_position_in_line(-1); + $self->state->start_line(-1); + $self->state->text(undef); + } +} + +# Return a token from this source; i.e., match a token on the char +# stream. +sub next_token { + my ($self) = @_; + + while (1) { + $self->state->token(undef); + $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); + $self->state->token_start_char_index($self->input->index()); + $self->state->token_start_char_position_in_line($self->input->get_char_position_in_line()); + $self->state->token_start_line($self->input->get_line()); + $self->state->text(undef); + + if ($self->input->LA(1) eq ANTLR::Runtime::CharStream->EOF) { + return ANTLR::Runtime::Token->EOF_TOKEN; + } + + my $rv; + my $op = ''; + eval { + $self->m_tokens(); + if (!defined $self->state->token) { + $self->emit(); + } + elsif ($self->state->token == ANTLR::Runtime::Token->SKIP_TOKEN) { + $op = 'next'; + return; + } + $op = 'return'; + $rv = $self->state->token; + }; + return $rv if $op eq 'return'; + next if $op eq 'next'; + + if ($EVAL_ERROR) { + my $exception = $EVAL_ERROR; + if ($exception->isa('ANTLR::Runtime::RecognitionException')) { + $self->report_error($exception); + $self->recover($exception); + } else { + croak $exception; + } + } + } +} + +# Instruct the lexer to skip creating a token for current lexer rule +# and look for another token. nextToken() knows to keep looking when +# a lexer rule finishes with token set to SKIP_TOKEN. Recall that +# if token==null at end of any token rule, it creates one for you +# and emits it. +sub skip { + my ($self) = @_; + + $self->state->token(ANTLR::Runtime::Token->SKIP_TOKEN); + return; +} + +# This is the lexer entry point that sets instance var 'token' +sub m_tokens { + croak "Unimplemented"; +} + +# Set the char stream and reset the lexer +sub set_char_stream { + my ($self, $input) = @_; + + $self->input(undef); + $self->reset(); + $self->input($input); +} + +sub get_char_stream { + my ($self) = @_; + return $self->input; +} + +sub get_source_name { + my ($self) = @_; + return $self->input->get_source_name(); +} + +sub emit { + if (@_ == 1) { + my ($self) = @_; + # The standard method called to automatically emit a token at the + # outermost lexical rule. The token object should point into the + # char buffer start..stop. If there is a text override in 'text', + # use that to set the token's text. Override this method to emit + # custom Token objects. + my $t = ANTLR::Runtime::CommonToken->new({ + input => $self->input, + type => $self->state->type, + channel => $self->state->channel, + start => $self->state->token_start_char_index, + stop => $self->get_char_index() - 1 + }); + + $t->set_line($self->state->token_start_line); + $t->set_text($self->state->text); + $t->set_char_position_in_line($self->state->token_start_char_position_in_line); + $self->emit($t); + return $t; + } elsif (@_ == 2) { + my ($self, $token) = @_; + # Currently does not support multiple emits per nextToken invocation + # for efficiency reasons. Subclass and override this method and + # nextToken (to push tokens into a list and pull from that list rather + # than a single variable as this implementation does). + $self->state->token($token); + } +} + +sub match { + my ($self, $s) = @_; + + foreach my $c (split //, $s) { + if ($self->input->LA(1) ne $c) { + if ($self->state->backtracking > 0) { + $self->state->failed(1); + return; + } + my $mte = ANTLR::Runtime::MismatchedTokenException->new({ + expecting => $c, + input => $self->input + }); + $self->recover($mte); + croak $mte; + } + $self->input->consume(); + $self->state->failed(0); + } +} + +sub match_any { + my ($self) = @_; + + $self->input->consume(); +} + +sub match_range { + my ($self, $a, $b) = @_; + + if ($self->input->LA(1) lt $a || $self->input->LA(1) gt $b) { + if ($self->state->backtracking > 0) { + $self->state->failed(1); + return; + } + + my $mre = ANTLR::Runtime::MismatchedRangeException($a, $b, $self->input); + $self->recover($mre); + croak $mre; + } + + $self->input->consume(); + $self->state->failed(0); +} + +sub get_line { + my ($self) = @_; + + return $self->input->get_line(); +} + +sub get_char_position_in_line { + my ($self) = @_; + + return $self->input->get_char_position_in_line(); +} + +# What is the index of the current character of lookahead? +sub get_char_index { + my ($self) = @_; + + return $self->input->index(); +} + +# Return the text matched so far for the current token or any +# text override. +sub get_text { + my ($self) = @_; + + if (defined $self->state->text) { + return $self->state->text; + } + return $self->input->substring($self->state->token_start_char_index, $self->get_char_index() - 1); +} + +# Set the complete text of this token; it wipes any previous +# changes to the text. +sub set_text { + my ($self, $text) = @_; + + $self->state->text($text); +} + +sub report_error { + Readonly my $usage => 'void report_error(RecognitionException e)'; + croak $usage if @_ != 2; + my ($self, $e) = @_; + + $self->display_recognition_error($self->get_token_names(), $e); +} + +sub get_error_message { + my ($self, $e, $token_names) = @_; + + my $msg; + if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { + $msg = 'mismatched character ' + . $self->get_char_error_display($e->get_c()) + . ' expecting ' + . $self->get_char_error_display($e->expecting); + } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { + $msg = 'no viable alternative at character ' . $self->get_char_error_display($e->get_c()); + } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { + $msg = 'required (...)+ loop did not match anything at character ' + . $self->get_char_error_display($e->get_c()); + } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { + $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) + . ' expecting set ' . $e->expecting; + } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { + $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) + . ' expecting set ' . $e->expecting; + } elsif ($e->isa('ANTLR::Runtime::MismatchedRangeException')) { + $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) + . ' expecting set ' . $self->get_char_error_display($e->a) + . '..' . $self->get_char_error_display($e->b); + } else { + $msg = $self->SUPER::get_error_message($e, $token_names); + } + return $msg; +} + +sub get_char_error_display { + my ($self, $c) = @_; + + my $s; + if ($c eq ANTLR::Runtime::Token->EOF) { + $s = '<EOF>'; + } elsif ($c eq "\n") { + $s = '\n'; + } elsif ($c eq "\t") { + $s = '\t'; + } elsif ($c eq "\r") { + $s = '\r'; + } else { + $s = $c; + } + + return "'$s'"; +} + +# Lexers can normally match any char in it's vocabulary after matching +# a token, so do the easy thing and just kill a character and hope +# it all works out. You can instead use the rule invocation stack +# to do sophisticated error recovery if you are in a fragment rule. +sub recover { + my ($self, $re) = @_; + + $self->input->consume(); +} + +sub trace_in { + my ($self, $rule_name, $rule_index) = @_; + + my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); + $self->SUPER::trace_in($rule_name, $rule_index, $input_symbol); +} + +sub trace_out { + my ($self, $rule_name, $rule_index) = @_; + + my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); + $self->SUPER::trace_out($rule_name, $rule_index, $input_symbol); +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/MismatchedSetException.pm b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedSetException.pm new file mode 100644 index 0000000..d87b0e6 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedSetException.pm @@ -0,0 +1,9 @@ +package ANTLR::Runtime::MismatchedSetException; + +use Moose; + +extends 'ANTLR::Runtime::Exception'; + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/MismatchedTokenException.pm b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedTokenException.pm new file mode 100644 index 0000000..b141e99 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedTokenException.pm @@ -0,0 +1,33 @@ +package ANTLR::Runtime::MismatchedTokenException; + +use ANTLR::Runtime::Token; + +use Moose; + +use overload + '""' => \&to_string, + 'bool' => sub { 1 }, + fallback => 1 + ; + +extends 'ANTLR::Runtime::RecognitionException'; + +has 'expecting' => ( + is => 'ro', + isa => 'Int', + default => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE, +); + +sub get_expecting { + my ($self) = @_; + return $self->expecting; +} + +sub to_string { + my ($self) = @_; + return "MismatchedTokenException(" . $self->get_unexpected_type() . "!=" . $self->expecting . ")"; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/MissingTokenException.pm b/runtime/Perl5/lib/ANTLR/Runtime/MissingTokenException.pm new file mode 100644 index 0000000..5f55a87 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/MissingTokenException.pm @@ -0,0 +1,36 @@ +package ANTLR::Runtime::MissingTokenException; + +use Moose; + +use overload + '""' => \&to_string; + +extends 'ANTLR::Runtime::MismatchedTokenException'; + +has 'inserted' => ( + is => 'ro', + isa => 'Any', +); + +sub get_missing_type { + my ($self) = @_; + return $self->expecting; +} + +sub to_string { + my ($self) = @_; + + if (defined (my $inserted = $self->inserted) && defined (my $token = $self->token)) { + return "MissingTokenException(inserted $inserted at " . $token->get_text() . ")"; + } + if (defined $self->token) { + return "MissingTokenException(at " . $self->token->get_text() . ")"; + } + + return "MissingTokenException"; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/NoViableAltException.pm b/runtime/Perl5/lib/ANTLR/Runtime/NoViableAltException.pm new file mode 100644 index 0000000..88a78ad --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/NoViableAltException.pm @@ -0,0 +1,27 @@ +package ANTLR::Runtime::NoViableAltException; + +use Moose; + +extends 'ANTLR::Runtime::RecognitionException'; + +has 'grammar_decision_description' => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +has 'decision_number' => ( + is => 'ro', + isa => 'Int', + required => 1, +); + +has 'state_number' => ( + is => 'ro', + isa => 'Int', + required => 1, +); + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Parser.pm b/runtime/Perl5/lib/ANTLR/Runtime/Parser.pm new file mode 100644 index 0000000..a9708ae --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Parser.pm @@ -0,0 +1,93 @@ +package ANTLR::Runtime::Parser; + +use Readonly; +use Carp; + +use Moose; + +extends 'ANTLR::Runtime::BaseRecognizer'; + +has 'input' => ( + is => 'rw', + does => 'ANTLR::Runtime::TokenStream' +); + +sub reset { + my ($self) = @_; + + $self->SUPER::reset(); # reset all recognizer state variables + if (defined $self->input) { + $self->input->seek(0); # rewind the input + } +} + +sub get_current_input_symbol { + my ($self, $input) = @_; + return $self->input->LT(1); +} + +sub get_missing_symbol { + my ($self, $arg_ref) = @_; + my $input = $arg_ref->{input}; + my $exception = $arg_ref->{exception}; + my $expected_token_type = $arg_ref->{expected_token_type}; + my $follow = $arg_ref->{follow}; + + my $token_text; + if ($expected_token_type == ANTLR::Runtime::Token->EOF) { + $token_text = '<missing EOF>'; + } + else { + $token_text = '<missing ' . $self->get_token_names()->[$expected_token_type] . '>'; + } + + my $t = ANTLR::Runtime::CommonToken->new({ + type => $expected_token_type, + text => $token_text + }); + my $current = $input->LT(1); + if ($current->get_type() == ANTLR::Runtime::Token->EOF) { + $current = $input->LT(-1); + } + $t->set_line($current->get_line()); + $t->set_char_position_in_line($current->get_char_position_in_line()); + $t->set_channel($self->DEFAULT_TOKEN_CHANNEL); + + return $t; +} + +sub set_token_stream { + my ($self, $input) = @_; + + $self->input(undef); + $self->reset(); + $self->input($input); +} + +sub get_token_stream { + my ($self) = @_; + + return $self->input; +} + +sub get_source_name { + my ($self) = @_; + return $self->input->get_source_name(); +} + +sub trace_in { + my ($self, $rule_name, $rule_index) = @_; + + $self->SUPER::trace_in($rule_name, $rule_index, $self->input->LT(1)); +} + +sub trace_out { + my ($self, $rule_name, $rule_index) = @_; + + $self->SUPER::trace_out($rule_name, $rule_index, $self->input->LT(1)); +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/ParserRuleReturnScope.pm b/runtime/Perl5/lib/ANTLR/Runtime/ParserRuleReturnScope.pm new file mode 100644 index 0000000..07adcf1 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/ParserRuleReturnScope.pm @@ -0,0 +1,30 @@ +package ANTLR::Runtime::ParserRuleReturnScope; + +use Moose; + +extends 'ANTLR::Runtime::RuleReturnScope'; + +has 'start' => ( + is => 'rw', + does => 'ANTLR::Runtime::Token', +); + +has 'stop' => ( + is => 'rw', + does => 'ANTLR::Runtime::Token', +); + +sub get_start { + my ($self) = @_; + return $self->start; +} + +sub get_stop { + my ($self) = @_; + return $self->stop; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/RecognitionException.pm b/runtime/Perl5/lib/ANTLR/Runtime/RecognitionException.pm new file mode 100644 index 0000000..db8d0ba --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/RecognitionException.pm @@ -0,0 +1,122 @@ +package ANTLR::Runtime::RecognitionException; + +use Carp; +use Readonly; + +use Moose; +use Moose::Util::TypeConstraints; + +extends 'ANTLR::Runtime::Exception'; + +has 'input' => ( + is => 'ro', + does => 'ANTLR::Runtime::IntStream', + required => 1, +); + +has 'index' => ( + is => 'ro', + isa => 'Int', + default => 0, +); + +has 'token' => ( + is => 'ro', + does => 'ANTLR::Runtime::Token', +); + +has 'node' => ( + is => 'ro', + isa => 'Any', +); + +subtype 'Char' + => as 'Str' + => where { $_ eq '-1' || length == 1 }; + +has 'c' => ( + is => 'ro', + isa => 'Maybe[Char]', +); + +has 'line' => ( + is => 'ro', + isa => 'Int', + default => 0, +); + +has 'char_position_in_line' => ( + is => 'ro', + isa => 'Int', + default => 0, +); + +has 'approximate_line_info' => ( + is => 'rw', + isa => 'Bool', +); + +sub BUILDARGS { + my ($class, @args) = @_; + my $args = $class->SUPER::BUILDARGS(@args); + + my $new_args = { %$args }; + my $input = $args->{input}; + $new_args->{input} = $input; + $new_args->{index} = $input->index(); + + if ($input->does('ANTLR::Runtime::TokenStream')) { + my $token = $input->LT(1); + $new_args->{token} = $token; + $new_args->{line} = $token->get_line(); + $new_args->{char_position_in_line} = $token->get_char_position_in_line(); + } + + if ($input->does('ANTLR::Runtime::TreeNodeStream')) { + # extract_information_from_tree_node_stream($input); + } + elsif ($input->does('ANTLR::Runtime::CharStream')) { + $new_args->{c} = $input->LA(1); + $new_args->{line} = $input->get_line(); + $new_args->{char_position_in_line} = $input->get_char_position_in_line(); + } + else { + $new_args->{c} = $input->LA(1); + } + + return $new_args; +} + +sub get_unexpected_type { + my ($self) = @_; + + if ($self->input->isa('ANTLR::Runtime::TokenStream')) { + return $self->token->get_type(); + } else { + return $self->c; + } +} + +sub get_c { + my ($self) = @_; + return $self->c; +} + +sub get_line { + my ($self) = @_; + return $self->line; +} + +sub get_char_position_in_line { + my ($self) = @_; + return $self->char_position_in_line; +} + +sub get_token { + my ($self) = @_; + return $self->token; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/RecognizerSharedState.pm b/runtime/Perl5/lib/ANTLR/Runtime/RecognizerSharedState.pm new file mode 100644 index 0000000..96c51f7 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/RecognizerSharedState.pm @@ -0,0 +1,130 @@ +package ANTLR::Runtime::RecognizerSharedState; + +use ANTLR::Runtime::Token; + +use Moose; + +# Track the set of token types that can follow any rule invocation. +# Stack grows upwards. When it hits the max, it grows 2x in size +# and keeps going. +has 'following' => ( + is => 'rw', + isa => 'ArrayRef[ANTLR::Runtime::BitSet]', + default => sub { [] }, +); + +has '_fsp' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# This is true when we see an error and before having successfully +# matched a token. Prevents generation of more than one error message +# per error. +has 'error_recovery' => ( + is => 'rw', + isa => 'Bool', + default => 0, +); + +# The index into the input stream where the last error occurred. +# This is used to prevent infinite loops where an error is found +# but no token is consumed during recovery...another error is found, +# ad naseum. This is a failsafe mechanism to guarantee that at least +# one token/tree node is consumed for two errors. +has 'last_error_index' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# In lieu of a return value, this indicates that a rule or token +# has failed to match. Reset to false upon valid token match. +has 'failed' => ( + is => 'rw', + isa => 'Bool', + default => 0, +); + +# Did the recognizer encounter a syntax error? Track how many. +has 'syntax_errors' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# If 0, no backtracking is going on. Safe to exec actions etc... +# If >0 then it's the level of backtracking. +has 'backtracking' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# An array[size num rules] of Map<Integer,Integer> that tracks +# the stop token index for each rule. ruleMemo[ruleIndex] is +# the memoization table for ruleIndex. For key ruleStartIndex, you +# get back the stop token for associated rule or MEMO_RULE_FAILED. +# This is only used if rule memoization is on (which it is by default). +has 'rule_memo' => ( + is => 'rw', + isa => 'Maybe[ArrayRef[HashRef[Int]]]', +); + +# The goal of all lexer rules/methods is to create a token object. +# This is an instance variable as multiple rules may collaborate to +# create a single token. nextToken will return this object after +# matching lexer rule(s). If you subclass to allow multiple token +# emissions, then set this to the last token to be matched or +# something nonnull so that the auto token emit mechanism will not +# emit another token. +has 'token' => ( + is => 'rw', + isa => 'Maybe[ANTLR::Runtime::Token]', +); + +# What character index in the stream did the current token start at? +# Needed, for example, to get the text for current token. Set at +# the start of nextToken. +has 'token_start_char_index' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# The line on which the first character of the token resides +has 'token_start_line' => ( + is => 'rw', + isa => 'Int', +); + +# The character position of first character within the line +has 'token_start_char_position_in_line' => ( + is => 'rw', + isa => 'Int', +); + +# The channel number for the current token +has 'channel' => ( + is => 'rw', + isa => 'Int', +); + +# The token type for the current token +has 'type' => ( + is => 'rw', + isa => 'Int', +); + +# You can set the text for the current token to override what is in +# the input char buffer. Use setText() or can set this instance var. +has 'text' => ( + is => 'rw', + isa => 'Maybe[Str]', +); + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/RuleReturnScope.pm b/runtime/Perl5/lib/ANTLR/Runtime/RuleReturnScope.pm new file mode 100644 index 0000000..eb08dbc --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/RuleReturnScope.pm @@ -0,0 +1,23 @@ +package ANTLR::Runtime::RuleReturnScope; + +use Moose; + +sub get_start { + return; +} + +sub get_stop { + return; +} + +sub get_tree { + return; +} + +sub get_template { + return; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Stream.pm b/runtime/Perl5/lib/ANTLR/Runtime/Stream.pm new file mode 100644 index 0000000..3dd0ba7 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Stream.pm @@ -0,0 +1,23 @@ +package ANTLR::Runtime::Stream; + +use Moose::Role; + +requires 'consume'; + +requires 'LA'; + +requires 'mark'; + +requires 'index'; + +requires 'rewind'; + +requires 'release'; + +requires 'seek'; + +requires 'size'; + +no Moose::Role; +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Token.pm b/runtime/Perl5/lib/ANTLR/Runtime/Token.pm new file mode 100644 index 0000000..7cd8f21 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Token.pm @@ -0,0 +1,80 @@ +package ANTLR::Runtime::Token; + +use Readonly; + +use feature qw( state ); + +use ANTLR::Runtime::CharStream; +#use ANTLR::Runtime::CommonToken; + +use Moose::Role; + +Readonly my $EOR_TOKEN_TYPE => 1; +sub EOR_TOKEN_TYPE { $EOR_TOKEN_TYPE } + +# imaginary tree navigation type; traverse "get child" link +Readonly my $DOWN => 2; +sub DOWN { $DOWN } + +# imaginary tree navigation type; finish with a child list +Readonly my $UP => 3; +sub UP { $UP } + +Readonly my $MIN_TOKEN_TYPE => $UP + 1; +sub MIN_TOKEN_TYPE { $MIN_TOKEN_TYPE } + +# All tokens go to the parser (unless skip() is called in that rule) +# on a particular "channel". The parser tunes to a particular channel +# so that whitespace etc... can go to the parser on a "hidden" channel. +Readonly my $DEFAULT_CHANNEL => 0; +sub DEFAULT_CHANNEL { $DEFAULT_CHANNEL } + +# Anything on different channel than DEFAULT_CHANNEL is not parsed +# by parser. +Readonly my $HIDDEN_CHANNEL => 99; +sub HIDDEN_CHANNEL { $HIDDEN_CHANNEL } + +sub EOF { ANTLR::Runtime::CharStream->EOF } + +#Readonly my $EOF_TOKEN => ANTLR::Runtime::CommonToken->new({ type => EOF }); +sub EOF_TOKEN { + require ANTLR::Runtime::CommonToken; + state $EOF_TOKEN = ANTLR::Runtime::CommonToken->new({ type => EOF }); + return $EOF_TOKEN; +} + +Readonly my $INVALID_TOKEN_TYPE => 0; +sub INVALID_TOKEN_TYPE { $INVALID_TOKEN_TYPE } + +#Readonly my $INVALID_TOKEN => ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); +sub INVALID_TOKEN { + require ANTLR::Runtime::CommonToken; + state $INVALID_TOKEN = ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); + return $INVALID_TOKEN; +} + +# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR +# will avoid creating a token for this symbol and try to fetch another. +#Readonly my $SKIP_TOKEN => ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); +sub SKIP_TOKEN { + require ANTLR::Runtime::CommonToken; + state $SKIP_TOKEN = ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); + return $SKIP_TOKEN; +} + +requires 'get_text', 'set_text'; + +requires 'get_type', 'set_type'; + +requires 'get_line', 'set_line'; + +requires 'get_char_position_in_line', 'set_char_position_in_line'; + +requires 'get_channel', 'set_channel'; + +requires 'get_token_index', 'set_token_index'; + +requires 'get_input_stream', 'set_input_stream'; + +no Moose::Role; +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/TokenSource.pm b/runtime/Perl5/lib/ANTLR/Runtime/TokenSource.pm new file mode 100644 index 0000000..f9200ef --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/TokenSource.pm @@ -0,0 +1,35 @@ +package ANTLR::Runtime::TokenSource; + +use Moose::Role; + +# Return a Token object from your input stream (usually a CharStream). +# Do not fail/return upon lexing error; keep chewing on the characters +# until you get a good one; errors are not passed through to the parser. +requires 'next_token'; + +# Where are you getting tokens from? normally the implication will simply +# ask lexers input stream. +requires 'get_source_name'; + +no Moose::Role; +1; +__END__ + +=head1 NAME + +ANTLR::Runtime::TokenSource + +=head1 DESCRIPTION + +A source of tokens must provide a sequence of tokens via nextToken() +and also must reveal it's source of characters; CommonToken's text is +computed from a CharStream; it only store indices into the char stream. + +Errors from the lexer are never passed to the parser. Either you want +to keep going or you do not upon token recognition error. If you do not +want to continue lexing then you do not want to continue parsing. Just +throw an exception not under RecognitionException and Java will naturally +toss you all the way out of the recognizers. If you want to continue +lexing then you should not throw an exception to the parser--it has already +requested a token. Keep lexing until you get a valid one. Just report +errors and keep going, looking for a valid token. diff --git a/runtime/Perl5/lib/ANTLR/Runtime/TokenStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/TokenStream.pm new file mode 100644 index 0000000..ecab89a --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/TokenStream.pm @@ -0,0 +1,16 @@ +package ANTLR::Runtime::TokenStream; + +use Moose::Role; +#extends 'ANTLR::Runtime::IntStream'; + +requires 'LT'; + +requires 'get'; + +requires 'get_token_source'; + +requires 'to_string'; + +no Moose::Role; +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/UnwantedTokenException.pm b/runtime/Perl5/lib/ANTLR/Runtime/UnwantedTokenException.pm new file mode 100644 index 0000000..60c1202 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/UnwantedTokenException.pm @@ -0,0 +1,37 @@ +package ANTLR::Runtime::UnwantedTokenException; + +use Moose; + +use overload + '""' => \&to_string; + +extends 'ANTLR::Runtime::MismatchedTokenException'; + +sub get_unexpected_token { + my ($self) = @_; + return $self->token; +} + +sub to_string { + my ($self) = @_; + + my $exp; + if ($self->expecting == ANTLR::Runtime::Token->INVALID_TOKEN_TYPE) { + $exp = ''; + } + else { + $exp = ", expected " . $self->expecting; + } + + if (defined $self->token) { + return "UnwantedTokenException(found=" . $self->token->get_text() . "$exp)"; + } + else { + return "UnwantedTokenException(found=undef$exp)"; + } +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/port.yml b/runtime/Perl5/port.yml new file mode 100644 index 0000000..56897f5 --- /dev/null +++ b/runtime/Perl5/port.yml @@ -0,0 +1,4 @@ +--- +status: + runtime/Java/src/org/antlr/runtime/BitSet.java: + sha1: 0000000000000000000000000000000000000000 diff --git a/runtime/Perl5/t/author/api.t b/runtime/Perl5/t/author/api.t new file mode 100644 index 0000000..420fe83 --- /dev/null +++ b/runtime/Perl5/t/author/api.t @@ -0,0 +1,95 @@ +use strict; +use warnings; + +use File::Spec; +use Java::JVM::Classfile; + +use Test::More tests => 29; + +sub class_name_to_java { + my ($name) = @_; + + my $tmp = $name; + $tmp =~ s/ANTLR::Runtime/org.antlr.runtime/; + $tmp =~ s/::/./g; + + return $tmp; +} + +sub java_class_name_to_perl { + my ($name) = @_; + + my $tmp = $name; + $tmp =~ s/org\.antlr\.runtime/ANTLR::Runtime/; + $tmp =~ s/\./::/g; + + return $tmp; +} + +sub resolve_java_class_file { + my ($name, $basedir) = @_; + + my $tmp = $name; + $tmp =~ s!\.!/!g; + $tmp .= '.class'; + + return File::Spec->catfile($basedir, $tmp); +} + +sub java_constant_name_to_perl { +} + +sub java_method_name_to_perl { + my ($name) = @_; + + if ($name eq '<init>') { + return 'new'; + } + # add special cases here + else { + my $tmp = $name; + $tmp =~ s/([a-z])([A-Z])/$1_\L$2\E/g; + + return $tmp; + } +} + +my @java_class_names = qw( + org.antlr.runtime.BitSet +); + +foreach my $java_class_name (@java_class_names) { + my $java_class_file = resolve_java_class_file($java_class_name, + '../../build/rtclasses'); + + my $java_class; + { + local $SIG{'__WARN__'} = sub {}; + $java_class = Java::JVM::Classfile->new($java_class_file); + } + + my $class_name = java_class_name_to_perl($java_class_name); + use_ok($class_name); + print map { "$_\n" } ANTLR::Runtime::BitSet->can(); + print "---\n"; + + eval { $class_name->new() }; + print join "\n", ANTLR::Runtime::BitSet->can(); + print "\n"; + + my $java_fields = $java_class->fields; + foreach my $java_field (@$java_fields) { + next if grep { $_ eq 'private' } @{$java_field->access_flags}; + + my $field_name = $java_field->name; + ok($class_name->can($field_name), $field_name); + } + + my $java_methods = $java_class->methods; + foreach my $java_method (@$java_methods) { + next if grep { $_ eq 'private' } @{$java_method->access_flags}; + + my $method_name = java_method_name_to_perl($java_method->name); + ok($class_name->can($method_name), $method_name); + } +} diff --git a/runtime/Perl5/t/author/perlcritic.t b/runtime/Perl5/t/author/perlcritic.t new file mode 100644 index 0000000..08b4545 --- /dev/null +++ b/runtime/Perl5/t/author/perlcritic.t @@ -0,0 +1,19 @@ +use strict; +use warnings; + +use File::Spec; +use English qw(-no_match_vars); + +use Test::More; + +eval { + require Test::Perl::Critic; +}; +if ( $EVAL_ERROR ) { + my $msg = 'Test::Perl::Critic required to criticise code'; + plan( skip_all => $msg ); +} + +my $rcfile = File::Spec->catfile( 't', 'author', 'perlcriticrc' ); +Test::Perl::Critic->import( -profile => $rcfile ); +all_critic_ok(); diff --git a/runtime/Perl5/t/author/perlcriticrc b/runtime/Perl5/t/author/perlcriticrc new file mode 100644 index 0000000..8acc451 --- /dev/null +++ b/runtime/Perl5/t/author/perlcriticrc @@ -0,0 +1,5 @@ +severity = 5 + +#[-CodeLayout::RequireTidyCode] + +[-Subroutines::ProhibitExplicitReturnUndef] diff --git a/runtime/Perl5/t/author/pod-coverage.t b/runtime/Perl5/t/author/pod-coverage.t new file mode 100644 index 0000000..54ae158 --- /dev/null +++ b/runtime/Perl5/t/author/pod-coverage.t @@ -0,0 +1,10 @@ +use strict; +use warnings; + +use Test::More; + +eval "use Test::Pod::Coverage"; +if ($@) { + plan skip_all => "Test::Pod::Coverage required for testing POD coverage: $@"; +} +all_pod_coverage_ok(); diff --git a/runtime/Perl5/t/author/pod.t b/runtime/Perl5/t/author/pod.t new file mode 100644 index 0000000..e6908b8 --- /dev/null +++ b/runtime/Perl5/t/author/pod.t @@ -0,0 +1,10 @@ +use strict; +use warnings; + +use Test::More; + +eval "use Test::Pod"; +if ($@) { + plan skip_all => "Test::Pod required for testing POD: $@"; +} +all_pod_files_ok(); diff --git a/runtime/Perl5/t/classes.t b/runtime/Perl5/t/classes.t new file mode 100644 index 0000000..654cdb9 --- /dev/null +++ b/runtime/Perl5/t/classes.t @@ -0,0 +1,9 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use lib 't/lib'; +use My::Test::Class::Load 't/classes'; + +Test::Class->runtests(); diff --git a/runtime/Perl5/t/classes/Test/ANTLR/Runtime/ANTLRStringStream.pm b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/ANTLRStringStream.pm new file mode 100644 index 0000000..ab4a98b --- /dev/null +++ b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/ANTLRStringStream.pm @@ -0,0 +1,32 @@ +package Test::ANTLR::Runtime::ANTLRStringStream; + +use ANTLR::Runtime::ANTLRStringStream; +use Test::More; + +use Moose; + +BEGIN { extends 'My::Test::Class' } + +sub consume : Test(2) { + my ($self) = @_; + + my $s = $self->class->new({ input => 'ABC' }); + is $s->LA(1), 'A'; + $s->consume(); + is $s->LA(1), 'B'; +} + +sub LA : Test(5) { + my ($self) = @_; + + my $s = $self->class->new({ input => 'ABC' }); + is $s->LA(0), undef; + is $s->LA(1), 'A'; + is $s->LA(2), 'B'; + is $s->LA(3), 'C'; + is $s->LA(4), ANTLR::Runtime::ANTLRStringStream->EOF; +} + +no Moose; +__PACKAGE__->meta->make_immutable(inline_constructor => 0); +1; diff --git a/runtime/Perl5/t/classes/Test/ANTLR/Runtime/BitSet.pm b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/BitSet.pm new file mode 100644 index 0000000..d1679e3 --- /dev/null +++ b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/BitSet.pm @@ -0,0 +1,82 @@ +package Test::ANTLR::Runtime::BitSet; + +use Test::More; + +use Moose; + +BEGIN { extends 'My::Test::Class' } + +sub constructor : Tests(3) { + my ($self) = @_; + my $class = $self->class; + + can_ok $class, 'new'; + ok my $bs = $class->new(); + isa_ok $bs, $class; +} + +sub constructor_bits : Tests(5) { + my ($self) = @_; + my $bs = $self->class->new({ bits => '001' }); + ok !$bs->member(0); + ok !$bs->member(1); + ok $bs->member(2); + ok !$bs->member(3); + is "$bs", '{2}'; +} + +sub constructor_number : Tests(2) { + my ($self) = @_; + my $bs = $self->class->new({ number => 0x10 }); + ok $bs->member(4); + is "$bs", '{4}'; +} + +sub constructor_words64 : Tests(2) { + my ($self) = @_; + my $bs = $self->class->new( + { words64 => [ '0x0000004000000001', '0x1000000000800000' ] }); + is "$bs", '{0,38,87,124}'; +} + +sub of : Tests(2) { + my ($self) = @_; + my $bs = $self->class->of(0x10); + ok $bs->member(16) ; + is "$bs", '{16}' ; +} + +sub operator_to_string : Tests(1) { + my ($self) = @_; + my $bs = $self->class->new(); + is "$bs", '{}'; +} + +sub add : Tests(1) { + my ($self) = @_; + my $bs = $self->class->new(); + $bs->add(2); + $bs->add(7); + is "$bs", '{2,7}'; +} + +sub remove : Tests(2) { + my ($self) = @_; + my $bs = $self->class->new(); + $bs->add(3); + $bs->add(12); + is "$bs", '{3,12}'; + $bs->remove(3); + is "$bs", '{12}'; +} + +sub operator_or : Tests(1) { + my ($self) = @_; + my $bs = $self->class->of(4); + $bs |= $self->class->of(5); + is "$bs", '{4,5}'; +} + +no Moose; +__PACKAGE__->meta->make_immutable(inline_constructor => 0); +1; diff --git a/runtime/Perl5/t/classes/Test/ANTLR/Runtime/CommonToken.pm b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/CommonToken.pm new file mode 100644 index 0000000..97ff91d --- /dev/null +++ b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/CommonToken.pm @@ -0,0 +1,38 @@ +package Test::ANTLR::Runtime::CommonToken; + +use Test::More; + +use ANTLR::Runtime::Token; + +use Moose; + +BEGIN { extends 'My::Test::Class' } + +sub constructor : Test(1) { + my $token = ANTLR::Runtime::CommonToken->new({ + input => undef, + type => 0, + channel => 0, + start => 0, + stop => 1, + }); + is $token->get_start_index(), 0; +} + +sub same : Test(2) { + ok(ANTLR::Runtime::Token->EOF_TOKEN == ANTLR::Runtime::Token->EOF_TOKEN); + ok(ANTLR::Runtime::Token->SKIP_TOKEN == ANTLR::Runtime::Token->SKIP_TOKEN); +} + +sub not_same : Test(2) { + ok !(ANTLR::Runtime::Token->EOF_TOKEN != ANTLR::Runtime::Token->EOF_TOKEN); + ok !(ANTLR::Runtime::Token->SKIP_TOKEN != ANTLR::Runtime::Token->SKIP_TOKEN); +} + +sub bool_eof : Test(1) { + ok !ANTLR::Runtime::Token->EOF_TOKEN; +} + +no Moose; +__PACKAGE__->meta->make_immutable(inline_constructor => 0); +1; diff --git a/runtime/Perl5/t/classes/Test/ANTLR/Runtime/Exception.pm b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/Exception.pm new file mode 100644 index 0000000..e6d0450 --- /dev/null +++ b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/Exception.pm @@ -0,0 +1,32 @@ +package Test::ANTLR::Runtime::Exception; + +use Test::More; + +use Moose; + +BEGIN { extends 'My::Test::Class' } + +sub constructor : Test(1) { + my ($self) = @_; + my $ex = $self->class->new(); + is $ex->message, ''; +} + +sub constructor_message : Test(1) { + my ($self) = @_; + my $ex = $self->class->new({ message => 'test error message' }); + is $ex->message, 'test error message'; +} + +sub throw : Test(1) { + my ($self) = @_; + eval { + $self->class->throw(message => 'test error message'); + }; + my $ex = $self->class->caught(); + is $ex->message, 'test error message'; +} + +no Moose; +__PACKAGE__->meta->make_immutable(inline_constructor => 0); +1; diff --git a/runtime/Perl5/t/classes/Test/ANTLR/Runtime/Lexer.pm b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/Lexer.pm new file mode 100644 index 0000000..848a1d3 --- /dev/null +++ b/runtime/Perl5/t/classes/Test/ANTLR/Runtime/Lexer.pm @@ -0,0 +1,20 @@ +package Test::ANTLR::Runtime::Lexer; + +use Test::More; + +use ANTLR::Runtime::ANTLRStringStream; +use ANTLR::Runtime::Lexer; + +use Moose; + +BEGIN { extends 'My::Test::Class' } + +sub constructor : Test(1) { + my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => 'ABC' }); + my $lexer = ANTLR::Runtime::Lexer->new({ input => $input }); + ok defined $lexer; +} + +no Moose; +__PACKAGE__->meta->make_immutable(inline_constructor => 0); +1; diff --git a/runtime/Perl5/t/examples/expr.t b/runtime/Perl5/t/examples/expr.t new file mode 100644 index 0000000..d1096da --- /dev/null +++ b/runtime/Perl5/t/examples/expr.t @@ -0,0 +1,85 @@ +use strict; +use warnings; + +use lib qw( t/lib ); + +use Test::More; +use ANTLR::Runtime::Test; + +plan tests => 1; + +# The SimpleCalc grammar from the five minutes tutorial. +g_test_output_is({ grammar => <<'GRAMMAR', test_program => <<'CODE', expected => <<'OUTPUT' }); +grammar Expr; +options { language = Perl5; } +@header {} + +@members { + my %memory; +} + +prog: stat+ ; + +stat: expr NEWLINE { print "$expr.value\n"; } + | ID '=' expr NEWLINE + { $memory{$ID.text} = $expr.value; } + | NEWLINE + ; + +expr returns [value] + : e=multExpr { $value = $e.value; } + ( '+' e=multExpr { $value += $e.value; } + | '-' e=multExpr { $value -= $e.value; } + )* + ; + +multExpr returns [value] + : e=atom { $value = $e.value; } ('*' e=atom { $value *= $e.value; })* + ; + +atom returns [value] + : INT { $value = $INT.text; } + | ID + { + my $v = $memory{$ID.text}; + if (defined $v) { + $value = $v; + } else { + print STDERR "undefined variable $ID.text\n"; + } + } + | '(' expr ')' { $value = $expr.value; } + ; + +ID : ('a'..'z'|'A'..'Z')+ ; +INT : '0'..'9'+ ; +NEWLINE:'\r'? '\n' ; +WS : (' '|'\t')+ { $self->skip(); } ; +GRAMMAR +use strict; +use warnings; + +use ANTLR::Runtime::ANTLRStringStream; +use ANTLR::Runtime::CommonTokenStream; +use ExprLexer; +use ExprParser; + +my $in = << 'EOT'; +1 + 1 +8 - 1 +a = 10 +b = 13 +2 * a + b + 1 +EOT + +my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => $in }); +my $lexer = ExprLexer->new({ input => $input }); + +my $tokens = ANTLR::Runtime::CommonTokenStream->new({ token_source => $lexer }); +my $parser = ExprParser->new({ input => $tokens }); +$parser->prog(); +CODE +2 +7 +34 +OUTPUT diff --git a/runtime/Perl5/t/examples/fig.t b/runtime/Perl5/t/examples/fig.t new file mode 100644 index 0000000..79bf8a0 --- /dev/null +++ b/runtime/Perl5/t/examples/fig.t @@ -0,0 +1,73 @@ +use strict; +use warnings; + +use lib qw( t/lib ); + +use Test::More; +use ANTLR::Runtime::Test; + +plan tests => 1; + +TODO: { +local $TODO = 'Not implemented yet'; +# The SimpleCalc grammar from the five minutes tutorial. +g_test_output_is({ grammar => <<'GRAMMAR', test_program => <<'CODE', expected => <<'OUTPUT' }); +grammar Fig; +options { language = Perl5; } + +@header { +use RunFig; +} + +@members { +has 'instances' => ( + default => sub { {} } +); +} + +file returns [objects] + : { $objects = []; } + (object { push @$objects, $object.o; })+ + ; + +object returns [o] + : qid v=ID? + { + $o = RunFig.newInstance($qid.text); + if (defined $v) { + $self->instances->{$v.text, $o); + } + } + '{' assign[$o]* '}' + ; + +assign[o] + : ID '=' expr ';' {RunFig.setObjectProperty(o,$ID.text,$expr.value);} + ; + +expr returns [value] + : STRING { $value = $STRING.text; } + | INT { $value = Integer.valueOf($INT.text); } + | '$' ID { $value = instances.get($ID.text); } + | '[' ']' { $value = new ArrayList(); } + | {ArrayList elements = new ArrayList(); } + '[' e=expr { elements.add($e.value); } + (',' e=expr { elements.add($e.value); })* + ']' + { $value = elements; } + ; + +qid : ID ('.' ID)* + ; + +STRING : '"' .* '"' { setText(getText().substring(1, getText().length()-1)); } ; +INT : '0'..'9'+ ; +ID : ('_'|'a'..'z'|'A'..'Z') ('_'|'a'..'z'|'A'..'Z'|'0'..'9')* ; +WS : (' '|'\n'|'\t')+ { $channel = $self->HIDDEN; } ; +CMT : '/*' .* '*/' { $channel = $self->HIDDEN; } ; +GRAMMAR + +CODE + +OUTPUT +} diff --git a/runtime/Perl5/t/examples/simplecalc.t b/runtime/Perl5/t/examples/simplecalc.t new file mode 100644 index 0000000..2f88381 --- /dev/null +++ b/runtime/Perl5/t/examples/simplecalc.t @@ -0,0 +1,87 @@ +use strict; +use warnings; + +use lib qw( t/lib ); + +use Test::More; +use ANTLR::Runtime::Test; + +plan tests => 1; + +# The SimpleCalc grammar from the five minutes tutorial. +g_test_output_is({ grammar => <<'GRAMMAR', test_program => <<'CODE', expected => <<'OUTPUT' }); +grammar SimpleCalc; +options { language = Perl5; } + +tokens { + PLUS = '+' ; + MINUS = '-' ; + MULT = '*' ; + DIV = '/' ; +} + +/*------------------------------------------------------------------ + * PARSER RULES + *------------------------------------------------------------------*/ + +expr : term ( ( PLUS | MINUS ) term )* ; + +term : factor ( ( MULT | DIV ) factor )* ; + +factor : NUMBER ; + +/*------------------------------------------------------------------ + * LEXER RULES + *------------------------------------------------------------------*/ + +NUMBER : (DIGIT)+ ; + +WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ; + +fragment DIGIT : '0'..'9' ; +GRAMMAR +use strict; +use warnings; + +use ANTLR::Runtime::ANTLRStringStream; +use ANTLR::Runtime::CommonTokenStream; +use ANTLR::Runtime::RecognitionException; +use SimpleCalcLexer; +use SimpleCalcParser; + +my @examples = ( + '1', + '1 + 1', + '1 +', + '1 * 2 + 3', +); + +foreach my $example (@examples) { + my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => $example }); + my $lexer = SimpleCalcLexer->new({ input => $input }); + my $tokens = ANTLR::Runtime::CommonTokenStream->new({ token_source => $lexer }); + my $parser = SimpleCalcParser->new({ input => $tokens }); + eval { + $parser->expr(); + if ($parser->get_number_of_syntax_errors() == 0) { + print "$example: good\n"; + } + else { + print "$example: bad\n"; + } + }; + if (my $ex = ANTLR::Runtime::RecognitionException->caught()) { + print "$example: error\n"; + } elsif ($ex = Exception::Class->caught()) { + print "$example: error: $ex\n"; + ref $ex ? $ex->rethrow() : die $ex; + } +} +CODE +1: good +1 + 1: good +1 +: bad +1 * 2 + 3: good +OUTPUT + +__END__ diff --git a/runtime/Perl5/t/lexer.t b/runtime/Perl5/t/lexer.t new file mode 100644 index 0000000..bf56b76 --- /dev/null +++ b/runtime/Perl5/t/lexer.t @@ -0,0 +1,126 @@ +use strict; +use warnings; + +use FindBin; +use lib qw( t/lib ); + +use File::Slurp; + +use Test::More; +use ANTLR::Runtime::Test; + +plan tests => 2; + +sub grammar_file { + my ($file) = @_; + return read_file("t/$file"); +} + +# A simple test: try to lex one possible token. +g_test_output_is({ grammar => <<'GRAMMAR', test_program => <<'CODE', expected => <<'OUTPUT' }); +/* This is a comment. Note that we're in the ANTLR grammar here, so it's not + a Perl '#' comment, and may be multi line... */ +// ... or a single line comment +lexer grammar INTLexer; +/* Set target language to Perl5. */ +options { language = Perl5; } + +/* Lexer rule for an integer. */ +INT : '0'..'9'+; +GRAMMAR +use strict; +use warnings; + +use ANTLR::Runtime::ANTLRStringStream; +use INTLexer; + +my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => '123' }); +my $lexer = INTLexer->new({ input => $input }); +while ((my $_ = $lexer->next_token())) { + print $_->get_text(), "\n"; +} +CODE +123 +OUTPUT + +# Multiple choice, including 'skip' and 'hide' actions. +g_test_output_is({ grammar => <<'GRAMMAR', test_program => <<'CODE', expected => <<'OUTPUT' }); +lexer grammar IDLexer; +options { language = Perl5; } + +ID : ('a'..'z'|'A'..'Z')+ ; +INT : '0'..'9'+ ; +NEWLINE : '\r'? '\n' { $self->skip() } ; +WS : (' '|'\t')+ { $channel = HIDDEN } ; +GRAMMAR +use strict; +use warnings; + +use ANTLR::Runtime::ANTLRStringStream; +use IDLexer; + +my $input = ANTLR::Runtime::ANTLRStringStream->new({ input => "Hello World!\n42\n" }); +my $lexer = IDLexer->new({ input => $input }); + +while (1) { + my $token = $lexer->next_token(); + last if $token->get_type() == IDLexer->EOF; + + print "text: '", $token->get_text(), "'\n"; + print "type: ", $token->get_type(), "\n"; + print "pos: ", $token->get_line(), ':', $token->get_char_position_in_line(), "\n"; + print "channel: ", $token->get_channel(), "\n"; + print "token index: ", $token->get_token_index(), "\n"; + print "\n"; +} +CODE +text: 'Hello' +type: 4 +pos: 1:0 +channel: 0 +token index: -1 + +text: ' ' +type: 7 +pos: 1:5 +channel: 99 +token index: -1 + +text: 'World' +type: 4 +pos: 1:6 +channel: 0 +token index: -1 + +text: '42' +type: 5 +pos: 2:0 +channel: 0 +token index: -1 + +OUTPUT + +=begin SKIP doesn't compile yet + +g_test_output_is({ grammar => scalar grammar_file('XMLLexer.g'), test_program => <<'CODE', expected => <<'OUTPUT' }); +use English qw( -no_match_vars ); +use ANTLR::Runtime::ANTLRStringStream; +use XMLLexer; + +use strict; +use warnings; + +my $input = ANTLR::Runtime::ANTLRStringStream->new(<< 'XML'); +<?xml version='1.0'?> +<test>foo</test> +XML +my $lexer = IDLexer->new($input); +while ((my $_ = $lexer->next_token())) { +} +CODE +XML declaration +PCDATA: "foo" +OUTPUT +} + +=end SKIP diff --git a/runtime/Perl5/t/lib/ANTLR/Runtime/Test.pm b/runtime/Perl5/t/lib/ANTLR/Runtime/Test.pm new file mode 100644 index 0000000..cde05b1 --- /dev/null +++ b/runtime/Perl5/t/lib/ANTLR/Runtime/Test.pm @@ -0,0 +1,155 @@ +package ANTLR::Runtime::Test; + +use strict; +use warnings; + +use base 'Test::Builder::Module'; + +my $CLASS = __PACKAGE__; + +our @EXPORT = qw( g_test_output_is ); + +use Carp; +use Cwd; +use File::Spec; +use File::Temp qw( tempdir ); + +sub read_file { + my ($filename) = @_; + + local $/; + open my $in, '<', $filename or die "Can't open $filename: $!"; + my $content = <$in>; + close $in or warn "Can't close $filename: $!"; + + return $content; +} + +sub write_file { + my ($filename, $content) = @_; + + open my $out, '>', $filename or die "Can't open $filename: $!"; + print $out $content; + close $out or warn "Can't close $filename: $!"; + + return; +} + +sub get_perl { + if (defined $ENV{HARNESS_PERL}) { + return $ENV{HARNESS_PERL}; + } + + if ($^O =~ /^(MS)?Win32$/) { + return Win32::GetShortPathName($^X); + } + + return $^X; +} + +sub g_test_output_is { + my ($args) = @_; + my $grammar = $args->{grammar}; + my $test_program = $args->{test_program}; + my $expected = $args->{expected}; + my $name = $args->{name} || undef; + my $tb = $CLASS->builder; + + my $tmpdir = tempdir( CLEANUP => 1 ); + + my $grammar_name; + if ($grammar =~ /^(?:(?:lexer|parser|tree)\s+)? grammar \s+ (\w+)/xms) { + $grammar_name = $1; + } else { + croak "Can't determine grammar name"; + } + + # write grammar file + my $grammar_file = File::Spec->catfile($tmpdir, "$grammar_name.g"); + write_file($grammar_file, $grammar); + + # write test program file + my $test_program_file = File::Spec->catfile($tmpdir, 'test.pl'); + write_file($test_program_file, $test_program); + + my $cwd = cwd; + my $test_result; + eval { + # compile grammar + my $antlr; + if ($^O =~ /linux/) { + $antlr = 'antlr.sh'; + } + elsif ($^O =~ /MSWin32/) { + $antlr = 'antlr.bat'; + } + else { + $antlr = 'antlr'; + } + my $g_result = run_program([ File::Spec->catfile($cwd, 'tools', $antlr), '-o', $tmpdir, $grammar_file ]); + if ($g_result->{exit_code} >> 8 != 0) { + croak $g_result->{err}; + } + + # run test program + { + #local $ENV{PERLCOV_DB} = File::Spec->catfile($tmpdir, 'perlcov.db'); + #local $ENV{NYTPROF} = 'file=' . File::Spec->catfile($tmpdir, 'nytprof.out'); + $test_result = run_program([ get_perl(), '-Mblib', "-I$tmpdir", $test_program_file ]); + if ($test_result->{exit_code} >> 8 != 0) { + croak $test_result->{err}; + } + } + }; + die $@ if $@; + + my $actual = $test_result->{out}; + + # compare with $expected + return $tb->is_eq($actual, $expected, $name); +} + +sub run_program { + my ($command) = @_; + + open my $old_out, '>&STDOUT' or die "Can't capture stdout: $!"; + close STDOUT or die "Can't close stdout: $!"; + open STDOUT, '>', 'out.tmp' or die "Can't redirect stdout: $!"; + + open my $old_err, '>&STDERR' or die "Can't capture stderr: $!"; + close STDERR or die "Can't close stderr: $!"; + open STDERR, '>', 'err.tmp' or die "Can't redirect stderr: $!"; + + system @$command; + my $exit_code = $?; + + # restore stderr + my $err = read_file('err.tmp'); + close STDERR or die "Can't close stderr: $!"; + open STDERR, '>&', $old_err or die "Can't restore stderr: $!"; + unlink 'err.tmp' or warn "Can't remove err.tmp: $!"; + + # restore stdout + my $out = read_file('out.tmp'); + close STDOUT or die "Can't close stdout: $!"; + open STDOUT, '>&', $old_out or die "Can't restore stdout: $!"; + unlink 'out.tmp' or warn "Can't remove out.tmp: $!"; + + my $exit_value; + if ($exit_code < 0) { + $exit_value = $exit_code; + } elsif ($exit_code && 0xff) { + $exit_value = "[SIGNAL $exit_code]"; + } else { + $exit_value = $exit_code >> 8; + } + + return { + exit_code => $exit_code, + exit_value => $exit_value, + out => $out, + err => $err, + }; +} + +1; diff --git a/runtime/Perl5/t/lib/My/Test/Class.pm b/runtime/Perl5/t/lib/My/Test/Class.pm new file mode 100644 index 0000000..cb91470 --- /dev/null +++ b/runtime/Perl5/t/lib/My/Test/Class.pm @@ -0,0 +1,32 @@ +package My::Test::Class; + +use Test::More; + +use Moose; + +BEGIN { extends 'Test::Class' } + +has 'class' => ( + is => 'rw', + isa => 'Str', +); + +sub new { + my ($class, @args) = @_; + my $self = $class->SUPER::new(@args); + return $class->meta->new_object( + __INSTANCE__ => $self, @args + ); +} + +sub startup : Tests(startup => 1) { + my ($test) = @_; + (my $class = ref $test) =~ s/^Test:://xms; + use_ok $class or die; + $test->class($class); + return; +} + +no Moose; +__PACKAGE__->meta->make_immutable(inline_constructor => 0); +1; diff --git a/runtime/Perl5/t/lib/My/Test/Class/Load.pm b/runtime/Perl5/t/lib/My/Test/Class/Load.pm new file mode 100644 index 0000000..3705485 --- /dev/null +++ b/runtime/Perl5/t/lib/My/Test/Class/Load.pm @@ -0,0 +1,27 @@ +package My::Test::Class::Load; + +use strict; +use warnings; + +use base 'Test::Class::Load'; + +sub is_test_class { + my ($class, $file, $dir) = @_; + + return if !$class->SUPER::is_test_class($file, $dir); + + if (exists $ENV{TEST_CLASS}) { + my $pattern = $ENV{TEST_CLASS}; + + (my $class = $file) =~ s!^\Q$dir\E/!!xms; + $class =~ s/\.pm$//xms; + $class =~ s!/!::!gxms; + + return if $class !~ /$pattern/xms; + } + + return 1; +} + +1; +__END__ diff --git a/runtime/Perl5/t/version.t b/runtime/Perl5/t/version.t new file mode 100644 index 0000000..094e66b --- /dev/null +++ b/runtime/Perl5/t/version.t @@ -0,0 +1,15 @@ +use strict; +use warnings; + +use ExtUtils::MakeMaker; +use Test::More tests => 1; + +my $file = 'lib/ANTLR/Runtime.pm'; + +my $version = MM->parse_version($file); + +# classic CPAN +#like($version, qr/^\d+\.\d{2,}(_\d{2,})?$/); + +# version.pm +like($version, qr/^\d+\.\d+\.\d+(?:_\d+)?$/); diff --git a/runtime/Perl5/tools/antlr.bat b/runtime/Perl5/tools/antlr.bat new file mode 100755 index 0000000..50c3ea5 --- /dev/null +++ b/runtime/Perl5/tools/antlr.bat @@ -0,0 +1,13 @@ +@echo off + +setlocal + +IF "%ANTLR_HOME%" == "" SET ANTLR_HOME=%~d0%~p0..\..\.. + +"%JAVA_HOME%\bin\java" ^ + -Dfile.encoding=UTF-8 ^ + -classpath "%ANTLR_HOME%\tool\target\classes;%ANTLR_HOME%\runtime\Java\target\classes;%ANTLR_HOME%\lib\antlr-3.0.jar;%ANTLR_HOME%\lib\antlr-2.7.7.jar;%ANTLR_HOME%\lib\stringtemplate-3.0.jar" ^ + org.antlr.Tool ^ + %* + +endlocal diff --git a/runtime/Perl5/tools/antlr.sh b/runtime/Perl5/tools/antlr.sh new file mode 100755 index 0000000..78a2562 --- /dev/null +++ b/runtime/Perl5/tools/antlr.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +ANTLR_HOME=`dirname "$0"`/../../.. + +java -Dfile.encoding=windows-1252 \ + -classpath "$ANTLR_HOME/build/classes:$ANTLR_HOME/build/rtclasses:$ANTLR_HOME/lib/antlr-3.0.jar:$ANTLR_HOME/lib/antlr-2.7.7.jar:$ANTLR_HOME/lib/stringtemplate-3.0.jar" \ + org.antlr.Tool \ + $@ diff --git a/runtime/Perl5/tools/port.pl b/runtime/Perl5/tools/port.pl new file mode 100644 index 0000000..350a0cb --- /dev/null +++ b/runtime/Perl5/tools/port.pl @@ -0,0 +1,203 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use version; +use Carp; +use Digest; +use File::Spec; +use File::Spec::Unix; +use YAML::Tiny; + +my $version = qv('0.0.1'); + +sub say { + print @_, "\n"; +} + +my $basedir = '../..'; + +my $commands = { + 'help' => \&help, + 'add' => \&add, + 'status' => \&status, +}; + +my $help = {}; + +sub filetype { + my ($path) = @_; + + if ($path =~ /\.(java|g)$/xms) { + return 'text/plain'; + } + else { + return 'application/octet-stream'; + } +} + +sub sha1sum { + my ($filename) = @_; + + open my $in, '<', $filename or croak "Can't open $filename: $!"; + if (filetype($filename) =~ /^text\//xms) { + # keep standard line feed conversion + } else { + if (!binmode $in) { + croak "Can't binmode $filename: $!"; + } + } + my $sha1 = Digest->new('SHA-1'); + $sha1->addfile($in); + my $digest = $sha1->hexdigest; + close $in or warn "Can't close $filename: $!"; + return $digest; +} + +my $inc_paths = [ + $basedir, + "$basedir/runtime/Java/src", +]; + +sub resolve_file { + my ($filename) = @_; + + my $resolved_file; + if (-e $filename) { + $resolved_file = $filename; + } + else { + my @canidates + = grep { -e $_ } + map { File::Spec->catfile($_, $filename) } + @$inc_paths; + $resolved_file = $canidates[0]; + } + + if (defined $resolved_file) { + $resolved_file = File::Spec::Unix->canonpath($resolved_file); + } + + return $resolved_file; +} + +$help->{help} = << 'EOH'; +help: Describe the usage of this program or its subcommands. +Usage: help [SUBCOMMAND...] +EOH + +sub help { + my ($cmd) = @_; + + if (defined $cmd) { + print $help->{$cmd}; + } + else { + say << 'EOH'; +Usage: port <subcommand> [options] [args] +EOH + say "Available subcommands:"; + foreach my $cmd (keys %$help) { + say " $cmd"; + } + } + +} + +$help->{add} = << 'EOH'; +add: Adds the file to the list of ported files. +Usage: add PATH... +EOH + +sub add { + my ($filename) = @_; + + my $port = YAML::Tiny->read('port.yml'); + my $status = $port->[0]->{status}; + if (!defined $status) { + $status = $port->[0]->{status} = {}; + } + + my $path = resolve_file($filename); + if (!defined $path) { + croak "File not found: $filename"; + } + my $digest = sha1sum($path); + $status->{$filename} = { + 'sha1' => $digest, + }; + $port->write('port.yml'); +} + +$help->{status} = << 'EOH'; +status: Print the status of the ported files. +usage: status [PATH...] +EOH + +sub status { + my $port = YAML::Tiny->read('port.yml'); + + my $status = $port->[0]->{status}; + + while (my ($filename, $fstatus) = each (%$status)) { + my $path = resolve_file($filename); + + my $digest = sha1sum($path); + + if ($digest ne $fstatus->{sha1}) { + say "M $filename"; + } + } +} + +my ($cmd, @args) = @ARGV; + +if (defined $cmd) { + my $cmd_f = $commands->{$cmd}; + if (defined $cmd_f) { + $cmd_f->(@args); + } + else { + say "Unknown command: '$cmd'"; + say "Type 'port help' for usage."; + exit 1; + } +} +else { + say "Type 'port help' for usage."; + exit 1; +} + +__END__ + +=head1 NAME + +port - ANTLR Perl 5 port status + +=head1 VERSION + +This documentation refers to port version 0.0.1 + +=head1 USAGE + + port help + + port status + +=head1 DESCRIPTION + +The primary language target for ANTLR is Java. The Perl 5 port only follows +this primary target language. This brings up the problem to follow the +changes made to the primary target, by knowing I<what> has changed and I<how>. + +This tool keeps a database of file paths and content checksum. Once the port +of a file (Java class, grammar, ...) is completed it is added to the +database (C<port add>). This database can then be queried to check what +primary files have changed (C<port status>). The revision control software +should be helpful to determine the actual changes. + +=head1 AUTHOR + +Ronald Blaschke (ron@rblasch.org) + |