diff options
Diffstat (limited to 'runtime/Perl5/lib/ANTLR')
28 files changed, 3319 insertions, 0 deletions
diff --git a/runtime/Perl5/lib/ANTLR/Runtime.pm b/runtime/Perl5/lib/ANTLR/Runtime.pm new file mode 100644 index 0000000..57bbc05 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime.pm @@ -0,0 +1,116 @@ +package ANTLR::Runtime; + +use strict; +use warnings; + +use version; our $VERSION = qv('0.0.1'); + +1; + +__END__ + +=head1 NAME + +ANTLR::Runtime - ANTLR Runtime for Perl 5 + + +=head1 VERSION + +This documentation refers to ANTLR::Runtime version 0.0.1 + + +=head1 SYNOPSIS + + use <Module::Name>; + # Brief but working code example(s) here showing the most common usage(s) + + # This section will be as far as many users bother reading + # so make it as educational and exemplary as possible. + + +=head1 DESCRIPTION + +A full description of the module and its features. +May include numerous subsections (i.e. =head2, =head3, etc.) + + +=head1 SUBROUTINES/METHODS + +A separate section listing the public components of the module's interface. +These normally consist of either subroutines that may be exported, or methods +that may be called on objects belonging to the classes that the module provides. +Name the section accordingly. + +In an object-oriented module, this section should begin with a sentence of the +form "An object of this class represents...", to give the reader a high-level +context to help them understand the methods that are subsequently described. + + +=head1 DIAGNOSTICS + +A list of every error and warning message that the module can generate +(even the ones that will "never happen"), with a full explanation of each +problem, one or more likely causes, and any suggested remedies. +(See also "Documenting Errors" in Chapter 13.) + + +=head1 CONFIGURATION AND ENVIRONMENT + +A full explanation of any configuration system(s) used by the module, +including the names and locations of any configuration files, and the +meaning of any environment variables or properties that can be set. These +descriptions must also include details of any configuration language used. +(See also "Configuration Files" in Chapter 19.) + + +=head1 DEPENDENCIES + +A list of all the other modules that this module relies upon, including any +restrictions on versions, and an indication whether these required modules are +part of the standard Perl distribution, part of the module's distribution, +or must be installed separately. + + +=head1 INCOMPATIBILITIES + +A list of any modules that this module cannot be used in conjunction with. +This may be due to name conflicts in the interface, or competition for +system or program resources, or due to internal limitations of Perl +(for example, many modules that use source code filters are mutually +incompatible). + + +=head1 BUGS AND LIMITATIONS + +A list of known problems with the module, together with some indication +whether they are likely to be fixed in an upcoming release. + +Also a list of restrictions on the features the module does provide: +data types that cannot be handled, performance issues and the circumstances +in which they may arise, practical limitations on the size of data sets, +special cases that are not (yet) handled, etc. + +The initial template usually just has: + +There are no known bugs in this module. +Please report problems to <Maintainer name(s)> (<contact address>) +Patches are welcome. + +=head1 AUTHOR + +Ronald Blaschke (ron@rblasch.org) + + +=head1 LICENCE AND COPYRIGHT + +Copyright (c) 2008 Ronald Blaschke (ron@rblasch.org). All rights reserved. + +Based on ANTLR Java Runtime by Terence Parr. + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl (see L<perlartistic>) +or ANTLR (see L<http://www.antlr.org/license.html>) itself. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. diff --git a/runtime/Perl5/lib/ANTLR/Runtime/ANTLRFileStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRFileStream.pm new file mode 100644 index 0000000..b6fe3de --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRFileStream.pm @@ -0,0 +1,84 @@ +package ANTLR::Runtime::ANTLRFileStream; + +use Carp; +use Readonly; + +use Moose; + +extends 'ANTLR::Runtime::ANTLRStringStream'; + +has 'file_name' => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +sub BUILDARGS { + my ($class, @args) = @_; + my $args = $class->SUPER::BUILDARGS(@args); + + my $file_name = $args->{file_name}; + if (!defined $file_name) { + return; + } + + my $fh; + my $encoding = $args->{encoding}; + if (defined $encoding) { + open $fh, "<:encoding($encoding)", $file_name + or croak "Can't open $file_name: $!"; + } + else { + open $fh, '<', $file_name + or croak "Can't open $file_name: $!"; + } + + my $content; + { + local $/; + $content = <$fh>; + } + close $fh or carp "Can't close $fh: $!"; + + $args->{input} = $content; + + return $args; +} + +sub load { + my ($self, $file_name, $encoding) = @_; + + if (!defined $file_name) { + return; + } + + my $fh; + if (defined $encoding) { + open $fh, "<:encoding($encoding)", $file_name + or croak "Can't open $file_name: $!"; + } + else { + open $fh, '<', $file_name + or croak "Can't open $file_name: $!"; + } + + my $content; + { + local $/; + $content = <$fh>; + } + close $fh or carp "Can't close $fh: $!"; + + $self->input($content); + return; +} + +sub get_source_name { + my ($self) = @_; + return $self->file_name; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/ANTLRStringStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRStringStream.pm new file mode 100644 index 0000000..2d55edc --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/ANTLRStringStream.pm @@ -0,0 +1,224 @@ +package ANTLR::Runtime::ANTLRStringStream; + +use Carp; +use Readonly; + +use ANTLR::Runtime::CharStreamState; + +use Moose; + +with 'ANTLR::Runtime::IntStream', 'ANTLR::Runtime::CharStream'; + +has 'input' => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +has 'p' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'line' => ( + is => 'rw', + isa => 'Int', + default => 1, +); + +has 'char_position_in_line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'mark_depth' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'markers' => ( + is => 'rw', + isa => 'ArrayRef[Maybe[ANTLR::Runtime::CharStreamState]]', + default => sub { [ undef ] }, +); + +has 'last_marker' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'name' => ( + is => 'rw', + isa => 'Str', + default => q{}, +); + +sub get_line { + my ($self) = @_; + return $self->line; +} + +sub set_line { + my ($self, $value) = @_; + $self->line($value); + return; +} + +sub get_char_position_in_line { + my ($self) = @_; + return $self->char_position_in_line; +} + +sub set_char_position_in_line { + my ($self, $value) = @_; + $self->char_position_in_line($value); + return; +} + +sub reset { + my ($self) = @_; + + $self->p(0); + $self->line(1); + $self->char_position_in_line(0); + $self->mark_depth(0); + return; +} + +sub consume { + my ($self) = @_; + + if ($self->p < length $self->input) { + $self->char_position_in_line($self->char_position_in_line + 1); + if (substr($self->input, $self->p, 1) eq "\n") { + $self->line($self->line + 1); + $self->char_position_in_line(0); + } + $self->p($self->p + 1); + } + return; +} + +sub LA { + my ($self, $i) = @_; + + if ($i == 0) { + return undef; + } + + if ($i < 0) { + ++$i; # e.g., translate LA(-1) to use offset i=0; then input[p+0-1] + if ($self->p + $i - 1 < 0) { + return $self->EOF; + } + } + + if ($self->p + $i - 1 >= length $self->input) { + return $self->EOF; + } + + return substr $self->input, $self->p + $i - 1, 1; +} + +sub LT { + my ($self, $i) = @_; + + return $self->LA($i); +} + +sub index { + my ($self) = @_; + + return $self->p; +} + +sub size { + my ($self) = @_; + + return length $self->input; +} + +sub mark { + my ($self) = @_; + + $self->mark_depth($self->mark_depth + 1); + my $state; + if ($self->mark_depth >= @{$self->markers}) { + $state = ANTLR::Runtime::CharStreamState->new(); + push @{$self->markers}, $state; + } else { + $state = $self->markers->[$self->mark_depth]; + } + + $state->set_p($self->p); + $state->set_line($self->line); + $state->set_char_position_in_line($self->char_position_in_line); + $self->last_marker($self->mark_depth); + + return $self->mark_depth; +} + +sub rewind { + my $self = shift; + my $m; + if (@_ == 0) { + $m = $self->last_marker; + } else { + $m = shift; + } + + my $state = $self->markers->[$m]; + # restore stream state + $self->seek($state->get_p); + $self->line($state->get_line); + $self->char_position_in_line($state->get_char_position_in_line); + $self->release($m); + return; +} + +sub release { + my ($self, $marker) = @_; + + # unwind any other markers made after m and release m + $self->mark_depth($marker); + # release this marker + $self->mark_depth($self->mark_depth - 1); + return; +} + +# consume() ahead unit p == index; can't just set p = index as we must update +# line and char_position_in_line +sub seek { + my ($self, $index) = @_; + + if ($index <= $self->p) { + # just jump; don't update stream state (line, ...) + $self->p($index); + return; + } + + # seek forward, consume until p hits index + while ($self->p < $index) { + $self->consume(); + } + return; +} + +sub substring { + my ($self, $start, $stop) = @_; + + return substr $self->input, $start, $stop - $start + 1; +} + +sub get_source_name { + my ($self) = @_; + return $self->name; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/BaseRecognizer.pm b/runtime/Perl5/lib/ANTLR/Runtime/BaseRecognizer.pm new file mode 100644 index 0000000..4f46ea7 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/BaseRecognizer.pm @@ -0,0 +1,617 @@ +package ANTLR::Runtime::BaseRecognizer; + +use Readonly; +use Carp; + +use ANTLR::Runtime::RecognizerSharedState; +use ANTLR::Runtime::Token; +use ANTLR::Runtime::UnwantedTokenException; +use ANTLR::Runtime::MissingTokenException; +use ANTLR::Runtime::MismatchedTokenException; + +use Moose; + +Readonly my $MEMO_RULE_FAILED => -2; +sub MEMO_RULE_FAILED { $MEMO_RULE_FAILED } + +Readonly my $MEMO_RULE_UNKNOWN => -1; +sub MEMO_RULE_UNKNOWN { $MEMO_RULE_UNKNOWN } + +Readonly my $INITIAL_FOLLOW_STACK_SIZE => 100; +sub INITIAL_FOLLOW_STACK_SIZE { $INITIAL_FOLLOW_STACK_SIZE } + +# copies from Token object for convenience in actions +Readonly my $DEFAULT_TOKEN_CHANNEL => ANTLR::Runtime::Token->DEFAULT_CHANNEL; +sub DEFAULT_TOKEN_CHANNEL { $DEFAULT_TOKEN_CHANNEL } + +Readonly my $HIDDEN => ANTLR::Runtime::Token->HIDDEN_CHANNEL; +sub HIDDEN { $HIDDEN } + +Readonly my $NEXT_TOKEN_RULE_NAME => 'next_token'; +sub NEXT_TOKEN_RULE_NAME { $NEXT_TOKEN_RULE_NAME } + +# State of a lexer, parser, or tree parser are collected into a state +# object so the state can be shared. This sharing is needed to +# have one grammar import others and share same error variables +# and other state variables. It's a kind of explicit multiple +# inheritance via delegation of methods and shared state. +has 'state' => ( + is => 'rw', + isa => 'ANTLR::Runtime::RecognizerSharedState', + default => sub { ANTLR::Runtime::RecognizerSharedState->new() }, +); + +sub reset { + my ($self) = @_; + + if (!defined $self->state) { + return; + } + + my $state = $self->state; + $state->_fsp(-1); + $state->error_recovery(0); + $state->last_error_index(-1); + $state->failed(0); + $state->syntax_errors(0); + + # wack everything related to backtracking and memoization + $state->backtracking(0); + # wipe cache + $state->rule_memo([]); +} + +sub match { + Readonly my $usage => 'void match(IntStream input, int ttype, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $ttype, $follow) = @_; + + my $matched_symbol = $self->get_current_input_symbol($input); + if ($input->LA(1) eq $ttype) { + $input->consume(); + $self->state->error_recovery(0); + $self->state->failed(0); + return $matched_symbol; + } + + if ($self->state->backtracking > 0) { + $self->state->failed(1); + return $matched_symbol; + } + + return $self->recover_from_mismatched_token($input, $ttype, $follow); +} + +sub match_any { + Readonly my $usage => 'void match_any(IntStream input)'; + croak $usage if @_ != 2; + my ($self, $input) = @_; + + $self->state->error_recovery(0); + $self->state->failed(0); + $input->consume(); +} + +sub mismatch_is_unwanted_token { + my ($self, $input, $ttype) = @_; + return $input->LA(2) == $ttype; +} + +sub mismatch_is_missing_token { + my ($self, $input, $follow) = @_; + + if (!defined $follow) { + return 0; + } + + if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); + $follow = $follow->or($viable_tokens_following_this_rule); + if ($self->state->_fsp >= 0) { + $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); + } + } + + if ($follow->member($input->LA(1)) || $follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + return 1; + } + return 0; +} + +sub mismatch { + Readonly my $usage => 'void mismatch(IntStream input, int ttype, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $ttype, $follow) = @_; + + if ($self->mismatch_is_unwanted_token($input, $ttype)) { + ANTLR::Runtime::UnwantedTokenException->new({ + expecting => $ttype, + input => $input + })->throw(); + } + elsif ($self->mismatch_is_missing_token($input, $follow)) { + ANTLR::Runtime::MissingTokenException->new({ + expecting => $ttype, + input => $input + })->throw(); + } + else { + ANTLR::Runtime::MismatchedTokenException->new({ + expecting => $ttype, + input => $input + })->throw(); + } +} + +sub report_error { + Readonly my $usage => 'void report_error(RecognitionException e)'; + croak $usage if @_ != 2; + my ($self, $e) = @_; + + if ($self->state->error_recovery) { + return; + } + $self->state->syntax_errors($self->state->syntax_errors + 1); + $self->state->error_recovery(1); + + $self->display_recognition_error($self->get_token_names(), $e); + return; +} + +sub display_recognition_error { + Readonly my $usage => 'void display_recognition_error(String[] token_names, RecognitionException e)'; + croak $usage if @_ != 3; + my ($self, $token_names, $e) = @_; + + my $hdr = $self->get_error_header($e); + my $msg = $self->get_error_message($e, $token_names); + $self->emit_error_message("$hdr $msg"); +} + +sub get_error_message { + Readonly my $usage => 'String get_error_message(RecognitionException e, String[] token_names)'; + croak $usage if @_ != 3; + my ($self, $e, $token_names) = @_; + + if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { + my $token_name; + if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { + $token_name = 'EOF'; + } else { + $token_name = $token_names->[$e->get_expecting]; + } + + return 'mismatched input ' . $self->get_token_error_display($e->get_token) + . ' expecting ' . $token_name; + } elsif ($e->isa('ANTLR::Runtime::MismatchedTreeNodeException')) { + my $token_name; + if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { + $token_name = 'EOF'; + } else { + $token_name = $token_names->[$e->get_expecting]; + } + + return 'mismatched tree node: ' . $e->node + . ' expecting ' . $token_name; + } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { + return 'no viable alternative at input ' . $self->get_token_error_display($e->get_token); + } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { + return 'required (...)+ loop did not match anything at input ' + . get_token_error_display($e->get_token); + } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { + return 'mismatched input ' . $self->get_token_error_display($e->get_token) + . ' expecting set ' . $e->get_expecting; + } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { + return 'mismatched input ' . $self->get_token_error_display($e->get_token) + . ' expecting set ' . $e->get_expecting; + } elsif ($e->isa('ANTLR::Runtime::FailedPredicateException')) { + return 'rule ' . $e->rule_name . ' failed predicate: {' + . $e->predicate_text . '}?'; + } else { + return undef; + } +} + +sub get_number_of_syntax_errors { + my ($self) = @_; + return $self->state->syntax_errors; +} + +sub get_error_header { + Readonly my $usage => 'String get_error_header(RecognitionException e)'; + croak $usage if @_ != 2; + my ($self, $e) = @_; + + my $line = $e->get_line(); + my $col = $e->get_char_position_in_line(); + + return "line $line:$col"; +} + +sub get_token_error_display { + Readonly my $usage => 'String get_token_error_display(Token t)'; + croak $usage if @_ != 2; + my ($self, $t) = @_; + + my $s = $t->get_text(); + if (!defined $s) { + if ($t->get_type() == ANTLR::Runtime::Token->EOF) { + $s = '<EOF>'; + } else { + my $ttype = $t->get_type(); + $s = "<$ttype>"; + } + } + + $s =~ s/\n/\\\\n/g; + $s =~ s/\r/\\\\r/g; + $s =~ s/\t/\\\\t/g; + + return "'$s'"; +} + +sub emit_error_message { + Readonly my $usage => 'void emit_error_message(String msg)'; + croak $usage if @_ != 2; + my ($self, $msg) = @_; + + print STDERR $msg, "\n"; +} + +sub recover { + Readonly my $usage => 'void recover(IntStream input, RecognitionException re)'; + croak $usage if @_ != 3; + my ($self, $input, $re) = @_; + + if ($self->state->last_error_index == $input->index()) { + # uh oh, another error at same token index; must be a case + # where LT(1) is in the recovery token set so nothing is + # consumed; consume a single token so at least to prevent + # an infinite loop; this is a failsafe. + $input->consume(); + } + + $self->state->last_error_index($input->index()); + my $follow_set = $self->compute_error_recovery_set(); + $self->begin_resync(); + $self->consume_until($input, $follow_set); + $self->end_resync(); +} + +sub begin_resync { +} + +sub end_resync { +} + +sub compute_error_recovery_set { + Readonly my $usage => 'void compute_error_recovery_set()'; + croak $usage if @_ != 1; + my ($self) = @_; + + $self->combine_follows(0); +} + +sub compute_context_sensitive_rule_FOLLOW { + Readonly my $usage => 'void compute_context_sensitive_rule_FOLLOW()'; + croak $usage if @_ != 1; + my ($self) = @_; + + $self->combine_follows(1); +} + +sub combine_follows { + Readonly my $usage => 'BitSet combine_follows(boolean exact)'; + croak $usage if @_ != 2; + my ($self, $exact) = @_; + + my $top = $self->state->_fsp; + my $follow_set = ANTLR::Runtime::BitSet->new(); + + foreach my $local_follow_set (reverse @{$self->state->following}) { + $follow_set |= $local_follow_set; + if ($exact && $local_follow_set->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + last; + } + } + #$follow_set->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); + return $follow_set; +} + +sub recover_from_mismatched_token { + Readonly my $usage => 'void recover_from_mismatched_token(IntStream input, int ttype, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $ttype, $follow) = @_; + + if ($self->mismatch_is_unwanted_token($input, $ttype)) { + my $ex = ANTLR::Runtime::UnwantedTokenException->new({ + expecting => $ttype, + input => $input + }); + + $self->begin_resync(); + $input->consume(); + $self->end_resync(); + $self->report_error($ex); + + my $matched_symbol = $self->get_current_input_symbol($input); + $input->consume(); + return $matched_symbol; + } + + if ($self->mismatch_is_missing_token($input, $follow)) { + my $inserted = $self->get_missing_symbol({ + input => $input, + expected_token_type => $ttype, + follow => $follow, + }); + my $ex = ANTLR::Runtime::MissingTokenException({ + expecting => $ttype, + input => $input, + inserted => $inserted, + }); + $self->report_error($ex); + return $inserted; + } + + ANTLR::Runtime::MismatchedTokenException->new({ + expecting => $ttype, + input => $input, + })->throw(); +} + +sub recover_from_mismatched_set { + Readonly my $usage => 'void recover_from_mismatched_set(IntStream input, RecognitionException e, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $e, $follow) = @_; + + if ($self->mismatch_is_missing_token($input, $follow)) { + $self->report_error($e); + return $self->get_missing_symbol({ + input => $input, + exception => $e, + expected_token_type => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE, + follow => $follow, + }); + } + + $e->throw(); +} + +sub recover_from_mismatched_element { + Readonly my $usage => 'boolean recover_from_mismatched_element(IntStream input, RecognitionException e, BitSet follow)'; + croak $usage if @_ != 4; + my ($self, $input, $e, $follow) = @_; + + return 0 if (!defined $follow); + + if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { + my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); + $follow |= $viable_tokens_following_this_rule; + $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); + } + + if ($follow->member($input->LA(1))) { + $self->report_error($e); + return 1; + } + + return 0; +} + +sub get_current_input_symbol { + my ($self, $input) = @_; + return undef; +} + +sub get_missing_symbol { + my ($self, $arg_ref) = @_; + my $input = $arg_ref->{input}; + my $exception = $arg_ref->{exception}; + my $expected_token_type = $arg_ref->{expected_token_type}; + my $follow = $arg_ref->{follow}; + + return undef; +} + +sub consume_until { + Readonly my $usage => 'void consume_until(IntStream input, (int token_type | BitSet set))'; + croak $usage if @_ != 3; + + if ($_[2]->isa('ANTLR::Runtime::BitSet')) { + my ($self, $input, $set) = @_; + + my $ttype = $input->LA(1); + while ($ttype != ANTLR::Runtime::Token->EOF && !$set->member($ttype)) { + $input->consume(); + $ttype = $input->LA(1); + } + } else { + my ($self, $input, $token_type) = @_; + + my $ttype = $input->LA(1); + while ($ttype != ANTLR::Runtime::Token->EOF && $ttype != $token_type) { + $input->consume(); + $ttype = $input->LA(1); + } + } +} + +sub push_follow { + Readonly my $usage => 'void push_follow(BitSet fset)'; + croak $usage if @_ != 2; + my ($self, $fset) = @_; + + push @{$self->state->following}, $fset; + $self->state->_fsp($self->state->_fsp + 1); +} + +sub get_rule_invocation_stack { + Readonly my $usage => 'List get_rule_invocation_stack()'; + croak $usage if @_ != 1; + my ($self) = @_; + + my $rules = []; + for (my $i = 0; ; ++$i) { + my @frame = caller $i; + last if !@frame; + + my ($package, $filename, $line, $subroutine) = @frame; + + if ($package =~ /^ANTLR::Runtime::/) { + next; + } + + if ($subroutine eq NEXT_TOKEN_RULE_NAME) { + next; + } + + if ($package ne ref $self) { + next; + } + + push @{$rules}, $subroutine; + } +} + +sub get_backtracking_level { + Readonly my $usage => 'int get_backtracking_level()'; + croak $usage if @_ != 1; + my ($self) = @_; + + return $self->state->backtracking; +} + +sub set_backtracking_level { + my ($self, $n) = @_; + $self->state->backtracking($n); +} + +sub failed { + my ($self) = @_; + return $self->state->failed; +} + +sub get_token_names { + return undef; +} + +sub get_grammar_file_name { + return undef; +} + +sub to_strings { + Readonly my $usage => 'List to_strings(List tokens)'; + croak $usage if @_ != 2; + my ($self, $tokens) = @_; + + if (!defined $tokens) { + return undef; + } + + return map { $_->get_text() } @{$tokens}; +} + +sub get_rule_memoization { + Readonly my $usage => 'int get_rule_memoization(int rule_index, int rule_start_index)'; + croak $usage if @_ != 3; + my ($self, $rule_index, $rule_start_index) = @_; + + if (!defined $self->rule_memo->[$rule_index]) { + $self->rule_memo->[$rule_index] = {}; + } + + my $stop_index = $self->state->rule_memo->[$rule_index]->{$rule_start_index}; + if (!defined $stop_index) { + return $self->MEMO_RULE_UNKNOWN; + } + return $stop_index; +} + +sub alredy_parsed_rule { + Readonly my $usage => 'boolean alredy_parsed_rule(IntStream input, int rule_index)'; + croak $usage if @_ != 3; + my ($self, $input, $rule_index) = @_; + + my $stop_index = $self->get_rule_memoization($rule_index, $input->index()); + if ($stop_index == $self->MEMO_RULE_UNKNOWN) { + return 0; + } + + if ($stop_index == $self->MEMO_RULE_FAILED) { + $self->state->failed(1); + } else { + $input->seek($stop_index + 1); + } + return 1; +} + +sub memoize { + Readonly my $usage => 'void memoize(IntStream input, int rule_index, int rule_start_index)'; + croak $usage if @_ != 4; + my ($self, $input, $rule_index, $rule_start_index) = @_; + + my $stop_token_index = $self->state->failed ? $self->MEMO_RULE_FAILED : $input->index() - 1; + if (defined $self->state->rule_memo->[$rule_index]) { + $self->state->rule_memo->[$rule_index]->{$rule_start_index} = $stop_token_index; + } +} + +sub get_rule_memoization_cache_size { + Readonly my $usage => 'int get_rule_memoization_cache_size()'; + croak $usage if @_ != 1; + my ($self) = @_; + + my $n = 0; + foreach my $m (@{$self->state->rule_memo}) { + $n += keys %{$m} if defined $m; + } + + return $n; +} + +sub trace_in { + Readonly my $usage => 'void trace_in(String rule_name, int rule_index, input_symbol)'; + croak $usage if @_ != 4; + my ($self, $rule_name, $rule_index, $input_symbol) = @_; + + print "enter $rule_name $input_symbol"; + if ($self->state->failed) { + print ' failed=', $self->state->failed; + } + if ($self->state->backtracking > 0) { + print ' backtracking=', $self->state->backtracking; + } + print "\n"; +} + +sub trace_out { + Readonly my $usage => 'void trace_out(String rule_name, int rule_index, input_symbol)'; + croak $usage if @_ != 4; + my ($self, $rule_name, $rule_index, $input_symbol) = @_; + + print "exit $rule_name $input_symbol"; + if ($self->state->failed) { + print ' failed=', $self->state->failed; + } + if ($self->state->backtracking > 0) { + print ' backtracking=', $self->state->backtracking; + } + print "\n"; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ + +=head1 NAME + +ANTLR::Runtime::BaseRecognizer + +=head1 DESCRIPTION + +A generic recognizer that can handle recognizers generated from +lexer, parser, and tree grammars. This is all the parsing +support code essentially; most of it is error recovery stuff and +backtracking. diff --git a/runtime/Perl5/lib/ANTLR/Runtime/BitSet.pm b/runtime/Perl5/lib/ANTLR/Runtime/BitSet.pm new file mode 100644 index 0000000..b066f34 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/BitSet.pm @@ -0,0 +1,346 @@ +package ANTLR::Runtime::BitSet; + +use Carp; +use Readonly; +use List::Util qw( max ); + +use Moose; +use Moose::Util::TypeConstraints; + +use overload + '|=' => \&or_in_place, + '""' => \&str; + +# number of bits / long +Readonly my $BITS => 64; +sub BITS { return $BITS } + +# 2^6 == 64 +Readonly my $LOG_BITS => 6; +sub LOG_BITS { return $LOG_BITS } + +# We will often need to do a mod operator (i mod nbits). Its +# turns out that, for powers of two, this mod operation is +# same as (i & (nbits-1)). Since mod is slow, we use a +# precomputed mod mask to do the mod instead. +Readonly my $MOD_MASK => BITS - 1; +sub MOD_MASK { return $MOD_MASK } + +# The actual data bit +has 'bits' => ( + is => 'rw', + isa => subtype 'Str' => where { /^(?:0|1)*$/xms }, +); + +sub trim_hex { + my ($number) = @_; + + $number =~ s/^0x//xms; + + return $number; +} + +sub BUILD { + my ($self, $args) = @_; + + my $bits; + if (!%$args) { ## no critic (ControlStructures::ProhibitCascadingIfElse) + # Construct a bitset of size one word (64 bits) + $bits = '0' x BITS; + } + elsif (exists $args->{bits}) { + $bits = $args->{bits}; + } + elsif (exists $args->{number}) { + $bits = reverse unpack('B*', pack('N', $args->{number})); + } + elsif (exists $args->{words64}) { + # Construction from a static array of longs + my $words64 = $args->{words64}; + + # $number is in hex format + my $number = join '', + map { trim_hex($_) } + reverse @$words64; + + $bits = ''; + foreach my $h (split //xms, reverse $number) { + $bits .= reverse substr(unpack('B*', pack('h', hex $h)), 4); + } + } + elsif (exists $args->{''}) { + # Construction from a list of integers + } + elsif (exists $args->{size}) { + # Construct a bitset given the size + $bits = '0' x $args->{size}; + } + else { + croak 'Invalid argument'; + } + + $self->bits($bits); + return; +} + +sub of { + my ($class, $el) = @_; + + my $bs = ANTLR::Runtime::BitSet->new({ size => $el + 1 }); + $bs->add($el); + + return $bs; +} + +sub or : method { ## no critic (Subroutines::ProhibitBuiltinHomonyms) + my ($self, $a) = @_; + + if (!defined $a) { + return $self; + } + + my $s = $self->clone(); + $s->or_in_place($a); + return $s; +} + +sub add : method { + my ($self, $el) = @_; + + $self->grow_to_include($el); + my $bits = $self->bits; + substr($bits, $el, 1, '1'); + $self->bits($bits); + + return; +} + +sub grow_to_include : method { + my ($self, $bit) = @_; + + if ($bit > length $self->bits) { + $self->bits .= '0' x ($bit - (length $self->bits) + 1); + } + + return; +} + +sub or_in_place : method { + my ($self, $a) = @_; + + my $i = 0; + foreach my $b (split //xms, $a->bits) { + if ($b) { + $self->add($i); + } + } continue { + ++$i; + } + + return $self; +} + +sub clone : method { + my ($self) = @_; + + return ANTLR::Runtime::BitSet->new(bits => $self->bits); +} + +sub size : method { + my ($self) = @_; + + return scalar $self->bits =~ /1/xms; +} + +sub equals : method { + my ($self, $other) = @_; + + return $self->bits eq $other->bits; +} + +sub member : method { + my ($self, $el) = @_; + + return (substr $self->bits, $el, 1) eq '1'; +} + +sub remove : method { + my ($self, $el) = @_; + + my $bits = $self->bits; + substr($bits, $el, 1, '0'); + $self->bits($bits); + + return; +} + +sub is_nil : method { + my ($self) = @_; + + return $self->bits =~ /1/xms ? 1 : 0; +} + +sub num_bits : method { + my ($self) = @_; + return length $self->bits; +} + +sub length_in_long_words : method { + my ($self) = @_; + return $self->num_bits() / $self->BITS; +} + +sub to_array : method { + my ($self) = @_; + + my $elems = []; + + while ($self->bits =~ /1/gxms) { + push @$elems, $-[0]; + } + + return $elems; +} + +sub to_packed_array : method { + my ($self) = @_; + + return [ + $self->bits =~ /.{BITS}/gxms + ]; +} + +sub str : method { + my ($self) = @_; + + return $self->to_string(); +} + +sub to_string : method { + my ($self, $args) = @_; + + my $token_names; + if (defined $args && exists $args->{token_names}) { + $token_names = $args->{token_names}; + } + + my @str; + my $i = 0; + foreach my $b (split //xms, $self->bits) { + if ($b) { + if (defined $token_names) { + push @str, $token_names->[$i]; + } else { + push @str, $i; + } + } + } continue { + ++$i; + } + + return '{' . (join ',', @str) . '}'; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ + + +=head1 NAME + +ANTLR::Runtime::BitSet - A bit set + + +=head1 SYNOPSIS + + use <Module::Name>; + # Brief but working code example(s) here showing the most common usage(s) + + # This section will be as far as many users bother reading + # so make it as educational and exemplary as possible. + + +=head1 DESCRIPTION + +A stripped-down version of org.antlr.misc.BitSet that is just good enough to +handle runtime requirements such as FOLLOW sets for automatic error recovery. + + +=head1 SUBROUTINES/METHODS + +=over + +=item C<of> + +... + +=item C<or> + +Return this | a in a new set. + +=item C<add> + +Or this element into this set (grow as necessary to accommodate). + +=item C<grow_to_include> + +Grows the set to a larger number of bits. + +=item C<set_size> + +Sets the size of a set. + +=item C<remove> + +Remove this element from this set. + +=item C<length_in_long_words> + +Return how much space is being used by the bits array not how many actually +have member bits on. + +=back + +A separate section listing the public components of the module's interface. +These normally consist of either subroutines that may be exported, or methods +that may be called on objects belonging to the classes that the module provides. +Name the section accordingly. + +In an object-oriented module, this section should begin with a sentence of the +form "An object of this class represents...", to give the reader a high-level +context to help them understand the methods that are subsequently described. + + +=head1 DIAGNOSTICS + +A list of every error and warning message that the module can generate +(even the ones that will "never happen"), with a full explanation of each +problem, one or more likely causes, and any suggested remedies. +(See also "Documenting Errors" in Chapter 13.) + + +=head1 CONFIGURATION AND ENVIRONMENT + +A full explanation of any configuration system(s) used by the module, +including the names and locations of any configuration files, and the +meaning of any environment variables or properties that can be set. These +descriptions must also include details of any configuration language used. +(See also "Configuration Files" in Chapter 19.) + + +=head1 DEPENDENCIES + +A list of all the other modules that this module relies upon, including any +restrictions on versions, and an indication whether these required modules are +part of the standard Perl distribution, part of the module's distribution, +or must be installed separately. + + +=head1 INCOMPATIBILITIES + +A list of any modules that this module cannot be used in conjunction with. +This may be due to name conflicts in the interface, or competition for +system or program resources, or due to internal limitations of Perl +(for example, many modules that use source code filters are mutually +incompatible). diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CharStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/CharStream.pm new file mode 100644 index 0000000..6936eaa --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CharStream.pm @@ -0,0 +1,21 @@ +package ANTLR::Runtime::CharStream; + +use Carp; +use Readonly; + +use Moose::Role; +#extends 'ANTLR::Runtime::IntStream'; + +Readonly my $EOF => -1; +sub EOF { return $EOF } + +requires 'substring'; + +requires 'LT'; + +requires 'get_line', 'set_line'; + +requires 'get_char_position_in_line', 'set_char_position_in_line'; + +no Moose::Role; +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CharStreamState.pm b/runtime/Perl5/lib/ANTLR/Runtime/CharStreamState.pm new file mode 100644 index 0000000..d33821b --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CharStreamState.pm @@ -0,0 +1,28 @@ +package ANTLR::Runtime::CharStreamState; + +use Moose; + +# Index into the char stream of next lookahead char +has 'p' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# What line number is the scanner at before processing buffer[p]? +has 'line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# What char position 0..n-1 in line is scanner before processing buffer[p]? +has 'char_position_in_line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CommonToken.pm b/runtime/Perl5/lib/ANTLR/Runtime/CommonToken.pm new file mode 100644 index 0000000..3c64b97 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CommonToken.pm @@ -0,0 +1,224 @@ +package ANTLR::Runtime::CommonToken; + +use Readonly; + +use Moose; + +use overload + 'bool' => \¬_eof, + fallback => 1, + ; + +with 'ANTLR::Runtime::Token'; + +has 'type' => ( + is => 'rw', + isa => 'Int', + required => 1, +); + +has 'line' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'char_position_in_line' => ( + is => 'rw', + isa => 'Int', + # set to invalid position + default => -1, +); + +has 'channel' => ( + is => 'rw', + isa => 'Int', + default => sub { ANTLR::Runtime::Token->DEFAULT_CHANNEL } +); + +has 'input' => ( + is => 'rw', + isa => 'Maybe[ANTLR::Runtime::CharStream]', +); + +# We need to be able to change the text once in a while. If +# this is non-null, then getText should return this. Note that +# start/stop are not affected by changing this. +has 'text' => ( + is => 'rw', + isa => 'Maybe[Str]', +); + +# What token number is this from 0..n-1 tokens; < 0 implies invalid index +has 'index' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# The char position into the input buffer where this token starts +has 'start' => ( + is => 'rw', + isa => 'Int', +); + +# The char position into the input buffer where this token stops +has 'stop' => ( + is => 'rw', + isa => 'Int', +); + +sub BUILD { + my ($self, $arg_ref) = @_; + + if (exists $arg_ref->{token}) { + my $token = $arg_ref->{token}; + $self->text($token->get_text()); + $self->type($token->get_type()); + $self->line($token->get_line()); + $self->index($token->get_token_index()); + $self->char_position_in_line($token->get_char_position_in_line()); + $self->channel($token->get_channel()); + } + return; +} + +sub get_type { + my ($self) = @_; + return $self->type; +} + +sub set_type { + my ($self, $value) = @_; + $self->value($value); + return; +} + + +sub get_line { + my ($self) = @_; + return $self->line; +} + +sub set_line { + my ($self, $line) = @_; + $self->line($line); + return; +} + +sub get_text { + my ($self) = @_; + + if (defined $self->text) { + return $self->text; + } + if (!defined $self->input) { + return undef; + } + $self->text($self->input->substring($self->start, $self->stop)); + return $self->text; +} + +sub set_text { + my ($self, $text) = @_; + $self->text($text); + return; +} + +sub get_char_position_in_line { + my ($self) = @_; + return $self->char_position_in_line; +} + +sub set_char_position_in_line { + my ($self, $pos) = @_; + $self->char_position_in_line($pos); + return; +} + +sub get_channel { + my ($self) = @_; + return $self->channel; +} + +sub set_channel { + my ($self, $channel) = @_; + $self->channel($channel); + return; +} + +sub get_start_index { + my ($self) = @_; + return $self->start; +} + +sub set_start_index { + my ($self, $start) = @_; + $self->start($start); + return; +} + +sub get_stop_index { + my ($self) = @_; + return $self->stop; +} + +sub set_stop_index { + my ($self, $stop) = @_; + $self->stop($stop); + return; +} + +sub get_token_index { + my ($self) = @_; + return $self->index; +} + +sub set_token_index { + my ($self, $index) = @_; + $self->index($index); + return; +} + +sub get_input_stream { + my ($self) = @_; + return $self->input; +} + +sub set_input_stream { + my ($self, $input) = @_; + $self->input($input); + return; +} + +sub not_eof { + my ($self) = @_; + return $self->type != ANTLR::Runtime::Token->EOF; +} + +=begin later + + public String toString() { + String channelStr = ""; + if ( channel>0 ) { + channelStr=",channel="+channel; + } + String txt = getText(); + if ( txt!=null ) { + txt = txt.replaceAll("\n","\\\\n"); + txt = txt.replaceAll("\r","\\\\r"); + txt = txt.replaceAll("\t","\\\\t"); + } + else { + txt = "<no text>"; + } + return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; + } + +=end later + +=cut + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/CommonTokenStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/CommonTokenStream.pm new file mode 100644 index 0000000..cf298ec --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/CommonTokenStream.pm @@ -0,0 +1,392 @@ +package ANTLR::Runtime::CommonTokenStream; + +use Carp; +use Readonly; +use UNIVERSAL qw( isa ); + +use ANTLR::Runtime::CharStream; +use ANTLR::Runtime::Token; +use ANTLR::Runtime::TokenSource; + +use Moose; + +use overload + '""' => \&str + ; + +with 'ANTLR::Runtime::IntStream', + 'ANTLR::Runtime::TokenStream'; + +has 'token_source' => ( + is => 'rw', + does => 'ANTLR::Runtime::TokenSource', +); + +has 'tokens' => ( + is => 'rw', + isa => 'ArrayRef[ANTLR::Runtime::Token]', + default => sub { [] }, +); + +has 'channel_override_map' => ( + is => 'rw', + isa => 'HashRef[Int]', +); + +has 'discard_set' => ( + is => 'rw', + isa => 'HashRef[Int]', +); + +has 'channel' => ( + is => 'rw', + isa => 'Int', + default => ANTLR::Runtime::Token->DEFAULT_CHANNEL, +); + +has 'discard_off_channel_tokens' => ( + is => 'rw', + isa => 'Bool', + default => 0, +); + +has 'last_marker' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +has 'p' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +sub set_token_source { + my ($self, $token_source) = @_; + + $self->token_source($token_source); + $self->tokens([]); + $self->p(-1); + $self->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); +} + +sub fill_buffer { + my ($self) = @_; + + my $index = 0; + my $t = $self->token_source->next_token(); + while (defined $t && $t->get_type() != ANTLR::Runtime::CharStream->EOF) { + my $discard = 0; + # is there a channel override for token type? + if (defined $self->channel_override_map) { + my $channel = $self->channel_override_map->{$t->get_type()}; + if (defined $channel) { + $t->set_channel($channel); + } + } + + if (defined $self->discard_set && $self->discard_set->contains($t->get_type())) { + $discard = 1; + } elsif ($self->discard_off_channel_tokens && $t->get_channel() != $self->channel) { + $discard = 1; + } + + if (!$discard) { + $t->set_token_index($index); + push @{$self->tokens}, $t; + ++$index; + } + } continue { + $t = $self->token_source->next_token(); + } + + # leave p pointing at first token on channel + $self->p(0); + $self->skip_off_token_channels($self->p); +} + +sub consume { + my ($self) = @_; + + if ($self->p < @{$self->tokens}) { + $self->p($self->p + 1); + $self->p($self->skip_off_token_channels($self->p)); # leave p on valid token + } +} + +sub skip_off_token_channels { + my ($self, $i) = @_; + + my $n = @{$self->tokens}; + while ($i < $n && $self->tokens->[$i]->get_channel() != $self->channel) { + ++$i; + } + + return $i; +} + +sub skip_off_token_channels_reverse { + my ($self, $i) = @_; + + while ($i >= 0 && $self->tokens->[$i]->get_channel() != $self->channel) { + --$i; + } + + return $i; +} + +sub set_token_type_channel { + my ($self, $ttype, $channel) = @_; + + if (!defined $self->channel_override_map) { + $self->channel_override_map({}); + } + + $self->channel_override_map->{$ttype} = $channel; +} + +sub discard_token_type { + my ($self, $ttype) = @_; + + if (!defined $self->discard_set) { + $self->discard_set({}); + } + + $self->discard_set->{$ttype} = 1; +} + +sub get_tokens { + my ($self, $args) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + if (!defined $args) { + return $self->tokens; + } + + my $start = $args->{start}; + my $stop = $args->{stop}; + + my $types; + if (exists $args->{types}) { + if (ref $args->{types} eq 'ARRAY') { + $types = ANTLR::Runtime::BitSet->new($args->{types}); + } else { + $types = $args->{types}; + } + } else { + my $ttype = $args->{ttype}; + $types = ANTLR::Runtime::BitSet->of($ttype); + } + + + if ($stop >= @{$self->tokens}) { + $stop = $#{$self->tokens}; + } + if ($start < 0) { + $start = 0; + } + + if ($start > $stop) { + return undef; + } + + my $filtered_tokens = []; + foreach my $t (@{$self->tokens}[$start..$stop]) { + if (!defined $types || $types->member($t->get_type())) { + push @$filtered_tokens, $t; + } + } + + if (!@{$filtered_tokens}) { + $filtered_tokens = undef; + } + + return $filtered_tokens; +} + +sub LT { + my ($self, $k) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + if ($k == 0) { + return undef; + } + if ($k < 0) { + return $self->LB(-$k); + } + + if ($self->p + $k - 1 >= @{$self->tokens}) { + return ANTLR::Runtime::Token->EOF_TOKEN; + } + + my $i = $self->p; + my $n = 1; + + while ($n < $k) { + $i = $self->skip_off_token_channels($i+1); + ++$n; + } + + if ($i >= @{$self->tokens}) { + return ANTLR::Runtime::Token->EOF_TOKEN; + } + + return $self->tokens->[$i]; +} + +sub LB { + my ($self, $k) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + if ($k == 0) { + return undef; + } + if ($self->p - $k < 0) { + return undef; + } + + my $i = $self->p; + my $n = 1; + while ($n <= $k) { + $k = $self->skip_off_token_channels_reverse($i - 1); + ++$n; + } + + if ($i < 0) { + return undef; + } + + return $self->tokens->[$i]; +} + +sub get { + my ($self, $i) = @_; + + return $self->tokens->[$i]; +} + +sub LA { + my ($self, $i) = @_; + + return $self->LT($i)->get_type(); +} + +sub mark { + my ($self) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + $self->last_marker($self->index()); + return $self->last_marker; +} + +sub release { + my ($self, $marker) = @_; + + # no resources to release +} + +sub size { + my ($self) = @_; + + return scalar @{$self->tokens}; +} + +sub index { + my ($self) = @_; + + return $self->p; +} + +sub rewind { + Readonly my $usage => 'void rewind(int marker) | void rewind()'; + croak $usage if @_ != 1 && @_ != 2; + + if (@_ == 1) { + my ($self) = @_; + $self->seek($self->last_marker); + } else { + my ($self, $marker) = @_; + $self->seek($marker); + } +} + +sub seek { + my ($self, $index) = @_; + + $self->p($index); +} + +sub get_token_source { + my ($self) = @_; + + return $self->token_source; +} + +sub get_source_name { + my ($self) = @_; + return $self->get_token_source()->get_source_name(); +} + +sub str { + my ($self) = @_; + return $self->to_string(); +} + +sub to_string { + Readonly my $usage => 'String to_string() | String to_string(int start, int stop | String to_string(Token start, Token stop)'; + croak $usage if @_ != 1 && @_ != 3; + + if (@_ == 1) { + my ($self) = @_; + + if ($self->p == -1) { + $self->fill_buffer(); + } + return $self->to_string(0, $#{$self->tokens}); + } else { + my ($self, $start, $stop) = @_; + + if (defined $start && defined $stop) { + if (ref($start) && $start->isa('ANTLR::Runtime::Token')) { + $start = $start->get_token_index(); + } + + if (ref($start) && $stop->isa('ANTLR::Runtime::Token')) { + $stop = $stop->get_token_index(); + } + + if ($start < 0 || $stop < 0) { + return undef; + } + if ($self->p == -1) { + $self->fill_buffer(); + } + if ($stop >= @{$self->tokens}) { + $stop = $#{$self->tokens}; + } + + my $buf = ''; + foreach my $t (@{$self->tokens}[$start..$stop]) { + $buf .= $t->get_text(); + } + + return $buf; + } else { + return undef; + } + } +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/DFA.pm b/runtime/Perl5/lib/ANTLR/Runtime/DFA.pm new file mode 100644 index 0000000..d1f0ef1 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/DFA.pm @@ -0,0 +1,192 @@ +package ANTLR::Runtime::DFA; + +use Params::Validate qw( :types ); +use Error qw( try finally ); + +use Moose; + +has 'eot' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'eof' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'min' => ( + is => 'rw', + isa => 'ArrayRef[Str]', +); + +has 'max' => ( + is => 'rw', + isa => 'ArrayRef[Str]', +); + +has 'accept' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'special' => ( + is => 'rw', + isa => 'ArrayRef[Int]', +); + +has 'transition' => ( + is => 'rw', + isa => 'ArrayRef[ArrayRef[Int]]', +); + +has 'decision_number' => ( + is => 'rw', + isa => 'Int', +); + + +# Which recognizer encloses this DFA? Needed to check backtracking +has 'recognizer' => ( + is => 'rw', + isa => 'ANTLR::Runtime::BaseRecognizer', +); + + +sub get_description { + return "n/a"; +} + +# From the input stream, predict what alternative will succeed +# using this DFA (representing the covering regular approximation +# to the underlying CFL). Return an alternative number 1..n. Throw +# an exception upon error. +sub predict { + my ($self, $input) = @_; + + my $mark = $input->mark(); # remember where decision started in input + my $s = 0; # we always start at s0 + + try { + while (1) { + my $special_state = $self->special->[$s]; + if ($special_state >= 0) { + $s = $self->special_state_transition($special_state, $input); + if ($s == -1) { + $self->no_viable_alt($s, $input); + return 0; + } + $input->consume(); + next; + } + + if ($self->accept->[$s] >= 1) { + return $self->accept->[$s]; + } + + # look for a normal char transition + my $c = $input->LA(1); # -1 == \uFFFF, all tokens fit in 65000 space + + if ($c >= $self->min->[$s] && $c <= $self->max->[$s]) { + my $next_s = $self->transition->[$s][$c - $self->min->[$s]]; # move to next state + + if ($next_s < 0) { + # was in range but not a normal transition + # must check EOT, which is like the else clause. + # eot[s]>=0 indicates that an EOT edge goes to another + # state. + if ($self->eot->[$s] >= 0) { # EOT Transition to accept state? + $s = $self->eot->[$s]; + $input->consume(); + # TODO: I had this as return accept[eot[s]] + # which assumed here that the EOT edge always + #went to an accept...faster to do this, but + # what about predicated edges coming from EOT + # target? + next; + } + + $self->no_viable_alt($s, $input); + return 0; + } + + $s = $next_s; + $input->consume(); + next; + } + + if ($self->eot->[$s] >= 0) { # EOT Transition? + $s = $self->eot->[$s]; + $input->consume(); + next; + } + + if ($c == ANTLR::Runtime::Token->EOF && $self->eof->[$s] >= 0) { # EOF Transition to accept state? + return $self->accept->[$self->eof->[$s]]; + } + + # not in range and not EOF/EOT, must be invalid symbol + $self->no_viable_alt($s, $input); + return 0; + } + } + finally { + $input->rewind(); + }; +} + +sub no_viable_alt { + my ($self, $s, $input) = @_; + + if ($self->recognizer->state->backtracking > 0) { + $self->recognizer->state->failed = 1; + return; + } + my $nvae = ANTLR::Runtime::NoViableAltException({ + grammar_decision_description => $self->get_description(), + decision_number => $self->decision_number, + state_number => $self->state_number, + input => $input + }); + $self->error($nvae); + $nvae->throw(); +} + +# A hook for debugging interface +sub error { + my ($self, $nvae) = @_; +} + +sub special_state_transition { + my ($self, $s, $input) = @_; + + return -1; +} + +# Given a String that has a run-length-encoding of some unsigned shorts +# like "\1\2\3\9", convert to short[] {2,9,9,9}. We do this to avoid +# static short[] which generates so much init code that the class won't +# compile. :( +sub unpack_encoded_string { + my ($self, $encoded_string) = @_; + + my $data = []; + while ($encoded_string =~ /(.)(.)/gxms) { + my ($n, $v) = ($1, $2); + + push @$data, $v x $n; + } + + return $data; +} + +sub unpack_encoded_string_to_unsigned_chars { + my ($self, $encoded_string) = @_; + + return $self->unpack_encoded_string($encoded_string); +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/EarlyExitException.pm b/runtime/Perl5/lib/ANTLR/Runtime/EarlyExitException.pm new file mode 100644 index 0000000..17ccd44 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/EarlyExitException.pm @@ -0,0 +1,8 @@ +package ANTLR::Runtime::EarlyExitException; + +use strict; +use warnings; + +use base qw( ANTLR::Runtime::Exception ); + +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Exception.pm b/runtime/Perl5/lib/ANTLR/Runtime/Exception.pm new file mode 100644 index 0000000..1909cd4 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Exception.pm @@ -0,0 +1,23 @@ +package ANTLR::Runtime::Exception; + +use Exception::Class; + +use Moose; + +extends 'Moose::Object', 'Exception::Class::Base'; + +sub BUILD { + my ($self, $args) = @_; + + my %exception_args; + if (exists $args->{message}) { + $exception_args{message} = $args->{message}; + } + + $self->_initialize(%exception_args); + return; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/IntStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/IntStream.pm new file mode 100644 index 0000000..8b459fa --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/IntStream.pm @@ -0,0 +1,25 @@ +package ANTLR::Runtime::IntStream; + +use Moose::Role; + +requires 'consume'; + +requires 'LA'; + +requires 'mark'; + +requires 'index'; + +requires 'rewind'; + +requires 'release'; + +requires 'seek'; + +requires 'size'; + +requires 'get_source_name'; + +no Moose::Role; +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Lexer.pm b/runtime/Perl5/lib/ANTLR/Runtime/Lexer.pm new file mode 100644 index 0000000..5acbaec --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Lexer.pm @@ -0,0 +1,325 @@ +package ANTLR::Runtime::Lexer; + +use English qw( -no_match_vars ); +use Readonly; +use Carp; +use Switch; + +use ANTLR::Runtime::Token; +use ANTLR::Runtime::CommonToken; +use ANTLR::Runtime::CharStream; +use ANTLR::Runtime::MismatchedTokenException; + +use Moose; + +extends 'ANTLR::Runtime::BaseRecognizer'; +with 'ANTLR::Runtime::TokenSource'; + +has 'input' => ( + is => 'rw', + does => 'ANTLR::Runtime::CharStream', +); + +sub reset { + my ($self) = @_; + + # reset all recognizer state variables + $self->SUPER::reset(); + + # wack Lexer state variables + if (defined $self->input) { + # rewind the input + $self->input->seek(0); + } + + if (defined $self->state) { + $self->state->token(undef); + $self->state->type(ANTLR::Runtime::Token->INVALID_TOKEN_TYPE); + $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); + $self->state->token_start_char_index(-1); + $self->state->token_start_char_position_in_line(-1); + $self->state->start_line(-1); + $self->state->text(undef); + } +} + +# Return a token from this source; i.e., match a token on the char +# stream. +sub next_token { + my ($self) = @_; + + while (1) { + $self->state->token(undef); + $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); + $self->state->token_start_char_index($self->input->index()); + $self->state->token_start_char_position_in_line($self->input->get_char_position_in_line()); + $self->state->token_start_line($self->input->get_line()); + $self->state->text(undef); + + if ($self->input->LA(1) eq ANTLR::Runtime::CharStream->EOF) { + return ANTLR::Runtime::Token->EOF_TOKEN; + } + + my $rv; + my $op = ''; + eval { + $self->m_tokens(); + if (!defined $self->state->token) { + $self->emit(); + } + elsif ($self->state->token == ANTLR::Runtime::Token->SKIP_TOKEN) { + $op = 'next'; + return; + } + $op = 'return'; + $rv = $self->state->token; + }; + return $rv if $op eq 'return'; + next if $op eq 'next'; + + if ($EVAL_ERROR) { + my $exception = $EVAL_ERROR; + if ($exception->isa('ANTLR::Runtime::RecognitionException')) { + $self->report_error($exception); + $self->recover($exception); + } else { + croak $exception; + } + } + } +} + +# Instruct the lexer to skip creating a token for current lexer rule +# and look for another token. nextToken() knows to keep looking when +# a lexer rule finishes with token set to SKIP_TOKEN. Recall that +# if token==null at end of any token rule, it creates one for you +# and emits it. +sub skip { + my ($self) = @_; + + $self->state->token(ANTLR::Runtime::Token->SKIP_TOKEN); + return; +} + +# This is the lexer entry point that sets instance var 'token' +sub m_tokens { + croak "Unimplemented"; +} + +# Set the char stream and reset the lexer +sub set_char_stream { + my ($self, $input) = @_; + + $self->input(undef); + $self->reset(); + $self->input($input); +} + +sub get_char_stream { + my ($self) = @_; + return $self->input; +} + +sub get_source_name { + my ($self) = @_; + return $self->input->get_source_name(); +} + +sub emit { + if (@_ == 1) { + my ($self) = @_; + # The standard method called to automatically emit a token at the + # outermost lexical rule. The token object should point into the + # char buffer start..stop. If there is a text override in 'text', + # use that to set the token's text. Override this method to emit + # custom Token objects. + my $t = ANTLR::Runtime::CommonToken->new({ + input => $self->input, + type => $self->state->type, + channel => $self->state->channel, + start => $self->state->token_start_char_index, + stop => $self->get_char_index() - 1 + }); + + $t->set_line($self->state->token_start_line); + $t->set_text($self->state->text); + $t->set_char_position_in_line($self->state->token_start_char_position_in_line); + $self->emit($t); + return $t; + } elsif (@_ == 2) { + my ($self, $token) = @_; + # Currently does not support multiple emits per nextToken invocation + # for efficiency reasons. Subclass and override this method and + # nextToken (to push tokens into a list and pull from that list rather + # than a single variable as this implementation does). + $self->state->token($token); + } +} + +sub match { + my ($self, $s) = @_; + + foreach my $c (split //, $s) { + if ($self->input->LA(1) ne $c) { + if ($self->state->backtracking > 0) { + $self->state->failed(1); + return; + } + my $mte = ANTLR::Runtime::MismatchedTokenException->new({ + expecting => $c, + input => $self->input + }); + $self->recover($mte); + croak $mte; + } + $self->input->consume(); + $self->state->failed(0); + } +} + +sub match_any { + my ($self) = @_; + + $self->input->consume(); +} + +sub match_range { + my ($self, $a, $b) = @_; + + if ($self->input->LA(1) lt $a || $self->input->LA(1) gt $b) { + if ($self->state->backtracking > 0) { + $self->state->failed(1); + return; + } + + my $mre = ANTLR::Runtime::MismatchedRangeException($a, $b, $self->input); + $self->recover($mre); + croak $mre; + } + + $self->input->consume(); + $self->state->failed(0); +} + +sub get_line { + my ($self) = @_; + + return $self->input->get_line(); +} + +sub get_char_position_in_line { + my ($self) = @_; + + return $self->input->get_char_position_in_line(); +} + +# What is the index of the current character of lookahead? +sub get_char_index { + my ($self) = @_; + + return $self->input->index(); +} + +# Return the text matched so far for the current token or any +# text override. +sub get_text { + my ($self) = @_; + + if (defined $self->state->text) { + return $self->state->text; + } + return $self->input->substring($self->state->token_start_char_index, $self->get_char_index() - 1); +} + +# Set the complete text of this token; it wipes any previous +# changes to the text. +sub set_text { + my ($self, $text) = @_; + + $self->state->text($text); +} + +sub report_error { + Readonly my $usage => 'void report_error(RecognitionException e)'; + croak $usage if @_ != 2; + my ($self, $e) = @_; + + $self->display_recognition_error($self->get_token_names(), $e); +} + +sub get_error_message { + my ($self, $e, $token_names) = @_; + + my $msg; + if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { + $msg = 'mismatched character ' + . $self->get_char_error_display($e->get_c()) + . ' expecting ' + . $self->get_char_error_display($e->expecting); + } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { + $msg = 'no viable alternative at character ' . $self->get_char_error_display($e->get_c()); + } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { + $msg = 'required (...)+ loop did not match anything at character ' + . $self->get_char_error_display($e->get_c()); + } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { + $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) + . ' expecting set ' . $e->expecting; + } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { + $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) + . ' expecting set ' . $e->expecting; + } elsif ($e->isa('ANTLR::Runtime::MismatchedRangeException')) { + $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) + . ' expecting set ' . $self->get_char_error_display($e->a) + . '..' . $self->get_char_error_display($e->b); + } else { + $msg = $self->SUPER::get_error_message($e, $token_names); + } + return $msg; +} + +sub get_char_error_display { + my ($self, $c) = @_; + + my $s; + if ($c eq ANTLR::Runtime::Token->EOF) { + $s = '<EOF>'; + } elsif ($c eq "\n") { + $s = '\n'; + } elsif ($c eq "\t") { + $s = '\t'; + } elsif ($c eq "\r") { + $s = '\r'; + } else { + $s = $c; + } + + return "'$s'"; +} + +# Lexers can normally match any char in it's vocabulary after matching +# a token, so do the easy thing and just kill a character and hope +# it all works out. You can instead use the rule invocation stack +# to do sophisticated error recovery if you are in a fragment rule. +sub recover { + my ($self, $re) = @_; + + $self->input->consume(); +} + +sub trace_in { + my ($self, $rule_name, $rule_index) = @_; + + my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); + $self->SUPER::trace_in($rule_name, $rule_index, $input_symbol); +} + +sub trace_out { + my ($self, $rule_name, $rule_index) = @_; + + my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); + $self->SUPER::trace_out($rule_name, $rule_index, $input_symbol); +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/MismatchedSetException.pm b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedSetException.pm new file mode 100644 index 0000000..d87b0e6 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedSetException.pm @@ -0,0 +1,9 @@ +package ANTLR::Runtime::MismatchedSetException; + +use Moose; + +extends 'ANTLR::Runtime::Exception'; + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/MismatchedTokenException.pm b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedTokenException.pm new file mode 100644 index 0000000..b141e99 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/MismatchedTokenException.pm @@ -0,0 +1,33 @@ +package ANTLR::Runtime::MismatchedTokenException; + +use ANTLR::Runtime::Token; + +use Moose; + +use overload + '""' => \&to_string, + 'bool' => sub { 1 }, + fallback => 1 + ; + +extends 'ANTLR::Runtime::RecognitionException'; + +has 'expecting' => ( + is => 'ro', + isa => 'Int', + default => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE, +); + +sub get_expecting { + my ($self) = @_; + return $self->expecting; +} + +sub to_string { + my ($self) = @_; + return "MismatchedTokenException(" . $self->get_unexpected_type() . "!=" . $self->expecting . ")"; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/MissingTokenException.pm b/runtime/Perl5/lib/ANTLR/Runtime/MissingTokenException.pm new file mode 100644 index 0000000..5f55a87 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/MissingTokenException.pm @@ -0,0 +1,36 @@ +package ANTLR::Runtime::MissingTokenException; + +use Moose; + +use overload + '""' => \&to_string; + +extends 'ANTLR::Runtime::MismatchedTokenException'; + +has 'inserted' => ( + is => 'ro', + isa => 'Any', +); + +sub get_missing_type { + my ($self) = @_; + return $self->expecting; +} + +sub to_string { + my ($self) = @_; + + if (defined (my $inserted = $self->inserted) && defined (my $token = $self->token)) { + return "MissingTokenException(inserted $inserted at " . $token->get_text() . ")"; + } + if (defined $self->token) { + return "MissingTokenException(at " . $self->token->get_text() . ")"; + } + + return "MissingTokenException"; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/NoViableAltException.pm b/runtime/Perl5/lib/ANTLR/Runtime/NoViableAltException.pm new file mode 100644 index 0000000..88a78ad --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/NoViableAltException.pm @@ -0,0 +1,27 @@ +package ANTLR::Runtime::NoViableAltException; + +use Moose; + +extends 'ANTLR::Runtime::RecognitionException'; + +has 'grammar_decision_description' => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +has 'decision_number' => ( + is => 'ro', + isa => 'Int', + required => 1, +); + +has 'state_number' => ( + is => 'ro', + isa => 'Int', + required => 1, +); + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Parser.pm b/runtime/Perl5/lib/ANTLR/Runtime/Parser.pm new file mode 100644 index 0000000..a9708ae --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Parser.pm @@ -0,0 +1,93 @@ +package ANTLR::Runtime::Parser; + +use Readonly; +use Carp; + +use Moose; + +extends 'ANTLR::Runtime::BaseRecognizer'; + +has 'input' => ( + is => 'rw', + does => 'ANTLR::Runtime::TokenStream' +); + +sub reset { + my ($self) = @_; + + $self->SUPER::reset(); # reset all recognizer state variables + if (defined $self->input) { + $self->input->seek(0); # rewind the input + } +} + +sub get_current_input_symbol { + my ($self, $input) = @_; + return $self->input->LT(1); +} + +sub get_missing_symbol { + my ($self, $arg_ref) = @_; + my $input = $arg_ref->{input}; + my $exception = $arg_ref->{exception}; + my $expected_token_type = $arg_ref->{expected_token_type}; + my $follow = $arg_ref->{follow}; + + my $token_text; + if ($expected_token_type == ANTLR::Runtime::Token->EOF) { + $token_text = '<missing EOF>'; + } + else { + $token_text = '<missing ' . $self->get_token_names()->[$expected_token_type] . '>'; + } + + my $t = ANTLR::Runtime::CommonToken->new({ + type => $expected_token_type, + text => $token_text + }); + my $current = $input->LT(1); + if ($current->get_type() == ANTLR::Runtime::Token->EOF) { + $current = $input->LT(-1); + } + $t->set_line($current->get_line()); + $t->set_char_position_in_line($current->get_char_position_in_line()); + $t->set_channel($self->DEFAULT_TOKEN_CHANNEL); + + return $t; +} + +sub set_token_stream { + my ($self, $input) = @_; + + $self->input(undef); + $self->reset(); + $self->input($input); +} + +sub get_token_stream { + my ($self) = @_; + + return $self->input; +} + +sub get_source_name { + my ($self) = @_; + return $self->input->get_source_name(); +} + +sub trace_in { + my ($self, $rule_name, $rule_index) = @_; + + $self->SUPER::trace_in($rule_name, $rule_index, $self->input->LT(1)); +} + +sub trace_out { + my ($self, $rule_name, $rule_index) = @_; + + $self->SUPER::trace_out($rule_name, $rule_index, $self->input->LT(1)); +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/ParserRuleReturnScope.pm b/runtime/Perl5/lib/ANTLR/Runtime/ParserRuleReturnScope.pm new file mode 100644 index 0000000..07adcf1 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/ParserRuleReturnScope.pm @@ -0,0 +1,30 @@ +package ANTLR::Runtime::ParserRuleReturnScope; + +use Moose; + +extends 'ANTLR::Runtime::RuleReturnScope'; + +has 'start' => ( + is => 'rw', + does => 'ANTLR::Runtime::Token', +); + +has 'stop' => ( + is => 'rw', + does => 'ANTLR::Runtime::Token', +); + +sub get_start { + my ($self) = @_; + return $self->start; +} + +sub get_stop { + my ($self) = @_; + return $self->stop; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/RecognitionException.pm b/runtime/Perl5/lib/ANTLR/Runtime/RecognitionException.pm new file mode 100644 index 0000000..db8d0ba --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/RecognitionException.pm @@ -0,0 +1,122 @@ +package ANTLR::Runtime::RecognitionException; + +use Carp; +use Readonly; + +use Moose; +use Moose::Util::TypeConstraints; + +extends 'ANTLR::Runtime::Exception'; + +has 'input' => ( + is => 'ro', + does => 'ANTLR::Runtime::IntStream', + required => 1, +); + +has 'index' => ( + is => 'ro', + isa => 'Int', + default => 0, +); + +has 'token' => ( + is => 'ro', + does => 'ANTLR::Runtime::Token', +); + +has 'node' => ( + is => 'ro', + isa => 'Any', +); + +subtype 'Char' + => as 'Str' + => where { $_ eq '-1' || length == 1 }; + +has 'c' => ( + is => 'ro', + isa => 'Maybe[Char]', +); + +has 'line' => ( + is => 'ro', + isa => 'Int', + default => 0, +); + +has 'char_position_in_line' => ( + is => 'ro', + isa => 'Int', + default => 0, +); + +has 'approximate_line_info' => ( + is => 'rw', + isa => 'Bool', +); + +sub BUILDARGS { + my ($class, @args) = @_; + my $args = $class->SUPER::BUILDARGS(@args); + + my $new_args = { %$args }; + my $input = $args->{input}; + $new_args->{input} = $input; + $new_args->{index} = $input->index(); + + if ($input->does('ANTLR::Runtime::TokenStream')) { + my $token = $input->LT(1); + $new_args->{token} = $token; + $new_args->{line} = $token->get_line(); + $new_args->{char_position_in_line} = $token->get_char_position_in_line(); + } + + if ($input->does('ANTLR::Runtime::TreeNodeStream')) { + # extract_information_from_tree_node_stream($input); + } + elsif ($input->does('ANTLR::Runtime::CharStream')) { + $new_args->{c} = $input->LA(1); + $new_args->{line} = $input->get_line(); + $new_args->{char_position_in_line} = $input->get_char_position_in_line(); + } + else { + $new_args->{c} = $input->LA(1); + } + + return $new_args; +} + +sub get_unexpected_type { + my ($self) = @_; + + if ($self->input->isa('ANTLR::Runtime::TokenStream')) { + return $self->token->get_type(); + } else { + return $self->c; + } +} + +sub get_c { + my ($self) = @_; + return $self->c; +} + +sub get_line { + my ($self) = @_; + return $self->line; +} + +sub get_char_position_in_line { + my ($self) = @_; + return $self->char_position_in_line; +} + +sub get_token { + my ($self) = @_; + return $self->token; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/RecognizerSharedState.pm b/runtime/Perl5/lib/ANTLR/Runtime/RecognizerSharedState.pm new file mode 100644 index 0000000..96c51f7 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/RecognizerSharedState.pm @@ -0,0 +1,130 @@ +package ANTLR::Runtime::RecognizerSharedState; + +use ANTLR::Runtime::Token; + +use Moose; + +# Track the set of token types that can follow any rule invocation. +# Stack grows upwards. When it hits the max, it grows 2x in size +# and keeps going. +has 'following' => ( + is => 'rw', + isa => 'ArrayRef[ANTLR::Runtime::BitSet]', + default => sub { [] }, +); + +has '_fsp' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# This is true when we see an error and before having successfully +# matched a token. Prevents generation of more than one error message +# per error. +has 'error_recovery' => ( + is => 'rw', + isa => 'Bool', + default => 0, +); + +# The index into the input stream where the last error occurred. +# This is used to prevent infinite loops where an error is found +# but no token is consumed during recovery...another error is found, +# ad naseum. This is a failsafe mechanism to guarantee that at least +# one token/tree node is consumed for two errors. +has 'last_error_index' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# In lieu of a return value, this indicates that a rule or token +# has failed to match. Reset to false upon valid token match. +has 'failed' => ( + is => 'rw', + isa => 'Bool', + default => 0, +); + +# Did the recognizer encounter a syntax error? Track how many. +has 'syntax_errors' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# If 0, no backtracking is going on. Safe to exec actions etc... +# If >0 then it's the level of backtracking. +has 'backtracking' => ( + is => 'rw', + isa => 'Int', + default => 0, +); + +# An array[size num rules] of Map<Integer,Integer> that tracks +# the stop token index for each rule. ruleMemo[ruleIndex] is +# the memoization table for ruleIndex. For key ruleStartIndex, you +# get back the stop token for associated rule or MEMO_RULE_FAILED. +# This is only used if rule memoization is on (which it is by default). +has 'rule_memo' => ( + is => 'rw', + isa => 'Maybe[ArrayRef[HashRef[Int]]]', +); + +# The goal of all lexer rules/methods is to create a token object. +# This is an instance variable as multiple rules may collaborate to +# create a single token. nextToken will return this object after +# matching lexer rule(s). If you subclass to allow multiple token +# emissions, then set this to the last token to be matched or +# something nonnull so that the auto token emit mechanism will not +# emit another token. +has 'token' => ( + is => 'rw', + isa => 'Maybe[ANTLR::Runtime::Token]', +); + +# What character index in the stream did the current token start at? +# Needed, for example, to get the text for current token. Set at +# the start of nextToken. +has 'token_start_char_index' => ( + is => 'rw', + isa => 'Int', + default => -1, +); + +# The line on which the first character of the token resides +has 'token_start_line' => ( + is => 'rw', + isa => 'Int', +); + +# The character position of first character within the line +has 'token_start_char_position_in_line' => ( + is => 'rw', + isa => 'Int', +); + +# The channel number for the current token +has 'channel' => ( + is => 'rw', + isa => 'Int', +); + +# The token type for the current token +has 'type' => ( + is => 'rw', + isa => 'Int', +); + +# You can set the text for the current token to override what is in +# the input char buffer. Use setText() or can set this instance var. +has 'text' => ( + is => 'rw', + isa => 'Maybe[Str]', +); + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/RuleReturnScope.pm b/runtime/Perl5/lib/ANTLR/Runtime/RuleReturnScope.pm new file mode 100644 index 0000000..eb08dbc --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/RuleReturnScope.pm @@ -0,0 +1,23 @@ +package ANTLR::Runtime::RuleReturnScope; + +use Moose; + +sub get_start { + return; +} + +sub get_stop { + return; +} + +sub get_tree { + return; +} + +sub get_template { + return; +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Stream.pm b/runtime/Perl5/lib/ANTLR/Runtime/Stream.pm new file mode 100644 index 0000000..3dd0ba7 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Stream.pm @@ -0,0 +1,23 @@ +package ANTLR::Runtime::Stream; + +use Moose::Role; + +requires 'consume'; + +requires 'LA'; + +requires 'mark'; + +requires 'index'; + +requires 'rewind'; + +requires 'release'; + +requires 'seek'; + +requires 'size'; + +no Moose::Role; +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/Token.pm b/runtime/Perl5/lib/ANTLR/Runtime/Token.pm new file mode 100644 index 0000000..7cd8f21 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/Token.pm @@ -0,0 +1,80 @@ +package ANTLR::Runtime::Token; + +use Readonly; + +use feature qw( state ); + +use ANTLR::Runtime::CharStream; +#use ANTLR::Runtime::CommonToken; + +use Moose::Role; + +Readonly my $EOR_TOKEN_TYPE => 1; +sub EOR_TOKEN_TYPE { $EOR_TOKEN_TYPE } + +# imaginary tree navigation type; traverse "get child" link +Readonly my $DOWN => 2; +sub DOWN { $DOWN } + +# imaginary tree navigation type; finish with a child list +Readonly my $UP => 3; +sub UP { $UP } + +Readonly my $MIN_TOKEN_TYPE => $UP + 1; +sub MIN_TOKEN_TYPE { $MIN_TOKEN_TYPE } + +# All tokens go to the parser (unless skip() is called in that rule) +# on a particular "channel". The parser tunes to a particular channel +# so that whitespace etc... can go to the parser on a "hidden" channel. +Readonly my $DEFAULT_CHANNEL => 0; +sub DEFAULT_CHANNEL { $DEFAULT_CHANNEL } + +# Anything on different channel than DEFAULT_CHANNEL is not parsed +# by parser. +Readonly my $HIDDEN_CHANNEL => 99; +sub HIDDEN_CHANNEL { $HIDDEN_CHANNEL } + +sub EOF { ANTLR::Runtime::CharStream->EOF } + +#Readonly my $EOF_TOKEN => ANTLR::Runtime::CommonToken->new({ type => EOF }); +sub EOF_TOKEN { + require ANTLR::Runtime::CommonToken; + state $EOF_TOKEN = ANTLR::Runtime::CommonToken->new({ type => EOF }); + return $EOF_TOKEN; +} + +Readonly my $INVALID_TOKEN_TYPE => 0; +sub INVALID_TOKEN_TYPE { $INVALID_TOKEN_TYPE } + +#Readonly my $INVALID_TOKEN => ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); +sub INVALID_TOKEN { + require ANTLR::Runtime::CommonToken; + state $INVALID_TOKEN = ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); + return $INVALID_TOKEN; +} + +# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR +# will avoid creating a token for this symbol and try to fetch another. +#Readonly my $SKIP_TOKEN => ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); +sub SKIP_TOKEN { + require ANTLR::Runtime::CommonToken; + state $SKIP_TOKEN = ANTLR::Runtime::CommonToken->new({ type => INVALID_TOKEN_TYPE }); + return $SKIP_TOKEN; +} + +requires 'get_text', 'set_text'; + +requires 'get_type', 'set_type'; + +requires 'get_line', 'set_line'; + +requires 'get_char_position_in_line', 'set_char_position_in_line'; + +requires 'get_channel', 'set_channel'; + +requires 'get_token_index', 'set_token_index'; + +requires 'get_input_stream', 'set_input_stream'; + +no Moose::Role; +1; diff --git a/runtime/Perl5/lib/ANTLR/Runtime/TokenSource.pm b/runtime/Perl5/lib/ANTLR/Runtime/TokenSource.pm new file mode 100644 index 0000000..f9200ef --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/TokenSource.pm @@ -0,0 +1,35 @@ +package ANTLR::Runtime::TokenSource; + +use Moose::Role; + +# Return a Token object from your input stream (usually a CharStream). +# Do not fail/return upon lexing error; keep chewing on the characters +# until you get a good one; errors are not passed through to the parser. +requires 'next_token'; + +# Where are you getting tokens from? normally the implication will simply +# ask lexers input stream. +requires 'get_source_name'; + +no Moose::Role; +1; +__END__ + +=head1 NAME + +ANTLR::Runtime::TokenSource + +=head1 DESCRIPTION + +A source of tokens must provide a sequence of tokens via nextToken() +and also must reveal it's source of characters; CommonToken's text is +computed from a CharStream; it only store indices into the char stream. + +Errors from the lexer are never passed to the parser. Either you want +to keep going or you do not upon token recognition error. If you do not +want to continue lexing then you do not want to continue parsing. Just +throw an exception not under RecognitionException and Java will naturally +toss you all the way out of the recognizers. If you want to continue +lexing then you should not throw an exception to the parser--it has already +requested a token. Keep lexing until you get a valid one. Just report +errors and keep going, looking for a valid token. diff --git a/runtime/Perl5/lib/ANTLR/Runtime/TokenStream.pm b/runtime/Perl5/lib/ANTLR/Runtime/TokenStream.pm new file mode 100644 index 0000000..ecab89a --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/TokenStream.pm @@ -0,0 +1,16 @@ +package ANTLR::Runtime::TokenStream; + +use Moose::Role; +#extends 'ANTLR::Runtime::IntStream'; + +requires 'LT'; + +requires 'get'; + +requires 'get_token_source'; + +requires 'to_string'; + +no Moose::Role; +1; +__END__ diff --git a/runtime/Perl5/lib/ANTLR/Runtime/UnwantedTokenException.pm b/runtime/Perl5/lib/ANTLR/Runtime/UnwantedTokenException.pm new file mode 100644 index 0000000..60c1202 --- /dev/null +++ b/runtime/Perl5/lib/ANTLR/Runtime/UnwantedTokenException.pm @@ -0,0 +1,37 @@ +package ANTLR::Runtime::UnwantedTokenException; + +use Moose; + +use overload + '""' => \&to_string; + +extends 'ANTLR::Runtime::MismatchedTokenException'; + +sub get_unexpected_token { + my ($self) = @_; + return $self->token; +} + +sub to_string { + my ($self) = @_; + + my $exp; + if ($self->expecting == ANTLR::Runtime::Token->INVALID_TOKEN_TYPE) { + $exp = ''; + } + else { + $exp = ", expected " . $self->expecting; + } + + if (defined $self->token) { + return "UnwantedTokenException(found=" . $self->token->get_text() . "$exp)"; + } + else { + return "UnwantedTokenException(found=undef$exp)"; + } +} + +no Moose; +__PACKAGE__->meta->make_immutable(); +1; +__END__ |