diff options
Diffstat (limited to 'runtime/Ruby/lib/antlr3/dfa.rb')
-rw-r--r-- | runtime/Ruby/lib/antlr3/dfa.rb | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/runtime/Ruby/lib/antlr3/dfa.rb b/runtime/Ruby/lib/antlr3/dfa.rb new file mode 100644 index 0000000..78cc008 --- /dev/null +++ b/runtime/Ruby/lib/antlr3/dfa.rb @@ -0,0 +1,320 @@ +#!/usr/bin/ruby +# encoding: utf-8 + +=begin LICENSE + +[The "BSD licence"] +Copyright (c) 2009-2010 Kyle Yetter +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=end + +module ANTLR3 + +=begin rdoc ANTLR3::DFA + +DFA is a class that implements a finite state machine that chooses between +alternatives in a rule based upon lookahead symbols from an input stream. + +Deterministic Finite Automata (DFA) are finite state machines that are capable +of recognizing <i>regular languages</i>. For more background on the subject, +check out http://en.wikipedia.org/wiki/Deterministic_finite-state_machine or +check out general ANTLR documentation at http://www.antlr.org + +ANTLR implements most of its decision logic directly using code branching +structures in methods. However, for certain types of decisions, ANTLR will +generate a special DFA class definition to implement a decision. + +Conceptually, these state machines are defined by a number of states, each state +represented by an integer indexed upward from zero. State number +0+ is the +<i>start state</i> of the machine; every prediction will begin in state +0+. At +each step, the machine examines the next symbol on the input stream, checks the +value against the transition parameters associated with the current state, and +either moves to a new state number to repeat the process or decides that the +machine cannot transition any further. If the machine cannot transition any +further and the current state is defined as an <i>accept state</i>, an +alternative has been chosen successfully and the prediction procedure ends. If +the current state is not an <i>accept state</i>, the prediction has failed and +there is <i>no viable alternative</i>. + +In generated code, ANTLR defines DFA states using seven parameters, each defined +as a member of seven seperate array constants -- +MIN+, +MAX+, +EOT+, +EOF+, ++SPECIAL+, +ACCEPT+, and +TRANSITION+. The parameters that characterize state ++s+ are defined by the value of these lists at index +s+. + +MIN[s]:: + The smallest value of the next input symbol that has + a transition for state +s+ +MAX[s]:: + The largest value of the next input symbol that has + a transition for state +s+ +TRANSITION[s]:: + A list that defines the next state number based upon + the current input symbol. +EOT[s]:: + If positive, it specifies a state transition in + situations where a non-matching input symbol does + not indicate failure. +SPECIAL[s]:: + If positive, it indicates that the prediction + algorithm must defer to a special code block + to determine the next state. The value is used + by the special state code to determine the next + state. +ACCEPT[s]:: + If positive and there are no possible state + transitions, this is the alternative number + that has been predicted +EOF[s]:: + If positive and the input stream has been exhausted, + this is the alternative number that has been predicted. + +For more information on the prediction algorithm, check out the #predict method +below. + +=end + +class DFA + include Constants + include Error + + attr_reader :recognizer, :decision_number, :eot, :eof, :min, :max, + :accept, :special, :transition, :special_block + + class << self + attr_reader :decision, :eot, :eof, :min, :max, + :accept, :special, :transition + + def unpack( *data ) + data.empty? and return [].freeze + + n = data.length / 2 + size = 0 + n.times { |i| size += data[ 2*i ] } + if size > 1024 + values = Hash.new( 0 ) + data.each_slice( 2 ) do |count, value| + values[ value ] += count + end + default = values.keys.max_by { |v| values[ v ] } + + unpacked = Hash.new( default ) + position = 0 + data.each_slice( 2 ) do |count, value| + unless value == default + count.times { |i| unpacked[ position + i ] = value } + end + position += count + end + else + unpacked = [] + data.each_slice( 2 ) do |count, value| + unpacked.fill( value, unpacked.length, count ) + end + end + + return unpacked + end + + end + + def initialize( recognizer, decision_number = nil, + eot = nil, eof = nil, min = nil, max = nil, + accept = nil, special = nil, + transition = nil, &special_block ) + @recognizer = recognizer + @decision_number = decision_number || self.class.decision + @eot = eot || self.class::EOT #.eot + @eof = eof || self.class::EOF #.eof + @min = min || self.class::MIN #.min + @max = max || self.class::MAX #.max + @accept = accept || self.class::ACCEPT #.accept + @special = special || self.class::SPECIAL #.special + @transition = transition || self.class::TRANSITION #.transition + @special_block = special_block + rescue NameError => e + raise unless e.message =~ /uninitialized constant/ + constant = e.name + message = Util.tidy( <<-END ) + | No #{ constant } information provided. + | DFA cannot be instantiated without providing state array information. + | When DFAs are generated by ANTLR, this information should already be + | provided in the DFA subclass constants. + END + end + + if RUBY_VERSION =~ /^1\.9/ + + def predict( input ) + mark = input.mark + state = 0 + + 50000.times do + special_state = @special[ state ] + if special_state >= 0 + state = @special_block.call( special_state ) + if state == -1 + no_viable_alternative( state, input ) + return 0 + end + input.consume + next + end + @accept[ state ] >= 1 and return @accept[ state ] + + # look for a normal char transition + + c = input.peek.ord + # the @min and @max arrays contain the bounds of the character (or token type) + # ranges for the transition decisions + if c.between?( @min[ state ], @max[ state ] ) + # c - @min[state] is the position of the character within the range + # so for a range like ?a..?z, a match of ?a would be 0, + # ?c would be 2, and ?z would be 25 + next_state = @transition[ state ][ c - @min[ state ] ] + if next_state < 0 + if @eot[ state ] >= 0 + state = @eot[ state ] + input.consume + next + end + no_viable_alternative( state, input ) + return 0 + end + + state = next_state + input.consume + next + end + + if @eot[ state ] >= 0 + state = @eot[ state ] + input.consume() + next + end + + ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ] + no_viable_alternative( state, input ) + return 0 + end + + ANTLR3.bug!( Util.tidy( <<-END ) ) + | DFA BANG! + | The prediction loop has exceeded a maximum limit of 50000 iterations + | ---- + | decision: #@decision_number + | description: #{ description } + END + ensure + input.rewind( mark ) + end + + else + + def predict( input ) + mark = input.mark + state = 0 + + 50000.times do + special_state = @special[ state ] + if special_state >= 0 + state = @special_block.call( special_state ) + if state == -1 + no_viable_alternative( state, input ) + return 0 + end + input.consume + next + end + @accept[ state ] >= 1 and return @accept[ state ] + + # look for a normal char transition + + c = input.peek + # the @min and @max arrays contain the bounds of the character (or token type) + # ranges for the transition decisions + if c.between?( @min[ state ], @max[ state ] ) + # c - @min[state] is the position of the character within the range + # so for a range like ?a..?z, a match of ?a would be 0, + # ?c would be 2, and ?z would be 25 + next_state = @transition[ state ][ c - @min[ state ] ] + if next_state < 0 + if @eot[ state ] >= 0 + state = @eot[ state ] + input.consume + next + end + no_viable_alternative( state, input ) + return 0 + end + + state = next_state + input.consume() + next + end + if @eot[ state ] >= 0 + state = @eot[ state ] + input.consume() + next + end + ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ] + no_viable_alternative( state, input ) + return 0 + end + + ANTLR3.bug!( Util.tidy( <<-END ) ) + | DFA BANG! + | The prediction loop has exceeded a maximum limit of 50000 iterations + | ---- + | decision: #@decision_number + | description: #{ description } + END + ensure + input.rewind( mark ) + end + + end + + def no_viable_alternative( state, input ) + raise( BacktrackingFailed ) if @recognizer.state.backtracking > 0 + except = NoViableAlternative.new( description, @decision_number, state, input ) + error( except ) + raise( except ) + end + + def error( except ) + # overridable debugging hook + end + + def special_state_transition( state, input ) + return -1 + end + + def description + return "n/a" + end +end +end |