aboutsummaryrefslogtreecommitdiff
path: root/runtime/CSharp3/Sources/Antlr3.Runtime/DFA.cs
blob: 76c4083b0d17dfe2847d32621724e636f00864e3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
/*
 * [The "BSD licence"]
 * Copyright (c) 2005-2008 Terence Parr
 * All rights reserved.
 *
 * Conversion to C#:
 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

namespace Antlr.Runtime
{
    using ArgumentNullException = System.ArgumentNullException;
    using ConditionalAttribute = System.Diagnostics.ConditionalAttribute;
    using Console = System.Console;
    using IDebugEventListener = Antlr.Runtime.Debug.IDebugEventListener;

    public delegate int SpecialStateTransitionHandler( DFA dfa, int s, IIntStream input );

    /** <summary>A DFA implemented as a set of transition tables.</summary>
     *
     *  <remarks>
     *  Any state that has a semantic predicate edge is special; those states
     *  are generated with if-then-else structures in a specialStateTransition()
     *  which is generated by cyclicDFA template.
     *
     *  There are at most 32767 states (16-bit signed short).
     *  Could get away with byte sometimes but would have to generate different
     *  types and the simulation code too.  For a point of reference, the Java
     *  lexer's Tokens rule DFA has 326 states roughly.
     *  </remarks>
     */
    public class DFA
    {
        protected short[] eot;
        protected short[] eof;
        protected char[] min;
        protected char[] max;
        protected short[] accept;
        protected short[] special;
        protected short[][] transition;

        protected int decisionNumber;

        /** <summary>Which recognizer encloses this DFA?  Needed to check backtracking</summary> */
        protected BaseRecognizer recognizer;

        public bool debug = false;

        public DFA()
            : this(SpecialStateTransitionDefault)
        {
        }

        public DFA( SpecialStateTransitionHandler specialStateTransition )
        {
            this.SpecialStateTransition = specialStateTransition ?? SpecialStateTransitionDefault;
        }

        public virtual string Description
        {
            get
            {
                return "n/a";
            }
        }

        /** <summary>
         *  From the input stream, predict what alternative will succeed
         *  using this DFA (representing the covering regular approximation
         *  to the underlying CFL).  Return an alternative number 1..n.  Throw
         *  an exception upon error.
         *  </summary>
         */
        public virtual int Predict( IIntStream input )
        {
            if (input == null)
                throw new ArgumentNullException("input");

            DfaDebugMessage("Enter DFA.Predict for decision {0}", decisionNumber);

            int mark = input.Mark(); // remember where decision started in input
            int s = 0; // we always start at s0
            try
            {
                while (true)
                {
                    DfaDebugMessage("DFA {0} state {1} LA(1)={2}({3}), index={4}", decisionNumber, s, (char)input.LA(1), input.LA(1), input.Index);

                    int specialState = special[s];
                    if ( specialState >= 0 )
                    {
                        DfaDebugMessage("DFA {0} state {1} is special state {2}", decisionNumber, s, specialState);

                        s = SpecialStateTransition( this, specialState, input );

                        DfaDebugMessage("DFA {0} returns from special state {1} to {2}", decisionNumber, specialState, s);

                        if ( s == -1 )
                        {
                            NoViableAlt( s, input );
                            return 0;
                        }

                        input.Consume();
                        continue;
                    }

                    if ( accept[s] >= 1 )
                    {
                        DfaDebugMessage("accept; predict {0} from state {1}", accept[s], s);
                        return accept[s];
                    }

                    // look for a normal char transition
                    char c = (char)input.LA( 1 ); // -1 == \uFFFF, all tokens fit in 65000 space
                    if ( c >= min[s] && c <= max[s] )
                    {
                        int snext = transition[s][c - min[s]]; // move to next state
                        if ( snext < 0 )
                        {
                            // was in range but not a normal transition
                            // must check EOT, which is like the else clause.
                            // eot[s]>=0 indicates that an EOT edge goes to another
                            // state.
                            if ( eot[s] >= 0 )
                            {
                                // EOT Transition to accept state?
                                DfaDebugMessage("EOT transition");
                                s = eot[s];
                                input.Consume();
                                // TODO: I had this as return accept[eot[s]]
                                // which assumed here that the EOT edge always
                                // went to an accept...faster to do this, but
                                // what about predicated edges coming from EOT
                                // target?
                                continue;
                            }

                            NoViableAlt( s, input );
                            return 0;
                        }

                        s = snext;
                        input.Consume();
                        continue;
                    }

                    if ( eot[s] >= 0 )
                    {
                        // EOT Transition?
                        DfaDebugMessage("EOT transition");
                        s = eot[s];
                        input.Consume();
                        continue;
                    }

                    if ( c == unchecked( (char)TokenTypes.EndOfFile ) && eof[s] >= 0 )
                    {
                        // EOF Transition to accept state?
                        DfaDebugMessage("accept via EOF; predict {0} from {1}", accept[eof[s]], eof[s]);
                        return accept[eof[s]];
                    }

                    // not in range and not EOF/EOT, must be invalid symbol
                    DfaDebugInvalidSymbol(s);

                    NoViableAlt( s, input );
                    return 0;
                }
            }
            finally
            {
                input.Rewind( mark );
            }
        }

        [Conditional("DEBUG_DFA")]
        private void DfaDebugMessage(string format, params object[] args)
        {
            Console.Error.WriteLine(format, args);
        }

        [Conditional("DEBUG_DFA")]
        private void DfaDebugInvalidSymbol(int s)
        {
            Console.Error.WriteLine("min[{0}]={1}", s, min[s]);
            Console.Error.WriteLine("max[{0}]={1}", s, max[s]);
            Console.Error.WriteLine("eot[{0}]={1}", s, eot[s]);
            Console.Error.WriteLine("eof[{0}]={1}", s, eof[s]);

            for (int p = 0; p < transition[s].Length; p++)
                Console.Error.Write(transition[s][p] + " ");

            Console.Error.WriteLine();
        }

        protected virtual void NoViableAlt( int s, IIntStream input )
        {
            if ( recognizer.state.backtracking > 0 )
            {
                recognizer.state.failed = true;
                return;
            }
            NoViableAltException nvae =
                new NoViableAltException( Description,
                                         decisionNumber,
                                         s,
                                         input );
            Error( nvae );
            throw nvae;
        }

        /** <summary>A hook for debugging interface</summary> */
        public virtual void Error( NoViableAltException nvae )
        {
        }

        public SpecialStateTransitionHandler SpecialStateTransition
        {
            get;
            private set;
        }
        //public virtual int specialStateTransition( int s, IntStream input )
        //{
        //    return -1;
        //}

        static int SpecialStateTransitionDefault( DFA dfa, int s, IIntStream input )
        {
            return -1;
        }

        /** <summary>
         *  Given a String that has a run-length-encoding of some unsigned shorts
         *  like "\1\2\3\9", convert to short[] {2,9,9,9}.  We do this to avoid
         *  static short[] which generates so much init code that the class won't
         *  compile. :(
         *  </summary>
         */
        public static short[] UnpackEncodedString( string encodedString )
        {
            // walk first to find how big it is.
            int size = 0;
            for ( int i = 0; i < encodedString.Length; i += 2 )
            {
                size += encodedString[i];
            }
            short[] data = new short[size];
            int di = 0;
            for ( int i = 0; i < encodedString.Length; i += 2 )
            {
                char n = encodedString[i];
                char v = encodedString[i + 1];
                // add v n times to data
                for ( int j = 1; j <= n; j++ )
                {
                    data[di++] = (short)v;
                }
            }
            return data;
        }

        /** <summary>Hideous duplication of code, but I need different typed arrays out :(</summary> */
        public static char[] UnpackEncodedStringToUnsignedChars( string encodedString )
        {
            // walk first to find how big it is.
            int size = 0;
            for ( int i = 0; i < encodedString.Length; i += 2 )
            {
                size += encodedString[i];
            }
            char[] data = new char[size];
            int di = 0;
            for ( int i = 0; i < encodedString.Length; i += 2 )
            {
                char n = encodedString[i];
                char v = encodedString[i + 1];
                // add v n times to data
                for ( int j = 1; j <= n; j++ )
                {
                    data[di++] = v;
                }
            }
            return data;
        }

        [Conditional("ANTLR_DEBUG")]
        protected virtual void DebugRecognitionException(RecognitionException ex)
        {
            IDebugEventListener dbg = recognizer.DebugListener;
            if (dbg != null)
                dbg.RecognitionException(ex);
        }
    }
}