diff options
Diffstat (limited to 'antlr-3.4/runtime/ObjC/Framework/ANTLRLexer.m')
-rw-r--r-- | antlr-3.4/runtime/ObjC/Framework/ANTLRLexer.m | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/antlr-3.4/runtime/ObjC/Framework/ANTLRLexer.m b/antlr-3.4/runtime/ObjC/Framework/ANTLRLexer.m new file mode 100644 index 0000000..de1a0a3 --- /dev/null +++ b/antlr-3.4/runtime/ObjC/Framework/ANTLRLexer.m @@ -0,0 +1,428 @@ +// [The "BSD licence"] +// Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#import <ANTLR/antlr.h> +#import "ANTLRLexer.h" + +@implementation ANTLRLexer + +@synthesize input; +@synthesize ruleNestingLevel; +#pragma mark Initializer + +- (id) initWithCharStream:(id<ANTLRCharStream>)anInput +{ + self = [super initWithState:[[ANTLRRecognizerSharedState alloc] init]]; + if ( self != nil ) { + input = [anInput retain]; + if (state.token != nil) + [((ANTLRCommonToken *)state.token) setInput:anInput]; + ruleNestingLevel = 0; + } + return self; +} + +- (id) initWithCharStream:(id<ANTLRCharStream>)anInput State:(ANTLRRecognizerSharedState *)aState +{ + self = [super initWithState:aState]; + if ( self != nil ) { + input = [anInput retain]; + if (state.token != nil) + [((ANTLRCommonToken *)state.token) setInput:anInput]; + ruleNestingLevel = 0; + } + return self; +} + +- (void) dealloc +{ + if ( input ) [input release]; + [super dealloc]; +} + +- (id) copyWithZone:(NSZone *)aZone +{ + ANTLRLexer *copy; + + copy = [[[self class] allocWithZone:aZone] init]; + // copy = [super copyWithZone:aZone]; // allocation occurs here + if ( input != nil ) + copy.input = input; + copy.ruleNestingLevel = ruleNestingLevel; + return copy; +} + +- (void) reset +{ + [super reset]; // reset all recognizer state variables + // wack Lexer state variables + if ( input != nil ) { + [input seek:0]; // rewind the input + } + if ( state == nil ) { + return; // no shared state work to do + } + state.token = nil; + state.type = ANTLRCommonToken.INVALID_TOKEN_TYPE; + state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; + state.tokenStartCharIndex = -1; + state.tokenStartCharPositionInLine = -1; + state.tokenStartLine = -1; + state.text = nil; +} + +// token stuff +#pragma mark Tokens + +- (id<ANTLRToken>)getToken +{ + return [state getToken]; +} + +- (void) setToken: (id<ANTLRToken>) aToken +{ + if (state.token != aToken) { + [aToken retain]; + state.token = aToken; + } +} + + +// this method may be overridden in the generated lexer if we generate a filtering lexer. +- (id<ANTLRToken>) nextToken +{ + while (YES) { + [self setToken:nil]; + state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; + state.tokenStartCharIndex = input.index; + state.tokenStartCharPositionInLine = input.charPositionInLine; + state.tokenStartLine = input.line; + state.text = nil; + + // [self setText:[self text]]; + if ([input LA:1] == ANTLRCharStreamEOF) { + ANTLRCommonToken *eof = [ANTLRCommonToken newToken:input + Type:ANTLRTokenTypeEOF + Channel:ANTLRCommonToken.DEFAULT_CHANNEL + Start:input.index + Stop:input.index]; + [eof setLine:input.line]; + [eof setCharPositionInLine:input.charPositionInLine]; + return eof; + } + @try { + [self mTokens]; + // SEL aMethod = @selector(mTokens); + // [[self class] instancesRespondToSelector:aMethod]; + if ( state.token == nil) + [self emit]; + else if ( state.token == [ANTLRCommonToken skipToken] ) { + continue; + } + return state.token; + } + @catch (ANTLRNoViableAltException *nva) { + [self reportError:nva]; + [self recover:nva]; + } + @catch (ANTLRRecognitionException *e) { + [self reportError:e]; + } + } +} + +- (void) mTokens +{ // abstract, defined in generated source as a starting point for matching + [self doesNotRecognizeSelector:_cmd]; +} + +- (void) skip +{ + state.token = [ANTLRCommonToken skipToken]; +} + +- (id<ANTLRCharStream>) input +{ + return input; +} + +- (void) setInput:(id<ANTLRCharStream>) anInput +{ + if ( anInput != input ) { + if ( input ) [input release]; + } + input = nil; + [self reset]; + input = anInput; + [input retain]; +} + +/** Currently does not support multiple emits per nextToken invocation + * for efficiency reasons. Subclass and override this method and + * nextToken (to push tokens into a list and pull from that list rather + * than a single variable as this implementation does). + */ +- (void) emit:(id<ANTLRToken>)aToken +{ + state.token = aToken; +} + +/** The standard method called to automatically emit a token at the + * outermost lexical rule. The token object should point into the + * char buffer start..stop. If there is a text override in 'text', + * use that to set the token's text. Override this method to emit + * custom Token objects. + * + * If you are building trees, then you should also override + * Parser or TreeParser.getMissingSymbol(). + */ +- (void) emit +{ + id<ANTLRToken> aToken = [ANTLRCommonToken newToken:input + Type:state.type + Channel:state.channel + Start:state.tokenStartCharIndex + Stop:input.index-1]; + [aToken setLine:state.tokenStartLine]; + aToken.text = [self text]; + [aToken setCharPositionInLine:state.tokenStartCharPositionInLine]; + [aToken retain]; + [self emit:aToken]; + // [aToken release]; +} + +// matching +#pragma mark Matching +- (void) matchString:(NSString *)aString +{ + unichar c; + unsigned int i = 0; + unsigned int stringLength = [aString length]; + while ( i < stringLength ) { + c = [input LA:1]; + if ( c != [aString characterAtIndex:i] ) { + if ([state getBacktracking] > 0) { + state.failed = YES; + return; + } + ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input]; + mte.c = c; + [self recover:mte]; + @throw mte; + } + i++; + [input consume]; + state.failed = NO; + } +} + +- (void) matchAny +{ + [input consume]; +} + +- (void) matchChar:(unichar) aChar +{ + // TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype. + // try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code) + unichar charLA; + charLA = [input LA:1]; + if ( charLA != aChar) { + if ([state getBacktracking] > 0) { + state.failed = YES; + return; + } + ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:aChar Stream:input]; + mte.c = charLA; + [self recover:mte]; + @throw mte; + } + [input consume]; + state.failed = NO; +} + +- (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar +{ + unichar charLA = (unichar)[input LA:1]; + if ( charLA < fromChar || charLA > toChar ) { + if ([state getBacktracking] > 0) { + state.failed = YES; + return; + } + ANTLRMismatchedRangeException *mre = [ANTLRMismatchedRangeException + newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar) + stream:input]; + mre.c = charLA; + [self recover:mre]; + @throw mre; + } + [input consume]; + state.failed = NO; +} + + // info +#pragma mark Informational + +- (NSUInteger) line +{ + return input.line; +} + +- (NSUInteger) charPositionInLine +{ + return input.charPositionInLine; +} + +- (NSInteger) index +{ + return 0; +} + +- (NSString *) text +{ + if (state.text != nil) { + return state.text; + } + return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)]; +} + +- (void) setText:(NSString *) theText +{ + state.text = theText; +} + + // error handling +- (void) reportError:(ANTLRRecognitionException *)e +{ + /** TODO: not thought about recovery in lexer yet. + * + // if we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if ( errorRecovery ) { + //System.err.print("[SPURIOUS] "); + return; + } + errorRecovery = true; + */ + + [self displayRecognitionError:[self getTokenNames] Exception:e]; +} + +- (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)tokenNames +{ +/* NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in ANTLRLexer.m--%@\n", + e.name]; + */ + NSString *msg = nil; + if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) { + ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e; + msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"", + [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expecting]]; + } + else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) { + ANTLRNoViableAltException *nvae = (ANTLRNoViableAltException *)e; + // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" + // and "(decision="+nvae.decisionNumber+") and + // "state "+nvae.stateNumber + msg = [NSString stringWithFormat:@"no viable alternative at character \"%@\"", + [self getCharErrorDisplay:(nvae.c)]]; + } + else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) { + ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e; + // for development, can add "(decision="+eee.decisionNumber+")" + msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"", + [self getCharErrorDisplay:(eee.c)]]; + } + else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class]] ) { + ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e; + msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", + [self getCharErrorDisplay:(mse.c)], mse.expecting]; + } + else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) { + ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e; + msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", + [self getCharErrorDisplay:(mse.c)], mse.expecting]; + } + else if ( [e isKindOfClass:[ANTLRMismatchedRangeException class]] ) { + ANTLRMismatchedRangeException *mre = (ANTLRMismatchedRangeException *)e; + msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"", + [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)], + [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]]; + } + else { + msg = [super getErrorMessage:e TokenNames:[self getTokenNames]]; + } + return msg; +} + +- (NSString *)getCharErrorDisplay:(NSInteger)c +{ + NSString *s; + switch ( c ) { + case ANTLRTokenTypeEOF : + s = @"<EOF>"; + break; + case '\n' : + s = @"\\n"; + break; + case '\t' : + s = @"\\t"; + break; + case '\r' : + s = @"\\r"; + break; + default: + s = [NSString stringWithFormat:@"%c", (char)c]; + break; + } + return s; +} + +/** Lexers can normally match any char in it's vocabulary after matching + * a token, so do the easy thing and just kill a character and hope + * it all works out. You can instead use the rule invocation stack + * to do sophisticated error recovery if you are in a fragment rule. + */ +- (void)recover:(ANTLRRecognitionException *)re +{ + //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); + //re.printStackTrace(); + [input consume]; +} + +- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex +{ + NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; + [super traceIn:ruleName Index:ruleIndex Object:inputSymbol]; +} + +- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex +{ + NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; + [super traceOut:ruleName Index:ruleIndex Object:inputSymbol]; +} + +@end |