aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gruver <bgruv@google.com>2012-09-23 17:52:14 -0700
committerBen Gruver <bgruv@google.com>2012-09-23 18:14:03 -0700
commit48d5b730272ada20e5b8c0245d297b03dbbf6d02 (patch)
treeef18698d6205945430a04bc72bdf483aefe70ea4
parentb88e9c37528f9eafa3b5580ca1b812cc958f0fc2 (diff)
downloadsmali-48d5b730272ada20e5b8c0245d297b03dbbf6d02.tar.gz
Remove ANTLR lexer
-rw-r--r--smali/src/main/antlr3/smaliLexer.g791
-rw-r--r--smali/src/main/antlr3/smaliParser.g144
-rw-r--r--smali/src/main/java/org/jf/smali/InvalidToken.java5
-rw-r--r--smali/src/main/java/org/jf/smali/main.java33
-rw-r--r--smali/src/test/java/LexerTest.java4
5 files changed, 151 insertions, 826 deletions
diff --git a/smali/src/main/antlr3/smaliLexer.g b/smali/src/main/antlr3/smaliLexer.g
deleted file mode 100644
index 872e1065..00000000
--- a/smali/src/main/antlr3/smaliLexer.g
+++ /dev/null
@@ -1,791 +0,0 @@
-/*
- * The comment, number, string and character constant lexical rules are
- * derived from rules from the Java 1.6 grammar which can be found here:
- * http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g
- *
- * Specifically, these rules:
- *
- * BASE_INTEGER, DECIMAL_EXPONENT, BINARY_EXPONENT, HEX_PREFIX, HEX_DIGIT,
- * BASE_FLOAT_OR_ID, BASE_FLOAT, ESCAPE_SEQUENCE, POSITIVE_INTEGER_LITERAL,
- * NEGATIVE_INTEGER_LITERAL, LONG_LITERAL, SHORT_LITERAL, BYTE_LITERAL,
- * FLOAT_LITERAL_OR_ID, DOUBLE_LITERAL_OR_ID, FLOAT_LITERAL, DOUBLE_LITERAL,
- * BOOL_LITERAL, STRING_LITERAL, BASE_STRING_LITERAL, CHAR_LITERAL,
- * BASE_CHAR_LITERAL
- *
- * These rules were originally copyrighted by Terence Parr, and are used here in
- * accordance with the following license
- *
- * [The "BSD licence"]
- * Copyright (c) 2007-2008 Terence Parr
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * The remainder of this grammar is released by me (Ben Gruver) under the
- * following license:
- *
- * [The "BSD licence"]
- * Copyright (c) 2010 Ben Gruver
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-lexer grammar smaliLexer;
-
-options {
- superClass=ANTLRLexerWithErrorInterface;
-}
-
-@lexer::header {
- package org.jf.smali;
-
- import static org.jf.smali.LexerErrorInterface.ANTLRLexerWithErrorInterface;
-}
-
-@lexer::members {
- public static final int ERROR_CHANNEL = 100;
- public String getErrorHeader(RecognitionException e) {
- return getSourceName()+"["+ e.line+","+e.charPositionInLine+"]";
- }
-}
-
-/**********************************************************
-* DIRECTIVES
-**********************************************************/
-
-CLASS_DIRECTIVE
- : '.class';
-
-SUPER_DIRECTIVE
- : '.super';
-
-IMPLEMENTS_DIRECTIVE
- : '.implements';
-
-SOURCE_DIRECTIVE
- : '.source';
-
-FIELD_DIRECTIVE
- : '.field';
-
-END_FIELD_DIRECTIVE
- : '.end field';
-
-SUBANNOTATION_DIRECTIVE
- : '.subannotation';
-
-END_SUBANNOTATION_DIRECTIVE
- : '.end subannotation';
-
-ANNOTATION_DIRECTIVE
- : '.annotation';
-
-END_ANNOTATION_DIRECTIVE
- : '.end annotation';
-
-ENUM_DIRECTIVE
- : '.enum';
-
-METHOD_DIRECTIVE
- : '.method';
-
-END_METHOD_DIRECTIVE
- : '.end method';
-
-REGISTERS_DIRECTIVE
- : '.registers';
-
-LOCALS_DIRECTIVE
- : '.locals';
-
-ARRAY_DATA_DIRECTIVE
- : '.array-data';
-
-END_ARRAY_DATA_DIRECTIVE
- : '.end array-data';
-
-PACKED_SWITCH_DIRECTIVE
- : '.packed-switch';
-
-END_PACKED_SWITCH_DIRECTIVE
- : '.end packed-switch';
-
-SPARSE_SWITCH_DIRECTIVE
- : '.sparse-switch';
-
-END_SPARSE_SWITCH_DIRECTIVE
- : '.end sparse-switch';
-
-CATCH_DIRECTIVE
- : '.catch';
-
-CATCHALL_DIRECTIVE
- : '.catchall';
-
-LINE_DIRECTIVE
- : '.line';
-
-PARAMETER_DIRECTIVE
- : '.parameter';
-
-END_PARAMETER_DIRECTIVE
- : '.end parameter';
-
-LOCAL_DIRECTIVE
- : '.local';
-
-END_LOCAL_DIRECTIVE
- : '.end local';
-
-RESTART_LOCAL_DIRECTIVE
- : '.restart local';
-
-PROLOGUE_DIRECTIVE
- : '.prologue';
-
-EPILOGUE_DIRECTIVE
- : '.epilogue';
-
-/**********************************************************
-* LITERALS
-**********************************************************/
-fragment BASE_INTEGER
- : '0'
- | ('1'..'9') ('0'..'9')*
- | '0' ('0'..'7')+
- | HEX_PREFIX HEX_DIGIT+;
-
-fragment DECIMAL_EXPONENT
- : ('e'|'E') '-'? ('0'..'9')+;
-
-fragment BINARY_EXPONENT
- : ('p'|'P') '-'? ('0'..'9')+;
-
-fragment HEX_PREFIX
- : '0x'|'0X';
-
-fragment HEX_DIGIT
- : ('0'..'9')|('A'..'F')|('a'..'f');
-
-fragment HEX_DIGITS
- : HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
-
-/*This can either be floating point numbers, or identifier*/
-fragment BASE_FLOAT_OR_ID
- : '-'? ('0'..'9')+ DECIMAL_EXPONENT
- | HEX_PREFIX HEX_DIGIT+ BINARY_EXPONENT
- | '-'? ('i' | 'I') ('n' | 'N') ('f' | 'F') ('i' | 'I') ('n' | 'N') ('i' | 'I') ('t' | 'T') ('y' | 'Y')
- | ('n' | 'N') ('a' | 'A') ('n' | 'N');
-
-/*These can't be identifiers, due to the decimal point*/
-fragment BASE_FLOAT
- : '-'? ('0'..'9')+ '.' ('0'..'9')* DECIMAL_EXPONENT?
- | '-'? '.' ('0'..'9')+ DECIMAL_EXPONENT?
- | '-'? HEX_PREFIX HEX_DIGIT+ '.' HEX_DIGIT* BINARY_EXPONENT
- | '-'? HEX_PREFIX '.' HEX_DIGIT+ BINARY_EXPONENT;
-
-fragment ESCAPE_SEQUENCE[StringBuilder sb]
- : '\\'
- (
- 'b' {sb.append("\b");}
- | 't' {sb.append("\t");}
- | 'n' {sb.append("\n");}
- | 'f' {sb.append("\f");}
- | 'r' {sb.append("\r");}
- | '\"' {sb.append("\"");}
- | '\'' {sb.append("'");}
- | '\\' {sb.append("\\");}
- | 'u' HEX_DIGITS {sb.append((char)Integer.parseInt($HEX_DIGITS.text, 16));}
- );
-
-POSITIVE_INTEGER_LITERAL
- : BASE_INTEGER;
-
-NEGATIVE_INTEGER_LITERAL
- : '-' BASE_INTEGER;
-
-LONG_LITERAL
- : '-'? BASE_INTEGER ('l'|'L');
-
-SHORT_LITERAL
- : '-'? BASE_INTEGER ('s'|'S');
-
-BYTE_LITERAL
- : '-'? BASE_INTEGER ('t'|'T');
-
-FLOAT_LITERAL_OR_ID
- : BASE_FLOAT_OR_ID ('f'|'F')
- | '-'? ('0'..'9')+ ('f'|'F');
-
-DOUBLE_LITERAL_OR_ID
- : BASE_FLOAT_OR_ID ('d'|'D')?
- | '-'? ('0'..'9')+ ('d'|'D');
-
-FLOAT_LITERAL
- : BASE_FLOAT ('f'|'F');
-
-DOUBLE_LITERAL
- : BASE_FLOAT ('d'|'D')?;
-
-BOOL_LITERAL
- : 'true'
- | 'false';
-
-NULL_LITERAL
- : 'null';
-
-STRING_LITERAL
- @init {StringBuilder sb = new StringBuilder();}
- : BASE_STRING_LITERAL[sb] {setText(sb.toString());};
-
-fragment BASE_STRING_LITERAL[StringBuilder sb]
- : '"' {sb.append('"');}
- ( ESCAPE_SEQUENCE[sb]
- | ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
- )*
- '"' {sb.append('"');};
-
-CHAR_LITERAL
- @init {StringBuilder sb = new StringBuilder();}
- : BASE_CHAR_LITERAL[sb] {setText(sb.toString());};
-
-fragment BASE_CHAR_LITERAL[StringBuilder sb]
- : '\'' {sb.append('\'');}
- ( ESCAPE_SEQUENCE[sb]
- | ~( '\\' | '\'' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
- )
- '\'' { sb.append('\''); };
-
-
-/**********************************************************
-* MISC
-**********************************************************/
-REGISTER
- : ('v'|'p') ('0'..'9')+;
-
-ANNOTATION_VISIBILITY
- : 'build'
- | 'runtime'
- | 'system';
-
-ACCESS_SPEC
- : 'public'
- | 'private'
- | 'protected'
- | 'static'
- | 'final'
- | 'synchronized'
- | 'bridge'
- | 'varargs'
- | 'native'
- | 'abstract'
- | 'strictfp'
- | 'synthetic'
- | 'constructor'
- | 'declared-synchronized'
- | 'interface'
- | 'enum'
- | 'annotation'
- | 'volatile'
- | 'transient';
-
-VERIFICATION_ERROR_TYPE
- : 'no-error'
- | 'generic-error'
- | 'no-such-class'
- | 'no-such-field'
- | 'no-such-method'
- | 'illegal-class-access'
- | 'illegal-field-access'
- | 'illegal-method-access'
- | 'class-change-error'
- | 'instantiation-error';
-
-INLINE_INDEX
- : 'inline@0x' HEX_DIGIT+;
-
-VTABLE_INDEX
- : 'vtable@0x' HEX_DIGIT+;
-
-FIELD_OFFSET
- : 'field@0x' HEX_DIGIT+;
-
-OFFSET
- : '+' BASE_INTEGER;
-
-LINE_COMMENT
- : '#'
- (
- ~('\n'|'\r')* ('\r\n' | '\r' | '\n')
- | ~('\n'|'\r')*
- )
- {$channel = HIDDEN;};
-
-/**********************************************************
-* Instructions
-**********************************************************/
-INSTRUCTION_FORMAT10t
- : 'goto';
-
-INSTRUCTION_FORMAT10x
- : 'return-void'
- | 'nop';
-
-INSTRUCTION_FORMAT10x_ODEX
- : 'return-void-barrier';
-
-INSTRUCTION_FORMAT11n
- : 'const/4';
-
-INSTRUCTION_FORMAT11x
- : 'move-result'
- | 'move-result-wide'
- | 'move-result-object'
- | 'move-exception'
- | 'return'
- | 'return-wide'
- | 'return-object'
- | 'monitor-enter'
- | 'monitor-exit'
- | 'throw';
-
-INSTRUCTION_FORMAT12x_OR_ID
- : 'move'
- | 'move-wide'
- | 'move-object'
- | 'array-length'
- | 'neg-int'
- | 'not-int'
- | 'neg-long'
- | 'not-long'
- | 'neg-float'
- | 'neg-double'
- | 'int-to-long'
- | 'int-to-float'
- | 'int-to-double'
- | 'long-to-int'
- | 'long-to-float'
- | 'long-to-double'
- | 'float-to-int'
- | 'float-to-long'
- | 'float-to-double'
- | 'double-to-int'
- | 'double-to-long'
- | 'double-to-float'
- | 'int-to-byte'
- | 'int-to-char'
- | 'int-to-short';
-
-INSTRUCTION_FORMAT12x
- : 'add-int/2addr'
- | 'sub-int/2addr'
- | 'mul-int/2addr'
- | 'div-int/2addr'
- | 'rem-int/2addr'
- | 'and-int/2addr'
- | 'or-int/2addr'
- | 'xor-int/2addr'
- | 'shl-int/2addr'
- | 'shr-int/2addr'
- | 'ushr-int/2addr'
- | 'add-long/2addr'
- | 'sub-long/2addr'
- | 'mul-long/2addr'
- | 'div-long/2addr'
- | 'rem-long/2addr'
- | 'and-long/2addr'
- | 'or-long/2addr'
- | 'xor-long/2addr'
- | 'shl-long/2addr'
- | 'shr-long/2addr'
- | 'ushr-long/2addr'
- | 'add-float/2addr'
- | 'sub-float/2addr'
- | 'mul-float/2addr'
- | 'div-float/2addr'
- | 'rem-float/2addr'
- | 'add-double/2addr'
- | 'sub-double/2addr'
- | 'mul-double/2addr'
- | 'div-double/2addr'
- | 'rem-double/2addr';
-
-INSTRUCTION_FORMAT20bc
- : 'throw-verification-error';
-
-INSTRUCTION_FORMAT20t
- : 'goto/16';
-
-INSTRUCTION_FORMAT21c_FIELD
- : 'sget'
- | 'sget-wide'
- | 'sget-object'
- | 'sget-boolean'
- | 'sget-byte'
- | 'sget-char'
- | 'sget-short'
- | 'sput'
- | 'sput-wide'
- | 'sput-object'
- | 'sput-boolean'
- | 'sput-byte'
- | 'sput-char'
- | 'sput-short';
-
-INSTRUCTION_FORMAT21c_FIELD_ODEX
- : 'sget-volatile'
- | 'sget-wide-volatile'
- | 'sget-object-volatile'
- | 'sput-volatile'
- | 'sput-wide-volatile'
- | 'sput-object-volatile';
-
-INSTRUCTION_FORMAT21c_STRING
- : 'const-string';
-
-INSTRUCTION_FORMAT21c_TYPE
- : 'check-cast'
- | 'new-instance'
- | 'const-class';
-
-INSTRUCTION_FORMAT21h
- : 'const/high16'
- | 'const-wide/high16';
-
-INSTRUCTION_FORMAT21s
- : 'const/16'
- | 'const-wide/16';
-
-INSTRUCTION_FORMAT21t
- : 'if-eqz'
- | 'if-nez'
- | 'if-ltz'
- | 'if-gez'
- | 'if-gtz'
- | 'if-lez';
-
-INSTRUCTION_FORMAT22b
- : 'add-int/lit8'
- | 'rsub-int/lit8'
- | 'mul-int/lit8'
- | 'div-int/lit8'
- | 'rem-int/lit8'
- | 'and-int/lit8'
- | 'or-int/lit8'
- | 'xor-int/lit8'
- | 'shl-int/lit8'
- | 'shr-int/lit8'
- | 'ushr-int/lit8';
-
-INSTRUCTION_FORMAT22c_FIELD
- : 'iget'
- | 'iget-wide'
- | 'iget-object'
- | 'iget-boolean'
- | 'iget-byte'
- | 'iget-char'
- | 'iget-short'
- | 'iput'
- | 'iput-wide'
- | 'iput-object'
- | 'iput-boolean'
- | 'iput-byte'
- | 'iput-char'
- | 'iput-short';
-
-INSTRUCTION_FORMAT22c_FIELD_ODEX
- : 'iget-volatile'
- | 'iget-wide-volatile'
- | 'iget-object-volatile'
- | 'iput-volatile'
- | 'iput-wide-volatile'
- | 'iput-object-volatile';
-
-INSTRUCTION_FORMAT22c_TYPE
- : 'instance-of'
- | 'new-array';
-
-
-INSTRUCTION_FORMAT22cs_FIELD
- : 'iget-quick'
- | 'iget-wide-quick'
- | 'iget-object-quick'
- | 'iput-quick'
- | 'iput-wide-quick'
- | 'iput-object-quick';
-
-INSTRUCTION_FORMAT22s_OR_ID
- : 'rsub-int';
-
-INSTRUCTION_FORMAT22s
- : 'add-int/lit16'
- | 'mul-int/lit16'
- | 'div-int/lit16'
- | 'rem-int/lit16'
- | 'and-int/lit16'
- | 'or-int/lit16'
- | 'xor-int/lit16';
-
-INSTRUCTION_FORMAT22t
- : 'if-eq'
- | 'if-ne'
- | 'if-lt'
- | 'if-ge'
- | 'if-gt'
- | 'if-le';
-
-INSTRUCTION_FORMAT22x
- : 'move/from16'
- | 'move-wide/from16'
- | 'move-object/from16';
-
-INSTRUCTION_FORMAT23x
- : 'cmpl-float'
- | 'cmpg-float'
- | 'cmpl-double'
- | 'cmpg-double'
- | 'cmp-long'
- | 'aget'
- | 'aget-wide'
- | 'aget-object'
- | 'aget-boolean'
- | 'aget-byte'
- | 'aget-char'
- | 'aget-short'
- | 'aput'
- | 'aput-wide'
- | 'aput-object'
- | 'aput-boolean'
- | 'aput-byte'
- | 'aput-char'
- | 'aput-short'
- | 'add-int'
- | 'sub-int'
- | 'mul-int'
- | 'div-int'
- | 'rem-int'
- | 'and-int'
- | 'or-int'
- | 'xor-int'
- | 'shl-int'
- | 'shr-int'
- | 'ushr-int'
- | 'add-long'
- | 'sub-long'
- | 'mul-long'
- | 'div-long'
- | 'rem-long'
- | 'and-long'
- | 'or-long'
- | 'xor-long'
- | 'shl-long'
- | 'shr-long'
- | 'ushr-long'
- | 'add-float'
- | 'sub-float'
- | 'mul-float'
- | 'div-float'
- | 'rem-float'
- | 'add-double'
- | 'sub-double'
- | 'mul-double'
- | 'div-double'
- | 'rem-double';
-
-INSTRUCTION_FORMAT30t
- : 'goto/32';
-
-INSTRUCTION_FORMAT31c
- : 'const-string/jumbo';
-
-INSTRUCTION_FORMAT31i_OR_ID
- : 'const';
-
-INSTRUCTION_FORMAT31i
- : 'const-wide/32';
-
-INSTRUCTION_FORMAT31t
- : 'fill-array-data'
- | 'packed-switch'
- | 'sparse-switch';
-
-INSTRUCTION_FORMAT32x
- : 'move/16'
- | 'move-wide/16'
- | 'move-object/16';
-
-INSTRUCTION_FORMAT35c_METHOD
- : 'invoke-virtual'
- | 'invoke-super'
- | 'invoke-direct'
- | 'invoke-static'
- | 'invoke-interface';
-
-INSTRUCTION_FORMAT35c_METHOD_ODEX
- : 'invoke-direct-empty';
-
-INSTRUCTION_FORMAT35c_TYPE
- : 'filled-new-array';
-
-INSTRUCTION_FORMAT35mi_METHOD
- : 'execute-inline';
-
-INSTRUCTION_FORMAT35ms_METHOD
- : 'invoke-virtual-quick'
- | 'invoke-super-quick';
-
-INSTRUCTION_FORMAT3rc_METHOD
- : 'invoke-virtual/range'
- | 'invoke-super/range'
- | 'invoke-direct/range'
- | 'invoke-static/range'
- | 'invoke-interface/range';
-
-INSTRUCTION_FORMAT3rc_METHOD_ODEX
- : 'invoke-object-init/range';
-
-INSTRUCTION_FORMAT3rc_TYPE
- : 'filled-new-array/range';
-
-INSTRUCTION_FORMAT3rmi_METHOD
- : 'execute-inline/range';
-
-INSTRUCTION_FORMAT3rms_METHOD
- : 'invoke-virtual-quick/range'
- | 'invoke-super-quick/range';
-
-INSTRUCTION_FORMAT51l
- : 'const-wide';
-
-/**********************************************************
-* Types
-**********************************************************/
-fragment BASE_SIMPLE_NAME:
- ( 'A'..'Z'
- | 'a'..'z'
- | '0'..'9'
- | '$'
- | '-'
- | '_'
- | '\u00a1'..'\u1fff'
- | '\u2010'..'\u2027'
- | '\u2030'..'\ud7ff'
- | '\ue000'..'\uffef'
- )+;
-
-fragment BASE_PRIMITIVE_TYPE
- : 'Z'|'B'|'S'|'C'|'I'|'J'|'F'|'D';
-
-
-fragment BASE_CLASS_DESCRIPTOR
- : 'L' (BASE_SIMPLE_NAME '/')* BASE_SIMPLE_NAME ';';
-
-fragment BASE_ARRAY_DESCRIPTOR
- : '['+ (BASE_PRIMITIVE_TYPE | BASE_CLASS_DESCRIPTOR);
-
-fragment BASE_TYPE
- : BASE_PRIMITIVE_TYPE
- | BASE_CLASS_DESCRIPTOR
- | BASE_ARRAY_DESCRIPTOR;
-
-PRIMITIVE_TYPE
- : BASE_PRIMITIVE_TYPE;
-
-VOID_TYPE
- : 'V';
-
-CLASS_DESCRIPTOR
- : BASE_CLASS_DESCRIPTOR;
-
-ARRAY_DESCRIPTOR
- : BASE_ARRAY_DESCRIPTOR;
-
-PARAM_LIST_OR_ID
- : BASE_PRIMITIVE_TYPE BASE_PRIMITIVE_TYPE+;
-
-PARAM_LIST
- : BASE_TYPE BASE_TYPE+;
-
-SIMPLE_NAME
- : BASE_SIMPLE_NAME;
-
-METHOD_NAME
- : '<init>'
- | '<clinit>';
-
-
-/**********************************************************
-* Symbols
-**********************************************************/
-
-DOTDOT
- : '..';
-
-ARROW
- : '->';
-
-EQUAL
- : '=';
-
-COLON
- : ':';
-
-COMMA
- : ',';
-
-OPEN_BRACE
- : '{';
-
-CLOSE_BRACE
- : '}';
-
-OPEN_PAREN
- : '(';
-
-CLOSE_PAREN
- : ')';
-
-WHITE_SPACE
- : (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;};
diff --git a/smali/src/main/antlr3/smaliParser.g b/smali/src/main/antlr3/smaliParser.g
index 3d272b0e..6fb71325 100644
--- a/smali/src/main/antlr3/smaliParser.g
+++ b/smali/src/main/antlr3/smaliParser.g
@@ -29,12 +29,148 @@
parser grammar smaliParser;
options {
- tokenVocab=smaliLexer;
output=AST;
ASTLabelType=CommonTree;
}
tokens {
+ //Lexer tokens
+ ACCESS_SPEC;
+ ANNOTATION_DIRECTIVE;
+ ANNOTATION_VISIBILITY;
+ ARRAY_DATA_DIRECTIVE;
+ ARRAY_DESCRIPTOR;
+ ARROW;
+ BASE_ARRAY_DESCRIPTOR;
+ BASE_CHAR_LITERAL;
+ BASE_CLASS_DESCRIPTOR;
+ BASE_FLOAT;
+ BASE_FLOAT_OR_ID;
+ BASE_INTEGER;
+ BASE_PRIMITIVE_TYPE;
+ BASE_SIMPLE_NAME;
+ BASE_STRING_LITERAL;
+ BASE_TYPE;
+ BINARY_EXPONENT;
+ BOOL_LITERAL;
+ BYTE_LITERAL;
+ CATCH_DIRECTIVE;
+ CATCHALL_DIRECTIVE;
+ CHAR_LITERAL;
+ CLASS_DESCRIPTOR;
+ CLASS_DIRECTIVE;
+ CLOSE_BRACE;
+ CLOSE_PAREN;
+ COLON;
+ COMMA;
+ DECIMAL_EXPONENT;
+ DOTDOT;
+ DOUBLE_LITERAL;
+ DOUBLE_LITERAL_OR_ID;
+ END_ANNOTATION_DIRECTIVE;
+ END_ARRAY_DATA_DIRECTIVE;
+ END_FIELD_DIRECTIVE;
+ END_LOCAL_DIRECTIVE;
+ END_METHOD_DIRECTIVE;
+ END_PACKED_SWITCH_DIRECTIVE;
+ END_PARAMETER_DIRECTIVE;
+ END_SPARSE_SWITCH_DIRECTIVE;
+ END_SUBANNOTATION_DIRECTIVE;
+ ENUM_DIRECTIVE;
+ EPILOGUE_DIRECTIVE;
+ EQUAL;
+ ESCAPE_SEQUENCE;
+ FIELD_DIRECTIVE;
+ FIELD_OFFSET;
+ FLOAT_LITERAL;
+ FLOAT_LITERAL_OR_ID;
+ HEX_DIGIT;
+ HEX_DIGITS;
+ HEX_PREFIX;
+ IMPLEMENTS_DIRECTIVE;
+ INLINE_INDEX;
+ INSTRUCTION_FORMAT10t;
+ INSTRUCTION_FORMAT10x;
+ INSTRUCTION_FORMAT10x_ODEX;
+ INSTRUCTION_FORMAT11n;
+ INSTRUCTION_FORMAT11x;
+ INSTRUCTION_FORMAT12x;
+ INSTRUCTION_FORMAT12x_OR_ID;
+ INSTRUCTION_FORMAT20bc;
+ INSTRUCTION_FORMAT20t;
+ INSTRUCTION_FORMAT21c_FIELD;
+ INSTRUCTION_FORMAT21c_FIELD_ODEX;
+ INSTRUCTION_FORMAT21c_STRING;
+ INSTRUCTION_FORMAT21c_TYPE;
+ INSTRUCTION_FORMAT21h;
+ INSTRUCTION_FORMAT21s;
+ INSTRUCTION_FORMAT21t;
+ INSTRUCTION_FORMAT22b;
+ INSTRUCTION_FORMAT22c_FIELD;
+ INSTRUCTION_FORMAT22c_FIELD_ODEX;
+ INSTRUCTION_FORMAT22c_TYPE;
+ INSTRUCTION_FORMAT22cs_FIELD;
+ INSTRUCTION_FORMAT22s;
+ INSTRUCTION_FORMAT22s_OR_ID;
+ INSTRUCTION_FORMAT22t;
+ INSTRUCTION_FORMAT22x;
+ INSTRUCTION_FORMAT23x;
+ INSTRUCTION_FORMAT30t;
+ INSTRUCTION_FORMAT31c;
+ INSTRUCTION_FORMAT31i;
+ INSTRUCTION_FORMAT31i_OR_ID;
+ INSTRUCTION_FORMAT31t;
+ INSTRUCTION_FORMAT32x;
+ INSTRUCTION_FORMAT35c_METHOD;
+ INSTRUCTION_FORMAT35c_METHOD_ODEX;
+ INSTRUCTION_FORMAT35c_TYPE;
+ INSTRUCTION_FORMAT35mi_METHOD;
+ INSTRUCTION_FORMAT35ms_METHOD;
+ INSTRUCTION_FORMAT3rc_METHOD;
+ INSTRUCTION_FORMAT3rc_METHOD_ODEX;
+ INSTRUCTION_FORMAT3rc_TYPE;
+ INSTRUCTION_FORMAT3rmi_METHOD;
+ INSTRUCTION_FORMAT3rms_METHOD;
+ INSTRUCTION_FORMAT51l;
+ INVALID_TOKEN;
+ LINE_COMMENT;
+ LINE_DIRECTIVE;
+ LOCAL_DIRECTIVE;
+ LOCALS_DIRECTIVE;
+ LONG_LITERAL;
+ METHOD_DIRECTIVE;
+ METHOD_NAME;
+ NEGATIVE_INTEGER_LITERAL;
+ NULL_LITERAL;
+ OFFSET;
+ OPEN_BRACE;
+ OPEN_PAREN;
+ PACKED_SWITCH_DIRECTIVE;
+ PARAM_LIST;
+ PARAM_LIST_OR_ID;
+ PARAMETER_DIRECTIVE;
+ POSITIVE_INTEGER_LITERAL;
+ PRIMITIVE_TYPE;
+ PROLOGUE_DIRECTIVE;
+ REGISTER;
+ REGISTERS_DIRECTIVE;
+ RESTART_LOCAL_DIRECTIVE;
+ SHORT_LITERAL;
+ SIMPLE_NAME;
+ SOURCE_DIRECTIVE;
+ SPARSE_SWITCH_DIRECTIVE;
+ STRING_LITERAL;
+ SUBANNOTATION_DIRECTIVE;
+ SUPER_DIRECTIVE;
+ VERIFICATION_ERROR_TYPE;
+ VOID_TYPE;
+ VTABLE_INDEX;
+ WHITE_SPACE;
+
+ //A couple of generated types that we remap other tokens to, to simplify the generated AST
+ LABEL;
+ INTEGER_LITERAL;
+
//I_* tokens are imaginary tokens used as parent AST nodes
I_CLASS_DEF;
I_SUPER;
@@ -123,10 +259,6 @@ tokens {
I_STATEMENT_SPARSE_SWITCH;
I_REGISTER_RANGE;
I_REGISTER_LIST;
-
- LABEL;
- INTEGER_LITERAL;
- INVALID_TOKEN;
}
@header {
@@ -138,6 +270,8 @@ import org.jf.dexlib.Code.Opcode;
@members {
+ public static final int ERROR_CHANNEL = 100;
+
private boolean verboseErrors = false;
private boolean allowOdex = false;
private int apiLevel;
diff --git a/smali/src/main/java/org/jf/smali/InvalidToken.java b/smali/src/main/java/org/jf/smali/InvalidToken.java
index b7b10781..edeb7851 100644
--- a/smali/src/main/java/org/jf/smali/InvalidToken.java
+++ b/smali/src/main/java/org/jf/smali/InvalidToken.java
@@ -28,7 +28,6 @@
package org.jf.smali;
-import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonToken;
public class InvalidToken extends CommonToken {
@@ -37,13 +36,13 @@ public class InvalidToken extends CommonToken {
public InvalidToken(String message) {
super(smaliParser.INVALID_TOKEN);
this.message = message;
- this.channel = smaliLexer.ERROR_CHANNEL;
+ this.channel = smaliParser.ERROR_CHANNEL;
}
public InvalidToken(String message, String text) {
super(smaliParser.INVALID_TOKEN, text);
this.message = message;
- this.channel = smaliLexer.ERROR_CHANNEL;
+ this.channel = smaliParser.ERROR_CHANNEL;
}
public String getMessage() {
diff --git a/smali/src/main/java/org/jf/smali/main.java b/smali/src/main/java/org/jf/smali/main.java
index e38f8dea..773c08a4 100644
--- a/smali/src/main/java/org/jf/smali/main.java
+++ b/smali/src/main/java/org/jf/smali/main.java
@@ -103,7 +103,6 @@ public class main {
boolean fixJumbo = true;
boolean fixGoto = true;
boolean verboseErrors = false;
- boolean oldLexer = false;
boolean printTokens = false;
boolean apiSet = false;
@@ -158,9 +157,6 @@ public class main {
case 'V':
verboseErrors = true;
break;
- case 'L':
- oldLexer = true;
- break;
case 'T':
printTokens = true;
break;
@@ -202,7 +198,7 @@ public class main {
boolean errors = false;
for (File file: filesToProcess) {
- if (!assembleSmaliFile(file, dexFile, verboseErrors, oldLexer, printTokens, allowOdex, apiLevel)) {
+ if (!assembleSmaliFile(file, dexFile, verboseErrors, printTokens, allowOdex, apiLevel)) {
errors = true;
}
}
@@ -276,7 +272,7 @@ public class main {
}
}
- private static boolean assembleSmaliFile(File smaliFile, DexFile dexFile, boolean verboseErrors, boolean oldLexer,
+ private static boolean assembleSmaliFile(File smaliFile, DexFile dexFile, boolean verboseErrors,
boolean printTokens, boolean allowOdex, int apiLevel)
throws Exception {
CommonTokenStream tokens;
@@ -285,27 +281,19 @@ public class main {
boolean lexerErrors = false;
LexerErrorInterface lexer;
- if (oldLexer) {
- ANTLRFileStream input = new ANTLRFileStream(smaliFile.getAbsolutePath(), "UTF-8");
- input.name = smaliFile.getAbsolutePath();
-
- lexer = new smaliLexer(input);
- tokens = new CommonTokenStream((TokenSource)lexer);
- } else {
- FileInputStream fis = new FileInputStream(smaliFile.getAbsolutePath());
- InputStreamReader reader = new InputStreamReader(fis, "UTF-8");
+ FileInputStream fis = new FileInputStream(smaliFile.getAbsolutePath());
+ InputStreamReader reader = new InputStreamReader(fis, "UTF-8");
- lexer = new smaliFlexLexer(reader);
- ((smaliFlexLexer)lexer).setSourceFile(smaliFile);
- tokens = new CommonTokenStream((TokenSource)lexer);
- }
+ lexer = new smaliFlexLexer(reader);
+ ((smaliFlexLexer)lexer).setSourceFile(smaliFile);
+ tokens = new CommonTokenStream((TokenSource)lexer);
if (printTokens) {
tokens.getTokens();
for (int i=0; i<tokens.size(); i++) {
Token token = tokens.get(i);
- if (token.getChannel() == smaliLexer.HIDDEN) {
+ if (token.getChannel() == smaliParser.HIDDEN) {
continue;
}
@@ -423,10 +411,6 @@ public class main {
.withDescription("Generate verbose error messages")
.create("V");
- Option oldLexerOption = OptionBuilder.withLongOpt("old-lexer")
- .withDescription("Use the old lexer")
- .create("L");
-
Option printTokensOption = OptionBuilder.withLongOpt("print-tokens")
.withDescription("Print the name and text of each token")
.create("T");
@@ -442,7 +426,6 @@ public class main {
debugOptions.addOption(noFixJumboOption);
debugOptions.addOption(noFixGotoOption);
debugOptions.addOption(verboseErrorsOption);
- debugOptions.addOption(oldLexerOption);
debugOptions.addOption(printTokensOption);
for (Object option: basicOptions.getOptions()) {
diff --git a/smali/src/test/java/LexerTest.java b/smali/src/test/java/LexerTest.java
index 5afc2e2c..074b32ed 100644
--- a/smali/src/test/java/LexerTest.java
+++ b/smali/src/test/java/LexerTest.java
@@ -165,7 +165,7 @@ public class LexerTest {
for (int i=0; i<tokens.size(); i++) {
token = (CommonToken)tokens.get(i);
- if (discardHiddenTokens && token.getChannel() == smaliLexer.HIDDEN) {
+ if (discardHiddenTokens && token.getChannel() == smaliParser.HIDDEN) {
continue;
}
@@ -175,7 +175,7 @@ public class LexerTest {
if (token.getType() == smaliParser.INVALID_TOKEN) {
Assert.assertTrue("Encountered an INVALID_TOKEN not on the error channel",
- token.getChannel() == smaliLexer.ERROR_CHANNEL);
+ token.getChannel() == smaliParser.ERROR_CHANNEL);
}
ExpectedToken expectedToken = expectedTokens.get(expectedTokenIndex++);