diff options
Diffstat (limited to 'pycparser')
-rw-r--r-- | pycparser/__init__.py | 2 | ||||
-rw-r--r-- | pycparser/_build_tables.py | 8 | ||||
-rw-r--r-- | pycparser/ast_transforms.py | 3 | ||||
-rw-r--r-- | pycparser/c_generator.py | 38 | ||||
-rw-r--r-- | pycparser/c_lexer.py | 46 | ||||
-rw-r--r-- | pycparser/c_parser.py | 33 |
6 files changed, 97 insertions, 33 deletions
diff --git a/pycparser/__init__.py b/pycparser/__init__.py index b67389f..6e86e9f 100644 --- a/pycparser/__init__.py +++ b/pycparser/__init__.py @@ -8,7 +8,7 @@ # License: BSD #----------------------------------------------------------------- __all__ = ['c_lexer', 'c_parser', 'c_ast'] -__version__ = '2.19' +__version__ = '2.20' import io from subprocess import check_output diff --git a/pycparser/_build_tables.py b/pycparser/_build_tables.py index 94a3891..958381a 100644 --- a/pycparser/_build_tables.py +++ b/pycparser/_build_tables.py @@ -10,13 +10,17 @@ # License: BSD #----------------------------------------------------------------- +# Insert '.' and '..' as first entries to the search path for modules. +# Restricted environments like embeddable python do not include the +# current working directory on startup. +import sys +sys.path[0:0] = ['.', '..'] + # Generate c_ast.py from _ast_gen import ASTCodeGenerator ast_gen = ASTCodeGenerator('_c_ast.cfg') ast_gen.generate(open('c_ast.py', 'w')) -import sys -sys.path[0:0] = ['.', '..'] from pycparser import c_parser # Generates the tables diff --git a/pycparser/ast_transforms.py b/pycparser/ast_transforms.py index ba50966..0aeb88f 100644 --- a/pycparser/ast_transforms.py +++ b/pycparser/ast_transforms.py @@ -74,7 +74,8 @@ def fix_switch_cases(switch_node): # Goes over the children of the Compound below the Switch, adding them # either directly below new_compound or below the last Case as appropriate - for child in switch_node.stmt.block_items: + # (for `switch(cond) {}`, block_items would have been None) + for child in (switch_node.stmt.block_items or []): if isinstance(child, (c_ast.Case, c_ast.Default)): # If it's a Case/Default: # 1. Add it to the Compound and mark as "last case" diff --git a/pycparser/c_generator.py b/pycparser/c_generator.py index f789742..973d24a 100644 --- a/pycparser/c_generator.py +++ b/pycparser/c_generator.py @@ -119,7 +119,7 @@ class CGenerator(object): return s def visit_Cast(self, n): - s = '(' + self._generate_type(n.to_type) + ')' + s = '(' + self._generate_type(n.to_type, emit_declname=False) + ')' return s + ' ' + self._parenthesize_unless_simple(n.expr) def visit_ExprList(self, n): @@ -291,6 +291,15 @@ class CGenerator(object): def visit_FuncDecl(self, n): return self._generate_type(n) + def visit_ArrayDecl(self, n): + return self._generate_type(n, emit_declname=False) + + def visit_TypeDecl(self, n): + return self._generate_type(n, emit_declname=False) + + def visit_PtrDecl(self, n): + return self._generate_type(n, emit_declname=False) + def _generate_struct_union_enum(self, n, name): """ Generates code for structs, unions, and enums. name should be 'struct', 'union', or 'enum'. @@ -359,7 +368,7 @@ class CGenerator(object): s += self._generate_type(n.type) return s - def _generate_type(self, n, modifiers=[]): + def _generate_type(self, n, modifiers=[], emit_declname = True): """ Recursive generation from a type node. n is the type node. modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers encountered on the way down to a TypeDecl, to allow proper @@ -373,23 +382,29 @@ class CGenerator(object): if n.quals: s += ' '.join(n.quals) + ' ' s += self.visit(n.type) - nstr = n.declname if n.declname else '' + nstr = n.declname if n.declname and emit_declname else '' # Resolve modifiers. # Wrap in parens to distinguish pointer to array and pointer to # function syntax. # for i, modifier in enumerate(modifiers): if isinstance(modifier, c_ast.ArrayDecl): - if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): - nstr = '(' + nstr + ')' - nstr += '[' + self.visit(modifier.dim) + ']' + if (i != 0 and + isinstance(modifiers[i - 1], c_ast.PtrDecl)): + nstr = '(' + nstr + ')' + nstr += '[' + if modifier.dim_quals: + nstr += ' '.join(modifier.dim_quals) + ' ' + nstr += self.visit(modifier.dim) + ']' elif isinstance(modifier, c_ast.FuncDecl): - if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): - nstr = '(' + nstr + ')' + if (i != 0 and + isinstance(modifiers[i - 1], c_ast.PtrDecl)): + nstr = '(' + nstr + ')' nstr += '(' + self.visit(modifier.args) + ')' elif isinstance(modifier, c_ast.PtrDecl): if modifier.quals: - nstr = '* %s %s' % (' '.join(modifier.quals), nstr) + nstr = '* %s%s' % (' '.join(modifier.quals), + ' ' + nstr if nstr else '') else: nstr = '*' + nstr if nstr: s += ' ' + nstr @@ -397,11 +412,12 @@ class CGenerator(object): elif typ == c_ast.Decl: return self._generate_decl(n.type) elif typ == c_ast.Typename: - return self._generate_type(n.type) + return self._generate_type(n.type, emit_declname = emit_declname) elif typ == c_ast.IdentifierType: return ' '.join(n.names) + ' ' elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl): - return self._generate_type(n.type, modifiers + [n]) + return self._generate_type(n.type, modifiers + [n], + emit_declname = emit_declname) else: return self.visit(n) diff --git a/pycparser/c_lexer.py b/pycparser/c_lexer.py index de8445e..045d24e 100644 --- a/pycparser/c_lexer.py +++ b/pycparser/c_lexer.py @@ -19,7 +19,7 @@ class CLexer(object): tokens. The public attribute filename can be set to an initial - filaneme, but the lexer will update it upon #line + filename, but the lexer will update it upon #line directives. """ def __init__(self, error_func, on_lbrace_func, on_rbrace_func, @@ -130,7 +130,7 @@ class CLexer(object): 'TYPEID', # constants - 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN', + 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN', 'INT_CONST_CHAR', 'FLOAT_CONST', 'HEX_FLOAT_CONST', 'CHAR_CONST', 'WCHAR_CONST', @@ -205,23 +205,49 @@ class CLexer(object): # parse all correct code, even if it means to sometimes parse incorrect # code. # - simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" - decimal_escape = r"""(\d+)""" - hex_escape = r"""(x[0-9a-fA-F]+)""" - bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" + # The original regexes were taken verbatim from the C syntax definition, + # and were later modified to avoid worst-case exponential running time. + # + # simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" + # decimal_escape = r"""(\d+)""" + # hex_escape = r"""(x[0-9a-fA-F]+)""" + # bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" + # + # The following modifications were made to avoid the ambiguity that allowed backtracking: + # (https://github.com/eliben/pycparser/issues/61) + # + # - \x was removed from simple_escape, unless it was not followed by a hex digit, to avoid ambiguity with hex_escape. + # - hex_escape allows one or more hex characters, but requires that the next character(if any) is not hex + # - decimal_escape allows one or more decimal characters, but requires that the next character(if any) is not a decimal + # - bad_escape does not allow any decimals (8-9), to avoid conflicting with the permissive decimal_escape. + # + # Without this change, python's `re` module would recursively try parsing each ambiguous escape sequence in multiple ways. + # e.g. `\123` could be parsed as `\1`+`23`, `\12`+`3`, and `\123`. + + simple_escape = r"""([a-wyzA-Z._~!=&\^\-\\?'"]|x(?![0-9a-fA-F]))""" + decimal_escape = r"""(\d+)(?!\d)""" + hex_escape = r"""(x[0-9a-fA-F]+)(?![0-9a-fA-F])""" + bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-9])""" escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' + + # This complicated regex with lookahead might be slow for strings, so because all of the valid escapes (including \x) allowed + # 0 or more non-escaped characters after the first character, simple_escape+decimal_escape+hex_escape got simplified to + + escape_sequence_start_in_string = r"""(\\[0-9a-zA-Z._~!=&\^\-\\?'"])""" + cconst_char = r"""([^'\\\n]|"""+escape_sequence+')' char_const = "'"+cconst_char+"'" wchar_const = 'L'+char_const + multicharacter_constant = "'"+cconst_char+"{2,4}'" unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)" bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')""" # string literals (K&R2: A.2.6) - string_char = r"""([^"\\\n]|"""+escape_sequence+')' + string_char = r"""([^"\\\n]|"""+escape_sequence_start_in_string+')' string_literal = '"'+string_char+'*"' wstring_literal = 'L'+string_literal - bad_string_literal = '"'+string_char+'*?'+bad_escape+string_char+'*"' + bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"' # floating constants (K&R2: A.2.5.3) exponent_part = r"""([eE][-+]?[0-9]+)""" @@ -443,6 +469,10 @@ class CLexer(object): # Must come before bad_char_const, to prevent it from # catching valid char constants as invalid # + @TOKEN(multicharacter_constant) + def t_INT_CONST_CHAR(self, t): + return t + @TOKEN(char_const) def t_CHAR_CONST(self, t): return t diff --git a/pycparser/c_parser.py b/pycparser/c_parser.py index 0e6e755..744ede8 100644 --- a/pycparser/c_parser.py +++ b/pycparser/c_parser.py @@ -529,8 +529,7 @@ class CParser(PLYParser): def p_translation_unit_2(self, p): """ translation_unit : translation_unit external_declaration """ - if p[2] is not None: - p[1].extend(p[2]) + p[1].extend(p[2]) p[0] = p[1] # Declarations always come as lists (because they can be @@ -557,7 +556,7 @@ class CParser(PLYParser): def p_external_declaration_4(self, p): """ external_declaration : SEMI """ - p[0] = None + p[0] = [] def p_pp_directive(self, p): """ pp_directive : PPHASH @@ -1411,12 +1410,13 @@ class CParser(PLYParser): p[0] = self._type_modify_decl(decl=p[1], modifier=arr) def p_direct_abstract_declarator_3(self, p): - """ direct_abstract_declarator : LBRACKET assignment_expression_opt RBRACKET + """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET """ + quals = (p[2] if len(p) > 4 else []) or [] p[0] = c_ast.ArrayDecl( type=c_ast.TypeDecl(None, None, None), - dim=p[2], - dim_quals=[], + dim=p[3] if len(p) > 4 else p[2], + dim_quals=quals, coord=self._token_coord(p, 1)) def p_direct_abstract_declarator_4(self, p): @@ -1740,8 +1740,7 @@ class CParser(PLYParser): if len(p) == 2: p[0] = p[1] elif len(p) == 4: - field = c_ast.ID(p[3], self._token_coord(p, 3)) - p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord) + p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord) elif len(p) == 5: p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) else: @@ -1766,9 +1765,23 @@ class CParser(PLYParser): | INT_CONST_OCT | INT_CONST_HEX | INT_CONST_BIN - """ + | INT_CONST_CHAR + """ + uCount = 0 + lCount = 0 + for x in p[1][-3:]: + if x in ('l', 'L'): + lCount += 1 + elif x in ('u', 'U'): + uCount += 1 + t = '' + if uCount > 1: + raise ValueError('Constant cannot have more than one u/U suffix.') + elif lCount > 2: + raise ValueError('Constant cannot have more than two l/L suffix.') + prefix = 'unsigned ' * uCount + 'long ' * lCount p[0] = c_ast.Constant( - 'int', p[1], self._token_coord(p, 1)) + prefix + 'int', p[1], self._token_coord(p, 1)) def p_constant_2(self, p): """ constant : FLOAT_CONST |