aboutsummaryrefslogtreecommitdiff
path: root/pycparser
diff options
context:
space:
mode:
Diffstat (limited to 'pycparser')
-rw-r--r--pycparser/__init__.py2
-rw-r--r--pycparser/_build_tables.py8
-rw-r--r--pycparser/ast_transforms.py3
-rw-r--r--pycparser/c_generator.py38
-rw-r--r--pycparser/c_lexer.py46
-rw-r--r--pycparser/c_parser.py33
6 files changed, 97 insertions, 33 deletions
diff --git a/pycparser/__init__.py b/pycparser/__init__.py
index b67389f..6e86e9f 100644
--- a/pycparser/__init__.py
+++ b/pycparser/__init__.py
@@ -8,7 +8,7 @@
# License: BSD
#-----------------------------------------------------------------
__all__ = ['c_lexer', 'c_parser', 'c_ast']
-__version__ = '2.19'
+__version__ = '2.20'
import io
from subprocess import check_output
diff --git a/pycparser/_build_tables.py b/pycparser/_build_tables.py
index 94a3891..958381a 100644
--- a/pycparser/_build_tables.py
+++ b/pycparser/_build_tables.py
@@ -10,13 +10,17 @@
# License: BSD
#-----------------------------------------------------------------
+# Insert '.' and '..' as first entries to the search path for modules.
+# Restricted environments like embeddable python do not include the
+# current working directory on startup.
+import sys
+sys.path[0:0] = ['.', '..']
+
# Generate c_ast.py
from _ast_gen import ASTCodeGenerator
ast_gen = ASTCodeGenerator('_c_ast.cfg')
ast_gen.generate(open('c_ast.py', 'w'))
-import sys
-sys.path[0:0] = ['.', '..']
from pycparser import c_parser
# Generates the tables
diff --git a/pycparser/ast_transforms.py b/pycparser/ast_transforms.py
index ba50966..0aeb88f 100644
--- a/pycparser/ast_transforms.py
+++ b/pycparser/ast_transforms.py
@@ -74,7 +74,8 @@ def fix_switch_cases(switch_node):
# Goes over the children of the Compound below the Switch, adding them
# either directly below new_compound or below the last Case as appropriate
- for child in switch_node.stmt.block_items:
+ # (for `switch(cond) {}`, block_items would have been None)
+ for child in (switch_node.stmt.block_items or []):
if isinstance(child, (c_ast.Case, c_ast.Default)):
# If it's a Case/Default:
# 1. Add it to the Compound and mark as "last case"
diff --git a/pycparser/c_generator.py b/pycparser/c_generator.py
index f789742..973d24a 100644
--- a/pycparser/c_generator.py
+++ b/pycparser/c_generator.py
@@ -119,7 +119,7 @@ class CGenerator(object):
return s
def visit_Cast(self, n):
- s = '(' + self._generate_type(n.to_type) + ')'
+ s = '(' + self._generate_type(n.to_type, emit_declname=False) + ')'
return s + ' ' + self._parenthesize_unless_simple(n.expr)
def visit_ExprList(self, n):
@@ -291,6 +291,15 @@ class CGenerator(object):
def visit_FuncDecl(self, n):
return self._generate_type(n)
+ def visit_ArrayDecl(self, n):
+ return self._generate_type(n, emit_declname=False)
+
+ def visit_TypeDecl(self, n):
+ return self._generate_type(n, emit_declname=False)
+
+ def visit_PtrDecl(self, n):
+ return self._generate_type(n, emit_declname=False)
+
def _generate_struct_union_enum(self, n, name):
""" Generates code for structs, unions, and enums. name should be
'struct', 'union', or 'enum'.
@@ -359,7 +368,7 @@ class CGenerator(object):
s += self._generate_type(n.type)
return s
- def _generate_type(self, n, modifiers=[]):
+ def _generate_type(self, n, modifiers=[], emit_declname = True):
""" Recursive generation from a type node. n is the type node.
modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers
encountered on the way down to a TypeDecl, to allow proper
@@ -373,23 +382,29 @@ class CGenerator(object):
if n.quals: s += ' '.join(n.quals) + ' '
s += self.visit(n.type)
- nstr = n.declname if n.declname else ''
+ nstr = n.declname if n.declname and emit_declname else ''
# Resolve modifiers.
# Wrap in parens to distinguish pointer to array and pointer to
# function syntax.
#
for i, modifier in enumerate(modifiers):
if isinstance(modifier, c_ast.ArrayDecl):
- if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
- nstr = '(' + nstr + ')'
- nstr += '[' + self.visit(modifier.dim) + ']'
+ if (i != 0 and
+ isinstance(modifiers[i - 1], c_ast.PtrDecl)):
+ nstr = '(' + nstr + ')'
+ nstr += '['
+ if modifier.dim_quals:
+ nstr += ' '.join(modifier.dim_quals) + ' '
+ nstr += self.visit(modifier.dim) + ']'
elif isinstance(modifier, c_ast.FuncDecl):
- if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
- nstr = '(' + nstr + ')'
+ if (i != 0 and
+ isinstance(modifiers[i - 1], c_ast.PtrDecl)):
+ nstr = '(' + nstr + ')'
nstr += '(' + self.visit(modifier.args) + ')'
elif isinstance(modifier, c_ast.PtrDecl):
if modifier.quals:
- nstr = '* %s %s' % (' '.join(modifier.quals), nstr)
+ nstr = '* %s%s' % (' '.join(modifier.quals),
+ ' ' + nstr if nstr else '')
else:
nstr = '*' + nstr
if nstr: s += ' ' + nstr
@@ -397,11 +412,12 @@ class CGenerator(object):
elif typ == c_ast.Decl:
return self._generate_decl(n.type)
elif typ == c_ast.Typename:
- return self._generate_type(n.type)
+ return self._generate_type(n.type, emit_declname = emit_declname)
elif typ == c_ast.IdentifierType:
return ' '.join(n.names) + ' '
elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl):
- return self._generate_type(n.type, modifiers + [n])
+ return self._generate_type(n.type, modifiers + [n],
+ emit_declname = emit_declname)
else:
return self.visit(n)
diff --git a/pycparser/c_lexer.py b/pycparser/c_lexer.py
index de8445e..045d24e 100644
--- a/pycparser/c_lexer.py
+++ b/pycparser/c_lexer.py
@@ -19,7 +19,7 @@ class CLexer(object):
tokens.
The public attribute filename can be set to an initial
- filaneme, but the lexer will update it upon #line
+ filename, but the lexer will update it upon #line
directives.
"""
def __init__(self, error_func, on_lbrace_func, on_rbrace_func,
@@ -130,7 +130,7 @@ class CLexer(object):
'TYPEID',
# constants
- 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN',
+ 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN', 'INT_CONST_CHAR',
'FLOAT_CONST', 'HEX_FLOAT_CONST',
'CHAR_CONST',
'WCHAR_CONST',
@@ -205,23 +205,49 @@ class CLexer(object):
# parse all correct code, even if it means to sometimes parse incorrect
# code.
#
- simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
- decimal_escape = r"""(\d+)"""
- hex_escape = r"""(x[0-9a-fA-F]+)"""
- bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+ # The original regexes were taken verbatim from the C syntax definition,
+ # and were later modified to avoid worst-case exponential running time.
+ #
+ # simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
+ # decimal_escape = r"""(\d+)"""
+ # hex_escape = r"""(x[0-9a-fA-F]+)"""
+ # bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+ #
+ # The following modifications were made to avoid the ambiguity that allowed backtracking:
+ # (https://github.com/eliben/pycparser/issues/61)
+ #
+ # - \x was removed from simple_escape, unless it was not followed by a hex digit, to avoid ambiguity with hex_escape.
+ # - hex_escape allows one or more hex characters, but requires that the next character(if any) is not hex
+ # - decimal_escape allows one or more decimal characters, but requires that the next character(if any) is not a decimal
+ # - bad_escape does not allow any decimals (8-9), to avoid conflicting with the permissive decimal_escape.
+ #
+ # Without this change, python's `re` module would recursively try parsing each ambiguous escape sequence in multiple ways.
+ # e.g. `\123` could be parsed as `\1`+`23`, `\12`+`3`, and `\123`.
+
+ simple_escape = r"""([a-wyzA-Z._~!=&\^\-\\?'"]|x(?![0-9a-fA-F]))"""
+ decimal_escape = r"""(\d+)(?!\d)"""
+ hex_escape = r"""(x[0-9a-fA-F]+)(?![0-9a-fA-F])"""
+ bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-9])"""
escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
+
+ # This complicated regex with lookahead might be slow for strings, so because all of the valid escapes (including \x) allowed
+ # 0 or more non-escaped characters after the first character, simple_escape+decimal_escape+hex_escape got simplified to
+
+ escape_sequence_start_in_string = r"""(\\[0-9a-zA-Z._~!=&\^\-\\?'"])"""
+
cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
char_const = "'"+cconst_char+"'"
wchar_const = 'L'+char_const
+ multicharacter_constant = "'"+cconst_char+"{2,4}'"
unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
# string literals (K&R2: A.2.6)
- string_char = r"""([^"\\\n]|"""+escape_sequence+')'
+ string_char = r"""([^"\\\n]|"""+escape_sequence_start_in_string+')'
string_literal = '"'+string_char+'*"'
wstring_literal = 'L'+string_literal
- bad_string_literal = '"'+string_char+'*?'+bad_escape+string_char+'*"'
+ bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
# floating constants (K&R2: A.2.5.3)
exponent_part = r"""([eE][-+]?[0-9]+)"""
@@ -443,6 +469,10 @@ class CLexer(object):
# Must come before bad_char_const, to prevent it from
# catching valid char constants as invalid
#
+ @TOKEN(multicharacter_constant)
+ def t_INT_CONST_CHAR(self, t):
+ return t
+
@TOKEN(char_const)
def t_CHAR_CONST(self, t):
return t
diff --git a/pycparser/c_parser.py b/pycparser/c_parser.py
index 0e6e755..744ede8 100644
--- a/pycparser/c_parser.py
+++ b/pycparser/c_parser.py
@@ -529,8 +529,7 @@ class CParser(PLYParser):
def p_translation_unit_2(self, p):
""" translation_unit : translation_unit external_declaration
"""
- if p[2] is not None:
- p[1].extend(p[2])
+ p[1].extend(p[2])
p[0] = p[1]
# Declarations always come as lists (because they can be
@@ -557,7 +556,7 @@ class CParser(PLYParser):
def p_external_declaration_4(self, p):
""" external_declaration : SEMI
"""
- p[0] = None
+ p[0] = []
def p_pp_directive(self, p):
""" pp_directive : PPHASH
@@ -1411,12 +1410,13 @@ class CParser(PLYParser):
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
def p_direct_abstract_declarator_3(self, p):
- """ direct_abstract_declarator : LBRACKET assignment_expression_opt RBRACKET
+ """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
"""
+ quals = (p[2] if len(p) > 4 else []) or []
p[0] = c_ast.ArrayDecl(
type=c_ast.TypeDecl(None, None, None),
- dim=p[2],
- dim_quals=[],
+ dim=p[3] if len(p) > 4 else p[2],
+ dim_quals=quals,
coord=self._token_coord(p, 1))
def p_direct_abstract_declarator_4(self, p):
@@ -1740,8 +1740,7 @@ class CParser(PLYParser):
if len(p) == 2:
p[0] = p[1]
elif len(p) == 4:
- field = c_ast.ID(p[3], self._token_coord(p, 3))
- p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
+ p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
elif len(p) == 5:
p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
else:
@@ -1766,9 +1765,23 @@ class CParser(PLYParser):
| INT_CONST_OCT
| INT_CONST_HEX
| INT_CONST_BIN
- """
+ | INT_CONST_CHAR
+ """
+ uCount = 0
+ lCount = 0
+ for x in p[1][-3:]:
+ if x in ('l', 'L'):
+ lCount += 1
+ elif x in ('u', 'U'):
+ uCount += 1
+ t = ''
+ if uCount > 1:
+ raise ValueError('Constant cannot have more than one u/U suffix.')
+ elif lCount > 2:
+ raise ValueError('Constant cannot have more than two l/L suffix.')
+ prefix = 'unsigned ' * uCount + 'long ' * lCount
p[0] = c_ast.Constant(
- 'int', p[1], self._token_coord(p, 1))
+ prefix + 'int', p[1], self._token_coord(p, 1))
def p_constant_2(self, p):
""" constant : FLOAT_CONST