6 files changed, 97 insertions, 33 deletions
diff --git a/pycparser/__init__.py b/pycparser/__init__.py
index b67389f..6e86e9f 100644
--- a/pycparser/__init__.py
+++ b/pycparser/__init__.py
@@ -8,7 +8,7 @@
 # License: BSD
 #-----------------------------------------------------------------
 __all__ = ['c_lexer', 'c_parser', 'c_ast']
-__version__ = '2.19'
+__version__ = '2.20'
 
 import io
 from subprocess import check_output
diff --git a/pycparser/_build_tables.py b/pycparser/_build_tables.py
index 94a3891..958381a 100644
--- a/pycparser/_build_tables.py
+++ b/pycparser/_build_tables.py
@@ -10,13 +10,17 @@
 # License: BSD
 #-----------------------------------------------------------------
 
+# Insert '.' and '..' as first entries to the search path for modules.
+# Restricted environments like embeddable python do not include the
+# current working directory on startup.
+import sys
+sys.path[0:0] = ['.', '..']
+
 # Generate c_ast.py
 from _ast_gen import ASTCodeGenerator
 ast_gen = ASTCodeGenerator('_c_ast.cfg')
 ast_gen.generate(open('c_ast.py', 'w'))
 
-import sys
-sys.path[0:0] = ['.', '..']
 from pycparser import c_parser
 
 # Generates the tables
diff --git a/pycparser/ast_transforms.py b/pycparser/ast_transforms.py
index ba50966..0aeb88f 100644
--- a/pycparser/ast_transforms.py
+++ b/pycparser/ast_transforms.py
@@ -74,7 +74,8 @@ def fix_switch_cases(switch_node):
 
     # Goes over the children of the Compound below the Switch, adding them
     # either directly below new_compound or below the last Case as appropriate
-    for child in switch_node.stmt.block_items:
+    # (for `switch(cond) {}`, block_items would have been None)
+    for child in (switch_node.stmt.block_items or []):
         if isinstance(child, (c_ast.Case, c_ast.Default)):
             # If it's a Case/Default:
             # 1. Add it to the Compound and mark as "last case"
diff --git a/pycparser/c_generator.py b/pycparser/c_generator.py
index f789742..973d24a 100644
--- a/pycparser/c_generator.py
+++ b/pycparser/c_generator.py
@@ -119,7 +119,7 @@ class CGenerator(object):
         return s
 
     def visit_Cast(self, n):
-        s = '(' + self._generate_type(n.to_type) + ')'
+        s = '(' + self._generate_type(n.to_type, emit_declname=False) + ')'
         return s + ' ' + self._parenthesize_unless_simple(n.expr)
 
     def visit_ExprList(self, n):
@@ -291,6 +291,15 @@ class CGenerator(object):
     def visit_FuncDecl(self, n):
         return self._generate_type(n)
 
+    def visit_ArrayDecl(self, n):
+        return self._generate_type(n, emit_declname=False)
+
+    def visit_TypeDecl(self, n):
+        return self._generate_type(n, emit_declname=False)
+
+    def visit_PtrDecl(self, n):
+        return self._generate_type(n, emit_declname=False)
+
     def _generate_struct_union_enum(self, n, name):
         """ Generates code for structs, unions, and enums. name should be
             'struct', 'union', or 'enum'.
@@ -359,7 +368,7 @@ class CGenerator(object):
         s += self._generate_type(n.type)
         return s
 
-    def _generate_type(self, n, modifiers=[]):
+    def _generate_type(self, n, modifiers=[], emit_declname = True):
         """ Recursive generation from a type node. n is the type node.
             modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers
             encountered on the way down to a TypeDecl, to allow proper
@@ -373,23 +382,29 @@ class CGenerator(object):
             if n.quals: s += ' '.join(n.quals) + ' '
             s += self.visit(n.type)
 
-            nstr = n.declname if n.declname else ''
+            nstr = n.declname if n.declname and emit_declname else ''
             # Resolve modifiers.
             # Wrap in parens to distinguish pointer to array and pointer to
             # function syntax.
             #
             for i, modifier in enumerate(modifiers):
                 if isinstance(modifier, c_ast.ArrayDecl):
-                    if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
-                        nstr = '(' + nstr + ')'
-                    nstr += '[' + self.visit(modifier.dim) + ']'
+                    if (i != 0 and
+                        isinstance(modifiers[i - 1], c_ast.PtrDecl)):
+                            nstr = '(' + nstr + ')'
+                    nstr += '['
+                    if modifier.dim_quals:
+                        nstr += ' '.join(modifier.dim_quals) + ' '
+                    nstr += self.visit(modifier.dim) + ']'
                 elif isinstance(modifier, c_ast.FuncDecl):
-                    if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
-                        nstr = '(' + nstr + ')'
+                    if (i != 0 and
+                        isinstance(modifiers[i - 1], c_ast.PtrDecl)):
+                            nstr = '(' + nstr + ')'
                     nstr += '(' + self.visit(modifier.args) + ')'
                 elif isinstance(modifier, c_ast.PtrDecl):
                     if modifier.quals:
-                        nstr = '* %s %s' % (' '.join(modifier.quals), nstr)
+                        nstr = '* %s%s' % (' '.join(modifier.quals),
+                                           ' ' + nstr if nstr else '')
                     else:
                         nstr = '*' + nstr
             if nstr: s += ' ' + nstr
@@ -397,11 +412,12 @@ class CGenerator(object):
         elif typ == c_ast.Decl:
             return self._generate_decl(n.type)
         elif typ == c_ast.Typename:
-            return self._generate_type(n.type)
+            return self._generate_type(n.type, emit_declname = emit_declname)
         elif typ == c_ast.IdentifierType:
             return ' '.join(n.names) + ' '
         elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl):
-            return self._generate_type(n.type, modifiers + [n])
+            return self._generate_type(n.type, modifiers + [n],
+                                       emit_declname = emit_declname)
         else:
             return self.visit(n)
 
diff --git a/pycparser/c_lexer.py b/pycparser/c_lexer.py
index de8445e..045d24e 100644
--- a/pycparser/c_lexer.py
+++ b/pycparser/c_lexer.py
@@ -19,7 +19,7 @@ class CLexer(object):
         tokens.
 
         The public attribute filename can be set to an initial
-        filaneme, but the lexer will update it upon #line
+        filename, but the lexer will update it upon #line
         directives.
     """
     def __init__(self, error_func, on_lbrace_func, on_rbrace_func,
@@ -130,7 +130,7 @@ class CLexer(object):
         'TYPEID',
 
         # constants
-        'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN',
+        'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX', 'INT_CONST_BIN', 'INT_CONST_CHAR',
         'FLOAT_CONST', 'HEX_FLOAT_CONST',
         'CHAR_CONST',
         'WCHAR_CONST',
@@ -205,23 +205,49 @@ class CLexer(object):
     # parse all correct code, even if it means to sometimes parse incorrect
     # code.
     #
-    simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
-    decimal_escape = r"""(\d+)"""
-    hex_escape = r"""(x[0-9a-fA-F]+)"""
-    bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+    # The original regexes were taken verbatim from the C syntax definition,
+    # and were later modified to avoid worst-case exponential running time.
+    #
+    #   simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
+    #   decimal_escape = r"""(\d+)"""
+    #   hex_escape = r"""(x[0-9a-fA-F]+)"""
+    #   bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+    #
+    # The following modifications were made to avoid the ambiguity that allowed backtracking:
+    # (https://github.com/eliben/pycparser/issues/61)
+    #
+    # - \x was removed from simple_escape, unless it was not followed by a hex digit, to avoid ambiguity with hex_escape.
+    # - hex_escape allows one or more hex characters, but requires that the next character(if any) is not hex
+    # - decimal_escape allows one or more decimal characters, but requires that the next character(if any) is not a decimal
+    # - bad_escape does not allow any decimals (8-9), to avoid conflicting with the permissive decimal_escape.
+    #
+    # Without this change, python's `re` module would recursively try parsing each ambiguous escape sequence in multiple ways.
+    # e.g. `\123` could be parsed as `\1`+`23`, `\12`+`3`, and `\123`.
+
+    simple_escape = r"""([a-wyzA-Z._~!=&\^\-\\?'"]|x(?![0-9a-fA-F]))"""
+    decimal_escape = r"""(\d+)(?!\d)"""
+    hex_escape = r"""(x[0-9a-fA-F]+)(?![0-9a-fA-F])"""
+    bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-9])"""
 
     escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
+
+    # This complicated regex with lookahead might be slow for strings, so because all of the valid escapes (including \x) allowed
+    # 0 or more non-escaped characters after the first character, simple_escape+decimal_escape+hex_escape got simplified to
+
+    escape_sequence_start_in_string = r"""(\\[0-9a-zA-Z._~!=&\^\-\\?'"])"""
+
     cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
     char_const = "'"+cconst_char+"'"
     wchar_const = 'L'+char_const
+    multicharacter_constant = "'"+cconst_char+"{2,4}'"
     unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
     bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
 
     # string literals (K&R2: A.2.6)
-    string_char = r"""([^"\\\n]|"""+escape_sequence+')'
+    string_char = r"""([^"\\\n]|"""+escape_sequence_start_in_string+')'
     string_literal = '"'+string_char+'*"'
     wstring_literal = 'L'+string_literal
-    bad_string_literal = '"'+string_char+'*?'+bad_escape+string_char+'*"'
+    bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
 
     # floating constants (K&R2: A.2.5.3)
     exponent_part = r"""([eE][-+]?[0-9]+)"""
@@ -443,6 +469,10 @@ class CLexer(object):
     # Must come before bad_char_const, to prevent it from
     # catching valid char constants as invalid
     #
+    @TOKEN(multicharacter_constant)
+    def t_INT_CONST_CHAR(self, t):
+        return t
+
     @TOKEN(char_const)
     def t_CHAR_CONST(self, t):
         return t
diff --git a/pycparser/c_parser.py b/pycparser/c_parser.py
index 0e6e755..744ede8 100644
--- a/pycparser/c_parser.py
+++ b/pycparser/c_parser.py
@@ -529,8 +529,7 @@ class CParser(PLYParser):
     def p_translation_unit_2(self, p):
         """ translation_unit    : translation_unit external_declaration
         """
-        if p[2] is not None:
-            p[1].extend(p[2])
+        p[1].extend(p[2])
         p[0] = p[1]
 
     # Declarations always come as lists (because they can be
@@ -557,7 +556,7 @@ class CParser(PLYParser):
     def p_external_declaration_4(self, p):
         """ external_declaration    : SEMI
         """
-        p[0] = None
+        p[0] = []
 
     def p_pp_directive(self, p):
         """ pp_directive  : PPHASH
@@ -1411,12 +1410,13 @@ class CParser(PLYParser):
         p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
 
     def p_direct_abstract_declarator_3(self, p):
-        """ direct_abstract_declarator  : LBRACKET assignment_expression_opt RBRACKET
+        """ direct_abstract_declarator  : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
         """
+        quals = (p[2] if len(p) > 4 else []) or []
         p[0] = c_ast.ArrayDecl(
             type=c_ast.TypeDecl(None, None, None),
-            dim=p[2],
-            dim_quals=[],
+            dim=p[3] if len(p) > 4 else p[2],
+            dim_quals=quals,
             coord=self._token_coord(p, 1))
 
     def p_direct_abstract_declarator_4(self, p):
@@ -1740,8 +1740,7 @@ class CParser(PLYParser):
         if len(p) == 2:
             p[0] = p[1]
         elif len(p) == 4:
-            field = c_ast.ID(p[3], self._token_coord(p, 3))
-            p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
+            p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
         elif len(p) == 5:
             p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
         else:
@@ -1766,9 +1765,23 @@ class CParser(PLYParser):
                         | INT_CONST_OCT
                         | INT_CONST_HEX
                         | INT_CONST_BIN
-        """
+                        | INT_CONST_CHAR
+        """
+        uCount = 0
+        lCount = 0
+        for x in p[1][-3:]:
+            if x in ('l', 'L'):
+                lCount += 1
+            elif x in ('u', 'U'):
+                uCount += 1
+        t = ''
+        if uCount > 1:
+             raise ValueError('Constant cannot have more than one u/U suffix.')
+        elif lCount > 2:
+             raise ValueError('Constant cannot have more than two l/L suffix.')
+        prefix = 'unsigned ' * uCount + 'long ' * lCount
         p[0] = c_ast.Constant(
-            'int', p[1], self._token_coord(p, 1))
+            prefix + 'int', p[1], self._token_coord(p, 1))
 
     def p_constant_2(self, p):
         """ constant    : FLOAT_CONST