diff options
author | Jeffrey Vander Stoep <jeffv@google.com> | 2016-02-04 21:33:25 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2016-02-04 21:33:25 +0000 |
commit | 6f93750a62cfc34af70305b4bda0859259717448 (patch) | |
tree | 0b273a21aac9d863f4ea69e522a286326c21bb6c /lib/python2.7/site-packages/sepolgen | |
parent | f5e46605a61d63e329159ac38327b3d061277bbd (diff) | |
parent | 1dd717ffbacd9ef09d9c3443c82dc7ffeda83a47 (diff) | |
download | 2.7.5-6f93750a62cfc34af70305b4bda0859259717448.tar.gz |
Merge "setools: fix sesearch, add sediff"
Diffstat (limited to 'lib/python2.7/site-packages/sepolgen')
17 files changed, 0 insertions, 8030 deletions
diff --git a/lib/python2.7/site-packages/sepolgen/__init__.py b/lib/python2.7/site-packages/sepolgen/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/lib/python2.7/site-packages/sepolgen/__init__.py +++ /dev/null diff --git a/lib/python2.7/site-packages/sepolgen/access.py b/lib/python2.7/site-packages/sepolgen/access.py deleted file mode 100644 index cf13210..0000000 --- a/lib/python2.7/site-packages/sepolgen/access.py +++ /dev/null @@ -1,331 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -Classes representing basic access. - -SELinux - at the most basic level - represents access as -the 4-tuple subject (type or context), target (type or context), -object class, permission. The policy language elaborates this basic -access to faciliate more concise rules (e.g., allow rules can have multiple -source or target types - see refpolicy for more information). - -This module has objects for representing the most basic access (AccessVector) -and sets of that access (AccessVectorSet). These objects are used in Madison -in a variety of ways, but they are the fundamental representation of access. -""" - -import refpolicy -from selinux import audit2why - -def is_idparam(id): - """Determine if an id is a paramater in the form $N, where N is - an integer. - - Returns: - True if the id is a paramater - False if the id is not a paramater - """ - if len(id) > 1 and id[0] == '$': - try: - int(id[1:]) - except ValueError: - return False - return True - else: - return False - -class AccessVector: - """ - An access vector is the basic unit of access in SELinux. - - Access vectors are the most basic representation of access within - SELinux. It represents the access a source type has to a target - type in terms of an object class and a set of permissions. - - Access vectors are distinct from AVRules in that they can only - store a single source type, target type, and object class. The - simplicity of AccessVectors makes them useful for storing access - in a form that is easy to search and compare. - - The source, target, and object are stored as string. No checking - done to verify that the strings are valid SELinux identifiers. - Identifiers in the form $N (where N is an integer) are reserved as - interface parameters and are treated as wild cards in many - circumstances. - - Properties: - .src_type - The source type allowed access. [String or None] - .tgt_type - The target type to which access is allowed. [String or None] - .obj_class - The object class to which access is allowed. [String or None] - .perms - The permissions allowed to the object class. [IdSet] - .audit_msgs - The audit messages that generated this access vector [List of strings] - """ - def __init__(self, init_list=None): - if init_list: - self.from_list(init_list) - else: - self.src_type = None - self.tgt_type = None - self.obj_class = None - self.perms = refpolicy.IdSet() - self.audit_msgs = [] - self.type = audit2why.TERULE - self.data = [] - - # The direction of the information flow represented by this - # access vector - used for matching - self.info_flow_dir = None - - def from_list(self, list): - """Initialize an access vector from a list. - - Initialize an access vector from a list treating the list as - positional arguments - i.e., 0 = src_type, 1 = tgt_type, etc. - All of the list elements 3 and greater are treated as perms. - For example, the list ['foo_t', 'bar_t', 'file', 'read', 'write'] - would create an access vector list with the source type 'foo_t', - target type 'bar_t', object class 'file', and permissions 'read' - and 'write'. - - This format is useful for very simple storage to strings or disc - (see to_list) and for initializing access vectors. - """ - if len(list) < 4: - raise ValueError("List must contain at least four elements %s" % str(list)) - self.src_type = list[0] - self.tgt_type = list[1] - self.obj_class = list[2] - self.perms = refpolicy.IdSet(list[3:]) - - def to_list(self): - """ - Convert an access vector to a list. - - Convert an access vector to a list treating the list as positional - values. See from_list for more information on how an access vector - is represented in a list. - """ - l = [self.src_type, self.tgt_type, self.obj_class] - l.extend(self.perms) - return l - - def __str__(self): - return self.to_string() - - def to_string(self): - return "allow %s %s:%s %s;" % (self.src_type, self.tgt_type, - self.obj_class, self.perms.to_space_str()) - - def __cmp__(self, other): - if self.src_type != other.src_type: - return cmp(self.src_type, other.src_type) - if self.tgt_type != other.tgt_type: - return cmp(self.tgt_type, other.tgt_type) - if self.obj_class != self.obj_class: - return cmp(self.obj_class, other.obj_class) - if len(self.perms) != len(other.perms): - return cmp(len(self.perms), len(other.perms)) - x = list(self.perms) - x.sort() - y = list(other.perms) - y.sort() - for pa, pb in zip(x, y): - if pa != pb: - return cmp(pa, pb) - return 0 - -def avrule_to_access_vectors(avrule): - """Convert an avrule into a list of access vectors. - - AccessVectors and AVRules are similary, but differ in that - an AVRule can more than one source type, target type, and - object class. This function expands a single avrule into a - list of one or more AccessVectors representing the access - defined in the AVRule. - - - """ - if isinstance(avrule, AccessVector): - return [avrule] - a = [] - for src_type in avrule.src_types: - for tgt_type in avrule.tgt_types: - for obj_class in avrule.obj_classes: - access = AccessVector() - access.src_type = src_type - access.tgt_type = tgt_type - access.obj_class = obj_class - access.perms = avrule.perms.copy() - a.append(access) - return a - -class AccessVectorSet: - """A non-overlapping set of access vectors. - - An AccessVectorSet is designed to store one or more access vectors - that are non-overlapping. Access can be added to the set - incrementally and access vectors will be added or merged as - necessary. For example, adding the following access vectors using - add_av: - allow $1 etc_t : read; - allow $1 etc_t : write; - allow $1 var_log_t : read; - Would result in an access vector set with the access vectors: - allow $1 etc_t : { read write}; - allow $1 var_log_t : read; - """ - def __init__(self): - """Initialize an access vector set. - """ - self.src = {} - # The information flow direction of this access vector - # set - see objectmodel.py for more information. This - # stored here to speed up searching - see matching.py. - self.info_dir = None - - def __iter__(self): - """Iterate over all of the unique access vectors in the set.""" - for tgts in self.src.values(): - for objs in tgts.values(): - for av in objs.values(): - yield av - - def __len__(self): - """Return the number of unique access vectors in the set. - - Because of the inernal representation of the access vector set, - __len__ is not a constant time operation. Worst case is O(N) - where N is the number of unique access vectors, but the common - case is probably better. - """ - l = 0 - for tgts in self.src.values(): - for objs in tgts.values(): - l += len(objs) - return l - - def to_list(self): - """Return the unique access vectors in the set as a list. - - The format of the returned list is a set of nested lists, - each access vector represented by a list. This format is - designed to be simply serializable to a file. - - For example, consider an access vector set with the following - access vectors: - allow $1 user_t : file read; - allow $1 etc_t : file { read write}; - to_list would return the following: - [[$1, user_t, file, read] - [$1, etc_t, file, read, write]] - - See AccessVector.to_list for more information. - """ - l = [] - for av in self: - l.append(av.to_list()) - - return l - - def from_list(self, l): - """Add access vectors stored in a list. - - See to list for more information on the list format that this - method accepts. - - This will add all of the access from the list. Any existing - access vectors in the set will be retained. - """ - for av in l: - self.add_av(AccessVector(av)) - - def add(self, src_type, tgt_type, obj_class, perms, audit_msg=None, avc_type=audit2why.TERULE, data=[]): - """Add an access vector to the set. - """ - tgt = self.src.setdefault(src_type, { }) - cls = tgt.setdefault(tgt_type, { }) - - if cls.has_key((obj_class, avc_type)): - access = cls[obj_class, avc_type] - else: - access = AccessVector() - access.src_type = src_type - access.tgt_type = tgt_type - access.obj_class = obj_class - access.data = data - access.type = avc_type - cls[obj_class, avc_type] = access - - access.perms.update(perms) - if audit_msg: - access.audit_msgs.append(audit_msg) - - def add_av(self, av, audit_msg=None): - """Add an access vector to the set.""" - self.add(av.src_type, av.tgt_type, av.obj_class, av.perms) - - -def avs_extract_types(avs): - types = refpolicy.IdSet() - for av in avs: - types.add(av.src_type) - types.add(av.tgt_type) - - return types - -def avs_extract_obj_perms(avs): - perms = { } - for av in avs: - if perms.has_key(av.obj_class): - s = perms[av.obj_class] - else: - s = refpolicy.IdSet() - perms[av.obj_class] = s - s.update(av.perms) - return perms - -class RoleTypeSet: - """A non-overlapping set of role type statements. - - This clas allows the incremental addition of role type statements and - maintains a non-overlapping list of statements. - """ - def __init__(self): - """Initialize an access vector set.""" - self.role_types = {} - - def __iter__(self): - """Iterate over all of the unique role allows statements in the set.""" - for role_type in self.role_types.values(): - yield role_type - - def __len__(self): - """Return the unique number of role allow statements.""" - return len(self.role_types.keys()) - - def add(self, role, type): - if self.role_types.has_key(role): - role_type = self.role_types[role] - else: - role_type = refpolicy.RoleType() - role_type.role = role - self.role_types[role] = role_type - - role_type.types.add(type) diff --git a/lib/python2.7/site-packages/sepolgen/audit.py b/lib/python2.7/site-packages/sepolgen/audit.py deleted file mode 100644 index 56919be..0000000 --- a/lib/python2.7/site-packages/sepolgen/audit.py +++ /dev/null @@ -1,549 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -import refpolicy -import access -import re -import sys - -# Convenience functions - -def get_audit_boot_msgs(): - """Obtain all of the avc and policy load messages from the audit - log. This function uses ausearch and requires that the current - process have sufficient rights to run ausearch. - - Returns: - string contain all of the audit messages returned by ausearch. - """ - import subprocess - import time - fd=open("/proc/uptime", "r") - off=float(fd.read().split()[0]) - fd.close - s = time.localtime(time.time() - off) - bootdate = time.strftime("%x", s) - boottime = time.strftime("%X", s) - output = subprocess.Popen(["/sbin/ausearch", "-m", "AVC,USER_AVC,MAC_POLICY_LOAD,DAEMON_START,SELINUX_ERR", "-ts", bootdate, boottime], - stdout=subprocess.PIPE).communicate()[0] - return output - -def get_audit_msgs(): - """Obtain all of the avc and policy load messages from the audit - log. This function uses ausearch and requires that the current - process have sufficient rights to run ausearch. - - Returns: - string contain all of the audit messages returned by ausearch. - """ - import subprocess - output = subprocess.Popen(["/sbin/ausearch", "-m", "AVC,USER_AVC,MAC_POLICY_LOAD,DAEMON_START,SELINUX_ERR"], - stdout=subprocess.PIPE).communicate()[0] - return output - -def get_dmesg_msgs(): - """Obtain all of the avc and policy load messages from /bin/dmesg. - - Returns: - string contain all of the audit messages returned by dmesg. - """ - import subprocess - output = subprocess.Popen(["/bin/dmesg"], - stdout=subprocess.PIPE).communicate()[0] - return output - -# Classes representing audit messages - -class AuditMessage: - """Base class for all objects representing audit messages. - - AuditMessage is a base class for all audit messages and only - provides storage for the raw message (as a string) and a - parsing function that does nothing. - """ - def __init__(self, message): - self.message = message - self.header = "" - - def from_split_string(self, recs): - """Parse a string that has been split into records by space into - an audit message. - - This method should be overridden by subclasses. Error reporting - should be done by raise ValueError exceptions. - """ - for msg in recs: - fields = msg.split("=") - if len(fields) != 2: - if msg[:6] == "audit(": - self.header = msg - return - else: - continue - - if fields[0] == "msg": - self.header = fields[1] - return - - -class InvalidMessage(AuditMessage): - """Class representing invalid audit messages. This is used to differentiate - between audit messages that aren't recognized (that should return None from - the audit message parser) and a message that is recognized but is malformed - in some way. - """ - def __init__(self, message): - AuditMessage.__init__(self, message) - -class PathMessage(AuditMessage): - """Class representing a path message""" - def __init__(self, message): - AuditMessage.__init__(self, message) - self.path = "" - - def from_split_string(self, recs): - AuditMessage.from_split_string(self, recs) - - for msg in recs: - fields = msg.split("=") - if len(fields) != 2: - continue - if fields[0] == "path": - self.path = fields[1][1:-1] - return -import selinux.audit2why as audit2why - -avcdict = {} - -class AVCMessage(AuditMessage): - """AVC message representing an access denial or granted message. - - This is a very basic class and does not represent all possible fields - in an avc message. Currently the fields are: - scontext - context for the source (process) that generated the message - tcontext - context for the target - tclass - object class for the target (only one) - comm - the process name - exe - the on-disc binary - path - the path of the target - access - list of accesses that were allowed or denied - denial - boolean indicating whether this was a denial (True) or granted - (False) message. - - An example audit message generated from the audit daemon looks like (line breaks - added): - 'type=AVC msg=audit(1155568085.407:10877): avc: denied { search } for - pid=677 comm="python" name="modules" dev=dm-0 ino=13716388 - scontext=user_u:system_r:setroubleshootd_t:s0 - tcontext=system_u:object_r:modules_object_t:s0 tclass=dir' - - An example audit message stored in syslog (not processed by the audit daemon - line - breaks added): - 'Sep 12 08:26:43 dhcp83-5 kernel: audit(1158064002.046:4): avc: denied { read } - for pid=2 496 comm="bluez-pin" name=".gdm1K3IFT" dev=dm-0 ino=3601333 - scontext=user_u:system_r:bluetooth_helper_t:s0-s0:c0 - tcontext=system_u:object_r:xdm_tmp_t:s0 tclass=file - """ - def __init__(self, message): - AuditMessage.__init__(self, message) - self.scontext = refpolicy.SecurityContext() - self.tcontext = refpolicy.SecurityContext() - self.tclass = "" - self.comm = "" - self.exe = "" - self.path = "" - self.name = "" - self.accesses = [] - self.denial = True - self.type = audit2why.TERULE - - def __parse_access(self, recs, start): - # This is kind of sucky - the access that is in a space separated - # list like '{ read write }'. This doesn't fit particularly well with splitting - # the string on spaces. This function takes the list of recs and a starting - # position one beyond the open brace. It then adds the accesses until it finds - # the close brace or the end of the list (which is an error if reached without - # seeing a close brace). - found_close = False - i = start - if i == (len(recs) - 1): - raise ValueError("AVC message in invalid format [%s]\n" % self.message) - while i < len(recs): - if recs[i] == "}": - found_close = True - break - self.accesses.append(recs[i]) - i = i + 1 - if not found_close: - raise ValueError("AVC message in invalid format [%s]\n" % self.message) - return i + 1 - - - def from_split_string(self, recs): - AuditMessage.from_split_string(self, recs) - # FUTURE - fully parse avc messages and store all possible fields - # Required fields - found_src = False - found_tgt = False - found_class = False - found_access = False - - for i in range(len(recs)): - if recs[i] == "{": - i = self.__parse_access(recs, i + 1) - found_access = True - continue - elif recs[i] == "granted": - self.denial = False - - fields = recs[i].split("=") - if len(fields) != 2: - continue - if fields[0] == "scontext": - self.scontext = refpolicy.SecurityContext(fields[1]) - found_src = True - elif fields[0] == "tcontext": - self.tcontext = refpolicy.SecurityContext(fields[1]) - found_tgt = True - elif fields[0] == "tclass": - self.tclass = fields[1] - found_class = True - elif fields[0] == "comm": - self.comm = fields[1][1:-1] - elif fields[0] == "exe": - self.exe = fields[1][1:-1] - elif fields[0] == "name": - self.name = fields[1][1:-1] - - if not found_src or not found_tgt or not found_class or not found_access: - raise ValueError("AVC message in invalid format [%s]\n" % self.message) - self.analyze() - - def analyze(self): - tcontext = self.tcontext.to_string() - scontext = self.scontext.to_string() - access_tuple = tuple( self.accesses) - self.data = [] - - if (scontext, tcontext, self.tclass, access_tuple) in avcdict.keys(): - self.type, self.data = avcdict[(scontext, tcontext, self.tclass, access_tuple)] - else: - self.type, self.data = audit2why.analyze(scontext, tcontext, self.tclass, self.accesses); - if self.type == audit2why.NOPOLICY: - self.type = audit2why.TERULE - if self.type == audit2why.BADTCON: - raise ValueError("Invalid Target Context %s\n" % tcontext) - if self.type == audit2why.BADSCON: - raise ValueError("Invalid Source Context %s\n" % scontext) - if self.type == audit2why.BADSCON: - raise ValueError("Invalid Type Class %s\n" % self.tclass) - if self.type == audit2why.BADPERM: - raise ValueError("Invalid permission %s\n" % " ".join(self.accesses)) - if self.type == audit2why.BADCOMPUTE: - raise ValueError("Error during access vector computation") - - if self.type == audit2why.CONSTRAINT: - self.data = [ self.data ] - if self.scontext.user != self.tcontext.user: - self.data.append(("user (%s)" % self.scontext.user, 'user (%s)' % self.tcontext.user)) - if self.scontext.role != self.tcontext.role and self.tcontext.role != "object_r": - self.data.append(("role (%s)" % self.scontext.role, 'role (%s)' % self.tcontext.role)) - if self.scontext.level != self.tcontext.level: - self.data.append(("level (%s)" % self.scontext.level, 'level (%s)' % self.tcontext.level)) - - avcdict[(scontext, tcontext, self.tclass, access_tuple)] = (self.type, self.data) - -class PolicyLoadMessage(AuditMessage): - """Audit message indicating that the policy was reloaded.""" - def __init__(self, message): - AuditMessage.__init__(self, message) - -class DaemonStartMessage(AuditMessage): - """Audit message indicating that a daemon was started.""" - def __init__(self, message): - AuditMessage.__init__(self, message) - self.auditd = False - - def from_split_string(self, recs): - AuditMessage.from_split_string(self, recs) - if "auditd" in recs: - self.auditd = True - - -class ComputeSidMessage(AuditMessage): - """Audit message indicating that a sid was not valid. - - Compute sid messages are generated on attempting to create a security - context that is not valid. Security contexts are invalid if the role is - not authorized for the user or the type is not authorized for the role. - - This class does not store all of the fields from the compute sid message - - just the type and role. - """ - def __init__(self, message): - AuditMessage.__init__(self, message) - self.invalid_context = refpolicy.SecurityContext() - self.scontext = refpolicy.SecurityContext() - self.tcontext = refpolicy.SecurityContext() - self.tclass = "" - - def from_split_string(self, recs): - AuditMessage.from_split_string(self, recs) - if len(recs) < 10: - raise ValueError("Split string does not represent a valid compute sid message") - - try: - self.invalid_context = refpolicy.SecurityContext(recs[5]) - self.scontext = refpolicy.SecurityContext(recs[7].split("=")[1]) - self.tcontext = refpolicy.SecurityContext(recs[8].split("=")[1]) - self.tclass = recs[9].split("=")[1] - except: - raise ValueError("Split string does not represent a valid compute sid message") - def output(self): - return "role %s types %s;\n" % (self.role, self.type) - -# Parser for audit messages - -class AuditParser: - """Parser for audit messages. - - This class parses audit messages and stores them according to their message - type. This is not a general purpose audit message parser - it only extracts - selinux related messages. - - Each audit messages are stored in one of four lists: - avc_msgs - avc denial or granted messages. Messages are stored in - AVCMessage objects. - comput_sid_messages - invalid sid messages. Messages are stored in - ComputSidMessage objects. - invalid_msgs - selinux related messages that are not valid. Messages - are stored in InvalidMessageObjects. - policy_load_messages - policy load messages. Messages are stored in - PolicyLoadMessage objects. - - These lists will be reset when a policy load message is seen if - AuditParser.last_load_only is set to true. It is assumed that messages - are fed to the parser in chronological order - time stamps are not - parsed. - """ - def __init__(self, last_load_only=False): - self.__initialize() - self.last_load_only = last_load_only - - def __initialize(self): - self.avc_msgs = [] - self.compute_sid_msgs = [] - self.invalid_msgs = [] - self.policy_load_msgs = [] - self.path_msgs = [] - self.by_header = { } - self.check_input_file = False - - # Low-level parsing function - tries to determine if this audit - # message is an SELinux related message and then parses it into - # the appropriate AuditMessage subclass. This function deliberately - # does not impose policy (e.g., on policy load message) or store - # messages to make as simple and reusable as possible. - # - # Return values: - # None - no recognized audit message found in this line - # - # InvalidMessage - a recognized but invalid message was found. - # - # AuditMessage (or subclass) - object representing a parsed - # and valid audit message. - def __parse_line(self, line): - rec = line.split() - for i in rec: - found = False - if i == "avc:" or i == "message=avc:" or i == "msg='avc:": - msg = AVCMessage(line) - found = True - elif i == "security_compute_sid:": - msg = ComputeSidMessage(line) - found = True - elif i == "type=MAC_POLICY_LOAD" or i == "type=1403": - msg = PolicyLoadMessage(line) - found = True - elif i == "type=AVC_PATH": - msg = PathMessage(line) - found = True - elif i == "type=DAEMON_START": - msg = DaemonStartMessage(list) - found = True - - if found: - self.check_input_file = True - try: - msg.from_split_string(rec) - except ValueError: - msg = InvalidMessage(line) - return msg - return None - - # Higher-level parse function - take a line, parse it into an - # AuditMessage object, and store it in the appropriate list. - # This function will optionally reset all of the lists when - # it sees a load policy message depending on the value of - # self.last_load_only. - def __parse(self, line): - msg = self.__parse_line(line) - if msg is None: - return - - # Append to the correct list - if isinstance(msg, PolicyLoadMessage): - if self.last_load_only: - self.__initialize() - elif isinstance(msg, DaemonStartMessage): - # We initialize every time the auditd is started. This - # is less than ideal, but unfortunately it is the only - # way to catch reboots since the initial policy load - # by init is not stored in the audit log. - if msg.auditd and self.last_load_only: - self.__initialize() - self.policy_load_msgs.append(msg) - elif isinstance(msg, AVCMessage): - self.avc_msgs.append(msg) - elif isinstance(msg, ComputeSidMessage): - self.compute_sid_msgs.append(msg) - elif isinstance(msg, InvalidMessage): - self.invalid_msgs.append(msg) - elif isinstance(msg, PathMessage): - self.path_msgs.append(msg) - - # Group by audit header - if msg.header != "": - if self.by_header.has_key(msg.header): - self.by_header[msg.header].append(msg) - else: - self.by_header[msg.header] = [msg] - - - # Post processing will add additional information from AVC messages - # from related messages - only works on messages generated by - # the audit system. - def __post_process(self): - for value in self.by_header.values(): - avc = [] - path = None - for msg in value: - if isinstance(msg, PathMessage): - path = msg - elif isinstance(msg, AVCMessage): - avc.append(msg) - if len(avc) > 0 and path: - for a in avc: - a.path = path.path - - def parse_file(self, input): - """Parse the contents of a file object. This method can be called - multiple times (along with parse_string).""" - line = input.readline() - while line: - self.__parse(line) - line = input.readline() - if not self.check_input_file: - sys.stderr.write("Nothing to do\n") - sys.exit(0) - self.__post_process() - - def parse_string(self, input): - """Parse a string containing audit messages - messages should - be separated by new lines. This method can be called multiple - times (along with parse_file).""" - lines = input.split('\n') - for l in lines: - self.__parse(l) - self.__post_process() - - def to_role(self, role_filter=None): - """Return RoleAllowSet statements matching the specified filter - - Filter out types that match the filer, or all roles - - Params: - role_filter - [optional] Filter object used to filter the - output. - Returns: - Access vector set representing the denied access in the - audit logs parsed by this object. - """ - role_types = access.RoleTypeSet() - for cs in self.compute_sid_msgs: - if not role_filter or role_filter.filter(cs): - role_types.add(cs.invalid_context.role, cs.invalid_context.type) - - return role_types - - def to_access(self, avc_filter=None, only_denials=True): - """Convert the audit logs access into a an access vector set. - - Convert the audit logs into an access vector set, optionally - filtering the restults with the passed in filter object. - - Filter objects are object instances with a .filter method - that takes and access vector and returns True if the message - should be included in the final output and False otherwise. - - Params: - avc_filter - [optional] Filter object used to filter the - output. - Returns: - Access vector set representing the denied access in the - audit logs parsed by this object. - """ - av_set = access.AccessVectorSet() - for avc in self.avc_msgs: - if avc.denial != True and only_denials: - continue - if avc_filter: - if avc_filter.filter(avc): - av_set.add(avc.scontext.type, avc.tcontext.type, avc.tclass, - avc.accesses, avc, avc_type=avc.type, data=avc.data) - else: - av_set.add(avc.scontext.type, avc.tcontext.type, avc.tclass, - avc.accesses, avc, avc_type=avc.type, data=avc.data) - return av_set - -class AVCTypeFilter: - def __init__(self, regex): - self.regex = re.compile(regex) - - def filter(self, avc): - if self.regex.match(avc.scontext.type): - return True - if self.regex.match(avc.tcontext.type): - return True - return False - -class ComputeSidTypeFilter: - def __init__(self, regex): - self.regex = re.compile(regex) - - def filter(self, avc): - if self.regex.match(avc.invalid_context.type): - return True - if self.regex.match(avc.scontext.type): - return True - if self.regex.match(avc.tcontext.type): - return True - return False - - diff --git a/lib/python2.7/site-packages/sepolgen/classperms.py b/lib/python2.7/site-packages/sepolgen/classperms.py deleted file mode 100644 index c925dee..0000000 --- a/lib/python2.7/site-packages/sepolgen/classperms.py +++ /dev/null @@ -1,116 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -import sys - -tokens = ('DEFINE', - 'NAME', - 'TICK', - 'SQUOTE', - 'OBRACE', - 'CBRACE', - 'SEMI', - 'OPAREN', - 'CPAREN', - 'COMMA') - -reserved = { - 'define' : 'DEFINE' } - -t_TICK = r'\`' -t_SQUOTE = r'\'' -t_OBRACE = r'\{' -t_CBRACE = r'\}' -t_SEMI = r'\;' -t_OPAREN = r'\(' -t_CPAREN = r'\)' -t_COMMA = r'\,' - -t_ignore = " \t\n" - -def t_NAME(t): - r'[a-zA-Z_][a-zA-Z0-9_]*' - t.type = reserved.get(t.value,'NAME') - return t - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.skip(1) - -import lex -lex.lex() - -def p_statements(p): - '''statements : define_stmt - | define_stmt statements - ''' - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = [p[1]] + [p[2]] - -def p_define_stmt(p): - # This sucks - corresponds to 'define(`foo',`{ read write }') - '''define_stmt : DEFINE OPAREN TICK NAME SQUOTE COMMA TICK list SQUOTE CPAREN - ''' - - p[0] = [p[4], p[8]] - -def p_list(p): - '''list : NAME - | OBRACE names CBRACE - ''' - if p[1] == "{": - p[0] = p[2] - else: - p[0] = [p[1]] - -def p_names(p): - '''names : NAME - | NAME names - ''' - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = [p[1]] + p[2] - -def p_error(p): - print "Syntax error on line %d %s [type=%s]" % (p.lineno, p.value, p.type) - -import yacc -yacc.yacc() - - -f = open("all_perms.spt") -txt = f.read() -f.close() - -#lex.input(txt) -#while 1: -# tok = lex.token() -# if not tok: -# break -# print tok - -test = "define(`foo',`{ read write append }')" -test2 = """define(`all_filesystem_perms',`{ mount remount unmount getattr relabelfrom relabelto transition associate quotamod quotaget }') -define(`all_security_perms',`{ compute_av compute_create compute_member check_context load_policy compute_relabel compute_user setenforce setbool setsecparam setcheckreqprot }') -""" -result = yacc.parse(txt) -print result - diff --git a/lib/python2.7/site-packages/sepolgen/defaults.py b/lib/python2.7/site-packages/sepolgen/defaults.py deleted file mode 100644 index 218bc7c..0000000 --- a/lib/python2.7/site-packages/sepolgen/defaults.py +++ /dev/null @@ -1,77 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -import os -import re - -# Select the correct location for the development files based on a -# path variable (optionally read from a configuration file) -class PathChoooser(object): - def __init__(self, pathname): - self.config = dict() - if not os.path.exists(pathname): - self.config_pathname = "(defaults)" - self.config["SELINUX_DEVEL_PATH"] = "/usr/share/selinux/default:/usr/share/selinux/mls:/usr/share/selinux/devel" - return - self.config_pathname = pathname - ignore = re.compile(r"^\s*(?:#.+)?$") - consider = re.compile(r"^\s*(\w+)\s*=\s*(.+?)\s*$") - for lineno, line in enumerate(open(pathname)): - if ignore.match(line): continue - mo = consider.match(line) - if not mo: - raise ValueError, "%s:%d: line is not in key = value format" % (pathname, lineno+1) - self.config[mo.group(1)] = mo.group(2) - - # We're only exporting one useful function, so why not be a function - def __call__(self, testfilename, pathset="SELINUX_DEVEL_PATH"): - paths = self.config.get(pathset, None) - if paths is None: - raise ValueError, "%s was not in %s" % (pathset, self.config_pathname) - paths = paths.split(":") - for p in paths: - target = os.path.join(p, testfilename) - if os.path.exists(target): return target - return os.path.join(paths[0], testfilename) - - -""" -Various default settings, including file and directory locations. -""" - -def data_dir(): - return "/var/lib/sepolgen" - -def perm_map(): - return data_dir() + "/perm_map" - -def interface_info(): - return data_dir() + "/interface_info" - -def attribute_info(): - return data_dir() + "/attribute_info" - -def refpolicy_makefile(): - chooser = PathChoooser("/etc/selinux/sepolgen.conf") - return chooser("Makefile") - -def headers(): - chooser = PathChoooser("/etc/selinux/sepolgen.conf") - return chooser("include") - diff --git a/lib/python2.7/site-packages/sepolgen/interfaces.py b/lib/python2.7/site-packages/sepolgen/interfaces.py deleted file mode 100644 index 88a6dc3..0000000 --- a/lib/python2.7/site-packages/sepolgen/interfaces.py +++ /dev/null @@ -1,509 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -Classes for representing and manipulating interfaces. -""" - -import access -import refpolicy -import itertools -import objectmodel -import matching - -from sepolgeni18n import _ - -import copy - -class Param: - """ - Object representing a paramater for an interface. - """ - def __init__(self): - self.__name = "" - self.type = refpolicy.SRC_TYPE - self.obj_classes = refpolicy.IdSet() - self.required = True - - def set_name(self, name): - if not access.is_idparam(name): - raise ValueError("Name [%s] is not a param" % name) - self.__name = name - - def get_name(self): - return self.__name - - name = property(get_name, set_name) - - num = property(fget=lambda self: int(self.name[1:])) - - def __repr__(self): - return "<sepolgen.policygen.Param instance [%s, %s, %s]>" % \ - (self.name, refpolicy.field_to_str[self.type], " ".join(self.obj_classes)) - - -# Helper for extract perms -def __param_insert(name, type, av, params): - ret = 0 - if name in params: - p = params[name] - # The entries are identical - we're done - if type == p.type: - return - # Hanldle implicitly typed objects (like process) - if (type == refpolicy.SRC_TYPE or type == refpolicy.TGT_TYPE) and \ - (p.type == refpolicy.TGT_TYPE or p.type == refpolicy.SRC_TYPE): - #print name, refpolicy.field_to_str[p.type] - # If the object is not implicitly typed, tell the - # caller there is a likely conflict. - ret = 1 - if av: - avobjs = [av.obj_class] - else: - avobjs = [] - for obj in itertools.chain(p.obj_classes, avobjs): - if obj in objectmodel.implicitly_typed_objects: - ret = 0 - break - # "Promote" to a SRC_TYPE as this is the likely usage. - # We do this even if the above test fails on purpose - # as there is really no sane way to resolve the conflict - # here. The caller can take other actions if needed. - p.type = refpolicy.SRC_TYPE - else: - # There is some conflict - no way to resolve it really - # so we just leave the first entry and tell the caller - # there was a conflict. - ret = 1 - else: - p = Param() - p.name = name - p.type = type - params[p.name] = p - - if av: - p.obj_classes.add(av.obj_class) - return ret - - - -def av_extract_params(av, params): - """Extract the paramaters from an access vector. - - Extract the paramaters (in the form $N) from an access - vector, storing them as Param objects in a dictionary. - Some attempt is made at resolving conflicts with other - entries in the dict, but if an unresolvable conflict is - found it is reported to the caller. - - The goal here is to figure out how interface paramaters are - actually used in the interface - e.g., that $1 is a domain used as - a SRC_TYPE. In general an interface will look like this: - - interface(`foo', ` - allow $1 foo : file read; - ') - - This is simple to figure out - $1 is a SRC_TYPE. A few interfaces - are more complex, for example: - - interface(`foo_trans',` - domain_auto_trans($1,fingerd_exec_t,fingerd_t) - - allow $1 fingerd_t:fd use; - allow fingerd_t $1:fd use; - allow fingerd_t $1:fifo_file rw_file_perms; - allow fingerd_t $1:process sigchld; - ') - - Here the usage seems ambigious, but it is not. $1 is still domain - and therefore should be returned as a SRC_TYPE. - - Returns: - 0 - success - 1 - conflict found - """ - ret = 0 - found_src = False - if access.is_idparam(av.src_type): - if __param_insert(av.src_type, refpolicy.SRC_TYPE, av, params) == 1: - ret = 1 - - if access.is_idparam(av.tgt_type): - if __param_insert(av.tgt_type, refpolicy.TGT_TYPE, av, params) == 1: - ret = 1 - - if access.is_idparam(av.obj_class): - if __param_insert(av.obj_class, refpolicy.OBJ_CLASS, av, params) == 1: - ret = 1 - - for perm in av.perms: - if access.is_idparam(perm): - if __param_insert(perm, PERM) == 1: - ret = 1 - - return ret - -def role_extract_params(role, params): - if access.is_idparam(role.role): - return __param_insert(role.role, refpolicy.ROLE, None, params) - -def type_rule_extract_params(rule, params): - def extract_from_set(set, type): - ret = 0 - for x in set: - if access.is_idparam(x): - if __param_insert(x, type, None, params): - ret = 1 - return ret - - ret = 0 - if extract_from_set(rule.src_types, refpolicy.SRC_TYPE): - ret = 1 - - if extract_from_set(rule.tgt_types, refpolicy.TGT_TYPE): - ret = 1 - - if extract_from_set(rule.obj_classes, refpolicy.OBJ_CLASS): - ret = 1 - - if access.is_idparam(rule.dest_type): - if __param_insert(rule.dest_type, refpolicy.DEST_TYPE, None, params): - ret = 1 - - return ret - -def ifcall_extract_params(ifcall, params): - ret = 0 - for arg in ifcall.args: - if access.is_idparam(arg): - # Assume interface arguments are source types. Fairly safe - # assumption for most interfaces - if __param_insert(arg, refpolicy.SRC_TYPE, None, params): - ret = 1 - - return ret - -class AttributeVector: - def __init__(self): - self.name = "" - self.access = access.AccessVectorSet() - - def add_av(self, av): - self.access.add_av(av) - -class AttributeSet: - def __init__(self): - self.attributes = { } - - def add_attr(self, attr): - self.attributes[attr.name] = attr - - def from_file(self, fd): - def parse_attr(line): - fields = line[1:-1].split() - if len(fields) != 2 or fields[0] != "Attribute": - raise SyntaxError("Syntax error Attribute statement %s" % line) - a = AttributeVector() - a.name = fields[1] - - return a - - a = None - for line in fd: - line = line[:-1] - if line[0] == "[": - if a: - self.add_attr(a) - a = parse_attr(line) - elif a: - l = line.split(",") - av = access.AccessVector(l) - a.add_av(av) - if a: - self.add_attr(a) - -class InterfaceVector: - def __init__(self, interface=None, attributes={}): - # Enabled is a loose concept currently - we are essentially - # not enabling interfaces that we can't handle currently. - # See InterfaceVector.add_ifv for more information. - self.enabled = True - self.name = "" - # The access that is enabled by this interface - eventually - # this will include indirect access from typeattribute - # statements. - self.access = access.AccessVectorSet() - # Paramaters are stored in a dictionary (key: param name - # value: Param object). - self.params = { } - if interface: - self.from_interface(interface, attributes) - self.expanded = False - - def from_interface(self, interface, attributes={}): - self.name = interface.name - - # Add allow rules - for avrule in interface.avrules(): - if avrule.rule_type != refpolicy.AVRule.ALLOW: - continue - # Handle some policy bugs - if "dontaudit" in interface.name: - #print "allow rule in interface: %s" % interface - continue - avs = access.avrule_to_access_vectors(avrule) - for av in avs: - self.add_av(av) - - # Add typeattribute access - if attributes: - for typeattribute in interface.typeattributes(): - for attr in typeattribute.attributes: - if not attributes.attributes.has_key(attr): - # print "missing attribute " + attr - continue - attr_vec = attributes.attributes[attr] - for a in attr_vec.access: - av = copy.copy(a) - if av.src_type == attr_vec.name: - av.src_type = typeattribute.type - if av.tgt_type == attr_vec.name: - av.tgt_type = typeattribute.type - self.add_av(av) - - - # Extract paramaters from roles - for role in interface.roles(): - if role_extract_params(role, self.params): - pass - #print "found conflicting role param %s for interface %s" % \ - # (role.name, interface.name) - # Extract paramaters from type rules - for rule in interface.typerules(): - if type_rule_extract_params(rule, self.params): - pass - #print "found conflicting params in rule %s in interface %s" % \ - # (str(rule), interface.name) - - for ifcall in interface.interface_calls(): - if ifcall_extract_params(ifcall, self.params): - pass - #print "found conflicting params in ifcall %s in interface %s" % \ - # (str(ifcall), interface.name) - - - def add_av(self, av): - if av_extract_params(av, self.params) == 1: - pass - #print "found conflicting perms [%s]" % str(av) - self.access.add_av(av) - - def to_string(self): - s = [] - s.append("[InterfaceVector %s]" % self.name) - for av in self.access: - s.append(str(av)) - return "\n".join(s) - - def __str__(self): - return self.__repr__() - - def __repr__(self): - return "<InterfaceVector %s:%s>" % (self.name, self.enabled) - - -class InterfaceSet: - def __init__(self, output=None): - self.interfaces = { } - self.tgt_type_map = { } - self.tgt_type_all = [] - self.output = output - - def o(self, str): - if self.output: - self.output.write(str + "\n") - - def to_file(self, fd): - for iv in self.interfaces.values(): - fd.write("[InterfaceVector %s " % iv.name) - for param in iv.params.values(): - fd.write("%s:%s " % (param.name, refpolicy.field_to_str[param.type])) - fd.write("]\n") - avl = iv.access.to_list() - for av in avl: - fd.write(",".join(av)) - fd.write("\n") - - def from_file(self, fd): - def parse_ifv(line): - fields = line[1:-1].split() - if len(fields) < 2 or fields[0] != "InterfaceVector": - raise SyntaxError("Syntax error InterfaceVector statement %s" % line) - ifv = InterfaceVector() - ifv.name = fields[1] - if len(fields) == 2: - return - for field in fields[2:]: - p = field.split(":") - if len(p) != 2: - raise SyntaxError("Invalid param in InterfaceVector statement %s" % line) - param = Param() - param.name = p[0] - param.type = refpolicy.str_to_field[p[1]] - ifv.params[param.name] = param - return ifv - - ifv = None - for line in fd: - line = line[:-1] - if line[0] == "[": - if ifv: - self.add_ifv(ifv) - ifv = parse_ifv(line) - elif ifv: - l = line.split(",") - av = access.AccessVector(l) - ifv.add_av(av) - if ifv: - self.add_ifv(ifv) - - self.index() - - def add_ifv(self, ifv): - self.interfaces[ifv.name] = ifv - - def index(self): - for ifv in self.interfaces.values(): - tgt_types = set() - for av in ifv.access: - if access.is_idparam(av.tgt_type): - self.tgt_type_all.append(ifv) - tgt_types = set() - break - tgt_types.add(av.tgt_type) - - for type in tgt_types: - l = self.tgt_type_map.setdefault(type, []) - l.append(ifv) - - def add(self, interface, attributes={}): - ifv = InterfaceVector(interface, attributes) - self.add_ifv(ifv) - - def add_headers(self, headers, output=None, attributes={}): - for i in itertools.chain(headers.interfaces(), headers.templates()): - self.add(i, attributes) - - self.expand_ifcalls(headers) - self.index() - - def map_param(self, id, ifcall): - if access.is_idparam(id): - num = int(id[1:]) - if num > len(ifcall.args): - # Tell caller to drop this because it must have - # been generated from an optional param. - return None - else: - arg = ifcall.args[num - 1] - if isinstance(arg, list): - return arg - else: - return [arg] - else: - return [id] - - def map_add_av(self, ifv, av, ifcall): - src_types = self.map_param(av.src_type, ifcall) - if src_types is None: - return - - tgt_types = self.map_param(av.tgt_type, ifcall) - if tgt_types is None: - return - - obj_classes = self.map_param(av.obj_class, ifcall) - if obj_classes is None: - return - - new_perms = refpolicy.IdSet() - for perm in av.perms: - p = self.map_param(perm, ifcall) - if p is None: - continue - else: - new_perms.update(p) - if len(new_perms) == 0: - return - - for src_type in src_types: - for tgt_type in tgt_types: - for obj_class in obj_classes: - ifv.access.add(src_type, tgt_type, obj_class, new_perms) - - def do_expand_ifcalls(self, interface, if_by_name): - # Descend an interface call tree adding the access - # from each interface. This is a depth first walk - # of the tree. - - stack = [(interface, None)] - ifv = self.interfaces[interface.name] - ifv.expanded = True - - while len(stack) > 0: - cur, cur_ifcall = stack.pop(-1) - - cur_ifv = self.interfaces[cur.name] - if cur != interface: - - for av in cur_ifv.access: - self.map_add_av(ifv, av, cur_ifcall) - - # If we have already fully expanded this interface - # there is no reason to descend further. - if cur_ifv.expanded: - continue - - for ifcall in cur.interface_calls(): - if ifcall.ifname == interface.name: - self.o(_("Found circular interface class")) - return - try: - newif = if_by_name[ifcall.ifname] - except KeyError: - self.o(_("Missing interface definition for %s" % ifcall.ifname)) - continue - - stack.append((newif, ifcall)) - - - def expand_ifcalls(self, headers): - # Create a map of interface names to interfaces - - # this mirrors the interface vector map we already - # have. - if_by_name = { } - - for i in itertools.chain(headers.interfaces(), headers.templates()): - if_by_name[i.name] = i - - - for interface in itertools.chain(headers.interfaces(), headers.templates()): - self.do_expand_ifcalls(interface, if_by_name) - diff --git a/lib/python2.7/site-packages/sepolgen/lex.py b/lib/python2.7/site-packages/sepolgen/lex.py deleted file mode 100644 index c149366..0000000 --- a/lib/python2.7/site-packages/sepolgen/lex.py +++ /dev/null @@ -1,866 +0,0 @@ -#----------------------------------------------------------------------------- -# ply: lex.py -# -# Author: David M. Beazley (dave@dabeaz.com) -# -# Copyright (C) 2001-2006, David M. Beazley -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# See the file COPYING for a complete copy of the LGPL. -#----------------------------------------------------------------------------- - -__version__ = "2.2" - -import re, sys, types - -# Regular expression used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') - -# Available instance types. This is used when lexers are defined by a class. -# It's a little funky because I want to preserve backwards compatibility -# with Python 2.0 where types.ObjectType is undefined. - -try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) -except AttributeError: - _INSTANCETYPE = types.InstanceType - class object: pass # Note: needed if no new-style classes present - -# Exception thrown when invalid token encountered and no default error -# handler is defined. -class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s - -# Token class -class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) - def __repr__(self): - return str(self) - def skip(self,n): - self.lexer.skip(n) - -# ----------------------------------------------------------------------------- -# Lexer class -# -# This class encapsulates all of the methods and data associated with a lexer. -# -# input() - Store a new string in the lexer -# token() - Get the next token -# ----------------------------------------------------------------------------- - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstate = "INITIAL" # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - self.lexdebug = 0 # Debugging mode - self.lexoptimize = 0 # Optimized mode - - def clone(self,object=None): - c = Lexer() - c.lexstatere = self.lexstatere - c.lexstateinfo = self.lexstateinfo - c.lexstateretext = self.lexstateretext - c.lexstate = self.lexstate - c.lexstatestack = self.lexstatestack - c.lexstateignore = self.lexstateignore - c.lexstateerrorf = self.lexstateerrorf - c.lexreflags = self.lexreflags - c.lexdata = self.lexdata - c.lexpos = self.lexpos - c.lexlen = self.lexlen - c.lextokens = self.lextokens - c.lexdebug = self.lexdebug - c.lineno = self.lineno - c.lexoptimize = self.lexoptimize - c.lexliterals = self.lexliterals - c.lexmodule = self.lexmodule - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = { } - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = { } - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) - c.lexmodule = object - - # Set up other attributes - c.begin(c.lexstate) - return c - - # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self,tabfile): - tf = open(tabfile+".py","w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - exec "import %s as lextab" % tabfile - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] - self.begin('INITIAL') - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self,s): - if not (isinstance(s,types.StringType) or isinstance(s,types.UnicodeType)): - raise ValueError, "Expected a string" - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self,state): - if not self.lexstatere.has_key(state): - raise ValueError, "Undefined state" - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self,state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self,n): - self.lexpos += n - - # ------------------------------------------------------------ - # token() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue - - # Set last match in lexer so that rules can access it if they want - self.lexmatch = m - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - tok.lexer = self - - lexpos = m.end() - i = m.lastindex - func,tok.type = lexindexfunc[i] - self.lexpos = lexpos - - if not func: - # If no token type was set, it's an ignored token - if tok.type: return tok - break - - # if func not callable, it means it's an ignored token - if not callable(func): - break - - # If token is processed by a function, call it - newtok = func(tok) - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if not self.lextokens.has_key(newtok.type): - raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func.func_code.co_filename, func.func_code.co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) - - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.lexer = self - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = "error" - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: continue - return newtok - - self.lexpos = lexpos - raise LexError, ("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError, "No input string given with input()" - return None - -# ----------------------------------------------------------------------------- -# _validate_file() -# -# This checks to see if there are duplicated t_rulename() functions or strings -# in the parser input file. This is done using a simple regular expression -# match on each line in the filename. -# ----------------------------------------------------------------------------- - -def _validate_file(filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea what the file is. Return OK - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return 1 # Oh well - - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - noerror = 1 - for l in lines: - m = fre.match(l) - if not m: - m = sre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) - noerror = 0 - linen += 1 - return noerror - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist): - result = [] - for f in funclist: - if f and f[0]: - result.append((f[0].__name__,f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict): - if not relist: return [] - regex = "|".join(relist) - try: - lexre = re.compile(regex,re.VERBOSE | reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,handle.__name__[2:]) - elif handle is not None: - # If rule was specified as a string, we build an anonymous - # callback function to carry out the action - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) - print "IGNORE", f - else: - lexindexfunc[i] = (None, f[2:]) - - return [(lexre,lexindexfunc)],[regex] - except Exception,e: - m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre = _form_master_re(relist[:m],reflags,ldict) - rlist, rre = _form_master_re(relist[m:],reflags,ldict) - return llist+rlist, lre+rre - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- - -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not names.has_key(parts[i]) and parts[i] != 'ANY': break - if i > 1: - states = tuple(parts[1:i]) - else: - states = ('INITIAL',) - - if 'ANY' in states: - states = tuple(names.keys()) - - tokenname = "_".join(parts[i:]) - return (states,tokenname) - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0): - global lexer - ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} - error = 0 - files = { } - lexobj = Lexer() - lexobj.lexdebug = debug - lexobj.lexoptimize = optimize - global token,input - - if nowarn: warn = 0 - else: warn = 1 - - if object: module = object - - if module: - # User supplied a module object. - if isinstance(module, types.ModuleType): - ldict = module.__dict__ - elif isinstance(module, _INSTANCETYPE): - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = { } - for (i,v) in _items: - ldict[i] = v - else: - raise ValueError,"Expected a module or instance" - lexobj.lexmodule = module - - else: - # No module given. We might be able to get information from the caller. - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - f = f.f_back # Walk out to our calling function - ldict = f.f_globals # Grab its globals dictionary - - if optimize and lextab: - try: - lexobj.readtab(lextab,ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass - - # Get the tokens, states, and literals variables (if any) - if (module and isinstance(module,_INSTANCETYPE)): - tokens = getattr(module,"tokens",None) - states = getattr(module,"states",None) - literals = getattr(module,"literals","") - else: - tokens = ldict.get("tokens",None) - states = ldict.get("states",None) - literals = ldict.get("literals","") - - if not tokens: - raise SyntaxError,"lex: module does not define 'tokens'" - if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): - raise SyntaxError,"lex: tokens must be a list or tuple." - - # Build a dictionary of valid token names - lexobj.lextokens = { } - if not optimize: - for n in tokens: - if not _is_identifier.match(n): - print "lex: Bad token name '%s'" % n - error = 1 - if warn and lexobj.lextokens.has_key(n): - print "lex: Warning. Token '%s' multiply defined." % n - lexobj.lextokens[n] = None - else: - for n in tokens: lexobj.lextokens[n] = None - - if debug: - print "lex: tokens = '%s'" % lexobj.lextokens.keys() - - try: - for c in literals: - if not (isinstance(c,types.StringType) or isinstance(c,types.UnicodeType)) or len(c) > 1: - print "lex: Invalid literal %s. Must be a single character" % repr(c) - error = 1 - continue - - except TypeError: - print "lex: Invalid literals specification. literals must be a sequence of characters." - error = 1 - - lexobj.lexliterals = literals - - # Build statemap - if states: - if not (isinstance(states,types.TupleType) or isinstance(states,types.ListType)): - print "lex: states must be defined as a tuple or list." - error = 1 - else: - for s in states: - if not isinstance(s,types.TupleType) or len(s) != 2: - print "lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s) - error = 1 - continue - name, statetype = s - if not isinstance(name,types.StringType): - print "lex: state name %s must be a string" % repr(name) - error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - print "lex: state type for state %s must be 'inclusive' or 'exclusive'" % name - error = 1 - continue - if stateinfo.has_key(name): - print "lex: state '%s' already defined." % name - error = 1 - continue - stateinfo[name] = statetype - - # Get a list of symbols with the t_ or s_ prefix - tsymbols = [f for f in ldict.keys() if f[:2] == 't_' ] - - # Now build up a list of functions and a list of strings - - funcsym = { } # Symbols defined as functions - strsym = { } # Symbols defined as strings - toknames = { } # Mapping of symbols to token names - - for s in stateinfo.keys(): - funcsym[s] = [] - strsym[s] = [] - - ignore = { } # Ignore strings by state - errorf = { } # Error functions by state - - if len(tsymbols) == 0: - raise SyntaxError,"lex: no rules of the form t_rulename are defined." - - for f in tsymbols: - t = ldict[f] - states, tokname = _statetoken(f,stateinfo) - toknames[f] = tokname - - if callable(t): - for s in states: funcsym[s].append((f,t)) - elif (isinstance(t, types.StringType) or isinstance(t,types.UnicodeType)): - for s in states: strsym[s].append((f,t)) - else: - print "lex: %s not defined as a function or string" % f - error = 1 - - # Sort the functions by line number - for f in funcsym.values(): - f.sort(lambda x,y: cmp(x[1].func_code.co_firstlineno,y[1].func_code.co_firstlineno)) - - # Sort the strings by regular expression length - for s in strsym.values(): - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - - regexs = { } - - # Build the master regular expressions - for state in stateinfo.keys(): - regex_list = [] - - # Add rules defined by functions first - for fname, f in funcsym[state]: - line = f.func_code.co_firstlineno - file = f.func_code.co_filename - files[file] = None - tokname = toknames[fname] - - ismethod = isinstance(f, types.MethodType) - - if not optimize: - nargs = f.func_code.co_argcount - if ismethod: - reqargs = 2 - else: - reqargs = 1 - if nargs > reqargs: - print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) - error = 1 - continue - - if nargs < reqargs: - print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) - error = 1 - continue - - if tokname == 'ignore': - print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__) - error = 1 - continue - - if tokname == 'error': - errorf[state] = f - continue - - if f.__doc__: - if not optimize: - try: - c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE | reflags) - if c.match(""): - print "%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__) - error = 1 - continue - except re.error,e: - print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e) - if '#' in f.__doc__: - print "%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__) - error = 1 - continue - - if debug: - print "lex: Adding rule %s -> '%s' (state '%s')" % (f.__name__,f.__doc__, state) - - # Okay. The regular expression seemed okay. Let's append it to the master regular - # expression we're building - - regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__)) - else: - print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__) - - # Now add all of the simple rules - for name,r in strsym[state]: - tokname = toknames[name] - - if tokname == 'ignore': - ignore[state] = r - continue - - if not optimize: - if tokname == 'error': - raise SyntaxError,"lex: Rule '%s' must be defined as a function" % name - error = 1 - continue - - if not lexobj.lextokens.has_key(tokname) and tokname.find("ignore_") < 0: - print "lex: Rule '%s' defined for an unspecified token %s." % (name,tokname) - error = 1 - continue - try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | reflags) - if (c.match("")): - print "lex: Regular expression for rule '%s' matches empty string." % name - error = 1 - continue - except re.error,e: - print "lex: Invalid regular expression for rule '%s'. %s" % (name,e) - if '#' in r: - print "lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name - - error = 1 - continue - if debug: - print "lex: Adding rule %s -> '%s' (state '%s')" % (name,r,state) - - regex_list.append("(?P<%s>%s)" % (name,r)) - - if not regex_list: - print "lex: No rules defined for state '%s'" % state - error = 1 - - regexs[state] = regex_list - - - if not optimize: - for f in files.keys(): - if not _validate_file(f): - error = 1 - - if error: - raise SyntaxError,"lex: Unable to build lexer." - - # From this point forward, we're reasonably confident that we can build the lexer. - # No more errors will be generated, but there might be some warning messages. - - # Build the master regular expressions - - for state in regexs.keys(): - lexre, re_text = _form_master_re(regexs[state],reflags,ldict) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - if debug: - for i in range(len(re_text)): - print "lex: state '%s'. regex[%d] = '%s'" % (state, i, re_text[i]) - - # For inclusive states, we need to add the INITIAL state - for state,type in stateinfo.items(): - if state != "INITIAL" and type == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] - - # Set up ignore variables - lexobj.lexstateignore = ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") - - # Set up error functions - lexobj.lexstateerrorf = errorf - lexobj.lexerrorf = errorf.get("INITIAL",None) - if warn and not lexobj.lexerrorf: - print "lex: Warning. no t_error rule is defined." - - # Check state information for ignore and error rules - for s,stype in stateinfo.items(): - if stype == 'exclusive': - if warn and not errorf.has_key(s): - print "lex: Warning. no error rule is defined for exclusive state '%s'" % s - if warn and not ignore.has_key(s) and lexobj.lexignore: - print "lex: Warning. no ignore rule is defined for exclusive state '%s'" % s - elif stype == 'inclusive': - if not errorf.has_key(s): - errorf[s] = errorf.get("INITIAL",None) - if not ignore.has_key(s): - ignore[s] = ignore.get("INITIAL","") - - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - # If in optimize mode, we write the lextab - if lextab and optimize: - lexobj.writetab(lextab) - - return lexobj - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - -def runmain(lexer=None,data=None): - if not data: - try: - filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() - except IndexError: - print "Reading from standard input (type EOF to end):" - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while 1: - tok = _token() - if not tok: break - print "(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos) - - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - -def TOKEN(r): - def set_doc(f): - f.__doc__ = r - return f - return set_doc - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - diff --git a/lib/python2.7/site-packages/sepolgen/matching.py b/lib/python2.7/site-packages/sepolgen/matching.py deleted file mode 100644 index d56dd92..0000000 --- a/lib/python2.7/site-packages/sepolgen/matching.py +++ /dev/null @@ -1,255 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -Classes and algorithms for matching requested access to access vectors. -""" - -import access -import objectmodel -import itertools - -class Match: - def __init__(self, interface=None, dist=0): - self.interface = interface - self.dist = dist - self.info_dir_change = False - - def __cmp__(self, other): - if self.dist == other.dist: - if self.info_dir_change: - if other.info_dir_change: - return 0 - else: - return 1 - else: - if other.info_dir_change: - return -1 - else: - return 0 - else: - if self.dist < other.dist: - return -1 - else: - return 1 - -class MatchList: - DEFAULT_THRESHOLD = 150 - def __init__(self): - # Match objects that pass the threshold - self.children = [] - # Match objects over the threshold - self.bastards = [] - self.threshold = self.DEFAULT_THRESHOLD - self.allow_info_dir_change = False - self.av = None - - def best(self): - if len(self.children): - return self.children[0] - if len(self.bastards): - return self.bastards[0] - return None - - def __len__(self): - # Only return the length of the matches so - # that this can be used to test if there is - # a match. - return len(self.children) + len(self.bastards) - - def __iter__(self): - return iter(self.children) - - def all(self): - return itertools.chain(self.children, self.bastards) - - def append(self, match): - if match.dist <= self.threshold: - if not match.info_dir_change or self.allow_info_dir_change: - self.children.append(match) - else: - self.bastards.append(match) - else: - self.bastards.append(match) - - def sort(self): - self.children.sort() - self.bastards.sort() - - -class AccessMatcher: - def __init__(self, perm_maps=None): - self.type_penalty = 10 - self.obj_penalty = 10 - if perm_maps: - self.perm_maps = perm_maps - else: - self.perm_maps = objectmodel.PermMappings() - # We want a change in the information flow direction - # to be a strong penalty - stronger than access to - # a few unrelated types. - self.info_dir_penalty = 100 - - def type_distance(self, a, b): - if a == b or access.is_idparam(b): - return 0 - else: - return -self.type_penalty - - - def perm_distance(self, av_req, av_prov): - # First check that we have enough perms - diff = av_req.perms.difference(av_prov.perms) - - if len(diff) != 0: - total = self.perm_maps.getdefault_distance(av_req.obj_class, diff) - return -total - else: - diff = av_prov.perms.difference(av_req.perms) - return self.perm_maps.getdefault_distance(av_req.obj_class, diff) - - def av_distance(self, req, prov): - """Determine the 'distance' between 2 access vectors. - - This function is used to find an access vector that matches - a 'required' access. To do this we comput a signed numeric - value that indicates how close the req access is to the - 'provided' access vector. The closer the value is to 0 - the closer the match, with 0 being an exact match. - - A value over 0 indicates that the prov access vector provides more - access than the req (in practice, this means that the source type, - target type, and object class is the same and the perms in prov is - a superset of those in req. - - A value under 0 indicates that the prov access less - or unrelated - - access to the req access. A different type or object class will - result in a very low value. - - The values other than 0 should only be interpreted relative to - one another - they have no exact meaning and are likely to - change. - - Params: - req - [AccessVector] The access that is required. This is the - access being matched. - prov - [AccessVector] The access provided. This is the potential - match that is being evaluated for req. - Returns: - 0 : Exact match between the acess vectors. - - < 0 : The prov av does not provide all of the access in req. - A smaller value indicates that the access is further. - - > 0 : The prov av provides more access than req. The larger - the value the more access over req. - """ - # FUTURE - this is _very_ expensive and probably needs some - # thorough performance work. This version is meant to give - # meaningful results relatively simply. - dist = 0 - - # Get the difference between the types. The addition is safe - # here because type_distance only returns 0 or negative. - dist += self.type_distance(req.src_type, prov.src_type) - dist += self.type_distance(req.tgt_type, prov.tgt_type) - - # Object class distance - if req.obj_class != prov.obj_class and not access.is_idparam(prov.obj_class): - dist -= self.obj_penalty - - # Permission distance - - # If this av doesn't have a matching source type, target type, and object class - # count all of the permissions against it. Otherwise determine the perm - # distance and dir. - if dist < 0: - pdist = self.perm_maps.getdefault_distance(prov.obj_class, prov.perms) - else: - pdist = self.perm_distance(req, prov) - - # Combine the perm and other distance - if dist < 0: - if pdist < 0: - return dist + pdist - else: - return dist - pdist - elif dist >= 0: - if pdist < 0: - return pdist - dist - else: - return dist + pdist - - def av_set_match(self, av_set, av): - """ - - """ - dist = None - - # Get the distance for each access vector - for x in av_set: - tmp = self.av_distance(av, x) - if dist is None: - dist = tmp - elif tmp >= 0: - if dist >= 0: - dist += tmp - else: - dist = tmp + -dist - else: - if dist < 0: - dist += tmp - else: - dist -= tmp - - # Penalize for information flow - we want to prevent the - # addition of a write if the requested is read none. We are - # much less concerned about the reverse. - av_dir = self.perm_maps.getdefault_direction(av.obj_class, av.perms) - - if av_set.info_dir is None: - av_set.info_dir = objectmodel.FLOW_NONE - for x in av_set: - av_set.info_dir = av_set.info_dir | \ - self.perm_maps.getdefault_direction(x.obj_class, x.perms) - if (av_dir & objectmodel.FLOW_WRITE == 0) and (av_set.info_dir & objectmodel.FLOW_WRITE): - if dist < 0: - dist -= self.info_dir_penalty - else: - dist += self.info_dir_penalty - - return dist - - def search_ifs(self, ifset, av, match_list): - match_list.av = av - for iv in itertools.chain(ifset.tgt_type_all, - ifset.tgt_type_map.get(av.tgt_type, [])): - if not iv.enabled: - #print "iv %s not enabled" % iv.name - continue - - dist = self.av_set_match(iv.access, av) - if dist >= 0: - m = Match(iv, dist) - match_list.append(m) - - - match_list.sort() - - diff --git a/lib/python2.7/site-packages/sepolgen/module.py b/lib/python2.7/site-packages/sepolgen/module.py deleted file mode 100644 index 7fc9443..0000000 --- a/lib/python2.7/site-packages/sepolgen/module.py +++ /dev/null @@ -1,213 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -Utilities for dealing with the compilation of modules and creation -of module tress. -""" - -import defaults - -import selinux - -import re -import tempfile -import commands -import os -import os.path -import subprocess -import shutil - -def is_valid_name(modname): - """Check that a module name is valid. - """ - m = re.findall("[^a-zA-Z0-9_\-\.]", modname) - if len(m) == 0 and modname[0].isalpha(): - return True - else: - return False - -class ModuleTree: - def __init__(self, modname): - self.modname = modname - self.dirname = None - - def dir_name(self): - return self.dirname - - def te_name(self): - return self.dirname + "/" + self.modname + ".te" - - def fc_name(self): - return self.dirname + "/" + self.modname + ".fc" - - def if_name(self): - return self.dirname + "/" + self.modname + ".if" - - def package_name(self): - return self.dirname + "/" + self.modname + ".pp" - - def makefile_name(self): - return self.dirname + "/Makefile" - - def create(self, parent_dirname, makefile_include=None): - self.dirname = parent_dirname + "/" + self.modname - os.mkdir(self.dirname) - fd = open(self.makefile_name(), "w") - if makefile_include: - fd.write("include " + makefile_include) - else: - fd.write("include " + defaults.refpolicy_makefile()) - fd.close() - - # Create empty files for the standard refpolicy - # module files - open(self.te_name(), "w").close() - open(self.fc_name(), "w").close() - open(self.if_name(), "w").close() - -def modname_from_sourcename(sourcename): - return os.path.splitext(os.path.split(sourcename)[1])[0] - -class ModuleCompiler: - """ModuleCompiler eases running of the module compiler. - - The ModuleCompiler class encapsulates running the commandline - module compiler (checkmodule) and module packager (semodule_package). - You are likely interested in the create_module_package method. - - Several options are controlled via paramaters (only effects the - non-refpol builds): - - .mls [boolean] Generate an MLS module (by passed -M to - checkmodule). True to generate an MLS module, false - otherwise. - - .module [boolean] Generate a module instead of a base module. - True to generate a module, false to generate a base. - - .checkmodule [string] Fully qualified path to the module compiler. - Default is /usr/bin/checkmodule. - - .semodule_package [string] Fully qualified path to the module - packager. Defaults to /usr/bin/semodule_package. - .output [file object] File object used to write verbose - output of the compililation and packaging process. - """ - def __init__(self, output=None): - """Create a ModuleCompiler instance, optionally with an - output file object for verbose output of the compilation process. - """ - self.mls = selinux.is_selinux_mls_enabled() - self.module = True - self.checkmodule = "/usr/bin/checkmodule" - self.semodule_package = "/usr/bin/semodule_package" - self.output = output - self.last_output = "" - self.refpol_makefile = defaults.refpolicy_makefile() - self.make = "/usr/bin/make" - - def o(self, str): - if self.output: - self.output.write(str + "\n") - self.last_output = str - - def run(self, command): - self.o(command) - rc, output = commands.getstatusoutput(command) - self.o(output) - - return rc - - def gen_filenames(self, sourcename): - """Generate the module and policy package filenames from - a source file name. The source file must be in the form - of "foo.te". This will generate "foo.mod" and "foo.pp". - - Returns a tuple with (modname, policypackage). - """ - splitname = sourcename.split(".") - if len(splitname) < 2: - raise RuntimeError("invalid sourcefile name %s (must end in .te)", sourcename) - # Handle other periods in the filename correctly - basename = ".".join(splitname[0:-1]) - modname = basename + ".mod" - packagename = basename + ".pp" - - return (modname, packagename) - - def create_module_package(self, sourcename, refpolicy=True): - """Create a module package saved in a packagename from a - sourcename. - - The create_module_package creates a module package saved in a - file named sourcename (.pp is the standard extension) from a - source file (.te is the standard extension). The source file - should contain SELinux policy statements appropriate for a - base or non-base module (depending on the setting of .module). - - Only file names are accepted, not open file objects or - descriptors because the command line SELinux tools are used. - - On error a RuntimeError will be raised with a descriptive - error message. - """ - if refpolicy: - self.refpol_build(sourcename) - else: - modname, packagename = self.gen_filenames(sourcename) - self.compile(sourcename, modname) - self.package(modname, packagename) - os.unlink(modname) - - def refpol_build(self, sourcename): - # Compile - command = self.make + " -f " + self.refpol_makefile - rc = self.run(command) - - # Raise an error if the process failed - if rc != 0: - raise RuntimeError("compilation failed:\n%s" % self.last_output) - - def compile(self, sourcename, modname): - s = [self.checkmodule] - if self.mls: - s.append("-M") - if self.module: - s.append("-m") - s.append("-o") - s.append(modname) - s.append(sourcename) - - rc = self.run(" ".join(s)) - if rc != 0: - raise RuntimeError("compilation failed:\n%s" % self.last_output) - - def package(self, modname, packagename): - s = [self.semodule_package] - s.append("-o") - s.append(packagename) - s.append("-m") - s.append(modname) - - rc = self.run(" ".join(s)) - if rc != 0: - raise RuntimeError("packaging failed [%s]" % self.last_output) - - diff --git a/lib/python2.7/site-packages/sepolgen/objectmodel.py b/lib/python2.7/site-packages/sepolgen/objectmodel.py deleted file mode 100644 index 88c8a1f..0000000 --- a/lib/python2.7/site-packages/sepolgen/objectmodel.py +++ /dev/null @@ -1,172 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -This module provides knowledge object classes and permissions. It should -be used to keep this knowledge from leaking into the more generic parts of -the policy generation. -""" - -# Objects that can be implicitly typed - these objects do -# not _have_ to be implicitly typed (e.g., sockets can be -# explicitly labeled), but they often are. -# -# File is in this list for /proc/self -# -# This list is useful when dealing with rules that have a -# type (or param) used as both a subject and object. For -# example: -# -# allow httpd_t httpd_t : socket read; -# -# This rule makes sense because the socket was (presumably) created -# by a process with the type httpd_t. -implicitly_typed_objects = ["socket", "fd", "process", "file", "lnk_file", "fifo_file", - "dbus", "capability", "unix_stream_socket"] - -#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -# -#Information Flow -# -# All of the permissions in SELinux can be described in terms of -# information flow. For example, a read of a file is a flow of -# information from that file to the process reading. Viewing -# permissions in these terms can be used to model a varity of -# security properties. -# -# Here we have some infrastructure for understanding permissions -# in terms of information flow -# -#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -# Information flow deals with information either flowing from a subject -# to and object ("write") or to a subject from an object ("read"). Read -# or write is described from the subject point-of-view. It is also possible -# for a permission to represent both a read and write (though the flow is -# typical asymettric in terms of bandwidth). It is also possible for -# permission to not flow information (meaning that the result is pure -# side-effect). -# -# The following constants are for representing the directionality -# of information flow. -FLOW_NONE = 0 -FLOW_READ = 1 -FLOW_WRITE = 2 -FLOW_BOTH = FLOW_READ | FLOW_WRITE - -# These are used by the parser and for nice disply of the directions -str_to_dir = { "n" : FLOW_NONE, "r" : FLOW_READ, "w" : FLOW_WRITE, "b" : FLOW_BOTH } -dir_to_str = { FLOW_NONE : "n", FLOW_READ : "r", FLOW_WRITE : "w", FLOW_BOTH : "b" } - -class PermMap: - """A mapping between a permission and its information flow properties. - - PermMap represents the information flow properties of a single permission - including the direction (read, write, etc.) and an abstract representation - of the bandwidth of the flow (weight). - """ - def __init__(self, perm, dir, weight): - self.perm = perm - self.dir = dir - self.weight = weight - - def __repr__(self): - return "<sepolgen.objectmodel.PermMap %s %s %d>" % (self.perm, - dir_to_str[self.dir], - self.weight) - -class PermMappings: - """The information flow properties of a set of object classes and permissions. - - PermMappings maps one or more classes and permissions to their PermMap objects - describing their information flow charecteristics. - """ - def __init__(self): - self.classes = { } - self.default_weight = 5 - self.default_dir = FLOW_BOTH - - def from_file(self, fd): - """Read the permission mappings from a file. This reads the format used - by Apol in the setools suite. - """ - # This parsing is deliberitely picky and bails at the least error. It - # is assumed that the permission map file will be shipped as part - # of sepolgen and not user modified, so this is a reasonable design - # choice. If user supplied permission mappings are needed the parser - # should be made a little more robust and give better error messages. - cur = None - for line in fd: - fields = line.split() - if len(fields) == 0 or len(fields) == 1 or fields[0] == "#": - continue - if fields[0] == "class": - c = fields[1] - if self.classes.has_key(c): - raise ValueError("duplicate class in perm map") - self.classes[c] = { } - cur = self.classes[c] - else: - if len(fields) != 3: - raise ValueError("error in object classs permissions") - if cur is None: - raise ValueError("permission outside of class") - pm = PermMap(fields[0], str_to_dir[fields[1]], int(fields[2])) - cur[pm.perm] = pm - - def get(self, obj, perm): - """Get the permission map for the object permission. - - Returns: - PermMap representing the permission - Raises: - KeyError if the object or permission is not defined - """ - return self.classes[obj][perm] - - def getdefault(self, obj, perm): - """Get the permission map for the object permission or a default. - - getdefault is the same as get except that a default PermMap is - returned if the object class or permission is not defined. The - default is FLOW_BOTH with a weight of 5. - """ - try: - pm = self.classes[obj][perm] - except KeyError: - return PermMap(perm, self.default_dir, self.default_weight) - return pm - - def getdefault_direction(self, obj, perms): - dir = FLOW_NONE - for perm in perms: - pm = self.getdefault(obj, perm) - dir = dir | pm.dir - return dir - - def getdefault_distance(self, obj, perms): - total = 0 - for perm in perms: - pm = self.getdefault(obj, perm) - total += pm.weight - - return total - - - diff --git a/lib/python2.7/site-packages/sepolgen/output.py b/lib/python2.7/site-packages/sepolgen/output.py deleted file mode 100644 index 739452d..0000000 --- a/lib/python2.7/site-packages/sepolgen/output.py +++ /dev/null @@ -1,173 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -Classes and functions for the output of reference policy modules. - -This module takes a refpolicy.Module object and formats it for -output using the ModuleWriter object. By separating the output -in this way the other parts of Madison can focus solely on -generating policy. This keeps the semantic / syntactic issues -cleanly separated from the formatting issues. -""" - -import refpolicy -import util - -class ModuleWriter: - def __init__(self): - self.fd = None - self.module = None - self.sort = True - self.requires = True - - def write(self, module, fd): - self.module = module - - if self.sort: - sort_filter(self.module) - - # FIXME - make this handle nesting - for node, depth in refpolicy.walktree(self.module, showdepth=True): - fd.write("%s\n" % str(node)) - -# Helper functions for sort_filter - this is all done old school -# C style rather than with polymorphic methods because this sorting -# is specific to output. It is not necessarily the comparison you -# want generally. - -# Compare two IdSets - we could probably do something clever -# with different here, but this works. -def id_set_cmp(x, y): - xl = util.set_to_list(x) - xl.sort() - yl = util.set_to_list(y) - yl.sort() - - if len(xl) != len(yl): - return cmp(xl[0], yl[0]) - for v in zip(xl, yl): - if v[0] != v[1]: - return cmp(v[0], v[1]) - return 0 - -# Compare two avrules -def avrule_cmp(a, b): - ret = id_set_cmp(a.src_types, b.src_types) - if ret is not 0: - return ret - ret = id_set_cmp(a.tgt_types, b.tgt_types) - if ret is not 0: - return ret - ret = id_set_cmp(a.obj_classes, b.obj_classes) - if ret is not 0: - return ret - - # At this point, who cares - just return something - return cmp(len(a.perms), len(b.perms)) - -# Compare two interface calls -def ifcall_cmp(a, b): - if a.args[0] != b.args[0]: - return cmp(a.args[0], b.args[0]) - return cmp(a.ifname, b.ifname) - -# Compare an two avrules or interface calls -def rule_cmp(a, b): - if isinstance(a, refpolicy.InterfaceCall): - if isinstance(b, refpolicy.InterfaceCall): - return ifcall_cmp(a, b) - else: - return id_set_cmp([a.args[0]], b.src_types) - else: - if isinstance(b, refpolicy.AVRule): - return avrule_cmp(a,b) - else: - return id_set_cmp(a.src_types, [b.args[0]]) - -def role_type_cmp(a, b): - return cmp(a.role, b.role) - -def sort_filter(module): - """Sort and group the output for readability. - """ - def sort_node(node): - c = [] - - # Module statement - for mod in node.module_declarations(): - c.append(mod) - c.append(refpolicy.Comment()) - - # Requires - for require in node.requires(): - c.append(require) - c.append(refpolicy.Comment()) - - # Rules - # - # We are going to group output by source type (which - # we assume is the first argument for interfaces). - rules = [] - rules.extend(node.avrules()) - rules.extend(node.interface_calls()) - rules.sort(rule_cmp) - - cur = None - sep_rules = [] - for rule in rules: - if isinstance(rule, refpolicy.InterfaceCall): - x = rule.args[0] - else: - x = util.first(rule.src_types) - - if cur != x: - if cur: - sep_rules.append(refpolicy.Comment()) - cur = x - comment = refpolicy.Comment() - comment.lines.append("============= %s ==============" % cur) - sep_rules.append(comment) - sep_rules.append(rule) - - c.extend(sep_rules) - - - ras = [] - ras.extend(node.role_types()) - ras.sort(role_type_cmp) - if len(ras): - comment = refpolicy.Comment() - comment.lines.append("============= ROLES ==============") - c.append(comment) - - - c.extend(ras) - - # Everything else - for child in node.children: - if child not in c: - c.append(child) - - node.children = c - - for node in module.nodes(): - sort_node(node) - - diff --git a/lib/python2.7/site-packages/sepolgen/policygen.py b/lib/python2.7/site-packages/sepolgen/policygen.py deleted file mode 100644 index 5f38577..0000000 --- a/lib/python2.7/site-packages/sepolgen/policygen.py +++ /dev/null @@ -1,402 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -classes and algorithms for the generation of SELinux policy. -""" - -import itertools -import textwrap - -import refpolicy -import objectmodel -import access -import interfaces -import matching -import selinux.audit2why as audit2why -try: - from setools import * -except: - pass - -# Constants for the level of explanation from the generation -# routines -NO_EXPLANATION = 0 -SHORT_EXPLANATION = 1 -LONG_EXPLANATION = 2 - -class PolicyGenerator: - """Generate a reference policy module from access vectors. - - PolicyGenerator generates a new reference policy module - or updates an existing module based on requested access - in the form of access vectors. - - It generates allow rules and optionally module require - statements and reference policy interfaces. By default - only allow rules are generated. The methods .set_gen_refpol - and .set_gen_requires turns on interface generation and - requires generation respectively. - - PolicyGenerator can also optionally add comments explaining - why a particular access was allowed based on the audit - messages that generated the access. The access vectors - passed in must have the .audit_msgs field set correctly - and .explain set to SHORT|LONG_EXPLANATION to enable this - feature. - - The module created by PolicyGenerator can be passed to - output.ModuleWriter to output a text representation. - """ - def __init__(self, module=None): - """Initialize a PolicyGenerator with an optional - existing module. - - If the module paramater is not None then access - will be added to the passed in module. Otherwise - a new reference policy module will be created. - """ - self.ifgen = None - self.explain = NO_EXPLANATION - self.gen_requires = False - if module: - self.moduel = module - else: - self.module = refpolicy.Module() - - self.dontaudit = False - - self.domains = None - def set_gen_refpol(self, if_set=None, perm_maps=None): - """Set whether reference policy interfaces are generated. - - To turn on interface generation pass in an interface set - to use for interface generation. To turn off interface - generation pass in None. - - If interface generation is enabled requires generation - will also be enabled. - """ - if if_set: - self.ifgen = InterfaceGenerator(if_set, perm_maps) - self.gen_requires = True - else: - self.ifgen = None - self.__set_module_style() - - - def set_gen_requires(self, status=True): - """Set whether module requires are generated. - - Passing in true will turn on requires generation and - False will disable generation. If requires generation is - disabled interface generation will also be disabled and - can only be re-enabled via .set_gen_refpol. - """ - self.gen_requires = status - - def set_gen_explain(self, explain=SHORT_EXPLANATION): - """Set whether access is explained. - """ - self.explain = explain - - def set_gen_dontaudit(self, dontaudit): - self.dontaudit = dontaudit - - def __set_module_style(self): - if self.ifgen: - refpolicy = True - else: - refpolicy = False - for mod in self.module.module_declarations(): - mod.refpolicy = refpolicy - - def set_module_name(self, name, version="1.0"): - """Set the name of the module and optionally the version. - """ - # find an existing module declaration - m = None - for mod in self.module.module_declarations(): - m = mod - if not m: - m = refpolicy.ModuleDeclaration() - self.module.children.insert(0, m) - m.name = name - m.version = version - if self.ifgen: - m.refpolicy = True - else: - m.refpolicy = False - - def get_module(self): - # Generate the requires - if self.gen_requires: - gen_requires(self.module) - - """Return the generated module""" - return self.module - - def __add_allow_rules(self, avs): - for av in avs: - rule = refpolicy.AVRule(av) - if self.dontaudit: - rule.rule_type = rule.DONTAUDIT - rule.comment = "" - if self.explain: - rule.comment = str(refpolicy.Comment(explain_access(av, verbosity=self.explain))) - if av.type == audit2why.ALLOW: - rule.comment += "\n#!!!! This avc is allowed in the current policy" - if av.type == audit2why.DONTAUDIT: - rule.comment += "\n#!!!! This avc has a dontaudit rule in the current policy" - - if av.type == audit2why.BOOLEAN: - if len(av.data) > 1: - rule.comment += "\n#!!!! This avc can be allowed using one of the these booleans:\n# %s" % ", ".join(map(lambda x: x[0], av.data)) - else: - rule.comment += "\n#!!!! This avc can be allowed using the boolean '%s'" % av.data[0][0] - - if av.type == audit2why.CONSTRAINT: - rule.comment += "\n#!!!! This avc is a constraint violation. You would need to modify the attributes of either the source or target types to allow this access." - rule.comment += "\n#Constraint rule: " - rule.comment += "\n\t" + av.data[0] - for reason in av.data[1:]: - rule.comment += "\n#\tPossible cause is the source %s and target %s are different." % reason - - try: - if ( av.type == audit2why.TERULE and - "write" in av.perms and - ( "dir" in av.obj_class or "open" in av.perms )): - if not self.domains: - self.domains = seinfo(ATTRIBUTE, name="domain")[0]["types"] - types=[] - - for i in map(lambda x: x[TCONTEXT], sesearch([ALLOW], {SCONTEXT: av.src_type, CLASS: av.obj_class, PERMS: av.perms})): - if i not in self.domains: - types.append(i) - if len(types) == 1: - rule.comment += "\n#!!!! The source type '%s' can write to a '%s' of the following type:\n# %s\n" % ( av.src_type, av.obj_class, ", ".join(types)) - elif len(types) >= 1: - rule.comment += "\n#!!!! The source type '%s' can write to a '%s' of the following types:\n# %s\n" % ( av.src_type, av.obj_class, ", ".join(types)) - except: - pass - self.module.children.append(rule) - - - def add_access(self, av_set): - """Add the access from the access vector set to this - module. - """ - # Use the interface generator to split the access - # into raw allow rules and interfaces. After this - # a will contain a list of access that should be - # used as raw allow rules and the interfaces will - # be added to the module. - if self.ifgen: - raw_allow, ifcalls = self.ifgen.gen(av_set, self.explain) - self.module.children.extend(ifcalls) - else: - raw_allow = av_set - - # Generate the raw allow rules from the filtered list - self.__add_allow_rules(raw_allow) - - def add_role_types(self, role_type_set): - for role_type in role_type_set: - self.module.children.append(role_type) - -def explain_access(av, ml=None, verbosity=SHORT_EXPLANATION): - """Explain why a policy statement was generated. - - Return a string containing a text explanation of - why a policy statement was generated. The string is - commented and wrapped and can be directly inserted - into a policy. - - Params: - av - access vector representing the access. Should - have .audit_msgs set appropriately. - verbosity - the amount of explanation provided. Should - be set to NO_EXPLANATION, SHORT_EXPLANATION, or - LONG_EXPLANATION. - Returns: - list of strings - strings explaining the access or an empty - string if verbosity=NO_EXPLANATION or there is not sufficient - information to provide an explanation. - """ - s = [] - - def explain_interfaces(): - if not ml: - return - s.append(" Interface options:") - for match in ml.all(): - ifcall = call_interface(match.interface, ml.av) - s.append(' %s # [%d]' % (ifcall.to_string(), match.dist)) - - - # Format the raw audit data to explain why the - # access was requested - either long or short. - if verbosity == LONG_EXPLANATION: - for msg in av.audit_msgs: - s.append(' %s' % msg.header) - s.append(' scontext="%s" tcontext="%s"' % - (str(msg.scontext), str(msg.tcontext))) - s.append(' class="%s" perms="%s"' % - (msg.tclass, refpolicy.list_to_space_str(msg.accesses))) - s.append(' comm="%s" exe="%s" path="%s"' % (msg.comm, msg.exe, msg.path)) - s.extend(textwrap.wrap('message="' + msg.message + '"', 80, initial_indent=" ", - subsequent_indent=" ")) - explain_interfaces() - elif verbosity: - s.append(' src="%s" tgt="%s" class="%s", perms="%s"' % - (av.src_type, av.tgt_type, av.obj_class, av.perms.to_space_str())) - # For the short display we are only going to use the additional information - # from the first audit message. For the vast majority of cases this info - # will always be the same anyway. - if len(av.audit_msgs) > 0: - msg = av.audit_msgs[0] - s.append(' comm="%s" exe="%s" path="%s"' % (msg.comm, msg.exe, msg.path)) - explain_interfaces() - return s - -def param_comp(a, b): - return cmp(b.num, a.num) - -def call_interface(interface, av): - params = [] - args = [] - - params.extend(interface.params.values()) - params.sort(param_comp) - - ifcall = refpolicy.InterfaceCall() - ifcall.ifname = interface.name - - for i in range(len(params)): - if params[i].type == refpolicy.SRC_TYPE: - ifcall.args.append(av.src_type) - elif params[i].type == refpolicy.TGT_TYPE: - ifcall.args.append(av.tgt_type) - elif params[i].type == refpolicy.OBJ_CLASS: - ifcall.args.append(av.obj_class) - else: - print params[i].type - assert(0) - - assert(len(ifcall.args) > 0) - - return ifcall - -class InterfaceGenerator: - def __init__(self, ifs, perm_maps=None): - self.ifs = ifs - self.hack_check_ifs(ifs) - self.matcher = matching.AccessMatcher(perm_maps) - self.calls = [] - - def hack_check_ifs(self, ifs): - # FIXME: Disable interfaces we can't call - this is a hack. - # Because we don't handle roles, multiple paramaters, etc., - # etc., we must make certain we can actually use a returned - # interface. - for x in ifs.interfaces.values(): - params = [] - params.extend(x.params.values()) - params.sort(param_comp) - for i in range(len(params)): - # Check that the paramater position matches - # the number (e.g., $1 is the first arg). This - # will fail if the parser missed something. - if (i + 1) != params[i].num: - x.enabled = False - break - # Check that we can handle the param type (currently excludes - # roles. - if params[i].type not in [refpolicy.SRC_TYPE, refpolicy.TGT_TYPE, - refpolicy.OBJ_CLASS]: - x.enabled = False - break - - def gen(self, avs, verbosity): - raw_av = self.match(avs) - ifcalls = [] - for ml in self.calls: - ifcall = call_interface(ml.best().interface, ml.av) - if verbosity: - ifcall.comment = refpolicy.Comment(explain_access(ml.av, ml, verbosity)) - ifcalls.append((ifcall, ml)) - - d = [] - for ifcall, ifs in ifcalls: - found = False - for o_ifcall in d: - if o_ifcall.matches(ifcall): - if o_ifcall.comment and ifcall.comment: - o_ifcall.comment.merge(ifcall.comment) - found = True - if not found: - d.append(ifcall) - - return (raw_av, d) - - - def match(self, avs): - raw_av = [] - for av in avs: - ans = matching.MatchList() - self.matcher.search_ifs(self.ifs, av, ans) - if len(ans): - self.calls.append(ans) - else: - raw_av.append(av) - - return raw_av - - -def gen_requires(module): - """Add require statements to the module. - """ - def collect_requires(node): - r = refpolicy.Require() - for avrule in node.avrules(): - r.types.update(avrule.src_types) - r.types.update(avrule.tgt_types) - for obj in avrule.obj_classes: - r.add_obj_class(obj, avrule.perms) - - for ifcall in node.interface_calls(): - for arg in ifcall.args: - # FIXME - handle non-type arguments when we - # can actually figure those out. - r.types.add(arg) - - for role_type in node.role_types(): - r.roles.add(role_type.role) - r.types.update(role_type.types) - - r.types.discard("self") - - node.children.insert(0, r) - - # FUTURE - this is untested on modules with any sort of - # nesting - for node in module.nodes(): - collect_requires(node) - - diff --git a/lib/python2.7/site-packages/sepolgen/refparser.py b/lib/python2.7/site-packages/sepolgen/refparser.py deleted file mode 100644 index 83542d3..0000000 --- a/lib/python2.7/site-packages/sepolgen/refparser.py +++ /dev/null @@ -1,1128 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006-2007 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -# OVERVIEW -# -# -# This is a parser for the refpolicy policy "language" - i.e., the -# normal SELinux policy language plus the refpolicy style M4 macro -# constructs on top of that base language. This parser is primarily -# aimed at parsing the policy headers in order to create an abstract -# policy representation suitable for generating policy. -# -# Both the lexer and parser are included in this file. The are implemented -# using the Ply library (included with sepolgen). - -import sys -import os -import re -import traceback - -import refpolicy -import access -import defaults - -import lex -import yacc - -# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -# -# lexer -# -# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -tokens = ( - # basic tokens, punctuation - 'TICK', - 'SQUOTE', - 'OBRACE', - 'CBRACE', - 'SEMI', - 'COLON', - 'OPAREN', - 'CPAREN', - 'COMMA', - 'MINUS', - 'TILDE', - 'ASTERISK', - 'AMP', - 'BAR', - 'EXPL', - 'EQUAL', - 'FILENAME', - 'IDENTIFIER', - 'NUMBER', - 'PATH', - 'IPV6_ADDR', - # reserved words - # module - 'MODULE', - 'POLICY_MODULE', - 'REQUIRE', - # flask - 'SID', - 'GENFSCON', - 'FS_USE_XATTR', - 'FS_USE_TRANS', - 'FS_USE_TASK', - 'PORTCON', - 'NODECON', - 'NETIFCON', - 'PIRQCON', - 'IOMEMCON', - 'IOPORTCON', - 'PCIDEVICECON', - 'DEVICETREECON', - # object classes - 'CLASS', - # types and attributes - 'TYPEATTRIBUTE', - 'ROLEATTRIBUTE', - 'TYPE', - 'ATTRIBUTE', - 'ATTRIBUTE_ROLE', - 'ALIAS', - 'TYPEALIAS', - # conditional policy - 'BOOL', - 'TRUE', - 'FALSE', - 'IF', - 'ELSE', - # users and roles - 'ROLE', - 'TYPES', - # rules - 'ALLOW', - 'DONTAUDIT', - 'AUDITALLOW', - 'NEVERALLOW', - 'PERMISSIVE', - 'TYPE_TRANSITION', - 'TYPE_CHANGE', - 'TYPE_MEMBER', - 'RANGE_TRANSITION', - 'ROLE_TRANSITION', - # refpolicy keywords - 'OPT_POLICY', - 'INTERFACE', - 'TUNABLE_POLICY', - 'GEN_REQ', - 'TEMPLATE', - 'GEN_CONTEXT', - # m4 - 'IFELSE', - 'IFDEF', - 'IFNDEF', - 'DEFINE' - ) - -# All reserved keywords - see t_IDENTIFIER for how these are matched in -# the lexer. -reserved = { - # module - 'module' : 'MODULE', - 'policy_module' : 'POLICY_MODULE', - 'require' : 'REQUIRE', - # flask - 'sid' : 'SID', - 'genfscon' : 'GENFSCON', - 'fs_use_xattr' : 'FS_USE_XATTR', - 'fs_use_trans' : 'FS_USE_TRANS', - 'fs_use_task' : 'FS_USE_TASK', - 'portcon' : 'PORTCON', - 'nodecon' : 'NODECON', - 'netifcon' : 'NETIFCON', - 'pirqcon' : 'PIRQCON', - 'iomemcon' : 'IOMEMCON', - 'ioportcon' : 'IOPORTCON', - 'pcidevicecon' : 'PCIDEVICECON', - 'devicetreecon' : 'DEVICETREECON', - # object classes - 'class' : 'CLASS', - # types and attributes - 'typeattribute' : 'TYPEATTRIBUTE', - 'roleattribute' : 'ROLEATTRIBUTE', - 'type' : 'TYPE', - 'attribute' : 'ATTRIBUTE', - 'attribute_role' : 'ATTRIBUTE_ROLE', - 'alias' : 'ALIAS', - 'typealias' : 'TYPEALIAS', - # conditional policy - 'bool' : 'BOOL', - 'true' : 'TRUE', - 'false' : 'FALSE', - 'if' : 'IF', - 'else' : 'ELSE', - # users and roles - 'role' : 'ROLE', - 'types' : 'TYPES', - # rules - 'allow' : 'ALLOW', - 'dontaudit' : 'DONTAUDIT', - 'auditallow' : 'AUDITALLOW', - 'neverallow' : 'NEVERALLOW', - 'permissive' : 'PERMISSIVE', - 'type_transition' : 'TYPE_TRANSITION', - 'type_change' : 'TYPE_CHANGE', - 'type_member' : 'TYPE_MEMBER', - 'range_transition' : 'RANGE_TRANSITION', - 'role_transition' : 'ROLE_TRANSITION', - # refpolicy keywords - 'optional_policy' : 'OPT_POLICY', - 'interface' : 'INTERFACE', - 'tunable_policy' : 'TUNABLE_POLICY', - 'gen_require' : 'GEN_REQ', - 'template' : 'TEMPLATE', - 'gen_context' : 'GEN_CONTEXT', - # M4 - 'ifelse' : 'IFELSE', - 'ifndef' : 'IFNDEF', - 'ifdef' : 'IFDEF', - 'define' : 'DEFINE' - } - -# The ply lexer allows definition of tokens in 2 ways: regular expressions -# or functions. - -# Simple regex tokens -t_TICK = r'\`' -t_SQUOTE = r'\'' -t_OBRACE = r'\{' -t_CBRACE = r'\}' -# This will handle spurios extra ';' via the + -t_SEMI = r'\;+' -t_COLON = r'\:' -t_OPAREN = r'\(' -t_CPAREN = r'\)' -t_COMMA = r'\,' -t_MINUS = r'\-' -t_TILDE = r'\~' -t_ASTERISK = r'\*' -t_AMP = r'\&' -t_BAR = r'\|' -t_EXPL = r'\!' -t_EQUAL = r'\=' -t_NUMBER = r'[0-9\.]+' -t_PATH = r'/[a-zA-Z0-9)_\.\*/]*' -#t_IPV6_ADDR = r'[a-fA-F0-9]{0,4}:[a-fA-F0-9]{0,4}:([a-fA-F0-9]{0,4}:)*' - -# Ignore whitespace - this is a special token for ply that more efficiently -# ignores uninteresting tokens. -t_ignore = " \t" - -# More complex tokens -def t_IPV6_ADDR(t): - r'[a-fA-F0-9]{0,4}:[a-fA-F0-9]{0,4}:([a-fA-F0-9]|:)*' - # This is a function simply to force it sooner into - # the regex list - return t - -def t_m4comment(t): - r'dnl.*\n' - # Ignore all comments - t.lexer.lineno += 1 - -def t_refpolicywarn1(t): - r'define.*refpolicywarn\(.*\n' - # Ignore refpolicywarn statements - they sometimes - # contain text that we can't parse. - t.skip(1) - -def t_refpolicywarn(t): - r'refpolicywarn\(.*\n' - # Ignore refpolicywarn statements - they sometimes - # contain text that we can't parse. - t.lexer.lineno += 1 - -def t_IDENTIFIER(t): - r'[a-zA-Z_\$][a-zA-Z0-9_\-\+\.\$\*~]*' - # Handle any keywords - t.type = reserved.get(t.value,'IDENTIFIER') - return t - -def t_FILENAME(t): - r'\"[a-zA-Z0-9_\-\+\.\$\*~ :]+\"' - # Handle any keywords - t.type = reserved.get(t.value,'FILENAME') - return t - -def t_comment(t): - r'\#.*\n' - # Ignore all comments - t.lexer.lineno += 1 - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.skip(1) - -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - -# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -# -# Parser -# -# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -# Global data used during parsing - making it global is easier than -# passing the state through the parsing functions. - -# m is the top-level data structure (stands for modules). -m = None -# error is either None (indicating no error) or a string error message. -error = None -parse_file = "" -# spt is the support macros (e.g., obj/perm sets) - it is an instance of -# refpolicy.SupportMacros and should always be present during parsing -# though it may not contain any macros. -spt = None -success = True - -# utilities -def collect(stmts, parent, val=None): - if stmts is None: - return - for s in stmts: - if s is None: - continue - s.parent = parent - if val is not None: - parent.children.insert(0, (val, s)) - else: - parent.children.insert(0, s) - -def expand(ids, s): - for id in ids: - if spt.has_key(id): - s.update(spt.by_name(id)) - else: - s.add(id) - -# Top-level non-terminal -def p_statements(p): - '''statements : statement - | statements statement - | empty - ''' - if len(p) == 2 and p[1]: - m.children.append(p[1]) - elif len(p) > 2 and p[2]: - m.children.append(p[2]) - -def p_statement(p): - '''statement : interface - | template - | obj_perm_set - | policy - | policy_module_stmt - | module_stmt - ''' - p[0] = p[1] - -def p_empty(p): - 'empty :' - pass - -# -# Reference policy language constructs -# - -# This is for the policy module statement (e.g., policy_module(foo,1.2.0)). -# We have a separate terminal for either the basic language module statement -# and interface calls to make it easier to identifier. -def p_policy_module_stmt(p): - 'policy_module_stmt : POLICY_MODULE OPAREN IDENTIFIER COMMA NUMBER CPAREN' - m = refpolicy.ModuleDeclaration() - m.name = p[3] - m.version = p[5] - m.refpolicy = True - p[0] = m - -def p_interface(p): - '''interface : INTERFACE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN - ''' - x = refpolicy.Interface(p[4]) - collect(p[8], x) - p[0] = x - -def p_template(p): - '''template : TEMPLATE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN - | DEFINE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN - ''' - x = refpolicy.Template(p[4]) - collect(p[8], x) - p[0] = x - -def p_define(p): - '''define : DEFINE OPAREN TICK IDENTIFIER SQUOTE CPAREN''' - # This is for defining single M4 values (to be used later in ifdef statements). - # Example: define(`sulogin_no_pam'). We don't currently do anything with these - # but we should in the future when we correctly resolve ifdef statements. - p[0] = None - -def p_interface_stmts(p): - '''interface_stmts : policy - | interface_stmts policy - | empty - ''' - if len(p) == 2 and p[1]: - p[0] = p[1] - elif len(p) > 2: - if not p[1]: - if p[2]: - p[0] = p[2] - elif not p[2]: - p[0] = p[1] - else: - p[0] = p[1] + p[2] - -def p_optional_policy(p): - '''optional_policy : OPT_POLICY OPAREN TICK interface_stmts SQUOTE CPAREN - | OPT_POLICY OPAREN TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN - ''' - o = refpolicy.OptionalPolicy() - collect(p[4], o, val=True) - if len(p) > 7: - collect(p[8], o, val=False) - p[0] = [o] - -def p_tunable_policy(p): - '''tunable_policy : TUNABLE_POLICY OPAREN TICK cond_expr SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN - | TUNABLE_POLICY OPAREN TICK cond_expr SQUOTE COMMA TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN - ''' - x = refpolicy.TunablePolicy() - x.cond_expr = p[4] - collect(p[8], x, val=True) - if len(p) > 11: - collect(p[12], x, val=False) - p[0] = [x] - -def p_ifelse(p): - '''ifelse : IFELSE OPAREN TICK IDENTIFIER SQUOTE COMMA COMMA TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi - | IFELSE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi - ''' -# x = refpolicy.IfDef(p[4]) -# v = True -# collect(p[8], x, val=v) -# if len(p) > 12: -# collect(p[12], x, val=False) -# p[0] = [x] - pass - - -def p_ifdef(p): - '''ifdef : IFDEF OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi - | IFNDEF OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi - | IFDEF OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi - ''' - x = refpolicy.IfDef(p[4]) - if p[1] == 'ifdef': - v = True - else: - v = False - collect(p[8], x, val=v) - if len(p) > 12: - collect(p[12], x, val=False) - p[0] = [x] - -def p_interface_call(p): - '''interface_call : IDENTIFIER OPAREN interface_call_param_list CPAREN - | IDENTIFIER OPAREN CPAREN - | IDENTIFIER OPAREN interface_call_param_list CPAREN SEMI''' - # Allow spurious semi-colons at the end of interface calls - i = refpolicy.InterfaceCall(ifname=p[1]) - if len(p) > 4: - i.args.extend(p[3]) - p[0] = i - -def p_interface_call_param(p): - '''interface_call_param : IDENTIFIER - | IDENTIFIER MINUS IDENTIFIER - | nested_id_set - | TRUE - | FALSE - | FILENAME - ''' - # Intentionally let single identifiers pass through - # List means set, non-list identifier - if len(p) == 2: - p[0] = p[1] - else: - p[0] = [p[1], "-" + p[3]] - -def p_interface_call_param_list(p): - '''interface_call_param_list : interface_call_param - | interface_call_param_list COMMA interface_call_param - ''' - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = p[1] + [p[3]] - - -def p_obj_perm_set(p): - 'obj_perm_set : DEFINE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK names SQUOTE CPAREN' - s = refpolicy.ObjPermSet(p[4]) - s.perms = p[8] - p[0] = s - -# -# Basic SELinux policy language -# - -def p_policy(p): - '''policy : policy_stmt - | optional_policy - | tunable_policy - | ifdef - | ifelse - | conditional - ''' - p[0] = p[1] - -def p_policy_stmt(p): - '''policy_stmt : gen_require - | avrule_def - | typerule_def - | typeattribute_def - | roleattribute_def - | interface_call - | role_def - | role_allow - | permissive - | type_def - | typealias_def - | attribute_def - | attribute_role_def - | range_transition_def - | role_transition_def - | bool - | define - | initial_sid - | genfscon - | fs_use - | portcon - | nodecon - | netifcon - | pirqcon - | iomemcon - | ioportcon - | pcidevicecon - | devicetreecon - ''' - if p[1]: - p[0] = [p[1]] - -def p_module_stmt(p): - 'module_stmt : MODULE IDENTIFIER NUMBER SEMI' - m = refpolicy.ModuleDeclaration() - m.name = p[2] - m.version = p[3] - m.refpolicy = False - p[0] = m - -def p_gen_require(p): - '''gen_require : GEN_REQ OPAREN TICK requires SQUOTE CPAREN - | REQUIRE OBRACE requires CBRACE''' - # We ignore the require statements - they are redundant data from our point-of-view. - # Checkmodule will verify them later anyway so we just assume that they match what - # is in the rest of the interface. - pass - -def p_requires(p): - '''requires : require - | requires require - | ifdef - | requires ifdef - ''' - pass - -def p_require(p): - '''require : TYPE comma_list SEMI - | ROLE comma_list SEMI - | ATTRIBUTE comma_list SEMI - | ATTRIBUTE_ROLE comma_list SEMI - | CLASS comma_list SEMI - | BOOL comma_list SEMI - ''' - pass - -def p_security_context(p): - '''security_context : IDENTIFIER COLON IDENTIFIER COLON IDENTIFIER - | IDENTIFIER COLON IDENTIFIER COLON IDENTIFIER COLON mls_range_def''' - # This will likely need some updates to handle complex levels - s = refpolicy.SecurityContext() - s.user = p[1] - s.role = p[3] - s.type = p[5] - if len(p) > 6: - s.level = p[7] - - p[0] = s - -def p_gen_context(p): - '''gen_context : GEN_CONTEXT OPAREN security_context COMMA mls_range_def CPAREN - ''' - # We actually store gen_context statements in a SecurityContext - # object - it knows how to output either a bare context or a - # gen_context statement. - s = p[3] - s.level = p[5] - - p[0] = s - -def p_context(p): - '''context : security_context - | gen_context - ''' - p[0] = p[1] - -def p_initial_sid(p): - '''initial_sid : SID IDENTIFIER context''' - s = refpolicy.InitialSid() - s.name = p[2] - s.context = p[3] - p[0] = s - -def p_genfscon(p): - '''genfscon : GENFSCON IDENTIFIER PATH context''' - - g = refpolicy.GenfsCon() - g.filesystem = p[2] - g.path = p[3] - g.context = p[4] - - p[0] = g - -def p_fs_use(p): - '''fs_use : FS_USE_XATTR IDENTIFIER context SEMI - | FS_USE_TASK IDENTIFIER context SEMI - | FS_USE_TRANS IDENTIFIER context SEMI - ''' - f = refpolicy.FilesystemUse() - if p[1] == "fs_use_xattr": - f.type = refpolicy.FilesystemUse.XATTR - elif p[1] == "fs_use_task": - f.type = refpolicy.FilesystemUse.TASK - elif p[1] == "fs_use_trans": - f.type = refpolicy.FilesystemUse.TRANS - - f.filesystem = p[2] - f.context = p[3] - - p[0] = f - -def p_portcon(p): - '''portcon : PORTCON IDENTIFIER NUMBER context - | PORTCON IDENTIFIER NUMBER MINUS NUMBER context''' - c = refpolicy.PortCon() - c.port_type = p[2] - if len(p) == 5: - c.port_number = p[3] - c.context = p[4] - else: - c.port_number = p[3] + "-" + p[4] - c.context = p[5] - - p[0] = c - -def p_nodecon(p): - '''nodecon : NODECON NUMBER NUMBER context - | NODECON IPV6_ADDR IPV6_ADDR context - ''' - n = refpolicy.NodeCon() - n.start = p[2] - n.end = p[3] - n.context = p[4] - - p[0] = n - -def p_netifcon(p): - 'netifcon : NETIFCON IDENTIFIER context context' - n = refpolicy.NetifCon() - n.interface = p[2] - n.interface_context = p[3] - n.packet_context = p[4] - - p[0] = n - -def p_pirqcon(p): - 'pirqcon : PIRQCON NUMBER context' - c = refpolicy.PirqCon() - c.pirq_number = p[2] - c.context = p[3] - - p[0] = c - -def p_iomemcon(p): - '''iomemcon : IOMEMCON NUMBER context - | IOMEMCON NUMBER MINUS NUMBER context''' - c = refpolicy.IomemCon() - if len(p) == 4: - c.device_mem = p[2] - c.context = p[3] - else: - c.device_mem = p[2] + "-" + p[3] - c.context = p[4] - - p[0] = c - -def p_ioportcon(p): - '''ioportcon : IOPORTCON NUMBER context - | IOPORTCON NUMBER MINUS NUMBER context''' - c = refpolicy.IoportCon() - if len(p) == 4: - c.ioport = p[2] - c.context = p[3] - else: - c.ioport = p[2] + "-" + p[3] - c.context = p[4] - - p[0] = c - -def p_pcidevicecon(p): - 'pcidevicecon : PCIDEVICECON NUMBER context' - c = refpolicy.PciDeviceCon() - c.device = p[2] - c.context = p[3] - - p[0] = c - -def p_devicetreecon(p): - 'devicetreecon : DEVICETREECON NUMBER context' - c = refpolicy.DevicetTeeCon() - c.path = p[2] - c.context = p[3] - - p[0] = c - -def p_mls_range_def(p): - '''mls_range_def : mls_level_def MINUS mls_level_def - | mls_level_def - ''' - p[0] = p[1] - if len(p) > 2: - p[0] = p[0] + "-" + p[3] - -def p_mls_level_def(p): - '''mls_level_def : IDENTIFIER COLON comma_list - | IDENTIFIER - ''' - p[0] = p[1] - if len(p) > 2: - p[0] = p[0] + ":" + ",".join(p[3]) - -def p_type_def(p): - '''type_def : TYPE IDENTIFIER COMMA comma_list SEMI - | TYPE IDENTIFIER SEMI - | TYPE IDENTIFIER ALIAS names SEMI - | TYPE IDENTIFIER ALIAS names COMMA comma_list SEMI - ''' - t = refpolicy.Type(p[2]) - if len(p) == 6: - if p[3] == ',': - t.attributes.update(p[4]) - else: - t.aliases = p[4] - elif len(p) > 4: - t.aliases = p[4] - if len(p) == 8: - t.attributes.update(p[6]) - p[0] = t - -def p_attribute_def(p): - 'attribute_def : ATTRIBUTE IDENTIFIER SEMI' - a = refpolicy.Attribute(p[2]) - p[0] = a - -def p_attribute_role_def(p): - 'attribute_role_def : ATTRIBUTE_ROLE IDENTIFIER SEMI' - a = refpolicy.Attribute_Role(p[2]) - p[0] = a - -def p_typealias_def(p): - 'typealias_def : TYPEALIAS IDENTIFIER ALIAS names SEMI' - t = refpolicy.TypeAlias() - t.type = p[2] - t.aliases = p[4] - p[0] = t - -def p_role_def(p): - '''role_def : ROLE IDENTIFIER TYPES comma_list SEMI - | ROLE IDENTIFIER SEMI''' - r = refpolicy.Role() - r.role = p[2] - if len(p) > 4: - r.types.update(p[4]) - p[0] = r - -def p_role_allow(p): - 'role_allow : ALLOW names names SEMI' - r = refpolicy.RoleAllow() - r.src_roles = p[2] - r.tgt_roles = p[3] - p[0] = r - -def p_permissive(p): - 'permissive : PERMISSIVE names SEMI' - t.skip(1) - -def p_avrule_def(p): - '''avrule_def : ALLOW names names COLON names names SEMI - | DONTAUDIT names names COLON names names SEMI - | AUDITALLOW names names COLON names names SEMI - | NEVERALLOW names names COLON names names SEMI - ''' - a = refpolicy.AVRule() - if p[1] == 'dontaudit': - a.rule_type = refpolicy.AVRule.DONTAUDIT - elif p[1] == 'auditallow': - a.rule_type = refpolicy.AVRule.AUDITALLOW - elif p[1] == 'neverallow': - a.rule_type = refpolicy.AVRule.NEVERALLOW - a.src_types = p[2] - a.tgt_types = p[3] - a.obj_classes = p[5] - a.perms = p[6] - p[0] = a - -def p_typerule_def(p): - '''typerule_def : TYPE_TRANSITION names names COLON names IDENTIFIER SEMI - | TYPE_TRANSITION names names COLON names IDENTIFIER FILENAME SEMI - | TYPE_TRANSITION names names COLON names IDENTIFIER IDENTIFIER SEMI - | TYPE_CHANGE names names COLON names IDENTIFIER SEMI - | TYPE_MEMBER names names COLON names IDENTIFIER SEMI - ''' - t = refpolicy.TypeRule() - if p[1] == 'type_change': - t.rule_type = refpolicy.TypeRule.TYPE_CHANGE - elif p[1] == 'type_member': - t.rule_type = refpolicy.TypeRule.TYPE_MEMBER - t.src_types = p[2] - t.tgt_types = p[3] - t.obj_classes = p[5] - t.dest_type = p[6] - t.file_name = p[7] - p[0] = t - -def p_bool(p): - '''bool : BOOL IDENTIFIER TRUE SEMI - | BOOL IDENTIFIER FALSE SEMI''' - b = refpolicy.Bool() - b.name = p[2] - if p[3] == "true": - b.state = True - else: - b.state = False - p[0] = b - -def p_conditional(p): - ''' conditional : IF OPAREN cond_expr CPAREN OBRACE interface_stmts CBRACE - | IF OPAREN cond_expr CPAREN OBRACE interface_stmts CBRACE ELSE OBRACE interface_stmts CBRACE - ''' - c = refpolicy.Conditional() - c.cond_expr = p[3] - collect(p[6], c, val=True) - if len(p) > 8: - collect(p[10], c, val=False) - p[0] = [c] - -def p_typeattribute_def(p): - '''typeattribute_def : TYPEATTRIBUTE IDENTIFIER comma_list SEMI''' - t = refpolicy.TypeAttribute() - t.type = p[2] - t.attributes.update(p[3]) - p[0] = t - -def p_roleattribute_def(p): - '''roleattribute_def : ROLEATTRIBUTE IDENTIFIER comma_list SEMI''' - t = refpolicy.RoleAttribute() - t.role = p[2] - t.roleattributes.update(p[3]) - p[0] = t - -def p_range_transition_def(p): - '''range_transition_def : RANGE_TRANSITION names names COLON names mls_range_def SEMI - | RANGE_TRANSITION names names names SEMI''' - pass - -def p_role_transition_def(p): - '''role_transition_def : ROLE_TRANSITION names names names SEMI''' - pass - -def p_cond_expr(p): - '''cond_expr : IDENTIFIER - | EXPL cond_expr - | cond_expr AMP AMP cond_expr - | cond_expr BAR BAR cond_expr - | cond_expr EQUAL EQUAL cond_expr - | cond_expr EXPL EQUAL cond_expr - ''' - l = len(p) - if l == 2: - p[0] = [p[1]] - elif l == 3: - p[0] = [p[1]] + p[2] - else: - p[0] = p[1] + [p[2] + p[3]] + p[4] - - -# -# Basic terminals -# - -# Identifiers and lists of identifiers. These must -# be handled somewhat gracefully. Names returns an IdSet and care must -# be taken that this is _assigned_ to an object to correctly update -# all of the flags (as opposed to using update). The other terminals -# return list - this is to preserve ordering if it is important for -# parsing (for example, interface_call must retain the ordering). Other -# times the list should be used to update an IdSet. - -def p_names(p): - '''names : identifier - | nested_id_set - | asterisk - | TILDE identifier - | TILDE nested_id_set - | IDENTIFIER MINUS IDENTIFIER - ''' - s = refpolicy.IdSet() - if len(p) < 3: - expand(p[1], s) - elif len(p) == 3: - expand(p[2], s) - s.compliment = True - else: - expand([p[1]]) - s.add("-" + p[3]) - p[0] = s - -def p_identifier(p): - 'identifier : IDENTIFIER' - p[0] = [p[1]] - -def p_asterisk(p): - 'asterisk : ASTERISK' - p[0] = [p[1]] - -def p_nested_id_set(p): - '''nested_id_set : OBRACE nested_id_list CBRACE - ''' - p[0] = p[2] - -def p_nested_id_list(p): - '''nested_id_list : nested_id_element - | nested_id_list nested_id_element - ''' - if len(p) == 2: - p[0] = p[1] - else: - p[0] = p[1] + p[2] - -def p_nested_id_element(p): - '''nested_id_element : identifier - | MINUS IDENTIFIER - | nested_id_set - ''' - if len(p) == 2: - p[0] = p[1] - else: - # For now just leave the '-' - str = "-" + p[2] - p[0] = [str] - -def p_comma_list(p): - '''comma_list : nested_id_list - | comma_list COMMA nested_id_list - ''' - if len(p) > 2: - p[1] = p[1] + p[3] - p[0] = p[1] - -def p_optional_semi(p): - '''optional_semi : SEMI - | empty''' - pass - - -# -# Interface to the parser -# - -def p_error(tok): - global error, parse_file, success, parser - error = "%s: Syntax error on line %d %s [type=%s]" % (parse_file, tok.lineno, tok.value, tok.type) - print error - success = False - -def prep_spt(spt): - if not spt: - return { } - map = {} - for x in spt: - map[x.name] = x - -parser = None -lexer = None -def create_globals(module, support, debug): - global parser, lexer, m, spt - - if not parser: - lexer = lex.lex() - parser = yacc.yacc(method="LALR", debug=debug, write_tables=0) - - if module is not None: - m = module - else: - m = refpolicy.Module() - - if not support: - spt = refpolicy.SupportMacros() - else: - spt = support - -def parse(text, module=None, support=None, debug=False): - create_globals(module, support, debug) - global error, parser, lexer, success - - success = True - - try: - parser.parse(text, debug=debug, lexer=lexer) - except Exception, e: - parser = None - lexer = None - error = "internal parser error: %s" % str(e) + "\n" + traceback.format_exc() - - if not success: - # force the parser and lexer to be rebuilt - we have some problems otherwise - parser = None - msg = 'could not parse text: "%s"' % error - raise ValueError(msg) - return m - -def list_headers(root): - modules = [] - support_macros = None - - for dirpath, dirnames, filenames in os.walk(root): - for name in filenames: - modname = os.path.splitext(name) - filename = os.path.join(dirpath, name) - - if modname[1] == '.spt': - if name == "obj_perm_sets.spt": - support_macros = filename - elif len(re.findall("patterns", modname[0])): - modules.append((modname[0], filename)) - elif modname[1] == '.if': - modules.append((modname[0], filename)) - - return (modules, support_macros) - - -def parse_headers(root, output=None, expand=True, debug=False): - import util - - headers = refpolicy.Headers() - - modules = [] - support_macros = None - - if os.path.isfile(root): - name = os.path.split(root)[1] - if name == '': - raise ValueError("Invalid file name %s" % root) - modname = os.path.splitext(name) - modules.append((modname[0], root)) - all_modules, support_macros = list_headers(defaults.headers()) - else: - modules, support_macros = list_headers(root) - - if expand and not support_macros: - raise ValueError("could not find support macros (obj_perm_sets.spt)") - - def o(msg): - if output: - output.write(msg) - - def parse_file(f, module, spt=None): - global parse_file - if debug: - o("parsing file %s\n" % f) - try: - fd = open(f) - txt = fd.read() - fd.close() - parse_file = f - parse(txt, module, spt, debug) - except IOError, e: - return - except ValueError, e: - raise ValueError("error parsing file %s: %s" % (f, str(e))) - - spt = None - if support_macros: - o("Parsing support macros (%s): " % support_macros) - spt = refpolicy.SupportMacros() - parse_file(support_macros, spt) - - headers.children.append(spt) - - # FIXME: Total hack - add in can_exec rather than parse the insanity - # of misc_macros. We are just going to pretend that this is an interface - # to make the expansion work correctly. - can_exec = refpolicy.Interface("can_exec") - av = access.AccessVector(["$1","$2","file","execute_no_trans","open", "read", - "getattr","lock","execute","ioctl"]) - - can_exec.children.append(refpolicy.AVRule(av)) - headers.children.append(can_exec) - - o("done.\n") - - if output and not debug: - status = util.ConsoleProgressBar(sys.stdout, steps=len(modules)) - status.start("Parsing interface files") - - failures = [] - for x in modules: - m = refpolicy.Module() - m.name = x[0] - try: - if expand: - parse_file(x[1], m, spt) - else: - parse_file(x[1], m) - except ValueError, e: - o(str(e) + "\n") - failures.append(x[1]) - continue - - headers.children.append(m) - if output and not debug: - status.step() - - if len(failures): - o("failed to parse some headers: %s" % ", ".join(failures)) - - return headers diff --git a/lib/python2.7/site-packages/sepolgen/refpolicy.py b/lib/python2.7/site-packages/sepolgen/refpolicy.py deleted file mode 100644 index b8ed5c1..0000000 --- a/lib/python2.7/site-packages/sepolgen/refpolicy.py +++ /dev/null @@ -1,917 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -import string -import itertools -import selinux - -# OVERVIEW -# -# This file contains objects and functions used to represent the reference -# policy (including the headers, M4 macros, and policy language statements). -# -# This representation is very different from the semantic representation -# used in libsepol. Instead, it is a more typical abstract representation -# used by the first stage of compilers. It is basically a parse tree. -# -# This choice is intentional as it allows us to handle the unprocessed -# M4 statements - including the $1 style arguments - and to more easily generate -# the data structures that we need for policy generation. -# - -# Constans for referring to fields -SRC_TYPE = 0 -TGT_TYPE = 1 -OBJ_CLASS = 2 -PERMS = 3 -ROLE = 4 -DEST_TYPE = 5 - -# String represenations of the above constants -field_to_str = ["source", "target", "object", "permission", "role", "destination" ] -str_to_field = { "source" : SRC_TYPE, "target" : TGT_TYPE, "object" : OBJ_CLASS, - "permission" : PERMS, "role" : ROLE, "destination" : DEST_TYPE } - -# Base Classes - -class PolicyBase: - def __init__(self, parent=None): - self.parent = None - self.comment = None - -class Node(PolicyBase): - """Base class objects produced from parsing the reference policy. - - The Node class is used as the base class for any non-leaf - object produced by parsing the reference policy. This object - should contain a reference to its parent (or None for a top-level - object) and 0 or more children. - - The general idea here is to have a very simple tree structure. Children - are not separated out by type. Instead the tree structure represents - fairly closely the real structure of the policy statements. - - The object should be iterable - by default over all children but - subclasses are free to provide additional iterators over a subset - of their childre (see Interface for example). - """ - - def __init__(self, parent=None): - PolicyBase.__init__(self, parent) - self.children = [] - - def __iter__(self): - return iter(self.children) - - # Not all of the iterators will return something on all Nodes, but - # they won't explode either. Putting them here is just easier. - - # Top level nodes - - def nodes(self): - return itertools.ifilter(lambda x: isinstance(x, Node), walktree(self)) - - def modules(self): - return itertools.ifilter(lambda x: isinstance(x, Module), walktree(self)) - - def interfaces(self): - return itertools.ifilter(lambda x: isinstance(x, Interface), walktree(self)) - - def templates(self): - return itertools.ifilter(lambda x: isinstance(x, Template), walktree(self)) - - def support_macros(self): - return itertools.ifilter(lambda x: isinstance(x, SupportMacros), walktree(self)) - - # Common policy statements - - def module_declarations(self): - return itertools.ifilter(lambda x: isinstance(x, ModuleDeclaration), walktree(self)) - - def interface_calls(self): - return itertools.ifilter(lambda x: isinstance(x, InterfaceCall), walktree(self)) - - def avrules(self): - return itertools.ifilter(lambda x: isinstance(x, AVRule), walktree(self)) - - def typerules(self): - return itertools.ifilter(lambda x: isinstance(x, TypeRule), walktree(self)) - - def typeattributes(self): - """Iterate over all of the TypeAttribute children of this Interface.""" - return itertools.ifilter(lambda x: isinstance(x, TypeAttribute), walktree(self)) - - def roleattributes(self): - """Iterate over all of the RoleAttribute children of this Interface.""" - return itertools.ifilter(lambda x: isinstance(x, RoleAttribute), walktree(self)) - - def requires(self): - return itertools.ifilter(lambda x: isinstance(x, Require), walktree(self)) - - def roles(self): - return itertools.ifilter(lambda x: isinstance(x, Role), walktree(self)) - - def role_allows(self): - return itertools.ifilter(lambda x: isinstance(x, RoleAllow), walktree(self)) - - def role_types(self): - return itertools.ifilter(lambda x: isinstance(x, RoleType), walktree(self)) - - def __str__(self): - if self.comment: - return str(self.comment) + "\n" + self.to_string() - else: - return self.to_string() - - def __repr__(self): - return "<%s(%s)>" % (self.__class__.__name__, self.to_string()) - - def to_string(self): - return "" - - -class Leaf(PolicyBase): - def __init__(self, parent=None): - PolicyBase.__init__(self, parent) - - def __str__(self): - if self.comment: - return str(self.comment) + "\n" + self.to_string() - else: - return self.to_string() - - def __repr__(self): - return "<%s(%s)>" % (self.__class__.__name__, self.to_string()) - - def to_string(self): - return "" - - - -# Utility functions - -def walktree(node, depthfirst=True, showdepth=False, type=None): - """Iterate over a Node and its Children. - - The walktree function iterates over a tree containing Nodes and - leaf objects. The iteration can perform a depth first or a breadth - first traversal of the tree (controlled by the depthfirst - paramater. The passed in node will be returned. - - This function will only work correctly for trees - arbitrary graphs - will likely cause infinite looping. - """ - # We control depth first / versus breadth first by - # how we pop items off of the node stack. - if depthfirst: - index = -1 - else: - index = 0 - - stack = [(node, 0)] - while len(stack) > 0: - cur, depth = stack.pop(index) - if showdepth: - yield cur, depth - else: - yield cur - - # If the node is not a Node instance it must - # be a leaf - so no need to add it to the stack - if isinstance(cur, Node): - items = [] - i = len(cur.children) - 1 - while i >= 0: - if type is None or isinstance(cur.children[i], type): - items.append((cur.children[i], depth + 1)) - i -= 1 - - stack.extend(items) - -def walknode(node, type=None): - """Iterate over the direct children of a Node. - - The walktree function iterates over the children of a Node. - Unlike walktree it does note return the passed in node or - the children of any Node objects (that is, it does not go - beyond the current level in the tree). - """ - for x in node: - if type is None or isinstance(x, type): - yield x - - -def list_to_space_str(s, cont=('{', '}')): - """Convert a set (or any sequence type) into a string representation - formatted to match SELinux space separated list conventions. - - For example the list ['read', 'write'] would be converted into: - '{ read write }' - """ - l = len(s) - str = "" - if l < 1: - raise ValueError("cannot convert 0 len set to string") - str = " ".join(s) - if l == 1: - return str - else: - return cont[0] + " " + str + " " + cont[1] - -def list_to_comma_str(s): - l = len(s) - if l < 1: - raise ValueError("cannot conver 0 len set to comma string") - - return ", ".join(s) - -# Basic SELinux types - -class IdSet(set): - def __init__(self, list=None): - if list: - set.__init__(self, list) - else: - set.__init__(self) - self.compliment = False - - def to_space_str(self): - return list_to_space_str(self) - - def to_comma_str(self): - return list_to_comma_str(self) - -class SecurityContext(Leaf): - """An SELinux security context with optional MCS / MLS fields.""" - def __init__(self, context=None, parent=None): - """Create a SecurityContext object, optionally from a string. - - Parameters: - [context] - string representing a security context. Same format - as a string passed to the from_string method. - """ - Leaf.__init__(self, parent) - self.user = "" - self.role = "" - self.type = "" - self.level = None - if context is not None: - self.from_string(context) - - def from_string(self, context): - """Parse a string representing a context into a SecurityContext. - - The string should be in the standard format - e.g., - 'user:role:type:level'. - - Raises ValueError if the string is not parsable as a security context. - """ - fields = context.split(":") - if len(fields) < 3: - raise ValueError("context string [%s] not in a valid format" % context) - - self.user = fields[0] - self.role = fields[1] - self.type = fields[2] - if len(fields) > 3: - # FUTURE - normalize level fields to allow more comparisons to succeed. - self.level = string.join(fields[3:], ':') - else: - self.level = None - - def __eq__(self, other): - """Compare two SecurityContext objects - all fields must be exactly the - the same for the comparison to work. It is possible for the level fields - to be semantically the same yet syntactically different - in this case - this function will return false. - """ - return self.user == other.user and \ - self.role == other.role and \ - self.type == other.type and \ - self.level == other.level - - def to_string(self, default_level=None): - """Return a string representing this security context. - - By default, the string will contiain a MCS / MLS level - potentially from the default which is passed in if none was - set. - - Arguments: - default_level - the default level to use if self.level is an - empty string. - - Returns: - A string represening the security context in the form - 'user:role:type:level'. - """ - fields = [self.user, self.role, self.type] - if self.level is None: - if default_level is None: - if selinux.is_selinux_mls_enabled() == 1: - fields.append("s0") - else: - fields.append(default_level) - else: - fields.append(self.level) - return ":".join(fields) - -class ObjectClass(Leaf): - """SELinux object class and permissions. - - This class is a basic representation of an SELinux object - class - it does not represent separate common permissions - - just the union of the common and class specific permissions. - It is meant to be convenient for policy generation. - """ - def __init__(self, name="", parent=None): - Leaf.__init__(self, parent) - self.name = name - self.perms = IdSet() - -# Basic statements - -class TypeAttribute(Leaf): - """SElinux typeattribute statement. - - This class represents a typeattribute statement. - """ - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.type = "" - self.attributes = IdSet() - - def to_string(self): - return "typeattribute %s %s;" % (self.type, self.attributes.to_comma_str()) - -class RoleAttribute(Leaf): - """SElinux roleattribute statement. - - This class represents a roleattribute statement. - """ - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.role = "" - self.roleattributes = IdSet() - - def to_string(self): - return "roleattribute %s %s;" % (self.role, self.roleattributes.to_comma_str()) - - -class Role(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.role = "" - self.types = IdSet() - - def to_string(self): - s = "" - for t in self.types: - s += "role %s types %s;\n" % (self.role, t) - return s - -class Type(Leaf): - def __init__(self, name="", parent=None): - Leaf.__init__(self, parent) - self.name = name - self.attributes = IdSet() - self.aliases = IdSet() - - def to_string(self): - s = "type %s" % self.name - if len(self.aliases) > 0: - s = s + "alias %s" % self.aliases.to_space_str() - if len(self.attributes) > 0: - s = s + ", %s" % self.attributes.to_comma_str() - return s + ";" - -class TypeAlias(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.type = "" - self.aliases = IdSet() - - def to_string(self): - return "typealias %s alias %s;" % (self.type, self.aliases.to_space_str()) - -class Attribute(Leaf): - def __init__(self, name="", parent=None): - Leaf.__init__(self, parent) - self.name = name - - def to_string(self): - return "attribute %s;" % self.name - -class Attribute_Role(Leaf): - def __init__(self, name="", parent=None): - Leaf.__init__(self, parent) - self.name = name - - def to_string(self): - return "attribute_role %s;" % self.name - - -# Classes representing rules - -class AVRule(Leaf): - """SELinux access vector (AV) rule. - - The AVRule class represents all varieties of AV rules including - allow, dontaudit, and auditallow (indicated by the flags self.ALLOW, - self.DONTAUDIT, and self.AUDITALLOW respectively). - - The source and target types, object classes, and perms are all represented - by sets containing strings. Sets are used to make it simple to add - strings repeatedly while avoiding duplicates. - - No checking is done to make certain that the symbols are valid or - consistent (e.g., perms that don't match the object classes). It is - even possible to put invalid types like '$1' into the rules to allow - storage of the reference policy interfaces. - """ - ALLOW = 0 - DONTAUDIT = 1 - AUDITALLOW = 2 - NEVERALLOW = 3 - - def __init__(self, av=None, parent=None): - Leaf.__init__(self, parent) - self.src_types = IdSet() - self.tgt_types = IdSet() - self.obj_classes = IdSet() - self.perms = IdSet() - self.rule_type = self.ALLOW - if av: - self.from_av(av) - - def __rule_type_str(self): - if self.rule_type == self.ALLOW: - return "allow" - elif self.rule_type == self.DONTAUDIT: - return "dontaudit" - else: - return "auditallow" - - def from_av(self, av): - """Add the access from an access vector to this allow - rule. - """ - self.src_types.add(av.src_type) - if av.src_type == av.tgt_type: - self.tgt_types.add("self") - else: - self.tgt_types.add(av.tgt_type) - self.obj_classes.add(av.obj_class) - self.perms.update(av.perms) - - def to_string(self): - """Return a string representation of the rule - that is a valid policy language representation (assuming - that the types, object class, etc. are valie). - """ - return "%s %s %s:%s %s;" % (self.__rule_type_str(), - self.src_types.to_space_str(), - self.tgt_types.to_space_str(), - self.obj_classes.to_space_str(), - self.perms.to_space_str()) -class TypeRule(Leaf): - """SELinux type rules. - - This class is very similar to the AVRule class, but is for representing - the type rules (type_trans, type_change, and type_member). The major - difference is the lack of perms and only and sing destination type. - """ - TYPE_TRANSITION = 0 - TYPE_CHANGE = 1 - TYPE_MEMBER = 2 - - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.src_types = IdSet() - self.tgt_types = IdSet() - self.obj_classes = IdSet() - self.dest_type = "" - self.rule_type = self.TYPE_TRANSITION - - def __rule_type_str(self): - if self.rule_type == self.TYPE_TRANSITION: - return "type_transition" - elif self.rule_type == self.TYPE_CHANGE: - return "type_change" - else: - return "type_member" - - def to_string(self): - return "%s %s %s:%s %s;" % (self.__rule_type_str(), - self.src_types.to_space_str(), - self.tgt_types.to_space_str(), - self.obj_classes.to_space_str(), - self.dest_type) - -class RoleAllow(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.src_roles = IdSet() - self.tgt_roles = IdSet() - - def to_string(self): - return "allow %s %s;" % (self.src_roles.to_comma_str(), - self.tgt_roles.to_comma_str()) - -class RoleType(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.role = "" - self.types = IdSet() - - def to_string(self): - s = "" - for t in self.types: - s += "role %s types %s;\n" % (self.role, t) - return s - -class ModuleDeclaration(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.name = "" - self.version = "" - self.refpolicy = False - - def to_string(self): - if self.refpolicy: - return "policy_module(%s, %s)" % (self.name, self.version) - else: - return "module %s %s;" % (self.name, self.version) - -class Conditional(Node): - def __init__(self, parent=None): - Node.__init__(self, parent) - self.cond_expr = [] - - def to_string(self): - return "[If %s]" % list_to_space_str(self.cond_expr, cont=("", "")) - -class Bool(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.name = "" - self.state = False - - def to_string(self): - s = "bool %s " % self.name - if s.state: - return s + "true" - else: - return s + "false" - -class InitialSid(Leaf): - def __init(self, parent=None): - Leaf.__init__(self, parent) - self.name = "" - self.context = None - - def to_string(self): - return "sid %s %s" % (self.name, str(self.context)) - -class GenfsCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.filesystem = "" - self.path = "" - self.context = None - - def to_string(self): - return "genfscon %s %s %s" % (self.filesystem, self.path, str(self.context)) - -class FilesystemUse(Leaf): - XATTR = 1 - TRANS = 2 - TASK = 3 - - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.type = self.XATTR - self.filesystem = "" - self.context = None - - def to_string(self): - s = "" - if self.type == XATTR: - s = "fs_use_xattr " - elif self.type == TRANS: - s = "fs_use_trans " - elif self.type == TASK: - s = "fs_use_task " - - return "%s %s %s;" % (s, self.filesystem, str(self.context)) - -class PortCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.port_type = "" - self.port_number = "" - self.context = None - - def to_string(self): - return "portcon %s %s %s" % (self.port_type, self.port_number, str(self.context)) - -class NodeCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.start = "" - self.end = "" - self.context = None - - def to_string(self): - return "nodecon %s %s %s" % (self.start, self.end, str(self.context)) - -class NetifCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.interface = "" - self.interface_context = None - self.packet_context = None - - def to_string(self): - return "netifcon %s %s %s" % (self.interface, str(self.interface_context), - str(self.packet_context)) -class PirqCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.pirq_number = "" - self.context = None - - def to_string(self): - return "pirqcon %s %s" % (self.pirq_number, str(self.context)) - -class IomemCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.device_mem = "" - self.context = None - - def to_string(self): - return "iomemcon %s %s" % (self.device_mem, str(self.context)) - -class IoportCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.ioport = "" - self.context = None - - def to_string(self): - return "ioportcon %s %s" % (self.ioport, str(self.context)) - -class PciDeviceCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.device = "" - self.context = None - - def to_string(self): - return "pcidevicecon %s %s" % (self.device, str(self.context)) - -class DeviceTreeCon(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.path = "" - self.context = None - - def to_string(self): - return "devicetreecon %s %s" % (self.path, str(self.context)) - -# Reference policy specific types - -def print_tree(head): - for node, depth in walktree(head, showdepth=True): - s = "" - for i in range(depth): - s = s + "\t" - print s + str(node) - - -class Headers(Node): - def __init__(self, parent=None): - Node.__init__(self, parent) - - def to_string(self): - return "[Headers]" - - -class Module(Node): - def __init__(self, parent=None): - Node.__init__(self, parent) - - def to_string(self): - return "" - -class Interface(Node): - """A reference policy interface definition. - - This class represents a reference policy interface definition. - """ - def __init__(self, name="", parent=None): - Node.__init__(self, parent) - self.name = name - - def to_string(self): - return "[Interface name: %s]" % self.name - -class TunablePolicy(Node): - def __init__(self, parent=None): - Node.__init__(self, parent) - self.cond_expr = [] - - def to_string(self): - return "[Tunable Policy %s]" % list_to_space_str(self.cond_expr, cont=("", "")) - -class Template(Node): - def __init__(self, name="", parent=None): - Node.__init__(self, parent) - self.name = name - - def to_string(self): - return "[Template name: %s]" % self.name - -class IfDef(Node): - def __init__(self, name="", parent=None): - Node.__init__(self, parent) - self.name = name - - def to_string(self): - return "[Ifdef name: %s]" % self.name - -class InterfaceCall(Leaf): - def __init__(self, ifname="", parent=None): - Leaf.__init__(self, parent) - self.ifname = ifname - self.args = [] - self.comments = [] - - def matches(self, other): - if self.ifname != other.ifname: - return False - if len(self.args) != len(other.args): - return False - for a,b in zip(self.args, other.args): - if a != b: - return False - return True - - def to_string(self): - s = "%s(" % self.ifname - i = 0 - for a in self.args: - if isinstance(a, list): - str = list_to_space_str(a) - else: - str = a - - if i != 0: - s = s + ", %s" % str - else: - s = s + str - i += 1 - return s + ")" - -class OptionalPolicy(Node): - def __init__(self, parent=None): - Node.__init__(self, parent) - - def to_string(self): - return "[Optional Policy]" - -class SupportMacros(Node): - def __init__(self, parent=None): - Node.__init__(self, parent) - self.map = None - - def to_string(self): - return "[Support Macros]" - - def __expand_perm(self, perm): - # Recursive expansion - the assumption is that these - # are ordered correctly so that no macro is used before - # it is defined - s = set() - if self.map.has_key(perm): - for p in self.by_name(perm): - s.update(self.__expand_perm(p)) - else: - s.add(perm) - return s - - def __gen_map(self): - self.map = {} - for x in self: - exp_perms = set() - for perm in x.perms: - exp_perms.update(self.__expand_perm(perm)) - self.map[x.name] = exp_perms - - def by_name(self, name): - if not self.map: - self.__gen_map() - return self.map[name] - - def has_key(self, name): - if not self.map: - self.__gen_map() - return self.map.has_key(name) - -class Require(Leaf): - def __init__(self, parent=None): - Leaf.__init__(self, parent) - self.types = IdSet() - self.obj_classes = { } - self.roles = IdSet() - self.data = IdSet() - self.users = IdSet() - - def add_obj_class(self, obj_class, perms): - p = self.obj_classes.setdefault(obj_class, IdSet()) - p.update(perms) - - - def to_string(self): - s = [] - s.append("require {") - for type in self.types: - s.append("\ttype %s;" % type) - for obj_class, perms in self.obj_classes.items(): - s.append("\tclass %s %s;" % (obj_class, perms.to_space_str())) - for role in self.roles: - s.append("\trole %s;" % role) - for bool in self.data: - s.append("\tbool %s;" % bool) - for user in self.users: - s.append("\tuser %s;" % user) - s.append("}") - - # Handle empty requires - if len(s) == 2: - return "" - - return "\n".join(s) - - -class ObjPermSet: - def __init__(self, name): - self.name = name - self.perms = set() - - def to_string(self): - return "define(`%s', `%s')" % (self.name, self.perms.to_space_str()) - -class ClassMap: - def __init__(self, obj_class, perms): - self.obj_class = obj_class - self.perms = perms - - def to_string(self): - return self.obj_class + ": " + self.perms - -class Comment: - def __init__(self, l=None): - if l: - self.lines = l - else: - self.lines = [] - - def to_string(self): - # If there are no lines, treat this as a spacer between - # policy statements and return a new line. - if len(self.lines) == 0: - return "" - else: - out = [] - for line in self.lines: - out.append("#" + line) - return "\n".join(out) - - def merge(self, other): - if len(other.lines): - for line in other.lines: - if line != "": - self.lines.append(line) - - def __str__(self): - return self.to_string() - - diff --git a/lib/python2.7/site-packages/sepolgen/sepolgeni18n.py b/lib/python2.7/site-packages/sepolgen/sepolgeni18n.py deleted file mode 100644 index 998c435..0000000 --- a/lib/python2.7/site-packages/sepolgen/sepolgeni18n.py +++ /dev/null @@ -1,26 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -try: - import gettext - t = gettext.translation( 'yumex' ) - _ = t.gettext -except: - def _(str): - return str diff --git a/lib/python2.7/site-packages/sepolgen/util.py b/lib/python2.7/site-packages/sepolgen/util.py deleted file mode 100644 index 74a11f5..0000000 --- a/lib/python2.7/site-packages/sepolgen/util.py +++ /dev/null @@ -1,87 +0,0 @@ -# Authors: Karl MacMillan <kmacmillan@mentalrootkit.com> -# -# Copyright (C) 2006 Red Hat -# see file 'COPYING' for use and warranty information -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; version 2 only -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -class ConsoleProgressBar: - def __init__(self, out, steps=100, indicator='#'): - self.blocks = 0 - self.current = 0 - self.steps = steps - self.indicator = indicator - self.out = out - self.done = False - - def start(self, message=None): - self.done = False - if message: - self.out.write('\n%s:\n' % message) - self.out.write('%--10---20---30---40---50---60---70---80---90--100\n') - - def step(self, n=1): - self.current += n - - old = self.blocks - self.blocks = int(round(self.current / float(self.steps) * 100) / 2) - - if self.blocks > 50: - self.blocks = 50 - - new = self.blocks - old - - self.out.write(self.indicator * new) - self.out.flush() - - if self.blocks == 50 and not self.done: - self.done = True - self.out.write("\n") - -def set_to_list(s): - l = [] - l.extend(s) - return l - -def first(s, sorted=False): - """ - Return the first element of a set. - - It sometimes useful to return the first element from a set but, - because sets are not indexable, this is rather hard. This function - will return the first element from a set. If sorted is True, then - the set will first be sorted (making this an expensive operation). - Otherwise a random element will be returned (as sets are not ordered). - """ - if not len(s): - raise IndexError("empty containter") - - if sorted: - l = set_to_list(s) - l.sort() - return l[0] - else: - for x in s: - return x - -if __name__ == "__main__": - import sys - import time - p = ConsoleProgressBar(sys.stdout, steps=999) - p.start("computing pi") - for i in range(999): - p.step() - time.sleep(0.001) - diff --git a/lib/python2.7/site-packages/sepolgen/yacc.py b/lib/python2.7/site-packages/sepolgen/yacc.py deleted file mode 100644 index bc4536d..0000000 --- a/lib/python2.7/site-packages/sepolgen/yacc.py +++ /dev/null @@ -1,2209 +0,0 @@ -#----------------------------------------------------------------------------- -# ply: yacc.py -# -# Author(s): David M. Beazley (dave@dabeaz.com) -# -# Copyright (C) 2001-2006, David M. Beazley -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# See the file COPYING for a complete copy of the LGPL. -# -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -__version__ = "2.2" - -#----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- - -yaccdebug = 1 # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory - -debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - -error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -import re, types, sys, cStringIO, hashlib, os.path - -# Exception raised for yacc-related errors -class YaccError(Exception): pass - -#----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -#----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - -class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self,s,stack=None): - self.slice = s - self.pbstack = [] - self.stack = stack - - def __getitem__(self,n): - if type(n) == types.IntType: - if n >= 0: return self.slice[n].value - else: return self.stack[n].value - else: - return [s.value for s in self.slice[n.start:n.stop:n.step]] - - def __setitem__(self,n,v): - self.slice[n].value = v - - def __len__(self): - return len(self.slice) - - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) - - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline - - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) - - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos - - def pushback(self,n): - if n <= 0: - raise ValueError, "Expected a positive value" - if n > (len(self.slice)-1): - raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) - for i in range(0,n): - self.pbstack.append(self.slice[-i-1]) - -# The LR Parsing engine. This is defined as a class so that multiple parsers -# can exist in the same process. A user never instantiates this directly. -# Instead, the global yacc() function should be used to create a suitable Parser -# object. - -class Parser: - def __init__(self,magic=None): - - # This is a hack to keep users from trying to instantiate a Parser - # object directly. - - if magic != "xyzzy": - raise YaccError, "Can't instantiate Parser. Use yacc() instead." - - # Reset internal state - self.productions = None # List of productions - self.errorfunc = None # Error handling function - self.action = { } # LR Action table - self.goto = { } # LR goto table - self.require = { } # Attribute require table - self.method = "Unknown LR" # Table construction method used - - def errok(self): - self.errorcount = 0 - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - def parse(self,input=None,lexer=None,debug=0): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table - goto = self.goto # Local reference to goto table - prod = self.productions # Local reference to production list - pslice = YaccProduction(None) # Production object passed to grammar rules - pslice.parser = self # Parser object - self.errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - import lex - lexer = lex.lexer - - pslice.lexer = lexer - - # If input was supplied, pass to lexer - if input: - lexer.input(input) - - # Tokenize function - get_token = lexer.token - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - if debug > 1: - print 'state', statestack[-1] - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - if debug: - errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() - - # Check the action table - s = statestack[-1] - ltype = lookahead.type - t = actions.get((s,ltype),None) - - if debug > 1: - print 'action', t - if t is not None: - if t > 0: - # shift a symbol on the stack - if ltype == '$end': - # Error, end of input - sys.stderr.write("yacc: Parse error. EOF\n") - return - statestack.append(t) - if debug > 1: - sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if self.errorcount > 0: - self.errorcount -= 1 - - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - if debug > 1: - sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t)) - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - try: - sym.lineno = targ[1].lineno - sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) - sym.lexpos = targ[1].lexpos - sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos) - except AttributeError: - sym.lineno = 0 - del symstack[-plen:] - del statestack[-plen:] - else: - sym.lineno = 0 - targ = [ sym ] - pslice.slice = targ - pslice.pbstack = [] - # Call the grammar rule with our special slice object - p.func(pslice) - - # If there was a pushback, put that on the stack - if pslice.pbstack: - lookaheadstack.append(lookahead) - for _t in pslice.pbstack: - lookaheadstack.append(_t) - lookahead = None - - symstack.append(sym) - statestack.append(goto[statestack[-1],pname]) - continue - - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) - sys.stderr.write(errorlead, "\n") - - if t == None: - if debug: - sys.stderr.write(errorlead + "\n") - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if not self.errorcount: - self.errorcount = error_count - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if not self.errorcount: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - self.errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - - continue - - # Call an error function here - raise RuntimeError, "yacc: internal parser error!!!\n" - -# ----------------------------------------------------------------------------- -# === Parser Construction === -# -# The following functions and variables are used to implement the yacc() function -# itself. This is pretty hairy stuff involving lots of error checking, -# construction of LR items, kernels, and so forth. Although a lot of -# this work is done using global variables, the resulting Parser object -# is completely self contained--meaning that it is safe to repeatedly -# call yacc() with different grammars in the same application. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# validate_file() -# -# This function checks to see if there are duplicated p_rulename() functions -# in the parser module file. Without this function, it is really easy for -# users to make mistakes by cutting and pasting code fragments (and it's a real -# bugger to try and figure out why the resulting parser doesn't work). Therefore, -# we just do a little regular expression pattern matching of def statements -# to try and detect duplicates. -# ----------------------------------------------------------------------------- - -def validate_file(filename): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return 1 # Oh well - - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - counthash = { } - linen = 1 - noerror = 1 - for l in lines: - m = fre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev)) - noerror = 0 - linen += 1 - return noerror - -# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. -def validate_dict(d): - for n,v in d.items(): - if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue - if n[0:2] == 't_': continue - - if n[0:2] == 'p_': - sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n) - if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n)) - except StandardError: - pass - -# ----------------------------------------------------------------------------- -# === GRAMMAR FUNCTIONS === -# -# The following global variables and functions are used to store, manipulate, -# and verify the grammar rules specified by the user. -# ----------------------------------------------------------------------------- - -# Initialize all of the global variables used during grammar construction -def initialize_vars(): - global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems - global Errorfunc, Signature, Requires - - Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - Prodmap = { } # A dictionary that is only used to detect duplicate - # productions. - - Terminals = { } # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - Nonterminals = { } # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - First = { } # A dictionary of precomputed FIRST(x) symbols - - Follow = { } # A dictionary of precomputed FOLLOW(x) symbols - - Precedence = { } # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - LRitems = [ ] # A list of all LR items for the grammar. These are the - # productions with the "dot" like E -> E . PLUS E - - Errorfunc = None # User defined error handler - - Signature = hashlib.sha256() # Digital signature of the grammar rules, precedence - # and other information. Used to determined when a - # parsing table needs to be regenerated. - - Requires = { } # Requires list - - # File objects used when creating the parser.out debugging file - global _vf, _vfc - _vf = cStringIO.StringIO() - _vfc = cStringIO.StringIO() - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# It has a few required attributes: -# -# name - Name of the production (nonterminal) -# prod - A list of symbols making up its production -# number - Production number. -# -# In addition, a few additional attributes are used to help with debugging or -# optimization of table generation. -# -# file - File where production action is defined. -# lineno - Line number where action is defined -# func - Action function -# prec - Precedence level -# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' -# then lr_next refers to 'E -> E PLUS . E' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# ----------------------------------------------------------------------------- - -class Production: - def __init__(self,**kw): - for k,v in kw.items(): - setattr(self,k,v) - self.lr_index = -1 - self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure - self.lr1_added = 0 # Flag indicating whether or not added to LR1 - self.usyms = [ ] - self.lookaheads = { } - self.lk_added = { } - self.setnumbers = [ ] - - def __str__(self): - if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) - else: - s = "%s -> <empty>" % self.name - return s - - def __repr__(self): - return str(self) - - # Compute lr_items from the production - def lr_item(self,n): - if n > len(self.prod): return None - p = Production() - p.name = self.name - p.prod = list(self.prod) - p.number = self.number - p.lr_index = n - p.lookaheads = { } - p.setnumbers = self.setnumbers - p.prod.insert(n,".") - p.prod = tuple(p.prod) - p.len = len(p.prod) - p.usyms = self.usyms - - # Precompute list of productions immediately following - try: - p.lrafter = Prodnames[p.prod[n+1]] - except (IndexError,KeyError),e: - p.lrafter = [] - try: - p.lrbefore = p.prod[n-1] - except IndexError: - p.lrbefore = None - - return p - -class MiniProduction: - pass - -# regex matching identifiers -_is_identifier = re.compile(r'^[a-zA-Z0-9_-~]+$') - -# ----------------------------------------------------------------------------- -# add_production() -# -# Given an action function, this function assembles a production rule. -# The production rule is assumed to be found in the function's docstring. -# This rule has the general syntax: -# -# name1 ::= production1 -# | production2 -# | production3 -# ... -# | productionn -# name2 ::= production1 -# | production2 -# ... -# ----------------------------------------------------------------------------- - -def add_production(f,file,line,prodname,syms): - - if Terminals.has_key(prodname): - sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname)) - return -1 - if prodname == 'error': - sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname)) - return -1 - - if not _is_identifier.match(prodname): - sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname)) - return -1 - - for x in range(len(syms)): - s = syms[x] - if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) - return -1 - if not Terminals.has_key(c): - Terminals[c] = [] - syms[x] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != '%prec': - sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname)) - return -1 - - # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) - if Prodmap.has_key(map): - m = Prodmap[map] - sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m)) - sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line)) - return -1 - - p = Production() - p.name = prodname - p.prod = syms - p.file = file - p.line = line - p.func = f - p.number = len(Productions) - - - Productions.append(p) - Prodmap[map] = p - if not Nonterminals.has_key(prodname): - Nonterminals[prodname] = [ ] - - # Add all terminals to Terminals - i = 0 - while i < len(p.prod): - t = p.prod[i] - if t == '%prec': - try: - precname = p.prod[i+1] - except IndexError: - sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line)) - return -1 - - prec = Precedence.get(precname,None) - if not prec: - sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname)) - return -1 - else: - p.prec = prec - del p.prod[i] - del p.prod[i] - continue - - if Terminals.has_key(t): - Terminals[t].append(p.number) - # Is a terminal. We'll assign a precedence to p based on this - if not hasattr(p,"prec"): - p.prec = Precedence.get(t,('right',0)) - else: - if not Nonterminals.has_key(t): - Nonterminals[t] = [ ] - Nonterminals[t].append(p.number) - i += 1 - - if not hasattr(p,"prec"): - p.prec = ('right',0) - - # Set final length of productions - p.len = len(p.prod) - p.prod = tuple(p.prod) - - # Calculate unique syms in the production - p.usyms = [ ] - for s in p.prod: - if s not in p.usyms: - p.usyms.append(s) - - # Add to the global productions list - try: - Prodnames[p.name].append(p) - except KeyError: - Prodnames[p.name] = [ p ] - return 0 - -# Given a raw rule function, this function rips out its doc string -# and adds rules to the grammar - -def add_function(f): - line = f.func_code.co_firstlineno - file = f.func_code.co_filename - error = 0 - - if isinstance(f,types.MethodType): - reqdargs = 2 - else: - reqdargs = 1 - - if f.func_code.co_argcount > reqdargs: - sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__)) - return -1 - - if f.func_code.co_argcount < reqdargs: - sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__)) - return -1 - - if f.__doc__: - # Split the doc string into lines - pstrings = f.__doc__.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: continue - try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline)) - return -1 - prodname = lastp - if len(p) > 1: - syms = p[1:] - else: - syms = [ ] - else: - prodname = p[0] - lastp = prodname - assign = p[1] - if len(p) > 2: - syms = p[2:] - else: - syms = [ ] - if assign != ':' and assign != '::=': - sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline)) - return -1 - - - e = add_production(f,file,dline,prodname,syms) - error += e - - - except StandardError: - sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps)) - error -= 1 - else: - sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__)) - return error - - -# Cycle checking code (Michael Dyck) - -def compute_reachable(): - ''' - Find each symbol that can be reached from the start symbol. - Print a warning for any nonterminals that can't be reached. - (Unused terminals have already had their warning.) - ''' - Reachable = { } - for s in Terminals.keys() + Nonterminals.keys(): - Reachable[s] = 0 - - mark_reachable_from( Productions[0].prod[0], Reachable ) - - for s in Nonterminals.keys(): - if not Reachable[s]: - sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s) - -def mark_reachable_from(s, Reachable): - ''' - Mark all symbols that are reachable from symbol s. - ''' - if Reachable[s]: - # We've already reached symbol s. - return - Reachable[s] = 1 - for p in Prodnames.get(s,[]): - for r in p.prod: - mark_reachable_from(r, Reachable) - -# ----------------------------------------------------------------------------- -# compute_terminates() -# -# This function looks at the various parsing rules and tries to detect -# infinite recursion cycles (grammar rules where there is no possible way -# to derive a string of only terminals). -# ----------------------------------------------------------------------------- -def compute_terminates(): - ''' - Raise an error for any symbols that don't terminate. - ''' - Terminates = {} - - # Terminals: - for t in Terminals.keys(): - Terminates[t] = 1 - - Terminates['$end'] = 1 - - # Nonterminals: - - # Initialize to false: - for n in Nonterminals.keys(): - Terminates[n] = 0 - - # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not Terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = 0 - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = 1 - - if p_terminates: - # symbol n terminates! - if not Terminates[n]: - Terminates[n] = 1 - some_change = 1 - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - some_error = 0 - for (s,terminates) in Terminates.items(): - if not terminates: - if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s) - some_error = 1 - - return some_error - -# ----------------------------------------------------------------------------- -# verify_productions() -# -# This function examines all of the supplied rules to see if they seem valid. -# ----------------------------------------------------------------------------- -def verify_productions(cycle_check=1): - error = 0 - for p in Productions: - if not p: continue - - for s in p.prod: - if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': - sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s)) - error = 1 - continue - - unused_tok = 0 - # Now verify all of the tokens - if yaccdebug: - _vf.write("Unused terminals:\n\n") - for s,v in Terminals.items(): - if s != 'error' and not v: - sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s) - if yaccdebug: _vf.write(" %s\n"% s) - unused_tok += 1 - - # Print out all of the productions - if yaccdebug: - _vf.write("\nGrammar\n\n") - for i in range(1,len(Productions)): - _vf.write("Rule %-5d %s\n" % (i, Productions[i])) - - unused_prod = 0 - # Verify the use of all productions - for s,v in Nonterminals.items(): - if not v: - p = Prodnames[s][0] - sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s)) - unused_prod += 1 - - - if unused_tok == 1: - sys.stderr.write("yacc: Warning. There is 1 unused token.\n") - if unused_tok > 1: - sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok) - - if unused_prod == 1: - sys.stderr.write("yacc: Warning. There is 1 unused rule.\n") - if unused_prod > 1: - sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod) - - if yaccdebug: - _vf.write("\nTerminals, with rules where they appear\n\n") - ks = Terminals.keys() - ks.sort() - for k in ks: - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) - _vf.write("\nNonterminals, with rules where they appear\n\n") - ks = Nonterminals.keys() - ks.sort() - for k in ks: - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) - - if (cycle_check): - compute_reachable() - error += compute_terminates() -# error += check_cycles() - return error - -# ----------------------------------------------------------------------------- -# build_lritems() -# -# This function walks the list of productions and builds a complete set of the -# LR items. The LR items are stored in two ways: First, they are uniquely -# numbered and placed in the list _lritems. Second, a linked list of LR items -# is built for each production. For example: -# -# E -> E PLUS E -# -# Creates the list -# -# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] -# ----------------------------------------------------------------------------- - -def build_lritems(): - for p in Productions: - lastlri = p - lri = p.lr_item(0) - i = 0 - while 1: - lri = p.lr_item(i) - lastlri.lr_next = lri - if not lri: break - lri.lr_num = len(LRitems) - LRitems.append(lri) - lastlri = lri - i += 1 - - # In order for the rest of the parser generator to work, we need to - # guarantee that no more lritems are generated. Therefore, we nuke - # the p.lr_item method. (Only used in debugging) - # Production.lr_item = None - -# ----------------------------------------------------------------------------- -# add_precedence() -# -# Given a list of precedence rules, add to the precedence table. -# ----------------------------------------------------------------------------- - -def add_precedence(plist): - plevel = 0 - error = 0 - for p in plist: - plevel += 1 - try: - prec = p[0] - terms = p[1:] - if prec != 'left' and prec != 'right' and prec != 'nonassoc': - sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec) - return -1 - for t in terms: - if Precedence.has_key(t): - sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t) - error += 1 - continue - Precedence[t] = (prec,plevel) - except: - sys.stderr.write("yacc: Invalid precedence table.\n") - error += 1 - - return error - -# ----------------------------------------------------------------------------- -# augment_grammar() -# -# Compute the augmented grammar. This is just a rule S' -> start where start -# is the starting symbol. -# ----------------------------------------------------------------------------- - -def augment_grammar(start=None): - if not start: - start = Productions[1].name - Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) - Productions[0].usyms = [ start ] - Nonterminals[start].append(0) - - -# ------------------------------------------------------------------------- -# first() -# -# Compute the value of FIRST1(beta) where beta is a tuple of symbols. -# -# During execution of compute_first1, the result may be incomplete. -# Afterward (e.g., when called from compute_follow()), it will be complete. -# ------------------------------------------------------------------------- -def first(beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [ ] - for x in beta: - x_produces_empty = 0 - - # Add all the non-<empty> symbols of First[x] to the result. - for f in First[x]: - if f == '<empty>': - x_produces_empty = 1 - else: - if f not in result: result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('<empty>') - - return result - - -# FOLLOW(x) -# Given a non-terminal. This function computes the set of all symbols -# that might follow it. Dragon book, p. 189. - -def compute_follow(start=None): - # Add '$end' to the follow list of the start symbol - for k in Nonterminals.keys(): - Follow[k] = [ ] - - if not start: - start = Productions[1].name - - Follow[start] = [ '$end' ] - - while 1: - didadd = 0 - for p in Productions[1:]: - # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] - if Nonterminals.has_key(B): - # Okay. We got a non-terminal in a production - fst = first(p.prod[i+1:]) - hasempty = 0 - for f in fst: - if f != '<empty>' and f not in Follow[B]: - Follow[B].append(f) - didadd = 1 - if f == '<empty>': - hasempty = 1 - if hasempty or i == (len(p.prod)-1): - # Add elements of follow(a) to follow(b) - for f in Follow[p.name]: - if f not in Follow[B]: - Follow[B].append(f) - didadd = 1 - if not didadd: break - - if 0 and yaccdebug: - _vf.write('\nFollow:\n') - for k in Nonterminals.keys(): - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) - -# ------------------------------------------------------------------------- -# compute_first1() -# -# Compute the value of FIRST1(X) for all symbols -# ------------------------------------------------------------------------- -def compute_first1(): - - # Terminals: - for t in Terminals.keys(): - First[t] = [t] - - First['$end'] = ['$end'] - First['#'] = ['#'] # what's this for? - - # Nonterminals: - - # Initialize to the empty set: - for n in Nonterminals.keys(): - First[n] = [] - - # Then propagate symbols until no change: - while 1: - some_change = 0 - for n in Nonterminals.keys(): - for p in Prodnames[n]: - for f in first(p.prod): - if f not in First[n]: - First[n].append( f ) - some_change = 1 - if not some_change: - break - - if 0 and yaccdebug: - _vf.write('\nFirst:\n') - for k in Nonterminals.keys(): - _vf.write("%-20s : %s\n" % - (k, " ".join([str(s) for s in First[k]]))) - -# ----------------------------------------------------------------------------- -# === SLR Generation === -# -# The following functions are used to construct SLR (Simple LR) parsing tables -# as described on p.221-229 of the dragon book. -# ----------------------------------------------------------------------------- - -# Global variables for the LR parsing engine -def lr_init_vars(): - global _lr_action, _lr_goto, _lr_method - global _lr_goto_cache, _lr0_cidhash - - _lr_action = { } # Action table - _lr_goto = { } # Goto table - _lr_method = "Unknown" # LR method used - _lr_goto_cache = { } - _lr0_cidhash = { } - - -# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. -# prodlist is a list of productions. - -_add_count = 0 # Counter used to detect cycles - -def lr0_closure(I): - global _add_count - - _add_count += 1 - prodlist = Productions - - # Add everything in I to J - J = I[:] - didadd = 1 - while didadd: - didadd = 0 - for j in J: - for x in j.lrafter: - if x.lr0_added == _add_count: continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = _add_count - didadd = 1 - - return J - -# Compute the LR(0) goto function goto(I,X) where I is a set -# of LR(0) items and X is a grammar symbol. This function is written -# in a way that guarantees uniqueness of the generated goto sets -# (i.e. the same goto set will never be returned as two different Python -# objects). With uniqueness, we can later do fast set comparisons using -# id(obj) instead of element-wise comparison. - -def lr0_goto(I,x): - # First we look for a previously cached entry - g = _lr_goto_cache.get((id(I),x),None) - if g: return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = _lr_goto_cache.get(x,None) - if not s: - s = { } - _lr_goto_cache[x] = s - - gs = [ ] - for p in I: - n = p.lr_next - if n and n.lrbefore == x: - s1 = s.get(id(n),None) - if not s1: - s1 = { } - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end',None) - if not g: - if gs: - g = lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - _lr_goto_cache[(id(I),x)] = g - return g - -_lr0_cidhash = { } - -# Compute the LR(0) sets of item function -def lr0_items(): - - C = [ lr0_closure([Productions[0].lr_next]) ] - i = 0 - for I in C: - _lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms.keys(): - g = lr0_goto(I,x) - if not g: continue - if _lr0_cidhash.has_key(id(g)): continue - _lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - -# ----------------------------------------------------------------------------- -# ==== LALR(1) Parsing ==== -# -# LALR(1) parsing is almost exactly the same as SLR except that instead of -# relying upon Follow() sets when performing reductions, a more selective -# lookahead set that incorporates the state of the LR(0) machine is utilized. -# Thus, we mainly just have to focus on calculating the lookahead sets. -# -# The method used here is due to DeRemer and Pennelo (1982). -# -# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) -# Lookahead Sets", ACM Transactions on Programming Languages and Systems, -# Vol. 4, No. 4, Oct. 1982, pp. 615-649 -# -# Further details can also be found in: -# -# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", -# McGraw-Hill Book Company, (1985). -# -# Note: This implementation is a complete replacement of the LALR(1) -# implementation in PLY-1.x releases. That version was based on -# a less efficient algorithm and it had bugs in its implementation. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# compute_nullable_nonterminals() -# -# Creates a dictionary containing all of the non-terminals that might produce -# an empty production. -# ----------------------------------------------------------------------------- - -def compute_nullable_nonterminals(): - nullable = {} - num_nullable = 0 - while 1: - for p in Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 - continue - for t in p.prod: - if not nullable.has_key(t): break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) - return nullable - -# ----------------------------------------------------------------------------- -# find_nonterminal_trans(C) -# -# Given a set of LR(0) items, this functions finds all of the non-terminal -# transitions. These are transitions in which a dot appears immediately before -# a non-terminal. Returns a list of tuples of the form (state,N) where state -# is the state number and N is the nonterminal symbol. -# -# The input C is the set of LR(0) items. -# ----------------------------------------------------------------------------- - -def find_nonterminal_transitions(C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if Nonterminals.has_key(t[1]): - if t not in trans: trans.append(t) - state = state + 1 - return trans - -# ----------------------------------------------------------------------------- -# dr_relation() -# -# Computes the DR(p,A) relationships for non-terminal transitions. The input -# is a tuple (state,N) where state is a number and N is a nonterminal symbol. -# -# Returns a list of terminals. -# ----------------------------------------------------------------------------- - -def dr_relation(C,trans,nullable): - dr_set = { } - state,N = trans - terms = [] - - g = lr0_goto(C[state],N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if Terminals.has_key(a): - if a not in terms: terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == Productions[0].prod[0]: - terms.append('$end') - - return terms - -# ----------------------------------------------------------------------------- -# reads_relation() -# -# Computes the READS() relation (p,A) READS (t,C). -# ----------------------------------------------------------------------------- - -def reads_relation(C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = lr0_goto(C[state],N) - j = _lr0_cidhash.get(id(g),-1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if empty.has_key(a): - rel.append((j,a)) - - return rel - -# ----------------------------------------------------------------------------- -# compute_lookback_includes() -# -# Determines the lookback and includes relations -# -# LOOKBACK: -# -# This relation is determined by running the LR(0) state machine forward. -# For example, starting with a production "N : . A B C", we run it forward -# to obtain "N : A B C ." We then build a relationship between this final -# state and the starting state. These relationships are stored in a dictionary -# lookdict. -# -# INCLUDES: -# -# Computes the INCLUDE() relation (p,A) INCLUDES (p',B). -# -# This relation is used to determine non-terminal transitions that occur -# inside of other non-terminal transition states. (p,A) INCLUDES (p', B) -# if the following holds: -# -# B -> LAT, where T -> epsilon and p' -L-> p -# -# L is essentially a prefix (which may be empty), T is a suffix that must be -# able to derive an empty string. State p' must lead to state p with the string L. -# -# ----------------------------------------------------------------------------- - -def compute_lookback_includes(C,trans,nullable): - - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state,N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if dtrans.has_key((j,t)): - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if Terminals.has_key(p.prod[li]): break # No forget it - if not nullable.has_key(p.prod[li]): break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = lr0_goto(C[j],t) # Go to next set - j = _lr0_cidhash.get(id(g),-1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) - for i in includes: - if not includedict.has_key(i): includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb - - return lookdict,includedict - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - -def digraph(X,R,FP): - N = { } - for x in X: - N[x] = 0 - stack = [] - F = { } - for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) - return F - -def traverse(x,N,stack,F,X,R,FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) - if N[x] == d: - N[stack[-1]] = sys.maxint - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = sys.maxint - F[stack[-1]] = F[x] - element = stack.pop() - -# ----------------------------------------------------------------------------- -# compute_read_sets() -# -# Given a set of LR(0) items, this function computes the read sets. -# -# Inputs: C = Set of LR(0) items -# ntrans = Set of nonterminal transitions -# nullable = Set of empty transitions -# -# Returns a set containing the read sets -# ----------------------------------------------------------------------------- - -def compute_read_sets(C, ntrans, nullable): - FP = lambda x: dr_relation(C,x,nullable) - R = lambda x: reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) - return F - -# ----------------------------------------------------------------------------- -# compute_follow_sets() -# -# Given a set of LR(0) items, a set of non-terminal transitions, a readset, -# and an include set, this function computes the follow sets -# -# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} -# -# Inputs: -# ntrans = Set of nonterminal transitions -# readsets = Readset (previously computed) -# inclsets = Include sets (previously computed) -# -# Returns a set containing the follow sets -# ----------------------------------------------------------------------------- - -def compute_follow_sets(ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F - -# ----------------------------------------------------------------------------- -# add_lookaheads() -# -# Attaches the lookahead symbols to grammar rules. -# -# Inputs: lookbacks - Set of lookback relations -# followset - Computed follow set -# -# This function directly attaches the lookaheads to productions contained -# in the lookbacks set -# ----------------------------------------------------------------------------- - -def add_lookaheads(lookbacks,followset): - for trans,lb in lookbacks.items(): - # Loop over productions in lookback - for state,p in lb: - if not p.lookaheads.has_key(state): - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) - -# ----------------------------------------------------------------------------- -# add_lalr_lookaheads() -# -# This function does all of the work of adding lookahead information for use -# with LALR parsing -# ----------------------------------------------------------------------------- - -def add_lalr_lookaheads(C): - # Determine all of the nullable nonterminals - nullable = compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = find_nonterminal_transitions(C) - - # Compute read sets - readsets = compute_read_sets(C,trans,nullable) - - # Compute lookback/includes relations - lookd, included = compute_lookback_includes(C,trans,nullable) - - # Compute LALR FOLLOW sets - followsets = compute_follow_sets(trans,readsets,included) - - # Add all of the lookaheads - add_lookaheads(lookd,followsets) - -# ----------------------------------------------------------------------------- -# lr_parse_table() -# -# This function constructs the parse tables for SLR or LALR -# ----------------------------------------------------------------------------- -def lr_parse_table(method): - global _lr_method - goto = _lr_goto # Goto array - action = _lr_action # Action array - actionp = { } # Action production array (temporary) - - _lr_method = method - - n_srconflict = 0 - n_rrconflict = 0 - - if yaccdebug: - sys.stderr.write("yacc: Generating %s parsing table...\n" % method) - _vf.write("\n\nParsing method: %s\n\n" % method) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = lr0_items() - - if method == 'LALR': - add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [ ] # List of actions - - if yaccdebug: - _vf.write("\nstate %d\n\n" % st) - for p in I: - _vf.write(" (%d) %s\n" % (p.number, str(p))) - _vf.write("\n") - - for p in I: - try: - if p.prod[-1] == ".": - if p.name == "S'": - # Start symbol. Accept! - action[st,"$end"] = 0 - actionp[st,"$end"] = p - else: - # We are at the end of a production. Reduce! - if method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = Follow[p.name] - for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = action.get((st,a),None) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec,slevel = Productions[actionp[st,a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - action[st,a] = -p.number - actionp[st,a] = p - if not slevel and not rlevel: - _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) - n_srconflict += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) - n_srconflict +=1 - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - action[st,a] = -p.number - actionp[st,a] = p - # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st) - n_rrconflict += 1 - _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) - _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) - else: - sys.stderr.write("Unknown conflict in state %d\n" % st) - else: - action[st,a] = -p.number - actionp[st,a] = p - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if Terminals.has_key(a): - g = lr0_goto(I,a) - j = _lr0_cidhash.get(id(g),-1) - if j >= 0: - # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = action.get((st,a),None) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - sys.stderr.write("Shift/shift conflict in state %d\n" % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec,rlevel = Productions[actionp[st,a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): - # We decide to shift here... highest precedence to shift - action[st,a] = j - actionp[st,a] = p - if not rlevel: - n_srconflict += 1 - _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - n_srconflict +=1 - _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) - - else: - sys.stderr.write("Unknown conflict in state %d\n" % st) - else: - action[st,a] = j - actionp[st,a] = p - - except StandardError,e: - raise YaccError, "Hosed in lr_parse_table", e - - # Print the actions associated with each terminal - if yaccdebug: - _actprint = { } - for a,p,m in actlist: - if action.has_key((st,a)): - if p is actionp[st,a]: - _vf.write(" %-15s %s\n" % (a,m)) - _actprint[(a,m)] = 1 - _vf.write("\n") - for a,p,m in actlist: - if action.has_key((st,a)): - if p is not actionp[st,a]: - if not _actprint.has_key((a,m)): - _vf.write(" ! %-15s [ %s ]\n" % (a,m)) - _actprint[(a,m)] = 1 - - # Construct the goto table for this state - if yaccdebug: - _vf.write("\n") - nkeys = { } - for ii in I: - for s in ii.usyms: - if Nonterminals.has_key(s): - nkeys[s] = None - for n in nkeys.keys(): - g = lr0_goto(I,n) - j = _lr0_cidhash.get(id(g),-1) - if j >= 0: - goto[st,n] = j - if yaccdebug: - _vf.write(" %-30s shift and go to state %d\n" % (n,j)) - - st += 1 - - if yaccdebug: - if n_srconflict == 1: - sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict) - if n_srconflict > 1: - sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict) - if n_rrconflict == 1: - sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict) - if n_rrconflict > 1: - sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict) - -# ----------------------------------------------------------------------------- -# ==== LR Utility functions ==== -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# _lr_write_tables() -# -# This function writes the LR parsing tables to a file -# ----------------------------------------------------------------------------- - -def lr_write_tables(modulename=tab_module,outputdir=''): - filename = os.path.join(outputdir,modulename) + ".py" - try: - f = open(filename,"w") - - f.write(""" -# %s -# This file is automatically generated. Do not edit. - -_lr_method = %s - -_lr_signature = %s -""" % (filename, repr(_lr_method), repr(Signature.digest()))) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = { } - - for k,v in _lr_action.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - _lr_action[(_x,_k)] = _y -del _lr_action_items -""") - - else: - f.write("\n_lr_action = { "); - for k,v in _lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - if smaller: - # Factor out names to try and make smaller - items = { } - - for k,v in _lr_goto.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - _lr_goto[(_x,_k)] = _y -del _lr_goto_items -""") - else: - f.write("\n_lr_goto = { "); - for k,v in _lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - # Write production table - f.write("_lr_productions = [\n") - for p in Productions: - if p: - if (p.func): - f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) - else: - f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) - else: - f.write(" None,\n") - f.write("]\n") - - f.close() - - except IOError,e: - print "Unable to create '%s'" % filename - print e - return - -def lr_read_tables(module=tab_module,optimize=0): - global _lr_action, _lr_goto, _lr_productions, _lr_method - try: - exec "import %s as parsetab" % module - - if (optimize) or (Signature.digest() == parsetab._lr_signature): - _lr_action = parsetab._lr_action - _lr_goto = parsetab._lr_goto - _lr_productions = parsetab._lr_productions - _lr_method = parsetab._lr_method - return 1 - else: - return 0 - - except (ImportError,AttributeError): - return 0 - - -# Available instance types. This is used when parsers are defined by a class. -# it's a little funky because I want to preserve backwards compatibility -# with Python 2.0 where types.ObjectType is undefined. - -try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) -except AttributeError: - _INSTANCETYPE = types.InstanceType - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build the parser module -# ----------------------------------------------------------------------------- - -def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''): - global yaccdebug - yaccdebug = debug - - initialize_vars() - files = { } - error = 0 - - - # Add parsing method to signature - Signature.update(method) - - # If a "module" parameter was supplied, extract its dictionary. - # Note: a module may in fact be an instance as well. - - if module: - # User supplied a module object. - if isinstance(module, types.ModuleType): - ldict = module.__dict__ - elif isinstance(module, _INSTANCETYPE): - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = { } - for i in _items: - ldict[i[0]] = i[1] - else: - raise ValueError,"Expected a module" - - else: - # No module given. We might be able to get information from the caller. - # Throw an exception and unwind the traceback to get the globals - - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - f = f.f_back # Walk out to our calling function - ldict = f.f_globals # Grab its globals dictionary - - # Add starting symbol to signature - if not start: - start = ldict.get("start",None) - if start: - Signature.update(start) - - # If running in optimized mode. We're going to - - if (optimize and lr_read_tables(tabmodule,1)): - # Read parse table - del Productions[:] - for p in _lr_productions: - if not p: - Productions.append(None) - else: - m = MiniProduction() - m.name = p[0] - m.len = p[1] - m.file = p[3] - m.line = p[4] - if p[2]: - m.func = ldict[p[2]] - Productions.append(m) - - else: - # Get the tokens map - if (module and isinstance(module,_INSTANCETYPE)): - tokens = getattr(module,"tokens",None) - else: - tokens = ldict.get("tokens",None) - - if not tokens: - raise YaccError,"module does not define a list 'tokens'" - if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): - raise YaccError,"tokens must be a list or tuple." - - # Check to see if a requires dictionary is defined. - requires = ldict.get("require",None) - if requires: - if not (isinstance(requires,types.DictType)): - raise YaccError,"require must be a dictionary." - - for r,v in requires.items(): - try: - if not (isinstance(v,types.ListType)): - raise TypeError - v1 = [x.split(".") for x in v] - Requires[r] = v1 - except StandardError: - print "Invalid specification for rule '%s' in require. Expected a list of strings" % r - - - # Build the dictionary of terminals. We a record a 0 in the - # dictionary to track whether or not a terminal is actually - # used in the grammar - - if 'error' in tokens: - print "yacc: Illegal token 'error'. Is a reserved word." - raise YaccError,"Illegal token name" - - for n in tokens: - if Terminals.has_key(n): - print "yacc: Warning. Token '%s' multiply defined." % n - Terminals[n] = [ ] - - Terminals['error'] = [ ] - - # Get the precedence map (if any) - prec = ldict.get("precedence",None) - if prec: - if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): - raise YaccError,"precedence must be a list or tuple." - add_precedence(prec) - Signature.update(repr(prec)) - - for n in tokens: - if not Precedence.has_key(n): - Precedence[n] = ('right',0) # Default, right associative, 0 precedence - - # Look for error handler - ef = ldict.get('p_error',None) - if ef: - if isinstance(ef,types.FunctionType): - ismethod = 0 - elif isinstance(ef, types.MethodType): - ismethod = 1 - else: - raise YaccError,"'p_error' defined, but is not a function or method." - eline = ef.func_code.co_firstlineno - efile = ef.func_code.co_filename - files[efile] = None - - if (ef.func_code.co_argcount != 1+ismethod): - raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) - global Errorfunc - Errorfunc = ef - else: - print "yacc: Warning. no p_error() function is defined." - - # Get the list of built-in functions with p_ prefix - symbols = [ldict[f] for f in ldict.keys() - if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_' - and ldict[f].__name__ != 'p_error')] - - # Check for non-empty symbols - if len(symbols) == 0: - raise YaccError,"no rules of the form p_rulename are defined." - - # Sort the symbols by line number - symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) - - # Add all of the symbols to the grammar - for f in symbols: - if (add_function(f)) < 0: - error += 1 - else: - files[f.func_code.co_filename] = None - - # Make a signature of the docstrings - for f in symbols: - if f.__doc__: - Signature.update(f.__doc__) - - lr_init_vars() - - if error: - raise YaccError,"Unable to construct parser." - - if not lr_read_tables(tabmodule): - - # Validate files - for filename in files.keys(): - if not validate_file(filename): - error = 1 - - # Validate dictionary - validate_dict(ldict) - - if start and not Prodnames.has_key(start): - raise YaccError,"Bad starting symbol '%s'" % start - - augment_grammar(start) - error = verify_productions(cycle_check=check_recursion) - otherfunc = [ldict[f] for f in ldict.keys() - if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')] - - if error: - raise YaccError,"Unable to construct parser." - - build_lritems() - compute_first1() - compute_follow(start) - - if method in ['SLR','LALR']: - lr_parse_table(method) - else: - raise YaccError, "Unknown parsing method '%s'" % method - - if write_tables: - lr_write_tables(tabmodule,outputdir) - - if yaccdebug: - try: - f = open(os.path.join(outputdir,debugfile),"w") - f.write(_vfc.getvalue()) - f.write("\n\n") - f.write(_vf.getvalue()) - f.close() - except IOError,e: - print "yacc: can't create '%s'" % debugfile,e - - # Made it here. Create a parser object and set up its internal state. - # Set global parse() method to bound method of parser object. - - p = Parser("xyzzy") - p.productions = Productions - p.errorfunc = Errorfunc - p.action = _lr_action - p.goto = _lr_goto - p.method = _lr_method - p.require = Requires - - global parse - parse = p.parse - - global parser - parser = p - - # Clean up all of the globals we created - if (not optimize): - yacc_cleanup() - return p - -# yacc_cleanup function. Delete all of the global variables -# used during table construction - -def yacc_cleanup(): - global _lr_action, _lr_goto, _lr_method, _lr_goto_cache - del _lr_action, _lr_goto, _lr_method, _lr_goto_cache - - global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems - global Errorfunc, Signature, Requires - - del Productions, Prodnames, Prodmap, Terminals - del Nonterminals, First, Follow, Precedence, LRitems - del Errorfunc, Signature, Requires - - global _vf, _vfc - del _vf, _vfc - - -# Stub that raises an error if parsing is attempted without first calling yacc() -def parse(*args,**kwargs): - raise YaccError, "yacc: No parser built with yacc()" - |