From 74e4f93e81205ac85f74eaf5fd4c86515779aee1 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Mon, 8 Feb 2016 15:27:10 -0800 Subject: Add sepolgen for audit2allow (cherry picked from commit fcd1ca896d5e281901703561abc303cf92423daa) Addresses: ImportError: No module named sepolgen.audit Change-Id: I320a5c772bd55cc223d8484b6b8db22bacd2b4c5 --- lib/python2.7/site-packages/sepolgen/__init__.py | 0 lib/python2.7/site-packages/sepolgen/access.py | 332 +++ lib/python2.7/site-packages/sepolgen/audit.py | 556 +++++ lib/python2.7/site-packages/sepolgen/classperms.py | 116 + lib/python2.7/site-packages/sepolgen/defaults.py | 77 + lib/python2.7/site-packages/sepolgen/interfaces.py | 509 +++++ lib/python2.7/site-packages/sepolgen/lex.py | 872 ++++++++ lib/python2.7/site-packages/sepolgen/matching.py | 252 +++ lib/python2.7/site-packages/sepolgen/module.py | 216 ++ .../site-packages/sepolgen/objectmodel.py | 172 ++ lib/python2.7/site-packages/sepolgen/output.py | 177 ++ lib/python2.7/site-packages/sepolgen/policygen.py | 400 ++++ lib/python2.7/site-packages/sepolgen/refparser.py | 1129 ++++++++++ lib/python2.7/site-packages/sepolgen/refpolicy.py | 916 ++++++++ .../site-packages/sepolgen/sepolgeni18n.py | 26 + lib/python2.7/site-packages/sepolgen/util.py | 182 ++ lib/python2.7/site-packages/sepolgen/yacc.py | 2214 ++++++++++++++++++++ 17 files changed, 8146 insertions(+) create mode 100644 lib/python2.7/site-packages/sepolgen/__init__.py create mode 100644 lib/python2.7/site-packages/sepolgen/access.py create mode 100644 lib/python2.7/site-packages/sepolgen/audit.py create mode 100644 lib/python2.7/site-packages/sepolgen/classperms.py create mode 100644 lib/python2.7/site-packages/sepolgen/defaults.py create mode 100644 lib/python2.7/site-packages/sepolgen/interfaces.py create mode 100644 lib/python2.7/site-packages/sepolgen/lex.py create mode 100644 lib/python2.7/site-packages/sepolgen/matching.py create mode 100644 lib/python2.7/site-packages/sepolgen/module.py create mode 100644 lib/python2.7/site-packages/sepolgen/objectmodel.py create mode 100644 lib/python2.7/site-packages/sepolgen/output.py create mode 100644 lib/python2.7/site-packages/sepolgen/policygen.py create mode 100644 lib/python2.7/site-packages/sepolgen/refparser.py create mode 100644 lib/python2.7/site-packages/sepolgen/refpolicy.py create mode 100644 lib/python2.7/site-packages/sepolgen/sepolgeni18n.py create mode 100644 lib/python2.7/site-packages/sepolgen/util.py create mode 100644 lib/python2.7/site-packages/sepolgen/yacc.py (limited to 'lib') diff --git a/lib/python2.7/site-packages/sepolgen/__init__.py b/lib/python2.7/site-packages/sepolgen/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/python2.7/site-packages/sepolgen/access.py b/lib/python2.7/site-packages/sepolgen/access.py new file mode 100644 index 0000000..a5d8698 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/access.py @@ -0,0 +1,332 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Classes representing basic access. + +SELinux - at the most basic level - represents access as +the 4-tuple subject (type or context), target (type or context), +object class, permission. The policy language elaborates this basic +access to faciliate more concise rules (e.g., allow rules can have multiple +source or target types - see refpolicy for more information). + +This module has objects for representing the most basic access (AccessVector) +and sets of that access (AccessVectorSet). These objects are used in Madison +in a variety of ways, but they are the fundamental representation of access. +""" + +from . import refpolicy +from . import util + +from selinux import audit2why + +def is_idparam(id): + """Determine if an id is a paramater in the form $N, where N is + an integer. + + Returns: + True if the id is a paramater + False if the id is not a paramater + """ + if len(id) > 1 and id[0] == '$': + try: + int(id[1:]) + except ValueError: + return False + return True + else: + return False + +class AccessVector(util.Comparison): + """ + An access vector is the basic unit of access in SELinux. + + Access vectors are the most basic representation of access within + SELinux. It represents the access a source type has to a target + type in terms of an object class and a set of permissions. + + Access vectors are distinct from AVRules in that they can only + store a single source type, target type, and object class. The + simplicity of AccessVectors makes them useful for storing access + in a form that is easy to search and compare. + + The source, target, and object are stored as string. No checking + done to verify that the strings are valid SELinux identifiers. + Identifiers in the form $N (where N is an integer) are reserved as + interface parameters and are treated as wild cards in many + circumstances. + + Properties: + .src_type - The source type allowed access. [String or None] + .tgt_type - The target type to which access is allowed. [String or None] + .obj_class - The object class to which access is allowed. [String or None] + .perms - The permissions allowed to the object class. [IdSet] + .audit_msgs - The audit messages that generated this access vector [List of strings] + """ + def __init__(self, init_list=None): + if init_list: + self.from_list(init_list) + else: + self.src_type = None + self.tgt_type = None + self.obj_class = None + self.perms = refpolicy.IdSet() + self.audit_msgs = [] + self.type = audit2why.TERULE + self.data = [] + # when implementing __eq__ also __hash__ is needed on py2 + # if object is muttable __hash__ should be None + self.__hash__ = None + + # The direction of the information flow represented by this + # access vector - used for matching + self.info_flow_dir = None + + def from_list(self, list): + """Initialize an access vector from a list. + + Initialize an access vector from a list treating the list as + positional arguments - i.e., 0 = src_type, 1 = tgt_type, etc. + All of the list elements 3 and greater are treated as perms. + For example, the list ['foo_t', 'bar_t', 'file', 'read', 'write'] + would create an access vector list with the source type 'foo_t', + target type 'bar_t', object class 'file', and permissions 'read' + and 'write'. + + This format is useful for very simple storage to strings or disc + (see to_list) and for initializing access vectors. + """ + if len(list) < 4: + raise ValueError("List must contain at least four elements %s" % str(list)) + self.src_type = list[0] + self.tgt_type = list[1] + self.obj_class = list[2] + self.perms = refpolicy.IdSet(list[3:]) + + def to_list(self): + """ + Convert an access vector to a list. + + Convert an access vector to a list treating the list as positional + values. See from_list for more information on how an access vector + is represented in a list. + """ + l = [self.src_type, self.tgt_type, self.obj_class] + l.extend(sorted(self.perms)) + return l + + def __str__(self): + return self.to_string() + + def to_string(self): + return "allow %s %s:%s %s;" % (self.src_type, self.tgt_type, + self.obj_class, self.perms.to_space_str()) + + def _compare(self, other, method): + try: + x = list(self.perms) + a = (self.src_type, self.tgt_type, self.obj_class, x) + y = list(other.perms) + x.sort() + y.sort() + b = (other.src_type, other.tgt_type, other.obj_class, y) + return method(a, b) + except (AttributeError, TypeError): + # trying to compare to foreign type + return NotImplemented + + +def avrule_to_access_vectors(avrule): + """Convert an avrule into a list of access vectors. + + AccessVectors and AVRules are similary, but differ in that + an AVRule can more than one source type, target type, and + object class. This function expands a single avrule into a + list of one or more AccessVectors representing the access + defined in the AVRule. + + + """ + if isinstance(avrule, AccessVector): + return [avrule] + a = [] + for src_type in avrule.src_types: + for tgt_type in avrule.tgt_types: + for obj_class in avrule.obj_classes: + access = AccessVector() + access.src_type = src_type + access.tgt_type = tgt_type + access.obj_class = obj_class + access.perms = avrule.perms.copy() + a.append(access) + return a + +class AccessVectorSet: + """A non-overlapping set of access vectors. + + An AccessVectorSet is designed to store one or more access vectors + that are non-overlapping. Access can be added to the set + incrementally and access vectors will be added or merged as + necessary. For example, adding the following access vectors using + add_av: + allow $1 etc_t : read; + allow $1 etc_t : write; + allow $1 var_log_t : read; + Would result in an access vector set with the access vectors: + allow $1 etc_t : { read write}; + allow $1 var_log_t : read; + """ + def __init__(self): + """Initialize an access vector set. + """ + self.src = {} + # The information flow direction of this access vector + # set - see objectmodel.py for more information. This + # stored here to speed up searching - see matching.py. + self.info_dir = None + + def __iter__(self): + """Iterate over all of the unique access vectors in the set.""" + for tgts in self.src.values(): + for objs in tgts.values(): + for av in objs.values(): + yield av + + def __len__(self): + """Return the number of unique access vectors in the set. + + Because of the inernal representation of the access vector set, + __len__ is not a constant time operation. Worst case is O(N) + where N is the number of unique access vectors, but the common + case is probably better. + """ + l = 0 + for tgts in self.src.values(): + for objs in tgts.values(): + l += len(objs) + return l + + def to_list(self): + """Return the unique access vectors in the set as a list. + + The format of the returned list is a set of nested lists, + each access vector represented by a list. This format is + designed to be simply serializable to a file. + + For example, consider an access vector set with the following + access vectors: + allow $1 user_t : file read; + allow $1 etc_t : file { read write}; + to_list would return the following: + [[$1, user_t, file, read] + [$1, etc_t, file, read, write]] + + See AccessVector.to_list for more information. + """ + l = [] + for av in self: + l.append(av.to_list()) + + return l + + def from_list(self, l): + """Add access vectors stored in a list. + + See to list for more information on the list format that this + method accepts. + + This will add all of the access from the list. Any existing + access vectors in the set will be retained. + """ + for av in l: + self.add_av(AccessVector(av)) + + def add(self, src_type, tgt_type, obj_class, perms, audit_msg=None, avc_type=audit2why.TERULE, data=[]): + """Add an access vector to the set. + """ + tgt = self.src.setdefault(src_type, { }) + cls = tgt.setdefault(tgt_type, { }) + + if (obj_class, avc_type) in cls: + access = cls[obj_class, avc_type] + else: + access = AccessVector() + access.src_type = src_type + access.tgt_type = tgt_type + access.obj_class = obj_class + access.data = data + access.type = avc_type + cls[obj_class, avc_type] = access + + access.perms.update(perms) + if audit_msg: + access.audit_msgs.append(audit_msg) + + def add_av(self, av, audit_msg=None): + """Add an access vector to the set.""" + self.add(av.src_type, av.tgt_type, av.obj_class, av.perms) + + +def avs_extract_types(avs): + types = refpolicy.IdSet() + for av in avs: + types.add(av.src_type) + types.add(av.tgt_type) + + return types + +def avs_extract_obj_perms(avs): + perms = { } + for av in avs: + if av.obj_class in perms: + s = perms[av.obj_class] + else: + s = refpolicy.IdSet() + perms[av.obj_class] = s + s.update(av.perms) + return perms + +class RoleTypeSet: + """A non-overlapping set of role type statements. + + This clas allows the incremental addition of role type statements and + maintains a non-overlapping list of statements. + """ + def __init__(self): + """Initialize an access vector set.""" + self.role_types = {} + + def __iter__(self): + """Iterate over all of the unique role allows statements in the set.""" + for role_type in self.role_types.values(): + yield role_type + + def __len__(self): + """Return the unique number of role allow statements.""" + return len(self.role_types.keys()) + + def add(self, role, type): + if role in self.role_types: + role_type = self.role_types[role] + else: + role_type = refpolicy.RoleType() + role_type.role = role + self.role_types[role] = role_type + + role_type.types.add(type) diff --git a/lib/python2.7/site-packages/sepolgen/audit.py b/lib/python2.7/site-packages/sepolgen/audit.py new file mode 100644 index 0000000..724d3ea --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/audit.py @@ -0,0 +1,556 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +import re +import sys + +from . import refpolicy +from . import access +from . import util +# Convenience functions + +def get_audit_boot_msgs(): + """Obtain all of the avc and policy load messages from the audit + log. This function uses ausearch and requires that the current + process have sufficient rights to run ausearch. + + Returns: + string contain all of the audit messages returned by ausearch. + """ + import subprocess + import time + fd=open("/proc/uptime", "r") + off=float(fd.read().split()[0]) + fd.close + s = time.localtime(time.time() - off) + bootdate = time.strftime("%x", s) + boottime = time.strftime("%X", s) + output = subprocess.Popen(["/sbin/ausearch", "-m", "AVC,USER_AVC,MAC_POLICY_LOAD,DAEMON_START,SELINUX_ERR", "-ts", bootdate, boottime], + stdout=subprocess.PIPE).communicate()[0] + if util.PY3: + output = util.decode_input(output) + return output + +def get_audit_msgs(): + """Obtain all of the avc and policy load messages from the audit + log. This function uses ausearch and requires that the current + process have sufficient rights to run ausearch. + + Returns: + string contain all of the audit messages returned by ausearch. + """ + import subprocess + output = subprocess.Popen(["/sbin/ausearch", "-m", "AVC,USER_AVC,MAC_POLICY_LOAD,DAEMON_START,SELINUX_ERR"], + stdout=subprocess.PIPE).communicate()[0] + if util.PY3: + output = util.decode_input(output) + return output + +def get_dmesg_msgs(): + """Obtain all of the avc and policy load messages from /bin/dmesg. + + Returns: + string contain all of the audit messages returned by dmesg. + """ + import subprocess + output = subprocess.Popen(["/bin/dmesg"], + stdout=subprocess.PIPE).communicate()[0] + if util.PY3: + output = util.decode_input(output) + return output + +# Classes representing audit messages + +class AuditMessage: + """Base class for all objects representing audit messages. + + AuditMessage is a base class for all audit messages and only + provides storage for the raw message (as a string) and a + parsing function that does nothing. + """ + def __init__(self, message): + self.message = message + self.header = "" + + def from_split_string(self, recs): + """Parse a string that has been split into records by space into + an audit message. + + This method should be overridden by subclasses. Error reporting + should be done by raise ValueError exceptions. + """ + for msg in recs: + fields = msg.split("=") + if len(fields) != 2: + if msg[:6] == "audit(": + self.header = msg + return + else: + continue + + if fields[0] == "msg": + self.header = fields[1] + return + + +class InvalidMessage(AuditMessage): + """Class representing invalid audit messages. This is used to differentiate + between audit messages that aren't recognized (that should return None from + the audit message parser) and a message that is recognized but is malformed + in some way. + """ + def __init__(self, message): + AuditMessage.__init__(self, message) + +class PathMessage(AuditMessage): + """Class representing a path message""" + def __init__(self, message): + AuditMessage.__init__(self, message) + self.path = "" + + def from_split_string(self, recs): + AuditMessage.from_split_string(self, recs) + + for msg in recs: + fields = msg.split("=") + if len(fields) != 2: + continue + if fields[0] == "path": + self.path = fields[1][1:-1] + return +import selinux.audit2why as audit2why + +avcdict = {} + +class AVCMessage(AuditMessage): + """AVC message representing an access denial or granted message. + + This is a very basic class and does not represent all possible fields + in an avc message. Currently the fields are: + scontext - context for the source (process) that generated the message + tcontext - context for the target + tclass - object class for the target (only one) + comm - the process name + exe - the on-disc binary + path - the path of the target + access - list of accesses that were allowed or denied + denial - boolean indicating whether this was a denial (True) or granted + (False) message. + + An example audit message generated from the audit daemon looks like (line breaks + added): + 'type=AVC msg=audit(1155568085.407:10877): avc: denied { search } for + pid=677 comm="python" name="modules" dev=dm-0 ino=13716388 + scontext=user_u:system_r:setroubleshootd_t:s0 + tcontext=system_u:object_r:modules_object_t:s0 tclass=dir' + + An example audit message stored in syslog (not processed by the audit daemon - line + breaks added): + 'Sep 12 08:26:43 dhcp83-5 kernel: audit(1158064002.046:4): avc: denied { read } + for pid=2 496 comm="bluez-pin" name=".gdm1K3IFT" dev=dm-0 ino=3601333 + scontext=user_u:system_r:bluetooth_helper_t:s0-s0:c0 + tcontext=system_u:object_r:xdm_tmp_t:s0 tclass=file + """ + def __init__(self, message): + AuditMessage.__init__(self, message) + self.scontext = refpolicy.SecurityContext() + self.tcontext = refpolicy.SecurityContext() + self.tclass = "" + self.comm = "" + self.exe = "" + self.path = "" + self.name = "" + self.accesses = [] + self.denial = True + self.type = audit2why.TERULE + + def __parse_access(self, recs, start): + # This is kind of sucky - the access that is in a space separated + # list like '{ read write }'. This doesn't fit particularly well with splitting + # the string on spaces. This function takes the list of recs and a starting + # position one beyond the open brace. It then adds the accesses until it finds + # the close brace or the end of the list (which is an error if reached without + # seeing a close brace). + found_close = False + i = start + if i == (len(recs) - 1): + raise ValueError("AVC message in invalid format [%s]\n" % self.message) + while i < len(recs): + if recs[i] == "}": + found_close = True + break + self.accesses.append(recs[i]) + i = i + 1 + if not found_close: + raise ValueError("AVC message in invalid format [%s]\n" % self.message) + return i + 1 + + + def from_split_string(self, recs): + AuditMessage.from_split_string(self, recs) + # FUTURE - fully parse avc messages and store all possible fields + # Required fields + found_src = False + found_tgt = False + found_class = False + found_access = False + + for i in range(len(recs)): + if recs[i] == "{": + i = self.__parse_access(recs, i + 1) + found_access = True + continue + elif recs[i] == "granted": + self.denial = False + + fields = recs[i].split("=") + if len(fields) != 2: + continue + if fields[0] == "scontext": + self.scontext = refpolicy.SecurityContext(fields[1]) + found_src = True + elif fields[0] == "tcontext": + self.tcontext = refpolicy.SecurityContext(fields[1]) + found_tgt = True + elif fields[0] == "tclass": + self.tclass = fields[1] + found_class = True + elif fields[0] == "comm": + self.comm = fields[1][1:-1] + elif fields[0] == "exe": + self.exe = fields[1][1:-1] + elif fields[0] == "name": + self.name = fields[1][1:-1] + + if not found_src or not found_tgt or not found_class or not found_access: + raise ValueError("AVC message in invalid format [%s]\n" % self.message) + self.analyze() + + def analyze(self): + tcontext = self.tcontext.to_string() + scontext = self.scontext.to_string() + access_tuple = tuple( self.accesses) + self.data = [] + + if (scontext, tcontext, self.tclass, access_tuple) in avcdict.keys(): + self.type, self.data = avcdict[(scontext, tcontext, self.tclass, access_tuple)] + else: + self.type, self.data = audit2why.analyze(scontext, tcontext, self.tclass, self.accesses); + if self.type == audit2why.NOPOLICY: + self.type = audit2why.TERULE + if self.type == audit2why.BADTCON: + raise ValueError("Invalid Target Context %s\n" % tcontext) + if self.type == audit2why.BADSCON: + raise ValueError("Invalid Source Context %s\n" % scontext) + if self.type == audit2why.BADSCON: + raise ValueError("Invalid Type Class %s\n" % self.tclass) + if self.type == audit2why.BADPERM: + raise ValueError("Invalid permission %s\n" % " ".join(self.accesses)) + if self.type == audit2why.BADCOMPUTE: + raise ValueError("Error during access vector computation") + + if self.type == audit2why.CONSTRAINT: + self.data = [ self.data ] + if self.scontext.user != self.tcontext.user: + self.data.append(("user (%s)" % self.scontext.user, 'user (%s)' % self.tcontext.user)) + if self.scontext.role != self.tcontext.role and self.tcontext.role != "object_r": + self.data.append(("role (%s)" % self.scontext.role, 'role (%s)' % self.tcontext.role)) + if self.scontext.level != self.tcontext.level: + self.data.append(("level (%s)" % self.scontext.level, 'level (%s)' % self.tcontext.level)) + + avcdict[(scontext, tcontext, self.tclass, access_tuple)] = (self.type, self.data) + +class PolicyLoadMessage(AuditMessage): + """Audit message indicating that the policy was reloaded.""" + def __init__(self, message): + AuditMessage.__init__(self, message) + +class DaemonStartMessage(AuditMessage): + """Audit message indicating that a daemon was started.""" + def __init__(self, message): + AuditMessage.__init__(self, message) + self.auditd = False + + def from_split_string(self, recs): + AuditMessage.from_split_string(self, recs) + if "auditd" in recs: + self.auditd = True + + +class ComputeSidMessage(AuditMessage): + """Audit message indicating that a sid was not valid. + + Compute sid messages are generated on attempting to create a security + context that is not valid. Security contexts are invalid if the role is + not authorized for the user or the type is not authorized for the role. + + This class does not store all of the fields from the compute sid message - + just the type and role. + """ + def __init__(self, message): + AuditMessage.__init__(self, message) + self.invalid_context = refpolicy.SecurityContext() + self.scontext = refpolicy.SecurityContext() + self.tcontext = refpolicy.SecurityContext() + self.tclass = "" + + def from_split_string(self, recs): + AuditMessage.from_split_string(self, recs) + if len(recs) < 10: + raise ValueError("Split string does not represent a valid compute sid message") + + try: + self.invalid_context = refpolicy.SecurityContext(recs[5]) + self.scontext = refpolicy.SecurityContext(recs[7].split("=")[1]) + self.tcontext = refpolicy.SecurityContext(recs[8].split("=")[1]) + self.tclass = recs[9].split("=")[1] + except: + raise ValueError("Split string does not represent a valid compute sid message") + def output(self): + return "role %s types %s;\n" % (self.role, self.type) + +# Parser for audit messages + +class AuditParser: + """Parser for audit messages. + + This class parses audit messages and stores them according to their message + type. This is not a general purpose audit message parser - it only extracts + selinux related messages. + + Each audit messages are stored in one of four lists: + avc_msgs - avc denial or granted messages. Messages are stored in + AVCMessage objects. + comput_sid_messages - invalid sid messages. Messages are stored in + ComputSidMessage objects. + invalid_msgs - selinux related messages that are not valid. Messages + are stored in InvalidMessageObjects. + policy_load_messages - policy load messages. Messages are stored in + PolicyLoadMessage objects. + + These lists will be reset when a policy load message is seen if + AuditParser.last_load_only is set to true. It is assumed that messages + are fed to the parser in chronological order - time stamps are not + parsed. + """ + def __init__(self, last_load_only=False): + self.__initialize() + self.last_load_only = last_load_only + + def __initialize(self): + self.avc_msgs = [] + self.compute_sid_msgs = [] + self.invalid_msgs = [] + self.policy_load_msgs = [] + self.path_msgs = [] + self.by_header = { } + self.check_input_file = False + + # Low-level parsing function - tries to determine if this audit + # message is an SELinux related message and then parses it into + # the appropriate AuditMessage subclass. This function deliberately + # does not impose policy (e.g., on policy load message) or store + # messages to make as simple and reusable as possible. + # + # Return values: + # None - no recognized audit message found in this line + # + # InvalidMessage - a recognized but invalid message was found. + # + # AuditMessage (or subclass) - object representing a parsed + # and valid audit message. + def __parse_line(self, line): + rec = line.split() + for i in rec: + found = False + if i == "avc:" or i == "message=avc:" or i == "msg='avc:": + msg = AVCMessage(line) + found = True + elif i == "security_compute_sid:": + msg = ComputeSidMessage(line) + found = True + elif i == "type=MAC_POLICY_LOAD" or i == "type=1403": + msg = PolicyLoadMessage(line) + found = True + elif i == "type=AVC_PATH": + msg = PathMessage(line) + found = True + elif i == "type=DAEMON_START": + msg = DaemonStartMessage(list) + found = True + + if found: + self.check_input_file = True + try: + msg.from_split_string(rec) + except ValueError: + msg = InvalidMessage(line) + return msg + return None + + # Higher-level parse function - take a line, parse it into an + # AuditMessage object, and store it in the appropriate list. + # This function will optionally reset all of the lists when + # it sees a load policy message depending on the value of + # self.last_load_only. + def __parse(self, line): + msg = self.__parse_line(line) + if msg is None: + return + + # Append to the correct list + if isinstance(msg, PolicyLoadMessage): + if self.last_load_only: + self.__initialize() + elif isinstance(msg, DaemonStartMessage): + # We initialize every time the auditd is started. This + # is less than ideal, but unfortunately it is the only + # way to catch reboots since the initial policy load + # by init is not stored in the audit log. + if msg.auditd and self.last_load_only: + self.__initialize() + self.policy_load_msgs.append(msg) + elif isinstance(msg, AVCMessage): + self.avc_msgs.append(msg) + elif isinstance(msg, ComputeSidMessage): + self.compute_sid_msgs.append(msg) + elif isinstance(msg, InvalidMessage): + self.invalid_msgs.append(msg) + elif isinstance(msg, PathMessage): + self.path_msgs.append(msg) + + # Group by audit header + if msg.header != "": + if msg.header in self.by_header: + self.by_header[msg.header].append(msg) + else: + self.by_header[msg.header] = [msg] + + + # Post processing will add additional information from AVC messages + # from related messages - only works on messages generated by + # the audit system. + def __post_process(self): + for value in self.by_header.values(): + avc = [] + path = None + for msg in value: + if isinstance(msg, PathMessage): + path = msg + elif isinstance(msg, AVCMessage): + avc.append(msg) + if len(avc) > 0 and path: + for a in avc: + a.path = path.path + + def parse_file(self, input): + """Parse the contents of a file object. This method can be called + multiple times (along with parse_string).""" + line = input.readline() + while line: + self.__parse(line) + line = input.readline() + if not self.check_input_file: + sys.stderr.write("Nothing to do\n") + sys.exit(0) + self.__post_process() + + def parse_string(self, input): + """Parse a string containing audit messages - messages should + be separated by new lines. This method can be called multiple + times (along with parse_file).""" + lines = input.split('\n') + for l in lines: + self.__parse(l) + self.__post_process() + + def to_role(self, role_filter=None): + """Return RoleAllowSet statements matching the specified filter + + Filter out types that match the filer, or all roles + + Params: + role_filter - [optional] Filter object used to filter the + output. + Returns: + Access vector set representing the denied access in the + audit logs parsed by this object. + """ + role_types = access.RoleTypeSet() + for cs in self.compute_sid_msgs: + if not role_filter or role_filter.filter(cs): + role_types.add(cs.invalid_context.role, cs.invalid_context.type) + + return role_types + + def to_access(self, avc_filter=None, only_denials=True): + """Convert the audit logs access into a an access vector set. + + Convert the audit logs into an access vector set, optionally + filtering the restults with the passed in filter object. + + Filter objects are object instances with a .filter method + that takes and access vector and returns True if the message + should be included in the final output and False otherwise. + + Params: + avc_filter - [optional] Filter object used to filter the + output. + Returns: + Access vector set representing the denied access in the + audit logs parsed by this object. + """ + av_set = access.AccessVectorSet() + for avc in self.avc_msgs: + if avc.denial != True and only_denials: + continue + if avc_filter: + if avc_filter.filter(avc): + av_set.add(avc.scontext.type, avc.tcontext.type, avc.tclass, + avc.accesses, avc, avc_type=avc.type, data=avc.data) + else: + av_set.add(avc.scontext.type, avc.tcontext.type, avc.tclass, + avc.accesses, avc, avc_type=avc.type, data=avc.data) + return av_set + +class AVCTypeFilter: + def __init__(self, regex): + self.regex = re.compile(regex) + + def filter(self, avc): + if self.regex.match(avc.scontext.type): + return True + if self.regex.match(avc.tcontext.type): + return True + return False + +class ComputeSidTypeFilter: + def __init__(self, regex): + self.regex = re.compile(regex) + + def filter(self, avc): + if self.regex.match(avc.invalid_context.type): + return True + if self.regex.match(avc.scontext.type): + return True + if self.regex.match(avc.tcontext.type): + return True + return False + + diff --git a/lib/python2.7/site-packages/sepolgen/classperms.py b/lib/python2.7/site-packages/sepolgen/classperms.py new file mode 100644 index 0000000..f4fd899 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/classperms.py @@ -0,0 +1,116 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +import sys + +tokens = ('DEFINE', + 'NAME', + 'TICK', + 'SQUOTE', + 'OBRACE', + 'CBRACE', + 'SEMI', + 'OPAREN', + 'CPAREN', + 'COMMA') + +reserved = { + 'define' : 'DEFINE' } + +t_TICK = r'\`' +t_SQUOTE = r'\'' +t_OBRACE = r'\{' +t_CBRACE = r'\}' +t_SEMI = r'\;' +t_OPAREN = r'\(' +t_CPAREN = r'\)' +t_COMMA = r'\,' + +t_ignore = " \t\n" + +def t_NAME(t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + t.type = reserved.get(t.value,'NAME') + return t + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.skip(1) + +from . import lex +lex.lex() + +def p_statements(p): + '''statements : define_stmt + | define_stmt statements + ''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = [p[1]] + [p[2]] + +def p_define_stmt(p): + # This sucks - corresponds to 'define(`foo',`{ read write }') + '''define_stmt : DEFINE OPAREN TICK NAME SQUOTE COMMA TICK list SQUOTE CPAREN + ''' + + p[0] = [p[4], p[8]] + +def p_list(p): + '''list : NAME + | OBRACE names CBRACE + ''' + if p[1] == "{": + p[0] = p[2] + else: + p[0] = [p[1]] + +def p_names(p): + '''names : NAME + | NAME names + ''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = [p[1]] + p[2] + +def p_error(p): + print("Syntax error on line %d %s [type=%s]" % (p.lineno, p.value, p.type)) + +from . import yacc +yacc.yacc() + + +f = open("all_perms.spt") +txt = f.read() +f.close() + +#lex.input(txt) +#while 1: +# tok = lex.token() +# if not tok: +# break +# print tok + +test = "define(`foo',`{ read write append }')" +test2 = """define(`all_filesystem_perms',`{ mount remount unmount getattr relabelfrom relabelto transition associate quotamod quotaget }') +define(`all_security_perms',`{ compute_av compute_create compute_member check_context load_policy compute_relabel compute_user setenforce setbool setsecparam setcheckreqprot }') +""" +result = yacc.parse(txt) +print(result) + diff --git a/lib/python2.7/site-packages/sepolgen/defaults.py b/lib/python2.7/site-packages/sepolgen/defaults.py new file mode 100644 index 0000000..9591063 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/defaults.py @@ -0,0 +1,77 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +import os +import re + +# Select the correct location for the development files based on a +# path variable (optionally read from a configuration file) +class PathChoooser(object): + def __init__(self, pathname): + self.config = dict() + if not os.path.exists(pathname): + self.config_pathname = "(defaults)" + self.config["SELINUX_DEVEL_PATH"] = "/usr/share/selinux/default:/usr/share/selinux/mls:/usr/share/selinux/devel" + return + self.config_pathname = pathname + ignore = re.compile(r"^\s*(?:#.+)?$") + consider = re.compile(r"^\s*(\w+)\s*=\s*(.+?)\s*$") + for lineno, line in enumerate(open(pathname)): + if ignore.match(line): continue + mo = consider.match(line) + if not mo: + raise ValueError("%s:%d: line is not in key = value format" % (pathname, lineno+1)) + self.config[mo.group(1)] = mo.group(2) + + # We're only exporting one useful function, so why not be a function + def __call__(self, testfilename, pathset="SELINUX_DEVEL_PATH"): + paths = self.config.get(pathset, None) + if paths is None: + raise ValueError("%s was not in %s" % (pathset, self.config_pathname)) + paths = paths.split(":") + for p in paths: + target = os.path.join(p, testfilename) + if os.path.exists(target): return target + return os.path.join(paths[0], testfilename) + + +""" +Various default settings, including file and directory locations. +""" + +def data_dir(): + return "/var/lib/sepolgen" + +def perm_map(): + return data_dir() + "/perm_map" + +def interface_info(): + return data_dir() + "/interface_info" + +def attribute_info(): + return data_dir() + "/attribute_info" + +def refpolicy_makefile(): + chooser = PathChoooser("/etc/selinux/sepolgen.conf") + return chooser("Makefile") + +def headers(): + chooser = PathChoooser("/etc/selinux/sepolgen.conf") + return chooser("include") + diff --git a/lib/python2.7/site-packages/sepolgen/interfaces.py b/lib/python2.7/site-packages/sepolgen/interfaces.py new file mode 100644 index 0000000..48ae4f2 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/interfaces.py @@ -0,0 +1,509 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Classes for representing and manipulating interfaces. +""" + +import copy +import itertools + +from . import access +from . import refpolicy +from . import objectmodel +from . import matching +from .sepolgeni18n import _ + + +class Param: + """ + Object representing a paramater for an interface. + """ + def __init__(self): + self.__name = "" + self.type = refpolicy.SRC_TYPE + self.obj_classes = refpolicy.IdSet() + self.required = True + + def set_name(self, name): + if not access.is_idparam(name): + raise ValueError("Name [%s] is not a param" % name) + self.__name = name + + def get_name(self): + return self.__name + + name = property(get_name, set_name) + + num = property(fget=lambda self: int(self.name[1:])) + + def __repr__(self): + return "" % \ + (self.name, refpolicy.field_to_str[self.type], " ".join(self.obj_classes)) + + +# Helper for extract perms +def __param_insert(name, type, av, params): + ret = 0 + if name in params: + p = params[name] + # The entries are identical - we're done + if type == p.type: + return + # Hanldle implicitly typed objects (like process) + if (type == refpolicy.SRC_TYPE or type == refpolicy.TGT_TYPE) and \ + (p.type == refpolicy.TGT_TYPE or p.type == refpolicy.SRC_TYPE): + #print name, refpolicy.field_to_str[p.type] + # If the object is not implicitly typed, tell the + # caller there is a likely conflict. + ret = 1 + if av: + avobjs = [av.obj_class] + else: + avobjs = [] + for obj in itertools.chain(p.obj_classes, avobjs): + if obj in objectmodel.implicitly_typed_objects: + ret = 0 + break + # "Promote" to a SRC_TYPE as this is the likely usage. + # We do this even if the above test fails on purpose + # as there is really no sane way to resolve the conflict + # here. The caller can take other actions if needed. + p.type = refpolicy.SRC_TYPE + else: + # There is some conflict - no way to resolve it really + # so we just leave the first entry and tell the caller + # there was a conflict. + ret = 1 + else: + p = Param() + p.name = name + p.type = type + params[p.name] = p + + if av: + p.obj_classes.add(av.obj_class) + return ret + + + +def av_extract_params(av, params): + """Extract the paramaters from an access vector. + + Extract the paramaters (in the form $N) from an access + vector, storing them as Param objects in a dictionary. + Some attempt is made at resolving conflicts with other + entries in the dict, but if an unresolvable conflict is + found it is reported to the caller. + + The goal here is to figure out how interface paramaters are + actually used in the interface - e.g., that $1 is a domain used as + a SRC_TYPE. In general an interface will look like this: + + interface(`foo', ` + allow $1 foo : file read; + ') + + This is simple to figure out - $1 is a SRC_TYPE. A few interfaces + are more complex, for example: + + interface(`foo_trans',` + domain_auto_trans($1,fingerd_exec_t,fingerd_t) + + allow $1 fingerd_t:fd use; + allow fingerd_t $1:fd use; + allow fingerd_t $1:fifo_file rw_file_perms; + allow fingerd_t $1:process sigchld; + ') + + Here the usage seems ambigious, but it is not. $1 is still domain + and therefore should be returned as a SRC_TYPE. + + Returns: + 0 - success + 1 - conflict found + """ + ret = 0 + found_src = False + if access.is_idparam(av.src_type): + if __param_insert(av.src_type, refpolicy.SRC_TYPE, av, params) == 1: + ret = 1 + + if access.is_idparam(av.tgt_type): + if __param_insert(av.tgt_type, refpolicy.TGT_TYPE, av, params) == 1: + ret = 1 + + if access.is_idparam(av.obj_class): + if __param_insert(av.obj_class, refpolicy.OBJ_CLASS, av, params) == 1: + ret = 1 + + for perm in av.perms: + if access.is_idparam(perm): + if __param_insert(perm, PERM) == 1: + ret = 1 + + return ret + +def role_extract_params(role, params): + if access.is_idparam(role.role): + return __param_insert(role.role, refpolicy.ROLE, None, params) + +def type_rule_extract_params(rule, params): + def extract_from_set(set, type): + ret = 0 + for x in set: + if access.is_idparam(x): + if __param_insert(x, type, None, params): + ret = 1 + return ret + + ret = 0 + if extract_from_set(rule.src_types, refpolicy.SRC_TYPE): + ret = 1 + + if extract_from_set(rule.tgt_types, refpolicy.TGT_TYPE): + ret = 1 + + if extract_from_set(rule.obj_classes, refpolicy.OBJ_CLASS): + ret = 1 + + if access.is_idparam(rule.dest_type): + if __param_insert(rule.dest_type, refpolicy.DEST_TYPE, None, params): + ret = 1 + + return ret + +def ifcall_extract_params(ifcall, params): + ret = 0 + for arg in ifcall.args: + if access.is_idparam(arg): + # Assume interface arguments are source types. Fairly safe + # assumption for most interfaces + if __param_insert(arg, refpolicy.SRC_TYPE, None, params): + ret = 1 + + return ret + +class AttributeVector: + def __init__(self): + self.name = "" + self.access = access.AccessVectorSet() + + def add_av(self, av): + self.access.add_av(av) + +class AttributeSet: + def __init__(self): + self.attributes = { } + + def add_attr(self, attr): + self.attributes[attr.name] = attr + + def from_file(self, fd): + def parse_attr(line): + fields = line[1:-1].split() + if len(fields) != 2 or fields[0] != "Attribute": + raise SyntaxError("Syntax error Attribute statement %s" % line) + a = AttributeVector() + a.name = fields[1] + + return a + + a = None + for line in fd: + line = line[:-1] + if line[0] == "[": + if a: + self.add_attr(a) + a = parse_attr(line) + elif a: + l = line.split(",") + av = access.AccessVector(l) + a.add_av(av) + if a: + self.add_attr(a) + +class InterfaceVector: + def __init__(self, interface=None, attributes={}): + # Enabled is a loose concept currently - we are essentially + # not enabling interfaces that we can't handle currently. + # See InterfaceVector.add_ifv for more information. + self.enabled = True + self.name = "" + # The access that is enabled by this interface - eventually + # this will include indirect access from typeattribute + # statements. + self.access = access.AccessVectorSet() + # Paramaters are stored in a dictionary (key: param name + # value: Param object). + self.params = { } + if interface: + self.from_interface(interface, attributes) + self.expanded = False + + def from_interface(self, interface, attributes={}): + self.name = interface.name + + # Add allow rules + for avrule in interface.avrules(): + if avrule.rule_type != refpolicy.AVRule.ALLOW: + continue + # Handle some policy bugs + if "dontaudit" in interface.name: + #print "allow rule in interface: %s" % interface + continue + avs = access.avrule_to_access_vectors(avrule) + for av in avs: + self.add_av(av) + + # Add typeattribute access + if attributes: + for typeattribute in interface.typeattributes(): + for attr in typeattribute.attributes: + if attr not in attributes.attributes: + # print "missing attribute " + attr + continue + attr_vec = attributes.attributes[attr] + for a in attr_vec.access: + av = copy.copy(a) + if av.src_type == attr_vec.name: + av.src_type = typeattribute.type + if av.tgt_type == attr_vec.name: + av.tgt_type = typeattribute.type + self.add_av(av) + + + # Extract paramaters from roles + for role in interface.roles(): + if role_extract_params(role, self.params): + pass + #print "found conflicting role param %s for interface %s" % \ + # (role.name, interface.name) + # Extract paramaters from type rules + for rule in interface.typerules(): + if type_rule_extract_params(rule, self.params): + pass + #print "found conflicting params in rule %s in interface %s" % \ + # (str(rule), interface.name) + + for ifcall in interface.interface_calls(): + if ifcall_extract_params(ifcall, self.params): + pass + #print "found conflicting params in ifcall %s in interface %s" % \ + # (str(ifcall), interface.name) + + + def add_av(self, av): + if av_extract_params(av, self.params) == 1: + pass + #print "found conflicting perms [%s]" % str(av) + self.access.add_av(av) + + def to_string(self): + s = [] + s.append("[InterfaceVector %s]" % self.name) + for av in self.access: + s.append(str(av)) + return "\n".join(s) + + def __str__(self): + return self.__repr__() + + def __repr__(self): + return "" % (self.name, self.enabled) + + +class InterfaceSet: + def __init__(self, output=None): + self.interfaces = { } + self.tgt_type_map = { } + self.tgt_type_all = [] + self.output = output + + def o(self, str): + if self.output: + self.output.write(str + "\n") + + def to_file(self, fd): + for iv in sorted(self.interfaces.values(), key=lambda x: x.name): + fd.write("[InterfaceVector %s " % iv.name) + for param in sorted(iv.params.values(), key=lambda x: x.name): + fd.write("%s:%s " % (param.name, refpolicy.field_to_str[param.type])) + fd.write("]\n") + avl = sorted(iv.access.to_list()) + for av in avl: + fd.write(",".join(av)) + fd.write("\n") + + def from_file(self, fd): + def parse_ifv(line): + fields = line[1:-1].split() + if len(fields) < 2 or fields[0] != "InterfaceVector": + raise SyntaxError("Syntax error InterfaceVector statement %s" % line) + ifv = InterfaceVector() + ifv.name = fields[1] + if len(fields) == 2: + return + for field in fields[2:]: + p = field.split(":") + if len(p) != 2: + raise SyntaxError("Invalid param in InterfaceVector statement %s" % line) + param = Param() + param.name = p[0] + param.type = refpolicy.str_to_field[p[1]] + ifv.params[param.name] = param + return ifv + + ifv = None + for line in fd: + line = line[:-1] + if line[0] == "[": + if ifv: + self.add_ifv(ifv) + ifv = parse_ifv(line) + elif ifv: + l = line.split(",") + av = access.AccessVector(l) + ifv.add_av(av) + if ifv: + self.add_ifv(ifv) + + self.index() + + def add_ifv(self, ifv): + self.interfaces[ifv.name] = ifv + + def index(self): + for ifv in self.interfaces.values(): + tgt_types = set() + for av in ifv.access: + if access.is_idparam(av.tgt_type): + self.tgt_type_all.append(ifv) + tgt_types = set() + break + tgt_types.add(av.tgt_type) + + for type in tgt_types: + l = self.tgt_type_map.setdefault(type, []) + l.append(ifv) + + def add(self, interface, attributes={}): + ifv = InterfaceVector(interface, attributes) + self.add_ifv(ifv) + + def add_headers(self, headers, output=None, attributes={}): + for i in itertools.chain(headers.interfaces(), headers.templates()): + self.add(i, attributes) + + self.expand_ifcalls(headers) + self.index() + + def map_param(self, id, ifcall): + if access.is_idparam(id): + num = int(id[1:]) + if num > len(ifcall.args): + # Tell caller to drop this because it must have + # been generated from an optional param. + return None + else: + arg = ifcall.args[num - 1] + if isinstance(arg, list): + return arg + else: + return [arg] + else: + return [id] + + def map_add_av(self, ifv, av, ifcall): + src_types = self.map_param(av.src_type, ifcall) + if src_types is None: + return + + tgt_types = self.map_param(av.tgt_type, ifcall) + if tgt_types is None: + return + + obj_classes = self.map_param(av.obj_class, ifcall) + if obj_classes is None: + return + + new_perms = refpolicy.IdSet() + for perm in av.perms: + p = self.map_param(perm, ifcall) + if p is None: + continue + else: + new_perms.update(p) + if len(new_perms) == 0: + return + + for src_type in src_types: + for tgt_type in tgt_types: + for obj_class in obj_classes: + ifv.access.add(src_type, tgt_type, obj_class, new_perms) + + def do_expand_ifcalls(self, interface, if_by_name): + # Descend an interface call tree adding the access + # from each interface. This is a depth first walk + # of the tree. + + stack = [(interface, None)] + ifv = self.interfaces[interface.name] + ifv.expanded = True + + while len(stack) > 0: + cur, cur_ifcall = stack.pop(-1) + + cur_ifv = self.interfaces[cur.name] + if cur != interface: + + for av in cur_ifv.access: + self.map_add_av(ifv, av, cur_ifcall) + + # If we have already fully expanded this interface + # there is no reason to descend further. + if cur_ifv.expanded: + continue + + for ifcall in cur.interface_calls(): + if ifcall.ifname == interface.name: + self.o(_("Found circular interface class")) + return + try: + newif = if_by_name[ifcall.ifname] + except KeyError: + self.o(_("Missing interface definition for %s" % ifcall.ifname)) + continue + + stack.append((newif, ifcall)) + + + def expand_ifcalls(self, headers): + # Create a map of interface names to interfaces - + # this mirrors the interface vector map we already + # have. + if_by_name = { } + + for i in itertools.chain(headers.interfaces(), headers.templates()): + if_by_name[i.name] = i + + + for interface in itertools.chain(headers.interfaces(), headers.templates()): + self.do_expand_ifcalls(interface, if_by_name) + diff --git a/lib/python2.7/site-packages/sepolgen/lex.py b/lib/python2.7/site-packages/sepolgen/lex.py new file mode 100644 index 0000000..c13acef --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/lex.py @@ -0,0 +1,872 @@ +#----------------------------------------------------------------------------- +# ply: lex.py +# +# Author: David M. Beazley (dave@dabeaz.com) +# +# Copyright (C) 2001-2006, David M. Beazley +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +#----------------------------------------------------------------------------- + +__version__ = "2.2" + +import re, sys, types + +from . import util +import collections + + +# Regular expression used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Available instance types. This is used when parsers are defined by a class. +# In Python3 the InstanceType and ObjectType are no more, they've passed, ceased +# to be, they are ex-classes along with old-style classes + +try: + _INSTANCETYPE = (types.InstanceType, types.ObjectType) +except AttributeError: + _INSTANCETYPE = object + +# Exception thrown when invalid token encountered and no default error +# handler is defined. +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class +class LexToken(object): + def __str__(self): + return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + def __repr__(self): + return str(self) + def skip(self,n): + self.lexer.skip(n) + +# ----------------------------------------------------------------------------- +# Lexer class +# +# This class encapsulates all of the methods and data associated with a lexer. +# +# input() - Store a new string in the lexer +# token() - Get the next token +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re,findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstate = "INITIAL" # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = "" # Ignored characters + self.lexliterals = "" # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + self.lexdebug = 0 # Debugging mode + self.lexoptimize = 0 # Optimized mode + + def clone(self,object=None): + c = Lexer() + c.lexstatere = self.lexstatere + c.lexstateinfo = self.lexstateinfo + c.lexstateretext = self.lexstateretext + c.lexstate = self.lexstate + c.lexstatestack = self.lexstatestack + c.lexstateignore = self.lexstateignore + c.lexstateerrorf = self.lexstateerrorf + c.lexreflags = self.lexreflags + c.lexdata = self.lexdata + c.lexpos = self.lexpos + c.lexlen = self.lexlen + c.lextokens = self.lextokens + c.lexdebug = self.lexdebug + c.lineno = self.lineno + c.lexoptimize = self.lexoptimize + c.lexliterals = self.lexliterals + c.lexmodule = self.lexmodule + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = { } + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object,f[0].__name__),f[1])) + newre.append((cre,newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = { } + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexmodule = object + + # Set up other attributes + c.begin(c.lexstate) + return c + + # ------------------------------------------------------------ + # writetab() - Write lexer information to a table file + # ------------------------------------------------------------ + def writetab(self,tabfile): + tf = open(tabfile+".py","w") + tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) + tf.write("_lextokens = %s\n" % repr(self.lextokens)) + tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) + tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) + tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) + + tabre = { } + for key, lre in self.lexstatere.items(): + titem = [] + for i in range(len(lre)): + titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1]))) + tabre[key] = titem + + tf.write("_lexstatere = %s\n" % repr(tabre)) + tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) + + taberr = { } + for key, ef in self.lexstateerrorf.items(): + if ef: + taberr[key] = ef.__name__ + else: + taberr[key] = None + tf.write("_lexstateerrorf = %s\n" % repr(taberr)) + tf.close() + + # ------------------------------------------------------------ + # readtab() - Read lexer information from a tab file + # ------------------------------------------------------------ + def readtab(self,tabfile,fdict): + exec("import %s as lextab" % tabfile) + self.lextokens = lextab._lextokens + self.lexreflags = lextab._lexreflags + self.lexliterals = lextab._lexliterals + self.lexstateinfo = lextab._lexstateinfo + self.lexstateignore = lextab._lexstateignore + self.lexstatere = { } + self.lexstateretext = { } + for key,lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for i in range(len(lre)): + titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) + txtitem.append(lre[i][0]) + self.lexstatere[key] = titem + self.lexstateretext[key] = txtitem + self.lexstateerrorf = { } + for key,ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[key] = fdict[ef] + self.begin('INITIAL') + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + if not (isinstance(s,util.bytes_type) or isinstance(s, util.string_type)): + raise ValueError("Expected a string") + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self,state): + if state not in self.lexstatere: + raise ValueError("Undefined state") + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state,"") + self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self,state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self,n): + self.lexpos += n + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre,lexindexfunc in self.lexre: + m = lexre.match(lexdata,lexpos) + if not m: continue + + # Set last match in lexer so that rules can access it if they want + self.lexmatch = m + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + + lexpos = m.end() + i = m.lastindex + func,tok.type = lexindexfunc[i] + self.lexpos = lexpos + + if not func: + # If no token type was set, it's an ignored token + if tok.type: return tok + break + + # if func not callable, it means it's an ignored token + if not isinstance(func, collections.Callable): + break + + # If token is processed by a function, call it + newtok = func(tok) + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + break + + # Verify type of the token. If not in the token map, raise an error + if not self.lexoptimize: + if newtok.type not in self.lextokens: + raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.lexer = self + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: continue + return newtok + + self.lexpos = lexpos + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError("No input string given with input()") + return None + +# ----------------------------------------------------------------------------- +# _validate_file() +# +# This checks to see if there are duplicated t_rulename() functions or strings +# in the parser input file. This is done using a simple regular expression +# match on each line in the filename. +# ----------------------------------------------------------------------------- + +def _validate_file(filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print("%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev)) + noerror = 0 + linen += 1 + return noerror + +# ----------------------------------------------------------------------------- +# _funcs_to_names() +# +# Given a list of regular expression functions, this converts it to a list +# suitable for output to a table file +# ----------------------------------------------------------------------------- + +def _funcs_to_names(funclist): + result = [] + for f in funclist: + if f and f[0]: + result.append((f[0].__name__,f[1])) + else: + result.append(f) + return result + +# ----------------------------------------------------------------------------- +# _names_to_funcs() +# +# Given a list of regular expression function names, this converts it back to +# functions. +# ----------------------------------------------------------------------------- + +def _names_to_funcs(namelist,fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]],n[1])) + else: + result.append(n) + return result + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- + +def _form_master_re(relist,reflags,ldict): + if not relist: return [] + regex = "|".join(relist) + try: + lexre = re.compile(regex,re.VERBOSE | reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + for f,i in lexre.groupindex.items(): + handle = ldict.get(f,None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle,handle.__name__[2:]) + elif handle is not None: + # If rule was specified as a string, we build an anonymous + # callback function to carry out the action + if f.find("ignore_") > 0: + lexindexfunc[i] = (None,None) + print("IGNORE", f) + else: + lexindexfunc[i] = (None, f[2:]) + + return [(lexre,lexindexfunc)],[regex] + except Exception as e: + m = int(len(relist)/2) + if m == 0: m = 1 + llist, lre = _form_master_re(relist[:m],reflags,ldict) + rlist, rre = _form_master_re(relist[m:],reflags,ldict) + return llist+rlist, lre+rre + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- + +def _statetoken(s,names): + nonstate = 1 + parts = s.split("_") + for i in range(1,len(parts)): + if parts[i] not in names and parts[i] != 'ANY': break + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names.keys()) + + tokenname = "_".join(parts[i:]) + return (states,tokenname) + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0): + global lexer + ldict = None + stateinfo = { 'INITIAL' : 'inclusive'} + error = 0 + files = { } + lexobj = Lexer() + lexobj.lexdebug = debug + lexobj.lexoptimize = optimize + global token,input + + if nowarn: warn = 0 + else: warn = 1 + + if object: module = object + + if module: + # User supplied a module object. + if isinstance(module, types.ModuleType): + ldict = module.__dict__ + elif isinstance(module, _INSTANCETYPE): + _items = [(k,getattr(module,k)) for k in dir(module)] + ldict = { } + for (i,v) in _items: + ldict[i] = v + else: + raise ValueError("Expected a module or instance") + lexobj.lexmodule = module + + else: + # No module given. We might be able to get information from the caller. + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + if optimize and lextab: + try: + lexobj.readtab(lextab,ldict) + token = lexobj.token + input = lexobj.input + lexer = lexobj + return lexobj + + except ImportError: + pass + + # Get the tokens, states, and literals variables (if any) + if (module and isinstance(module,_INSTANCETYPE)): + tokens = getattr(module,"tokens",None) + states = getattr(module,"states",None) + literals = getattr(module,"literals","") + else: + tokens = ldict.get("tokens",None) + states = ldict.get("states",None) + literals = ldict.get("literals","") + + if not tokens: + raise SyntaxError("lex: module does not define 'tokens'") + if not (isinstance(tokens,list) or isinstance(tokens,tuple)): + raise SyntaxError("lex: tokens must be a list or tuple.") + + # Build a dictionary of valid token names + lexobj.lextokens = { } + if not optimize: + for n in tokens: + if not _is_identifier.match(n): + print("lex: Bad token name '%s'" % n) + error = 1 + if warn and n in lexobj.lextokens: + print("lex: Warning. Token '%s' multiply defined." % n) + lexobj.lextokens[n] = None + else: + for n in tokens: lexobj.lextokens[n] = None + + if debug: + print("lex: tokens = '%s'" % list(lexobj.lextokens.keys())) + + try: + for c in literals: + if not (isinstance(c,util.bytes_type) or isinstance(c, util.string_type)) or len(c) > 1: + print("lex: Invalid literal %s. Must be a single character" % repr(c)) + error = 1 + continue + + except TypeError: + print("lex: Invalid literals specification. literals must be a sequence of characters.") + error = 1 + + lexobj.lexliterals = literals + + # Build statemap + if states: + if not (isinstance(states,tuple) or isinstance(states,list)): + print("lex: states must be defined as a tuple or list.") + error = 1 + else: + for s in states: + if not isinstance(s,tuple) or len(s) != 2: + print("lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s)) + error = 1 + continue + name, statetype = s + if isinstance(name, util.string_type): + original_name = name + name = util.encode_input(name) + if not isinstance(name,util.bytes_type) or len(original_name) != len(name): + print("lex: state name %s must be a byte string" % repr(original_name)) + error = 1 + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + print("lex: state type for state %s must be 'inclusive' or 'exclusive'" % name) + error = 1 + continue + if name in stateinfo: + print("lex: state '%s' already defined." % name) + error = 1 + continue + stateinfo[name] = statetype + + # Get a list of symbols with the t_ or s_ prefix + tsymbols = [f for f in ldict.keys() if f[:2] == 't_' ] + + # Now build up a list of functions and a list of strings + + funcsym = { } # Symbols defined as functions + strsym = { } # Symbols defined as strings + toknames = { } # Mapping of symbols to token names + + for s in stateinfo.keys(): + funcsym[s] = [] + strsym[s] = [] + + ignore = { } # Ignore strings by state + errorf = { } # Error functions by state + + if len(tsymbols) == 0: + raise SyntaxError("lex: no rules of the form t_rulename are defined.") + + for f in tsymbols: + t = ldict[f] + states, tokname = _statetoken(f,stateinfo) + toknames[f] = tokname + + if isinstance(t, collections.Callable): + for s in states: funcsym[s].append((f,t)) + elif (isinstance(t, util.bytes_type) or isinstance(t,util.string_type)): + for s in states: strsym[s].append((f,t)) + else: + print("lex: %s not defined as a function or string" % f) + error = 1 + + # Sort the functions by line number + for f in funcsym.values(): + f.sort(key=lambda x: x[1].__code__.co_firstlineno) + + # Sort the strings by regular expression length + for s in strsym.values(): + s.sort(key=lambda x: len(x[1])) + + regexs = { } + + # Build the master regular expressions + for state in stateinfo.keys(): + regex_list = [] + + # Add rules defined by functions first + for fname, f in funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + files[file] = None + tokname = toknames[fname] + + ismethod = isinstance(f, types.MethodType) + + if not optimize: + nargs = f.__code__.co_argcount + if ismethod: + reqargs = 2 + else: + reqargs = 1 + if nargs > reqargs: + print("%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)) + error = 1 + continue + + if nargs < reqargs: + print("%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)) + error = 1 + continue + + if tokname == 'ignore': + print("%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__)) + error = 1 + continue + + if tokname == 'error': + errorf[state] = f + continue + + if f.__doc__: + if not optimize: + try: + c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE | reflags) + if c.match(""): + print("%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__)) + error = 1 + continue + except re.error as e: + print("%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e)) + if '#' in f.__doc__: + print("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__)) + error = 1 + continue + + if debug: + print("lex: Adding rule %s -> '%s' (state '%s')" % (f.__name__,f.__doc__, state)) + + # Okay. The regular expression seemed okay. Let's append it to the master regular + # expression we're building + + regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__)) + else: + print("%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__)) + + # Now add all of the simple rules + for name,r in strsym[state]: + tokname = toknames[name] + + if tokname == 'ignore': + ignore[state] = r + continue + + if not optimize: + if tokname == 'error': + raise SyntaxError("lex: Rule '%s' must be defined as a function" % name) + error = 1 + continue + + if tokname not in lexobj.lextokens and tokname.find("ignore_") < 0: + print("lex: Rule '%s' defined for an unspecified token %s." % (name,tokname)) + error = 1 + continue + try: + c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | reflags) + if (c.match("")): + print("lex: Regular expression for rule '%s' matches empty string." % name) + error = 1 + continue + except re.error as e: + print("lex: Invalid regular expression for rule '%s'. %s" % (name,e)) + if '#' in r: + print("lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name) + + error = 1 + continue + if debug: + print("lex: Adding rule %s -> '%s' (state '%s')" % (name,r,state)) + + regex_list.append("(?P<%s>%s)" % (name,r)) + + if not regex_list: + print("lex: No rules defined for state '%s'" % state) + error = 1 + + regexs[state] = regex_list + + + if not optimize: + for f in files.keys(): + if not _validate_file(f): + error = 1 + + if error: + raise SyntaxError("lex: Unable to build lexer.") + + # From this point forward, we're reasonably confident that we can build the lexer. + # No more errors will be generated, but there might be some warning messages. + + # Build the master regular expressions + + for state in regexs.keys(): + lexre, re_text = _form_master_re(regexs[state],reflags,ldict) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + if debug: + for i in range(len(re_text)): + print("lex: state '%s'. regex[%d] = '%s'" % (state, i, re_text[i])) + + # For inclusive states, we need to add the INITIAL state + for state,type in stateinfo.items(): + if state != "INITIAL" and type == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere["INITIAL"] + lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + + # Set up ignore variables + lexobj.lexstateignore = ignore + lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + + # Set up error functions + lexobj.lexstateerrorf = errorf + lexobj.lexerrorf = errorf.get("INITIAL",None) + if warn and not lexobj.lexerrorf: + print("lex: Warning. no t_error rule is defined.") + + # Check state information for ignore and error rules + for s,stype in stateinfo.items(): + if stype == 'exclusive': + if warn and s not in errorf: + print("lex: Warning. no error rule is defined for exclusive state '%s'" % s) + if warn and s not in ignore and lexobj.lexignore: + print("lex: Warning. no ignore rule is defined for exclusive state '%s'" % s) + elif stype == 'inclusive': + if s not in errorf: + errorf[s] = errorf.get("INITIAL",None) + if s not in ignore: + ignore[s] = ignore.get("INITIAL","") + + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + # If in optimize mode, we write the lextab + if lextab and optimize: + lexobj.writetab(lextab) + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + print("Reading from standard input (type EOF to end):") + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + print("(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_doc(f): + f.__doc__ = r + return f + return set_doc + +# Alternative spelling of the TOKEN decorator +Token = TOKEN + diff --git a/lib/python2.7/site-packages/sepolgen/matching.py b/lib/python2.7/site-packages/sepolgen/matching.py new file mode 100644 index 0000000..6f86359 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/matching.py @@ -0,0 +1,252 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Classes and algorithms for matching requested access to access vectors. +""" + +import itertools + +from . import access +from . import objectmodel +from . import util + + +class Match(util.Comparison): + def __init__(self, interface=None, dist=0): + self.interface = interface + self.dist = dist + self.info_dir_change = False + # when implementing __eq__ also __hash__ is needed on py2 + # if object is muttable __hash__ should be None + self.__hash__ = None + + def _compare(self, other, method): + try: + a = (self.dist, self.info_dir_change) + b = (other.dist, other.info_dir_change) + return method(a, b) + except (AttributeError, TypeError): + # trying to compare to foreign type + return NotImplemented + +class MatchList: + DEFAULT_THRESHOLD = 150 + def __init__(self): + # Match objects that pass the threshold + self.children = [] + # Match objects over the threshold + self.bastards = [] + self.threshold = self.DEFAULT_THRESHOLD + self.allow_info_dir_change = False + self.av = None + + def best(self): + if len(self.children): + return self.children[0] + if len(self.bastards): + return self.bastards[0] + return None + + def __len__(self): + # Only return the length of the matches so + # that this can be used to test if there is + # a match. + return len(self.children) + len(self.bastards) + + def __iter__(self): + return iter(self.children) + + def all(self): + return itertools.chain(self.children, self.bastards) + + def append(self, match): + if match.dist <= self.threshold: + if not match.info_dir_change or self.allow_info_dir_change: + self.children.append(match) + else: + self.bastards.append(match) + else: + self.bastards.append(match) + + def sort(self): + self.children.sort() + self.bastards.sort() + + +class AccessMatcher: + def __init__(self, perm_maps=None): + self.type_penalty = 10 + self.obj_penalty = 10 + if perm_maps: + self.perm_maps = perm_maps + else: + self.perm_maps = objectmodel.PermMappings() + # We want a change in the information flow direction + # to be a strong penalty - stronger than access to + # a few unrelated types. + self.info_dir_penalty = 100 + + def type_distance(self, a, b): + if a == b or access.is_idparam(b): + return 0 + else: + return -self.type_penalty + + + def perm_distance(self, av_req, av_prov): + # First check that we have enough perms + diff = av_req.perms.difference(av_prov.perms) + + if len(diff) != 0: + total = self.perm_maps.getdefault_distance(av_req.obj_class, diff) + return -total + else: + diff = av_prov.perms.difference(av_req.perms) + return self.perm_maps.getdefault_distance(av_req.obj_class, diff) + + def av_distance(self, req, prov): + """Determine the 'distance' between 2 access vectors. + + This function is used to find an access vector that matches + a 'required' access. To do this we comput a signed numeric + value that indicates how close the req access is to the + 'provided' access vector. The closer the value is to 0 + the closer the match, with 0 being an exact match. + + A value over 0 indicates that the prov access vector provides more + access than the req (in practice, this means that the source type, + target type, and object class is the same and the perms in prov is + a superset of those in req. + + A value under 0 indicates that the prov access less - or unrelated + - access to the req access. A different type or object class will + result in a very low value. + + The values other than 0 should only be interpreted relative to + one another - they have no exact meaning and are likely to + change. + + Params: + req - [AccessVector] The access that is required. This is the + access being matched. + prov - [AccessVector] The access provided. This is the potential + match that is being evaluated for req. + Returns: + 0 : Exact match between the acess vectors. + + < 0 : The prov av does not provide all of the access in req. + A smaller value indicates that the access is further. + + > 0 : The prov av provides more access than req. The larger + the value the more access over req. + """ + # FUTURE - this is _very_ expensive and probably needs some + # thorough performance work. This version is meant to give + # meaningful results relatively simply. + dist = 0 + + # Get the difference between the types. The addition is safe + # here because type_distance only returns 0 or negative. + dist += self.type_distance(req.src_type, prov.src_type) + dist += self.type_distance(req.tgt_type, prov.tgt_type) + + # Object class distance + if req.obj_class != prov.obj_class and not access.is_idparam(prov.obj_class): + dist -= self.obj_penalty + + # Permission distance + + # If this av doesn't have a matching source type, target type, and object class + # count all of the permissions against it. Otherwise determine the perm + # distance and dir. + if dist < 0: + pdist = self.perm_maps.getdefault_distance(prov.obj_class, prov.perms) + else: + pdist = self.perm_distance(req, prov) + + # Combine the perm and other distance + if dist < 0: + if pdist < 0: + return dist + pdist + else: + return dist - pdist + elif dist >= 0: + if pdist < 0: + return pdist - dist + else: + return dist + pdist + + def av_set_match(self, av_set, av): + """ + + """ + dist = None + + # Get the distance for each access vector + for x in av_set: + tmp = self.av_distance(av, x) + if dist is None: + dist = tmp + elif tmp >= 0: + if dist >= 0: + dist += tmp + else: + dist = tmp + -dist + else: + if dist < 0: + dist += tmp + else: + dist -= tmp + + # Penalize for information flow - we want to prevent the + # addition of a write if the requested is read none. We are + # much less concerned about the reverse. + av_dir = self.perm_maps.getdefault_direction(av.obj_class, av.perms) + + if av_set.info_dir is None: + av_set.info_dir = objectmodel.FLOW_NONE + for x in av_set: + av_set.info_dir = av_set.info_dir | \ + self.perm_maps.getdefault_direction(x.obj_class, x.perms) + if (av_dir & objectmodel.FLOW_WRITE == 0) and (av_set.info_dir & objectmodel.FLOW_WRITE): + if dist < 0: + dist -= self.info_dir_penalty + else: + dist += self.info_dir_penalty + + return dist + + def search_ifs(self, ifset, av, match_list): + match_list.av = av + for iv in itertools.chain(ifset.tgt_type_all, + ifset.tgt_type_map.get(av.tgt_type, [])): + if not iv.enabled: + #print "iv %s not enabled" % iv.name + continue + + dist = self.av_set_match(iv.access, av) + if dist >= 0: + m = Match(iv, dist) + match_list.append(m) + + + match_list.sort() + + diff --git a/lib/python2.7/site-packages/sepolgen/module.py b/lib/python2.7/site-packages/sepolgen/module.py new file mode 100644 index 0000000..c09676a --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/module.py @@ -0,0 +1,216 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Utilities for dealing with the compilation of modules and creation +of module tress. +""" + +import re +import tempfile +try: + from subprocess import getstatusoutput +except ImportError: + from commands import getstatusoutput +import os +import os.path +import shutil + +import selinux + +from . import defaults + + +def is_valid_name(modname): + """Check that a module name is valid. + """ + m = re.findall("[^a-zA-Z0-9_\-\.]", modname) + if len(m) == 0 and modname[0].isalpha(): + return True + else: + return False + +class ModuleTree: + def __init__(self, modname): + self.modname = modname + self.dirname = None + + def dir_name(self): + return self.dirname + + def te_name(self): + return self.dirname + "/" + self.modname + ".te" + + def fc_name(self): + return self.dirname + "/" + self.modname + ".fc" + + def if_name(self): + return self.dirname + "/" + self.modname + ".if" + + def package_name(self): + return self.dirname + "/" + self.modname + ".pp" + + def makefile_name(self): + return self.dirname + "/Makefile" + + def create(self, parent_dirname, makefile_include=None): + self.dirname = parent_dirname + "/" + self.modname + os.mkdir(self.dirname) + fd = open(self.makefile_name(), "w") + if makefile_include: + fd.write("include " + makefile_include) + else: + fd.write("include " + defaults.refpolicy_makefile()) + fd.close() + + # Create empty files for the standard refpolicy + # module files + open(self.te_name(), "w").close() + open(self.fc_name(), "w").close() + open(self.if_name(), "w").close() + +def modname_from_sourcename(sourcename): + return os.path.splitext(os.path.split(sourcename)[1])[0] + +class ModuleCompiler: + """ModuleCompiler eases running of the module compiler. + + The ModuleCompiler class encapsulates running the commandline + module compiler (checkmodule) and module packager (semodule_package). + You are likely interested in the create_module_package method. + + Several options are controlled via paramaters (only effects the + non-refpol builds): + + .mls [boolean] Generate an MLS module (by passed -M to + checkmodule). True to generate an MLS module, false + otherwise. + + .module [boolean] Generate a module instead of a base module. + True to generate a module, false to generate a base. + + .checkmodule [string] Fully qualified path to the module compiler. + Default is /usr/bin/checkmodule. + + .semodule_package [string] Fully qualified path to the module + packager. Defaults to /usr/bin/semodule_package. + .output [file object] File object used to write verbose + output of the compililation and packaging process. + """ + def __init__(self, output=None): + """Create a ModuleCompiler instance, optionally with an + output file object for verbose output of the compilation process. + """ + self.mls = selinux.is_selinux_mls_enabled() + self.module = True + self.checkmodule = "/usr/bin/checkmodule" + self.semodule_package = "/usr/bin/semodule_package" + self.output = output + self.last_output = "" + self.refpol_makefile = defaults.refpolicy_makefile() + self.make = "/usr/bin/make" + + def o(self, str): + if self.output: + self.output.write(str + "\n") + self.last_output = str + + def run(self, command): + self.o(command) + rc, output = getstatusoutput(command) + self.o(output) + + return rc + + def gen_filenames(self, sourcename): + """Generate the module and policy package filenames from + a source file name. The source file must be in the form + of "foo.te". This will generate "foo.mod" and "foo.pp". + + Returns a tuple with (modname, policypackage). + """ + splitname = sourcename.split(".") + if len(splitname) < 2: + raise RuntimeError("invalid sourcefile name %s (must end in .te)", sourcename) + # Handle other periods in the filename correctly + basename = ".".join(splitname[0:-1]) + modname = basename + ".mod" + packagename = basename + ".pp" + + return (modname, packagename) + + def create_module_package(self, sourcename, refpolicy=True): + """Create a module package saved in a packagename from a + sourcename. + + The create_module_package creates a module package saved in a + file named sourcename (.pp is the standard extension) from a + source file (.te is the standard extension). The source file + should contain SELinux policy statements appropriate for a + base or non-base module (depending on the setting of .module). + + Only file names are accepted, not open file objects or + descriptors because the command line SELinux tools are used. + + On error a RuntimeError will be raised with a descriptive + error message. + """ + if refpolicy: + self.refpol_build(sourcename) + else: + modname, packagename = self.gen_filenames(sourcename) + self.compile(sourcename, modname) + self.package(modname, packagename) + os.unlink(modname) + + def refpol_build(self, sourcename): + # Compile + command = self.make + " -f " + self.refpol_makefile + rc = self.run(command) + + # Raise an error if the process failed + if rc != 0: + raise RuntimeError("compilation failed:\n%s" % self.last_output) + + def compile(self, sourcename, modname): + s = [self.checkmodule] + if self.mls: + s.append("-M") + if self.module: + s.append("-m") + s.append("-o") + s.append(modname) + s.append(sourcename) + + rc = self.run(" ".join(s)) + if rc != 0: + raise RuntimeError("compilation failed:\n%s" % self.last_output) + + def package(self, modname, packagename): + s = [self.semodule_package] + s.append("-o") + s.append(packagename) + s.append("-m") + s.append(modname) + + rc = self.run(" ".join(s)) + if rc != 0: + raise RuntimeError("packaging failed [%s]" % self.last_output) + + diff --git a/lib/python2.7/site-packages/sepolgen/objectmodel.py b/lib/python2.7/site-packages/sepolgen/objectmodel.py new file mode 100644 index 0000000..d05d721 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/objectmodel.py @@ -0,0 +1,172 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +This module provides knowledge object classes and permissions. It should +be used to keep this knowledge from leaking into the more generic parts of +the policy generation. +""" + +# Objects that can be implicitly typed - these objects do +# not _have_ to be implicitly typed (e.g., sockets can be +# explicitly labeled), but they often are. +# +# File is in this list for /proc/self +# +# This list is useful when dealing with rules that have a +# type (or param) used as both a subject and object. For +# example: +# +# allow httpd_t httpd_t : socket read; +# +# This rule makes sense because the socket was (presumably) created +# by a process with the type httpd_t. +implicitly_typed_objects = ["socket", "fd", "process", "file", "lnk_file", "fifo_file", + "dbus", "capability", "unix_stream_socket"] + +#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +# +#Information Flow +# +# All of the permissions in SELinux can be described in terms of +# information flow. For example, a read of a file is a flow of +# information from that file to the process reading. Viewing +# permissions in these terms can be used to model a varity of +# security properties. +# +# Here we have some infrastructure for understanding permissions +# in terms of information flow +# +#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +# Information flow deals with information either flowing from a subject +# to and object ("write") or to a subject from an object ("read"). Read +# or write is described from the subject point-of-view. It is also possible +# for a permission to represent both a read and write (though the flow is +# typical asymettric in terms of bandwidth). It is also possible for +# permission to not flow information (meaning that the result is pure +# side-effect). +# +# The following constants are for representing the directionality +# of information flow. +FLOW_NONE = 0 +FLOW_READ = 1 +FLOW_WRITE = 2 +FLOW_BOTH = FLOW_READ | FLOW_WRITE + +# These are used by the parser and for nice disply of the directions +str_to_dir = { "n" : FLOW_NONE, "r" : FLOW_READ, "w" : FLOW_WRITE, "b" : FLOW_BOTH } +dir_to_str = { FLOW_NONE : "n", FLOW_READ : "r", FLOW_WRITE : "w", FLOW_BOTH : "b" } + +class PermMap: + """A mapping between a permission and its information flow properties. + + PermMap represents the information flow properties of a single permission + including the direction (read, write, etc.) and an abstract representation + of the bandwidth of the flow (weight). + """ + def __init__(self, perm, dir, weight): + self.perm = perm + self.dir = dir + self.weight = weight + + def __repr__(self): + return "" % (self.perm, + dir_to_str[self.dir], + self.weight) + +class PermMappings: + """The information flow properties of a set of object classes and permissions. + + PermMappings maps one or more classes and permissions to their PermMap objects + describing their information flow charecteristics. + """ + def __init__(self): + self.classes = { } + self.default_weight = 5 + self.default_dir = FLOW_BOTH + + def from_file(self, fd): + """Read the permission mappings from a file. This reads the format used + by Apol in the setools suite. + """ + # This parsing is deliberitely picky and bails at the least error. It + # is assumed that the permission map file will be shipped as part + # of sepolgen and not user modified, so this is a reasonable design + # choice. If user supplied permission mappings are needed the parser + # should be made a little more robust and give better error messages. + cur = None + for line in fd: + fields = line.split() + if len(fields) == 0 or len(fields) == 1 or fields[0] == "#": + continue + if fields[0] == "class": + c = fields[1] + if c in self.classes: + raise ValueError("duplicate class in perm map") + self.classes[c] = { } + cur = self.classes[c] + else: + if len(fields) != 3: + raise ValueError("error in object classs permissions") + if cur is None: + raise ValueError("permission outside of class") + pm = PermMap(fields[0], str_to_dir[fields[1]], int(fields[2])) + cur[pm.perm] = pm + + def get(self, obj, perm): + """Get the permission map for the object permission. + + Returns: + PermMap representing the permission + Raises: + KeyError if the object or permission is not defined + """ + return self.classes[obj][perm] + + def getdefault(self, obj, perm): + """Get the permission map for the object permission or a default. + + getdefault is the same as get except that a default PermMap is + returned if the object class or permission is not defined. The + default is FLOW_BOTH with a weight of 5. + """ + try: + pm = self.classes[obj][perm] + except KeyError: + return PermMap(perm, self.default_dir, self.default_weight) + return pm + + def getdefault_direction(self, obj, perms): + dir = FLOW_NONE + for perm in perms: + pm = self.getdefault(obj, perm) + dir = dir | pm.dir + return dir + + def getdefault_distance(self, obj, perms): + total = 0 + for perm in perms: + pm = self.getdefault(obj, perm) + total += pm.weight + + return total + + + diff --git a/lib/python2.7/site-packages/sepolgen/output.py b/lib/python2.7/site-packages/sepolgen/output.py new file mode 100644 index 0000000..7a83aee --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/output.py @@ -0,0 +1,177 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +Classes and functions for the output of reference policy modules. + +This module takes a refpolicy.Module object and formats it for +output using the ModuleWriter object. By separating the output +in this way the other parts of Madison can focus solely on +generating policy. This keeps the semantic / syntactic issues +cleanly separated from the formatting issues. +""" + +from . import refpolicy +from . import util + +if util.PY3: + from .util import cmp + + +class ModuleWriter: + def __init__(self): + self.fd = None + self.module = None + self.sort = True + self.requires = True + + def write(self, module, fd): + self.module = module + + if self.sort: + sort_filter(self.module) + + # FIXME - make this handle nesting + for node, depth in refpolicy.walktree(self.module, showdepth=True): + fd.write("%s\n" % str(node)) + +# Helper functions for sort_filter - this is all done old school +# C style rather than with polymorphic methods because this sorting +# is specific to output. It is not necessarily the comparison you +# want generally. + +# Compare two IdSets - we could probably do something clever +# with different here, but this works. +def id_set_cmp(x, y): + xl = util.set_to_list(x) + xl.sort() + yl = util.set_to_list(y) + yl.sort() + + if len(xl) != len(yl): + return cmp(xl[0], yl[0]) + for v in zip(xl, yl): + if v[0] != v[1]: + return cmp(v[0], v[1]) + return 0 + +# Compare two avrules +def avrule_cmp(a, b): + ret = id_set_cmp(a.src_types, b.src_types) + if ret is not 0: + return ret + ret = id_set_cmp(a.tgt_types, b.tgt_types) + if ret is not 0: + return ret + ret = id_set_cmp(a.obj_classes, b.obj_classes) + if ret is not 0: + return ret + + # At this point, who cares - just return something + return cmp(len(a.perms), len(b.perms)) + +# Compare two interface calls +def ifcall_cmp(a, b): + if a.args[0] != b.args[0]: + return cmp(a.args[0], b.args[0]) + return cmp(a.ifname, b.ifname) + +# Compare an two avrules or interface calls +def rule_cmp(a, b): + if isinstance(a, refpolicy.InterfaceCall): + if isinstance(b, refpolicy.InterfaceCall): + return ifcall_cmp(a, b) + else: + return id_set_cmp([a.args[0]], b.src_types) + else: + if isinstance(b, refpolicy.AVRule): + return avrule_cmp(a,b) + else: + return id_set_cmp(a.src_types, [b.args[0]]) + +def role_type_cmp(a, b): + return cmp(a.role, b.role) + +def sort_filter(module): + """Sort and group the output for readability. + """ + def sort_node(node): + c = [] + + # Module statement + for mod in node.module_declarations(): + c.append(mod) + c.append(refpolicy.Comment()) + + # Requires + for require in node.requires(): + c.append(require) + c.append(refpolicy.Comment()) + + # Rules + # + # We are going to group output by source type (which + # we assume is the first argument for interfaces). + rules = [] + rules.extend(node.avrules()) + rules.extend(node.interface_calls()) + rules.sort(key=util.cmp_to_key(rule_cmp)) + + cur = None + sep_rules = [] + for rule in rules: + if isinstance(rule, refpolicy.InterfaceCall): + x = rule.args[0] + else: + x = util.first(rule.src_types) + + if cur != x: + if cur: + sep_rules.append(refpolicy.Comment()) + cur = x + comment = refpolicy.Comment() + comment.lines.append("============= %s ==============" % cur) + sep_rules.append(comment) + sep_rules.append(rule) + + c.extend(sep_rules) + + + ras = [] + ras.extend(node.role_types()) + ras.sort(key=util.cmp_to_key(role_type_cmp)) + if len(ras): + comment = refpolicy.Comment() + comment.lines.append("============= ROLES ==============") + c.append(comment) + + + c.extend(ras) + + # Everything else + for child in node.children: + if child not in c: + c.append(child) + + node.children = c + + for node in module.nodes(): + sort_node(node) + + diff --git a/lib/python2.7/site-packages/sepolgen/policygen.py b/lib/python2.7/site-packages/sepolgen/policygen.py new file mode 100644 index 0000000..34c8401 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/policygen.py @@ -0,0 +1,400 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +""" +classes and algorithms for the generation of SELinux policy. +""" + +import itertools +import textwrap + +import selinux.audit2why as audit2why +try: + from setools import * +except: + pass + +from . import refpolicy +from . import objectmodel +from . import access +from . import interfaces +from . import matching +from . import util +# Constants for the level of explanation from the generation +# routines +NO_EXPLANATION = 0 +SHORT_EXPLANATION = 1 +LONG_EXPLANATION = 2 + +class PolicyGenerator: + """Generate a reference policy module from access vectors. + + PolicyGenerator generates a new reference policy module + or updates an existing module based on requested access + in the form of access vectors. + + It generates allow rules and optionally module require + statements and reference policy interfaces. By default + only allow rules are generated. The methods .set_gen_refpol + and .set_gen_requires turns on interface generation and + requires generation respectively. + + PolicyGenerator can also optionally add comments explaining + why a particular access was allowed based on the audit + messages that generated the access. The access vectors + passed in must have the .audit_msgs field set correctly + and .explain set to SHORT|LONG_EXPLANATION to enable this + feature. + + The module created by PolicyGenerator can be passed to + output.ModuleWriter to output a text representation. + """ + def __init__(self, module=None): + """Initialize a PolicyGenerator with an optional + existing module. + + If the module paramater is not None then access + will be added to the passed in module. Otherwise + a new reference policy module will be created. + """ + self.ifgen = None + self.explain = NO_EXPLANATION + self.gen_requires = False + if module: + self.moduel = module + else: + self.module = refpolicy.Module() + + self.dontaudit = False + + self.domains = None + def set_gen_refpol(self, if_set=None, perm_maps=None): + """Set whether reference policy interfaces are generated. + + To turn on interface generation pass in an interface set + to use for interface generation. To turn off interface + generation pass in None. + + If interface generation is enabled requires generation + will also be enabled. + """ + if if_set: + self.ifgen = InterfaceGenerator(if_set, perm_maps) + self.gen_requires = True + else: + self.ifgen = None + self.__set_module_style() + + + def set_gen_requires(self, status=True): + """Set whether module requires are generated. + + Passing in true will turn on requires generation and + False will disable generation. If requires generation is + disabled interface generation will also be disabled and + can only be re-enabled via .set_gen_refpol. + """ + self.gen_requires = status + + def set_gen_explain(self, explain=SHORT_EXPLANATION): + """Set whether access is explained. + """ + self.explain = explain + + def set_gen_dontaudit(self, dontaudit): + self.dontaudit = dontaudit + + def __set_module_style(self): + if self.ifgen: + refpolicy = True + else: + refpolicy = False + for mod in self.module.module_declarations(): + mod.refpolicy = refpolicy + + def set_module_name(self, name, version="1.0"): + """Set the name of the module and optionally the version. + """ + # find an existing module declaration + m = None + for mod in self.module.module_declarations(): + m = mod + if not m: + m = refpolicy.ModuleDeclaration() + self.module.children.insert(0, m) + m.name = name + m.version = version + if self.ifgen: + m.refpolicy = True + else: + m.refpolicy = False + + def get_module(self): + # Generate the requires + if self.gen_requires: + gen_requires(self.module) + + """Return the generated module""" + return self.module + + def __add_allow_rules(self, avs): + for av in avs: + rule = refpolicy.AVRule(av) + if self.dontaudit: + rule.rule_type = rule.DONTAUDIT + rule.comment = "" + if self.explain: + rule.comment = str(refpolicy.Comment(explain_access(av, verbosity=self.explain))) + if av.type == audit2why.ALLOW: + rule.comment += "\n#!!!! This avc is allowed in the current policy" + if av.type == audit2why.DONTAUDIT: + rule.comment += "\n#!!!! This avc has a dontaudit rule in the current policy" + + if av.type == audit2why.BOOLEAN: + if len(av.data) > 1: + rule.comment += "\n#!!!! This avc can be allowed using one of the these booleans:\n# %s" % ", ".join([x[0] for x in av.data]) + else: + rule.comment += "\n#!!!! This avc can be allowed using the boolean '%s'" % av.data[0][0] + + if av.type == audit2why.CONSTRAINT: + rule.comment += "\n#!!!! This avc is a constraint violation. You would need to modify the attributes of either the source or target types to allow this access." + rule.comment += "\n#Constraint rule: " + rule.comment += "\n#\t" + av.data[0] + for reason in av.data[1:]: + rule.comment += "\n#\tPossible cause is the source %s and target %s are different." % reason + + try: + if ( av.type == audit2why.TERULE and + "write" in av.perms and + ( "dir" in av.obj_class or "open" in av.perms )): + if not self.domains: + self.domains = seinfo(ATTRIBUTE, name="domain")[0]["types"] + types=[] + + for i in [x[TCONTEXT] for x in sesearch([ALLOW], {SCONTEXT: av.src_type, CLASS: av.obj_class, PERMS: av.perms})]: + if i not in self.domains: + types.append(i) + if len(types) == 1: + rule.comment += "\n#!!!! The source type '%s' can write to a '%s' of the following type:\n# %s\n" % ( av.src_type, av.obj_class, ", ".join(types)) + elif len(types) >= 1: + rule.comment += "\n#!!!! The source type '%s' can write to a '%s' of the following types:\n# %s\n" % ( av.src_type, av.obj_class, ", ".join(types)) + except: + pass + self.module.children.append(rule) + + + def add_access(self, av_set): + """Add the access from the access vector set to this + module. + """ + # Use the interface generator to split the access + # into raw allow rules and interfaces. After this + # a will contain a list of access that should be + # used as raw allow rules and the interfaces will + # be added to the module. + if self.ifgen: + raw_allow, ifcalls = self.ifgen.gen(av_set, self.explain) + self.module.children.extend(ifcalls) + else: + raw_allow = av_set + + # Generate the raw allow rules from the filtered list + self.__add_allow_rules(raw_allow) + + def add_role_types(self, role_type_set): + for role_type in role_type_set: + self.module.children.append(role_type) + +def explain_access(av, ml=None, verbosity=SHORT_EXPLANATION): + """Explain why a policy statement was generated. + + Return a string containing a text explanation of + why a policy statement was generated. The string is + commented and wrapped and can be directly inserted + into a policy. + + Params: + av - access vector representing the access. Should + have .audit_msgs set appropriately. + verbosity - the amount of explanation provided. Should + be set to NO_EXPLANATION, SHORT_EXPLANATION, or + LONG_EXPLANATION. + Returns: + list of strings - strings explaining the access or an empty + string if verbosity=NO_EXPLANATION or there is not sufficient + information to provide an explanation. + """ + s = [] + + def explain_interfaces(): + if not ml: + return + s.append(" Interface options:") + for match in ml.all(): + ifcall = call_interface(match.interface, ml.av) + s.append(' %s # [%d]' % (ifcall.to_string(), match.dist)) + + + # Format the raw audit data to explain why the + # access was requested - either long or short. + if verbosity == LONG_EXPLANATION: + for msg in av.audit_msgs: + s.append(' %s' % msg.header) + s.append(' scontext="%s" tcontext="%s"' % + (str(msg.scontext), str(msg.tcontext))) + s.append(' class="%s" perms="%s"' % + (msg.tclass, refpolicy.list_to_space_str(msg.accesses))) + s.append(' comm="%s" exe="%s" path="%s"' % (msg.comm, msg.exe, msg.path)) + s.extend(textwrap.wrap('message="' + msg.message + '"', 80, initial_indent=" ", + subsequent_indent=" ")) + explain_interfaces() + elif verbosity: + s.append(' src="%s" tgt="%s" class="%s", perms="%s"' % + (av.src_type, av.tgt_type, av.obj_class, av.perms.to_space_str())) + # For the short display we are only going to use the additional information + # from the first audit message. For the vast majority of cases this info + # will always be the same anyway. + if len(av.audit_msgs) > 0: + msg = av.audit_msgs[0] + s.append(' comm="%s" exe="%s" path="%s"' % (msg.comm, msg.exe, msg.path)) + explain_interfaces() + return s + +def call_interface(interface, av): + params = [] + args = [] + + params.extend(interface.params.values()) + params.sort(key=lambda param: param.num, reverse=True) + + ifcall = refpolicy.InterfaceCall() + ifcall.ifname = interface.name + + for i in range(len(params)): + if params[i].type == refpolicy.SRC_TYPE: + ifcall.args.append(av.src_type) + elif params[i].type == refpolicy.TGT_TYPE: + ifcall.args.append(av.tgt_type) + elif params[i].type == refpolicy.OBJ_CLASS: + ifcall.args.append(av.obj_class) + else: + print(params[i].type) + assert(0) + + assert(len(ifcall.args) > 0) + + return ifcall + +class InterfaceGenerator: + def __init__(self, ifs, perm_maps=None): + self.ifs = ifs + self.hack_check_ifs(ifs) + self.matcher = matching.AccessMatcher(perm_maps) + self.calls = [] + + def hack_check_ifs(self, ifs): + # FIXME: Disable interfaces we can't call - this is a hack. + # Because we don't handle roles, multiple paramaters, etc., + # etc., we must make certain we can actually use a returned + # interface. + for x in ifs.interfaces.values(): + params = [] + params.extend(x.params.values()) + params.sort(key=lambda param: param.num, reverse=True) + for i in range(len(params)): + # Check that the paramater position matches + # the number (e.g., $1 is the first arg). This + # will fail if the parser missed something. + if (i + 1) != params[i].num: + x.enabled = False + break + # Check that we can handle the param type (currently excludes + # roles. + if params[i].type not in [refpolicy.SRC_TYPE, refpolicy.TGT_TYPE, + refpolicy.OBJ_CLASS]: + x.enabled = False + break + + def gen(self, avs, verbosity): + raw_av = self.match(avs) + ifcalls = [] + for ml in self.calls: + ifcall = call_interface(ml.best().interface, ml.av) + if verbosity: + ifcall.comment = refpolicy.Comment(explain_access(ml.av, ml, verbosity)) + ifcalls.append((ifcall, ml)) + + d = [] + for ifcall, ifs in ifcalls: + found = False + for o_ifcall in d: + if o_ifcall.matches(ifcall): + if o_ifcall.comment and ifcall.comment: + o_ifcall.comment.merge(ifcall.comment) + found = True + if not found: + d.append(ifcall) + + return (raw_av, d) + + + def match(self, avs): + raw_av = [] + for av in avs: + ans = matching.MatchList() + self.matcher.search_ifs(self.ifs, av, ans) + if len(ans): + self.calls.append(ans) + else: + raw_av.append(av) + + return raw_av + + +def gen_requires(module): + """Add require statements to the module. + """ + def collect_requires(node): + r = refpolicy.Require() + for avrule in node.avrules(): + r.types.update(avrule.src_types) + r.types.update(avrule.tgt_types) + for obj in avrule.obj_classes: + r.add_obj_class(obj, avrule.perms) + + for ifcall in node.interface_calls(): + for arg in ifcall.args: + # FIXME - handle non-type arguments when we + # can actually figure those out. + r.types.add(arg) + + for role_type in node.role_types(): + r.roles.add(role_type.role) + r.types.update(role_type.types) + + r.types.discard("self") + + node.children.insert(0, r) + + # FUTURE - this is untested on modules with any sort of + # nesting + for node in module.nodes(): + collect_requires(node) + + diff --git a/lib/python2.7/site-packages/sepolgen/refparser.py b/lib/python2.7/site-packages/sepolgen/refparser.py new file mode 100644 index 0000000..9b1d0c8 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/refparser.py @@ -0,0 +1,1129 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006-2007 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +# OVERVIEW +# +# +# This is a parser for the refpolicy policy "language" - i.e., the +# normal SELinux policy language plus the refpolicy style M4 macro +# constructs on top of that base language. This parser is primarily +# aimed at parsing the policy headers in order to create an abstract +# policy representation suitable for generating policy. +# +# Both the lexer and parser are included in this file. The are implemented +# using the Ply library (included with sepolgen). + +import sys +import os +import re +import traceback + +from . import access +from . import defaults +from . import lex +from . import refpolicy +from . import yacc + +# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +# +# lexer +# +# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +tokens = ( + # basic tokens, punctuation + 'TICK', + 'SQUOTE', + 'OBRACE', + 'CBRACE', + 'SEMI', + 'COLON', + 'OPAREN', + 'CPAREN', + 'COMMA', + 'MINUS', + 'TILDE', + 'ASTERISK', + 'AMP', + 'BAR', + 'EXPL', + 'EQUAL', + 'FILENAME', + 'IDENTIFIER', + 'NUMBER', + 'PATH', + 'IPV6_ADDR', + # reserved words + # module + 'MODULE', + 'POLICY_MODULE', + 'REQUIRE', + # flask + 'SID', + 'GENFSCON', + 'FS_USE_XATTR', + 'FS_USE_TRANS', + 'FS_USE_TASK', + 'PORTCON', + 'NODECON', + 'NETIFCON', + 'PIRQCON', + 'IOMEMCON', + 'IOPORTCON', + 'PCIDEVICECON', + 'DEVICETREECON', + # object classes + 'CLASS', + # types and attributes + 'TYPEATTRIBUTE', + 'ROLEATTRIBUTE', + 'TYPE', + 'ATTRIBUTE', + 'ATTRIBUTE_ROLE', + 'ALIAS', + 'TYPEALIAS', + # conditional policy + 'BOOL', + 'TRUE', + 'FALSE', + 'IF', + 'ELSE', + # users and roles + 'ROLE', + 'TYPES', + # rules + 'ALLOW', + 'DONTAUDIT', + 'AUDITALLOW', + 'NEVERALLOW', + 'PERMISSIVE', + 'TYPE_TRANSITION', + 'TYPE_CHANGE', + 'TYPE_MEMBER', + 'RANGE_TRANSITION', + 'ROLE_TRANSITION', + # refpolicy keywords + 'OPT_POLICY', + 'INTERFACE', + 'TUNABLE_POLICY', + 'GEN_REQ', + 'TEMPLATE', + 'GEN_CONTEXT', + # m4 + 'IFELSE', + 'IFDEF', + 'IFNDEF', + 'DEFINE' + ) + +# All reserved keywords - see t_IDENTIFIER for how these are matched in +# the lexer. +reserved = { + # module + 'module' : 'MODULE', + 'policy_module' : 'POLICY_MODULE', + 'require' : 'REQUIRE', + # flask + 'sid' : 'SID', + 'genfscon' : 'GENFSCON', + 'fs_use_xattr' : 'FS_USE_XATTR', + 'fs_use_trans' : 'FS_USE_TRANS', + 'fs_use_task' : 'FS_USE_TASK', + 'portcon' : 'PORTCON', + 'nodecon' : 'NODECON', + 'netifcon' : 'NETIFCON', + 'pirqcon' : 'PIRQCON', + 'iomemcon' : 'IOMEMCON', + 'ioportcon' : 'IOPORTCON', + 'pcidevicecon' : 'PCIDEVICECON', + 'devicetreecon' : 'DEVICETREECON', + # object classes + 'class' : 'CLASS', + # types and attributes + 'typeattribute' : 'TYPEATTRIBUTE', + 'roleattribute' : 'ROLEATTRIBUTE', + 'type' : 'TYPE', + 'attribute' : 'ATTRIBUTE', + 'attribute_role' : 'ATTRIBUTE_ROLE', + 'alias' : 'ALIAS', + 'typealias' : 'TYPEALIAS', + # conditional policy + 'bool' : 'BOOL', + 'true' : 'TRUE', + 'false' : 'FALSE', + 'if' : 'IF', + 'else' : 'ELSE', + # users and roles + 'role' : 'ROLE', + 'types' : 'TYPES', + # rules + 'allow' : 'ALLOW', + 'dontaudit' : 'DONTAUDIT', + 'auditallow' : 'AUDITALLOW', + 'neverallow' : 'NEVERALLOW', + 'permissive' : 'PERMISSIVE', + 'type_transition' : 'TYPE_TRANSITION', + 'type_change' : 'TYPE_CHANGE', + 'type_member' : 'TYPE_MEMBER', + 'range_transition' : 'RANGE_TRANSITION', + 'role_transition' : 'ROLE_TRANSITION', + # refpolicy keywords + 'optional_policy' : 'OPT_POLICY', + 'interface' : 'INTERFACE', + 'tunable_policy' : 'TUNABLE_POLICY', + 'gen_require' : 'GEN_REQ', + 'template' : 'TEMPLATE', + 'gen_context' : 'GEN_CONTEXT', + # M4 + 'ifelse' : 'IFELSE', + 'ifndef' : 'IFNDEF', + 'ifdef' : 'IFDEF', + 'define' : 'DEFINE' + } + +# The ply lexer allows definition of tokens in 2 ways: regular expressions +# or functions. + +# Simple regex tokens +t_TICK = r'\`' +t_SQUOTE = r'\'' +t_OBRACE = r'\{' +t_CBRACE = r'\}' +# This will handle spurios extra ';' via the + +t_SEMI = r'\;+' +t_COLON = r'\:' +t_OPAREN = r'\(' +t_CPAREN = r'\)' +t_COMMA = r'\,' +t_MINUS = r'\-' +t_TILDE = r'\~' +t_ASTERISK = r'\*' +t_AMP = r'\&' +t_BAR = r'\|' +t_EXPL = r'\!' +t_EQUAL = r'\=' +t_NUMBER = r'[0-9\.]+' +t_PATH = r'/[a-zA-Z0-9)_\.\*/\$]*' +#t_IPV6_ADDR = r'[a-fA-F0-9]{0,4}:[a-fA-F0-9]{0,4}:([a-fA-F0-9]{0,4}:)*' + +# Ignore whitespace - this is a special token for ply that more efficiently +# ignores uninteresting tokens. +t_ignore = " \t" + +# More complex tokens +def t_IPV6_ADDR(t): + r'[a-fA-F0-9]{0,4}:[a-fA-F0-9]{0,4}:([a-fA-F0-9]|:)*' + # This is a function simply to force it sooner into + # the regex list + return t + +def t_m4comment(t): + r'dnl.*\n' + # Ignore all comments + t.lexer.lineno += 1 + +def t_refpolicywarn1(t): + r'define.*refpolicywarn\(.*\n' + # Ignore refpolicywarn statements - they sometimes + # contain text that we can't parse. + t.skip(1) + +def t_refpolicywarn(t): + r'refpolicywarn\(.*\n' + # Ignore refpolicywarn statements - they sometimes + # contain text that we can't parse. + t.lexer.lineno += 1 + +def t_IDENTIFIER(t): + r'[a-zA-Z_\$][a-zA-Z0-9_\-\+\.\$\*~]*' + # Handle any keywords + t.type = reserved.get(t.value,'IDENTIFIER') + return t + +def t_FILENAME(t): + r'\"[a-zA-Z0-9_\-\+\.\$\*~ :]+\"' + # Handle any keywords + t.type = reserved.get(t.value,'FILENAME') + return t + +def t_comment(t): + r'\#.*\n' + # Ignore all comments + t.lexer.lineno += 1 + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.skip(1) + +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + +# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +# +# Parser +# +# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +# Global data used during parsing - making it global is easier than +# passing the state through the parsing functions. + +# m is the top-level data structure (stands for modules). +m = None +# error is either None (indicating no error) or a string error message. +error = None +parse_file = "" +# spt is the support macros (e.g., obj/perm sets) - it is an instance of +# refpolicy.SupportMacros and should always be present during parsing +# though it may not contain any macros. +spt = None +success = True + +# utilities +def collect(stmts, parent, val=None): + if stmts is None: + return + for s in stmts: + if s is None: + continue + s.parent = parent + if val is not None: + parent.children.insert(0, (val, s)) + else: + parent.children.insert(0, s) + +def expand(ids, s): + for id in ids: + if spt.has_key(id): + s.update(spt.by_name(id)) + else: + s.add(id) + +# Top-level non-terminal +def p_statements(p): + '''statements : statement + | statements statement + | empty + ''' + if len(p) == 2 and p[1]: + m.children.append(p[1]) + elif len(p) > 2 and p[2]: + m.children.append(p[2]) + +def p_statement(p): + '''statement : interface + | template + | obj_perm_set + | policy + | policy_module_stmt + | module_stmt + ''' + p[0] = p[1] + +def p_empty(p): + 'empty :' + pass + +# +# Reference policy language constructs +# + +# This is for the policy module statement (e.g., policy_module(foo,1.2.0)). +# We have a separate terminal for either the basic language module statement +# and interface calls to make it easier to identifier. +def p_policy_module_stmt(p): + 'policy_module_stmt : POLICY_MODULE OPAREN IDENTIFIER COMMA NUMBER CPAREN' + m = refpolicy.ModuleDeclaration() + m.name = p[3] + m.version = p[5] + m.refpolicy = True + p[0] = m + +def p_interface(p): + '''interface : INTERFACE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN + ''' + x = refpolicy.Interface(p[4]) + collect(p[8], x) + p[0] = x + +def p_template(p): + '''template : TEMPLATE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN + | DEFINE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN + ''' + x = refpolicy.Template(p[4]) + collect(p[8], x) + p[0] = x + +def p_define(p): + '''define : DEFINE OPAREN TICK IDENTIFIER SQUOTE CPAREN''' + # This is for defining single M4 values (to be used later in ifdef statements). + # Example: define(`sulogin_no_pam'). We don't currently do anything with these + # but we should in the future when we correctly resolve ifdef statements. + p[0] = None + +def p_interface_stmts(p): + '''interface_stmts : policy + | interface_stmts policy + | empty + ''' + if len(p) == 2 and p[1]: + p[0] = p[1] + elif len(p) > 2: + if not p[1]: + if p[2]: + p[0] = p[2] + elif not p[2]: + p[0] = p[1] + else: + p[0] = p[1] + p[2] + +def p_optional_policy(p): + '''optional_policy : OPT_POLICY OPAREN TICK interface_stmts SQUOTE CPAREN + | OPT_POLICY OPAREN TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN + ''' + o = refpolicy.OptionalPolicy() + collect(p[4], o, val=True) + if len(p) > 7: + collect(p[8], o, val=False) + p[0] = [o] + +def p_tunable_policy(p): + '''tunable_policy : TUNABLE_POLICY OPAREN TICK cond_expr SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN + | TUNABLE_POLICY OPAREN TICK cond_expr SQUOTE COMMA TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN + ''' + x = refpolicy.TunablePolicy() + x.cond_expr = p[4] + collect(p[8], x, val=True) + if len(p) > 11: + collect(p[12], x, val=False) + p[0] = [x] + +def p_ifelse(p): + '''ifelse : IFELSE OPAREN TICK IDENTIFIER SQUOTE COMMA COMMA TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi + | IFELSE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi + | IFELSE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK SQUOTE COMMA TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi + ''' +# x = refpolicy.IfDef(p[4]) +# v = True +# collect(p[8], x, val=v) +# if len(p) > 12: +# collect(p[12], x, val=False) +# p[0] = [x] + pass + + +def p_ifdef(p): + '''ifdef : IFDEF OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi + | IFNDEF OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi + | IFDEF OPAREN TICK IDENTIFIER SQUOTE COMMA TICK interface_stmts SQUOTE COMMA TICK interface_stmts SQUOTE CPAREN optional_semi + ''' + x = refpolicy.IfDef(p[4]) + if p[1] == 'ifdef': + v = True + else: + v = False + collect(p[8], x, val=v) + if len(p) > 12: + collect(p[12], x, val=False) + p[0] = [x] + +def p_interface_call(p): + '''interface_call : IDENTIFIER OPAREN interface_call_param_list CPAREN + | IDENTIFIER OPAREN CPAREN + | IDENTIFIER OPAREN interface_call_param_list CPAREN SEMI''' + # Allow spurious semi-colons at the end of interface calls + i = refpolicy.InterfaceCall(ifname=p[1]) + if len(p) > 4: + i.args.extend(p[3]) + p[0] = i + +def p_interface_call_param(p): + '''interface_call_param : IDENTIFIER + | IDENTIFIER MINUS IDENTIFIER + | nested_id_set + | TRUE + | FALSE + | FILENAME + ''' + # Intentionally let single identifiers pass through + # List means set, non-list identifier + if len(p) == 2: + p[0] = p[1] + else: + p[0] = [p[1], "-" + p[3]] + +def p_interface_call_param_list(p): + '''interface_call_param_list : interface_call_param + | interface_call_param_list COMMA interface_call_param + ''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + [p[3]] + + +def p_obj_perm_set(p): + 'obj_perm_set : DEFINE OPAREN TICK IDENTIFIER SQUOTE COMMA TICK names SQUOTE CPAREN' + s = refpolicy.ObjPermSet(p[4]) + s.perms = p[8] + p[0] = s + +# +# Basic SELinux policy language +# + +def p_policy(p): + '''policy : policy_stmt + | optional_policy + | tunable_policy + | ifdef + | ifelse + | conditional + ''' + p[0] = p[1] + +def p_policy_stmt(p): + '''policy_stmt : gen_require + | avrule_def + | typerule_def + | typeattribute_def + | roleattribute_def + | interface_call + | role_def + | role_allow + | permissive + | type_def + | typealias_def + | attribute_def + | attribute_role_def + | range_transition_def + | role_transition_def + | bool + | define + | initial_sid + | genfscon + | fs_use + | portcon + | nodecon + | netifcon + | pirqcon + | iomemcon + | ioportcon + | pcidevicecon + | devicetreecon + ''' + if p[1]: + p[0] = [p[1]] + +def p_module_stmt(p): + 'module_stmt : MODULE IDENTIFIER NUMBER SEMI' + m = refpolicy.ModuleDeclaration() + m.name = p[2] + m.version = p[3] + m.refpolicy = False + p[0] = m + +def p_gen_require(p): + '''gen_require : GEN_REQ OPAREN TICK requires SQUOTE CPAREN + | REQUIRE OBRACE requires CBRACE''' + # We ignore the require statements - they are redundant data from our point-of-view. + # Checkmodule will verify them later anyway so we just assume that they match what + # is in the rest of the interface. + pass + +def p_requires(p): + '''requires : require + | requires require + | ifdef + | requires ifdef + ''' + pass + +def p_require(p): + '''require : TYPE comma_list SEMI + | ROLE comma_list SEMI + | ATTRIBUTE comma_list SEMI + | ATTRIBUTE_ROLE comma_list SEMI + | CLASS comma_list SEMI + | BOOL comma_list SEMI + ''' + pass + +def p_security_context(p): + '''security_context : IDENTIFIER COLON IDENTIFIER COLON IDENTIFIER + | IDENTIFIER COLON IDENTIFIER COLON IDENTIFIER COLON mls_range_def''' + # This will likely need some updates to handle complex levels + s = refpolicy.SecurityContext() + s.user = p[1] + s.role = p[3] + s.type = p[5] + if len(p) > 6: + s.level = p[7] + + p[0] = s + +def p_gen_context(p): + '''gen_context : GEN_CONTEXT OPAREN security_context COMMA mls_range_def CPAREN + ''' + # We actually store gen_context statements in a SecurityContext + # object - it knows how to output either a bare context or a + # gen_context statement. + s = p[3] + s.level = p[5] + + p[0] = s + +def p_context(p): + '''context : security_context + | gen_context + ''' + p[0] = p[1] + +def p_initial_sid(p): + '''initial_sid : SID IDENTIFIER context''' + s = refpolicy.InitialSid() + s.name = p[2] + s.context = p[3] + p[0] = s + +def p_genfscon(p): + '''genfscon : GENFSCON IDENTIFIER PATH context''' + + g = refpolicy.GenfsCon() + g.filesystem = p[2] + g.path = p[3] + g.context = p[4] + + p[0] = g + +def p_fs_use(p): + '''fs_use : FS_USE_XATTR IDENTIFIER context SEMI + | FS_USE_TASK IDENTIFIER context SEMI + | FS_USE_TRANS IDENTIFIER context SEMI + ''' + f = refpolicy.FilesystemUse() + if p[1] == "fs_use_xattr": + f.type = refpolicy.FilesystemUse.XATTR + elif p[1] == "fs_use_task": + f.type = refpolicy.FilesystemUse.TASK + elif p[1] == "fs_use_trans": + f.type = refpolicy.FilesystemUse.TRANS + + f.filesystem = p[2] + f.context = p[3] + + p[0] = f + +def p_portcon(p): + '''portcon : PORTCON IDENTIFIER NUMBER context + | PORTCON IDENTIFIER NUMBER MINUS NUMBER context''' + c = refpolicy.PortCon() + c.port_type = p[2] + if len(p) == 5: + c.port_number = p[3] + c.context = p[4] + else: + c.port_number = p[3] + "-" + p[4] + c.context = p[5] + + p[0] = c + +def p_nodecon(p): + '''nodecon : NODECON NUMBER NUMBER context + | NODECON IPV6_ADDR IPV6_ADDR context + ''' + n = refpolicy.NodeCon() + n.start = p[2] + n.end = p[3] + n.context = p[4] + + p[0] = n + +def p_netifcon(p): + 'netifcon : NETIFCON IDENTIFIER context context' + n = refpolicy.NetifCon() + n.interface = p[2] + n.interface_context = p[3] + n.packet_context = p[4] + + p[0] = n + +def p_pirqcon(p): + 'pirqcon : PIRQCON NUMBER context' + c = refpolicy.PirqCon() + c.pirq_number = p[2] + c.context = p[3] + + p[0] = c + +def p_iomemcon(p): + '''iomemcon : IOMEMCON NUMBER context + | IOMEMCON NUMBER MINUS NUMBER context''' + c = refpolicy.IomemCon() + if len(p) == 4: + c.device_mem = p[2] + c.context = p[3] + else: + c.device_mem = p[2] + "-" + p[3] + c.context = p[4] + + p[0] = c + +def p_ioportcon(p): + '''ioportcon : IOPORTCON NUMBER context + | IOPORTCON NUMBER MINUS NUMBER context''' + c = refpolicy.IoportCon() + if len(p) == 4: + c.ioport = p[2] + c.context = p[3] + else: + c.ioport = p[2] + "-" + p[3] + c.context = p[4] + + p[0] = c + +def p_pcidevicecon(p): + 'pcidevicecon : PCIDEVICECON NUMBER context' + c = refpolicy.PciDeviceCon() + c.device = p[2] + c.context = p[3] + + p[0] = c + +def p_devicetreecon(p): + 'devicetreecon : DEVICETREECON NUMBER context' + c = refpolicy.DevicetTeeCon() + c.path = p[2] + c.context = p[3] + + p[0] = c + +def p_mls_range_def(p): + '''mls_range_def : mls_level_def MINUS mls_level_def + | mls_level_def + ''' + p[0] = p[1] + if len(p) > 2: + p[0] = p[0] + "-" + p[3] + +def p_mls_level_def(p): + '''mls_level_def : IDENTIFIER COLON comma_list + | IDENTIFIER + ''' + p[0] = p[1] + if len(p) > 2: + p[0] = p[0] + ":" + ",".join(p[3]) + +def p_type_def(p): + '''type_def : TYPE IDENTIFIER COMMA comma_list SEMI + | TYPE IDENTIFIER SEMI + | TYPE IDENTIFIER ALIAS names SEMI + | TYPE IDENTIFIER ALIAS names COMMA comma_list SEMI + ''' + t = refpolicy.Type(p[2]) + if len(p) == 6: + if p[3] == ',': + t.attributes.update(p[4]) + else: + t.aliases = p[4] + elif len(p) > 4: + t.aliases = p[4] + if len(p) == 8: + t.attributes.update(p[6]) + p[0] = t + +def p_attribute_def(p): + 'attribute_def : ATTRIBUTE IDENTIFIER SEMI' + a = refpolicy.Attribute(p[2]) + p[0] = a + +def p_attribute_role_def(p): + 'attribute_role_def : ATTRIBUTE_ROLE IDENTIFIER SEMI' + a = refpolicy.Attribute_Role(p[2]) + p[0] = a + +def p_typealias_def(p): + 'typealias_def : TYPEALIAS IDENTIFIER ALIAS names SEMI' + t = refpolicy.TypeAlias() + t.type = p[2] + t.aliases = p[4] + p[0] = t + +def p_role_def(p): + '''role_def : ROLE IDENTIFIER TYPES comma_list SEMI + | ROLE IDENTIFIER SEMI''' + r = refpolicy.Role() + r.role = p[2] + if len(p) > 4: + r.types.update(p[4]) + p[0] = r + +def p_role_allow(p): + 'role_allow : ALLOW names names SEMI' + r = refpolicy.RoleAllow() + r.src_roles = p[2] + r.tgt_roles = p[3] + p[0] = r + +def p_permissive(p): + 'permissive : PERMISSIVE names SEMI' + t.skip(1) + +def p_avrule_def(p): + '''avrule_def : ALLOW names names COLON names names SEMI + | DONTAUDIT names names COLON names names SEMI + | AUDITALLOW names names COLON names names SEMI + | NEVERALLOW names names COLON names names SEMI + ''' + a = refpolicy.AVRule() + if p[1] == 'dontaudit': + a.rule_type = refpolicy.AVRule.DONTAUDIT + elif p[1] == 'auditallow': + a.rule_type = refpolicy.AVRule.AUDITALLOW + elif p[1] == 'neverallow': + a.rule_type = refpolicy.AVRule.NEVERALLOW + a.src_types = p[2] + a.tgt_types = p[3] + a.obj_classes = p[5] + a.perms = p[6] + p[0] = a + +def p_typerule_def(p): + '''typerule_def : TYPE_TRANSITION names names COLON names IDENTIFIER SEMI + | TYPE_TRANSITION names names COLON names IDENTIFIER FILENAME SEMI + | TYPE_TRANSITION names names COLON names IDENTIFIER IDENTIFIER SEMI + | TYPE_CHANGE names names COLON names IDENTIFIER SEMI + | TYPE_MEMBER names names COLON names IDENTIFIER SEMI + ''' + t = refpolicy.TypeRule() + if p[1] == 'type_change': + t.rule_type = refpolicy.TypeRule.TYPE_CHANGE + elif p[1] == 'type_member': + t.rule_type = refpolicy.TypeRule.TYPE_MEMBER + t.src_types = p[2] + t.tgt_types = p[3] + t.obj_classes = p[5] + t.dest_type = p[6] + t.file_name = p[7] + p[0] = t + +def p_bool(p): + '''bool : BOOL IDENTIFIER TRUE SEMI + | BOOL IDENTIFIER FALSE SEMI''' + b = refpolicy.Bool() + b.name = p[2] + if p[3] == "true": + b.state = True + else: + b.state = False + p[0] = b + +def p_conditional(p): + ''' conditional : IF OPAREN cond_expr CPAREN OBRACE interface_stmts CBRACE + | IF OPAREN cond_expr CPAREN OBRACE interface_stmts CBRACE ELSE OBRACE interface_stmts CBRACE + ''' + c = refpolicy.Conditional() + c.cond_expr = p[3] + collect(p[6], c, val=True) + if len(p) > 8: + collect(p[10], c, val=False) + p[0] = [c] + +def p_typeattribute_def(p): + '''typeattribute_def : TYPEATTRIBUTE IDENTIFIER comma_list SEMI''' + t = refpolicy.TypeAttribute() + t.type = p[2] + t.attributes.update(p[3]) + p[0] = t + +def p_roleattribute_def(p): + '''roleattribute_def : ROLEATTRIBUTE IDENTIFIER comma_list SEMI''' + t = refpolicy.RoleAttribute() + t.role = p[2] + t.roleattributes.update(p[3]) + p[0] = t + +def p_range_transition_def(p): + '''range_transition_def : RANGE_TRANSITION names names COLON names mls_range_def SEMI + | RANGE_TRANSITION names names names SEMI''' + pass + +def p_role_transition_def(p): + '''role_transition_def : ROLE_TRANSITION names names names SEMI''' + pass + +def p_cond_expr(p): + '''cond_expr : IDENTIFIER + | EXPL cond_expr + | cond_expr AMP AMP cond_expr + | cond_expr BAR BAR cond_expr + | cond_expr EQUAL EQUAL cond_expr + | cond_expr EXPL EQUAL cond_expr + ''' + l = len(p) + if l == 2: + p[0] = [p[1]] + elif l == 3: + p[0] = [p[1]] + p[2] + else: + p[0] = p[1] + [p[2] + p[3]] + p[4] + + +# +# Basic terminals +# + +# Identifiers and lists of identifiers. These must +# be handled somewhat gracefully. Names returns an IdSet and care must +# be taken that this is _assigned_ to an object to correctly update +# all of the flags (as opposed to using update). The other terminals +# return list - this is to preserve ordering if it is important for +# parsing (for example, interface_call must retain the ordering). Other +# times the list should be used to update an IdSet. + +def p_names(p): + '''names : identifier + | nested_id_set + | asterisk + | TILDE identifier + | TILDE nested_id_set + | IDENTIFIER MINUS IDENTIFIER + ''' + s = refpolicy.IdSet() + if len(p) < 3: + expand(p[1], s) + elif len(p) == 3: + expand(p[2], s) + s.compliment = True + else: + expand([p[1]]) + s.add("-" + p[3]) + p[0] = s + +def p_identifier(p): + 'identifier : IDENTIFIER' + p[0] = [p[1]] + +def p_asterisk(p): + 'asterisk : ASTERISK' + p[0] = [p[1]] + +def p_nested_id_set(p): + '''nested_id_set : OBRACE nested_id_list CBRACE + ''' + p[0] = p[2] + +def p_nested_id_list(p): + '''nested_id_list : nested_id_element + | nested_id_list nested_id_element + ''' + if len(p) == 2: + p[0] = p[1] + else: + p[0] = p[1] + p[2] + +def p_nested_id_element(p): + '''nested_id_element : identifier + | MINUS IDENTIFIER + | nested_id_set + ''' + if len(p) == 2: + p[0] = p[1] + else: + # For now just leave the '-' + str = "-" + p[2] + p[0] = [str] + +def p_comma_list(p): + '''comma_list : nested_id_list + | comma_list COMMA nested_id_list + ''' + if len(p) > 2: + p[1] = p[1] + p[3] + p[0] = p[1] + +def p_optional_semi(p): + '''optional_semi : SEMI + | empty''' + pass + + +# +# Interface to the parser +# + +def p_error(tok): + global error, parse_file, success, parser + error = "%s: Syntax error on line %d %s [type=%s]" % (parse_file, tok.lineno, tok.value, tok.type) + print(error) + success = False + +def prep_spt(spt): + if not spt: + return { } + map = {} + for x in spt: + map[x.name] = x + +parser = None +lexer = None +def create_globals(module, support, debug): + global parser, lexer, m, spt + + if not parser: + lexer = lex.lex() + parser = yacc.yacc(method="LALR", debug=debug, write_tables=0) + + if module is not None: + m = module + else: + m = refpolicy.Module() + + if not support: + spt = refpolicy.SupportMacros() + else: + spt = support + +def parse(text, module=None, support=None, debug=False): + create_globals(module, support, debug) + global error, parser, lexer, success + + lexer.lineno = 1 + success = True + + try: + parser.parse(text, debug=debug, lexer=lexer) + except Exception as e: + parser = None + lexer = None + error = "internal parser error: %s" % str(e) + "\n" + traceback.format_exc() + + if not success: + # force the parser and lexer to be rebuilt - we have some problems otherwise + parser = None + msg = 'could not parse text: "%s"' % error + raise ValueError(msg) + return m + +def list_headers(root): + modules = [] + support_macros = None + + for dirpath, dirnames, filenames in os.walk(root): + for name in filenames: + modname = os.path.splitext(name) + filename = os.path.join(dirpath, name) + + if modname[1] == '.spt': + if name == "obj_perm_sets.spt": + support_macros = filename + elif len(re.findall("patterns", modname[0])): + modules.append((modname[0], filename)) + elif modname[1] == '.if': + modules.append((modname[0], filename)) + + return (modules, support_macros) + + +def parse_headers(root, output=None, expand=True, debug=False): + from . import util + + headers = refpolicy.Headers() + + modules = [] + support_macros = None + + if os.path.isfile(root): + name = os.path.split(root)[1] + if name == '': + raise ValueError("Invalid file name %s" % root) + modname = os.path.splitext(name) + modules.append((modname[0], root)) + all_modules, support_macros = list_headers(defaults.headers()) + else: + modules, support_macros = list_headers(root) + + if expand and not support_macros: + raise ValueError("could not find support macros (obj_perm_sets.spt)") + + def o(msg): + if output: + output.write(msg) + + def parse_file(f, module, spt=None): + global parse_file + if debug: + o("parsing file %s\n" % f) + try: + fd = open(f) + txt = fd.read() + fd.close() + parse_file = f + parse(txt, module, spt, debug) + except IOError as e: + return + except ValueError as e: + raise ValueError("error parsing file %s: %s" % (f, str(e))) + + spt = None + if support_macros: + o("Parsing support macros (%s): " % support_macros) + spt = refpolicy.SupportMacros() + parse_file(support_macros, spt) + + headers.children.append(spt) + + # FIXME: Total hack - add in can_exec rather than parse the insanity + # of misc_macros. We are just going to pretend that this is an interface + # to make the expansion work correctly. + can_exec = refpolicy.Interface("can_exec") + av = access.AccessVector(["$1","$2","file","execute_no_trans","open", "read", + "getattr","lock","execute","ioctl"]) + + can_exec.children.append(refpolicy.AVRule(av)) + headers.children.append(can_exec) + + o("done.\n") + + if output and not debug: + status = util.ConsoleProgressBar(sys.stdout, steps=len(modules)) + status.start("Parsing interface files") + + failures = [] + for x in modules: + m = refpolicy.Module() + m.name = x[0] + try: + if expand: + parse_file(x[1], m, spt) + else: + parse_file(x[1], m) + except ValueError as e: + o(str(e) + "\n") + failures.append(x[1]) + continue + + headers.children.append(m) + if output and not debug: + status.step() + + if len(failures): + o("failed to parse some headers: %s" % ", ".join(failures)) + + return headers diff --git a/lib/python2.7/site-packages/sepolgen/refpolicy.py b/lib/python2.7/site-packages/sepolgen/refpolicy.py new file mode 100644 index 0000000..31b40d8 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/refpolicy.py @@ -0,0 +1,916 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +import string +import selinux + +# OVERVIEW +# +# This file contains objects and functions used to represent the reference +# policy (including the headers, M4 macros, and policy language statements). +# +# This representation is very different from the semantic representation +# used in libsepol. Instead, it is a more typical abstract representation +# used by the first stage of compilers. It is basically a parse tree. +# +# This choice is intentional as it allows us to handle the unprocessed +# M4 statements - including the $1 style arguments - and to more easily generate +# the data structures that we need for policy generation. +# + +# Constans for referring to fields +SRC_TYPE = 0 +TGT_TYPE = 1 +OBJ_CLASS = 2 +PERMS = 3 +ROLE = 4 +DEST_TYPE = 5 + +# String represenations of the above constants +field_to_str = ["source", "target", "object", "permission", "role", "destination" ] +str_to_field = { "source" : SRC_TYPE, "target" : TGT_TYPE, "object" : OBJ_CLASS, + "permission" : PERMS, "role" : ROLE, "destination" : DEST_TYPE } + +# Base Classes + +class PolicyBase: + def __init__(self, parent=None): + self.parent = None + self.comment = None + +class Node(PolicyBase): + """Base class objects produced from parsing the reference policy. + + The Node class is used as the base class for any non-leaf + object produced by parsing the reference policy. This object + should contain a reference to its parent (or None for a top-level + object) and 0 or more children. + + The general idea here is to have a very simple tree structure. Children + are not separated out by type. Instead the tree structure represents + fairly closely the real structure of the policy statements. + + The object should be iterable - by default over all children but + subclasses are free to provide additional iterators over a subset + of their childre (see Interface for example). + """ + + def __init__(self, parent=None): + PolicyBase.__init__(self, parent) + self.children = [] + + def __iter__(self): + return iter(self.children) + + # Not all of the iterators will return something on all Nodes, but + # they won't explode either. Putting them here is just easier. + + # Top level nodes + + def nodes(self): + return filter(lambda x: isinstance(x, Node), walktree(self)) + + def modules(self): + return filter(lambda x: isinstance(x, Module), walktree(self)) + + def interfaces(self): + return filter(lambda x: isinstance(x, Interface), walktree(self)) + + def templates(self): + return filter(lambda x: isinstance(x, Template), walktree(self)) + + def support_macros(self): + return filter(lambda x: isinstance(x, SupportMacros), walktree(self)) + + # Common policy statements + + def module_declarations(self): + return filter(lambda x: isinstance(x, ModuleDeclaration), walktree(self)) + + def interface_calls(self): + return filter(lambda x: isinstance(x, InterfaceCall), walktree(self)) + + def avrules(self): + return filter(lambda x: isinstance(x, AVRule), walktree(self)) + + def typerules(self): + return filter(lambda x: isinstance(x, TypeRule), walktree(self)) + + def typeattributes(self): + """Iterate over all of the TypeAttribute children of this Interface.""" + return filter(lambda x: isinstance(x, TypeAttribute), walktree(self)) + + def roleattributes(self): + """Iterate over all of the RoleAttribute children of this Interface.""" + return filter(lambda x: isinstance(x, RoleAttribute), walktree(self)) + + def requires(self): + return filter(lambda x: isinstance(x, Require), walktree(self)) + + def roles(self): + return filter(lambda x: isinstance(x, Role), walktree(self)) + + def role_allows(self): + return filter(lambda x: isinstance(x, RoleAllow), walktree(self)) + + def role_types(self): + return filter(lambda x: isinstance(x, RoleType), walktree(self)) + + def __str__(self): + if self.comment: + return str(self.comment) + "\n" + self.to_string() + else: + return self.to_string() + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self.to_string()) + + def to_string(self): + return "" + + +class Leaf(PolicyBase): + def __init__(self, parent=None): + PolicyBase.__init__(self, parent) + + def __str__(self): + if self.comment: + return str(self.comment) + "\n" + self.to_string() + else: + return self.to_string() + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self.to_string()) + + def to_string(self): + return "" + + + +# Utility functions + +def walktree(node, depthfirst=True, showdepth=False, type=None): + """Iterate over a Node and its Children. + + The walktree function iterates over a tree containing Nodes and + leaf objects. The iteration can perform a depth first or a breadth + first traversal of the tree (controlled by the depthfirst + paramater. The passed in node will be returned. + + This function will only work correctly for trees - arbitrary graphs + will likely cause infinite looping. + """ + # We control depth first / versus breadth first by + # how we pop items off of the node stack. + if depthfirst: + index = -1 + else: + index = 0 + + stack = [(node, 0)] + while len(stack) > 0: + cur, depth = stack.pop(index) + if showdepth: + yield cur, depth + else: + yield cur + + # If the node is not a Node instance it must + # be a leaf - so no need to add it to the stack + if isinstance(cur, Node): + items = [] + i = len(cur.children) - 1 + while i >= 0: + if type is None or isinstance(cur.children[i], type): + items.append((cur.children[i], depth + 1)) + i -= 1 + + stack.extend(items) + +def walknode(node, type=None): + """Iterate over the direct children of a Node. + + The walktree function iterates over the children of a Node. + Unlike walktree it does note return the passed in node or + the children of any Node objects (that is, it does not go + beyond the current level in the tree). + """ + for x in node: + if type is None or isinstance(x, type): + yield x + + +def list_to_space_str(s, cont=('{', '}')): + """Convert a set (or any sequence type) into a string representation + formatted to match SELinux space separated list conventions. + + For example the list ['read', 'write'] would be converted into: + '{ read write }' + """ + l = len(s) + str = "" + if l < 1: + raise ValueError("cannot convert 0 len set to string") + str = " ".join(s) + if l == 1: + return str + else: + return cont[0] + " " + str + " " + cont[1] + +def list_to_comma_str(s): + l = len(s) + if l < 1: + raise ValueError("cannot conver 0 len set to comma string") + + return ", ".join(s) + +# Basic SELinux types + +class IdSet(set): + def __init__(self, list=None): + if list: + set.__init__(self, list) + else: + set.__init__(self) + self.compliment = False + + def to_space_str(self): + return list_to_space_str(sorted(self)) + + def to_comma_str(self): + return list_to_comma_str(sorted(self)) + +class SecurityContext(Leaf): + """An SELinux security context with optional MCS / MLS fields.""" + def __init__(self, context=None, parent=None): + """Create a SecurityContext object, optionally from a string. + + Parameters: + [context] - string representing a security context. Same format + as a string passed to the from_string method. + """ + Leaf.__init__(self, parent) + self.user = "" + self.role = "" + self.type = "" + self.level = None + if context is not None: + self.from_string(context) + + def from_string(self, context): + """Parse a string representing a context into a SecurityContext. + + The string should be in the standard format - e.g., + 'user:role:type:level'. + + Raises ValueError if the string is not parsable as a security context. + """ + fields = context.split(":") + if len(fields) < 3: + raise ValueError("context string [%s] not in a valid format" % context) + + self.user = fields[0] + self.role = fields[1] + self.type = fields[2] + if len(fields) > 3: + # FUTURE - normalize level fields to allow more comparisons to succeed. + self.level = ':'.join(fields[3:]) + else: + self.level = None + + def __eq__(self, other): + """Compare two SecurityContext objects - all fields must be exactly the + the same for the comparison to work. It is possible for the level fields + to be semantically the same yet syntactically different - in this case + this function will return false. + """ + return self.user == other.user and \ + self.role == other.role and \ + self.type == other.type and \ + self.level == other.level + + def to_string(self, default_level=None): + """Return a string representing this security context. + + By default, the string will contiain a MCS / MLS level + potentially from the default which is passed in if none was + set. + + Arguments: + default_level - the default level to use if self.level is an + empty string. + + Returns: + A string represening the security context in the form + 'user:role:type:level'. + """ + fields = [self.user, self.role, self.type] + if self.level is None: + if default_level is None: + if selinux.is_selinux_mls_enabled() == 1: + fields.append("s0") + else: + fields.append(default_level) + else: + fields.append(self.level) + return ":".join(fields) + +class ObjectClass(Leaf): + """SELinux object class and permissions. + + This class is a basic representation of an SELinux object + class - it does not represent separate common permissions - + just the union of the common and class specific permissions. + It is meant to be convenient for policy generation. + """ + def __init__(self, name="", parent=None): + Leaf.__init__(self, parent) + self.name = name + self.perms = IdSet() + +# Basic statements + +class TypeAttribute(Leaf): + """SElinux typeattribute statement. + + This class represents a typeattribute statement. + """ + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.type = "" + self.attributes = IdSet() + + def to_string(self): + return "typeattribute %s %s;" % (self.type, self.attributes.to_comma_str()) + +class RoleAttribute(Leaf): + """SElinux roleattribute statement. + + This class represents a roleattribute statement. + """ + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.role = "" + self.roleattributes = IdSet() + + def to_string(self): + return "roleattribute %s %s;" % (self.role, self.roleattributes.to_comma_str()) + + +class Role(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.role = "" + self.types = IdSet() + + def to_string(self): + s = "" + for t in self.types: + s += "role %s types %s;\n" % (self.role, t) + return s + +class Type(Leaf): + def __init__(self, name="", parent=None): + Leaf.__init__(self, parent) + self.name = name + self.attributes = IdSet() + self.aliases = IdSet() + + def to_string(self): + s = "type %s" % self.name + if len(self.aliases) > 0: + s = s + "alias %s" % self.aliases.to_space_str() + if len(self.attributes) > 0: + s = s + ", %s" % self.attributes.to_comma_str() + return s + ";" + +class TypeAlias(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.type = "" + self.aliases = IdSet() + + def to_string(self): + return "typealias %s alias %s;" % (self.type, self.aliases.to_space_str()) + +class Attribute(Leaf): + def __init__(self, name="", parent=None): + Leaf.__init__(self, parent) + self.name = name + + def to_string(self): + return "attribute %s;" % self.name + +class Attribute_Role(Leaf): + def __init__(self, name="", parent=None): + Leaf.__init__(self, parent) + self.name = name + + def to_string(self): + return "attribute_role %s;" % self.name + + +# Classes representing rules + +class AVRule(Leaf): + """SELinux access vector (AV) rule. + + The AVRule class represents all varieties of AV rules including + allow, dontaudit, and auditallow (indicated by the flags self.ALLOW, + self.DONTAUDIT, and self.AUDITALLOW respectively). + + The source and target types, object classes, and perms are all represented + by sets containing strings. Sets are used to make it simple to add + strings repeatedly while avoiding duplicates. + + No checking is done to make certain that the symbols are valid or + consistent (e.g., perms that don't match the object classes). It is + even possible to put invalid types like '$1' into the rules to allow + storage of the reference policy interfaces. + """ + ALLOW = 0 + DONTAUDIT = 1 + AUDITALLOW = 2 + NEVERALLOW = 3 + + def __init__(self, av=None, parent=None): + Leaf.__init__(self, parent) + self.src_types = IdSet() + self.tgt_types = IdSet() + self.obj_classes = IdSet() + self.perms = IdSet() + self.rule_type = self.ALLOW + if av: + self.from_av(av) + + def __rule_type_str(self): + if self.rule_type == self.ALLOW: + return "allow" + elif self.rule_type == self.DONTAUDIT: + return "dontaudit" + else: + return "auditallow" + + def from_av(self, av): + """Add the access from an access vector to this allow + rule. + """ + self.src_types.add(av.src_type) + if av.src_type == av.tgt_type: + self.tgt_types.add("self") + else: + self.tgt_types.add(av.tgt_type) + self.obj_classes.add(av.obj_class) + self.perms.update(av.perms) + + def to_string(self): + """Return a string representation of the rule + that is a valid policy language representation (assuming + that the types, object class, etc. are valie). + """ + return "%s %s %s:%s %s;" % (self.__rule_type_str(), + self.src_types.to_space_str(), + self.tgt_types.to_space_str(), + self.obj_classes.to_space_str(), + self.perms.to_space_str()) +class TypeRule(Leaf): + """SELinux type rules. + + This class is very similar to the AVRule class, but is for representing + the type rules (type_trans, type_change, and type_member). The major + difference is the lack of perms and only and sing destination type. + """ + TYPE_TRANSITION = 0 + TYPE_CHANGE = 1 + TYPE_MEMBER = 2 + + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.src_types = IdSet() + self.tgt_types = IdSet() + self.obj_classes = IdSet() + self.dest_type = "" + self.rule_type = self.TYPE_TRANSITION + + def __rule_type_str(self): + if self.rule_type == self.TYPE_TRANSITION: + return "type_transition" + elif self.rule_type == self.TYPE_CHANGE: + return "type_change" + else: + return "type_member" + + def to_string(self): + return "%s %s %s:%s %s;" % (self.__rule_type_str(), + self.src_types.to_space_str(), + self.tgt_types.to_space_str(), + self.obj_classes.to_space_str(), + self.dest_type) + +class RoleAllow(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.src_roles = IdSet() + self.tgt_roles = IdSet() + + def to_string(self): + return "allow %s %s;" % (self.src_roles.to_comma_str(), + self.tgt_roles.to_comma_str()) + +class RoleType(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.role = "" + self.types = IdSet() + + def to_string(self): + s = "" + for t in self.types: + s += "role %s types %s;\n" % (self.role, t) + return s + +class ModuleDeclaration(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.name = "" + self.version = "" + self.refpolicy = False + + def to_string(self): + if self.refpolicy: + return "policy_module(%s, %s)" % (self.name, self.version) + else: + return "module %s %s;" % (self.name, self.version) + +class Conditional(Node): + def __init__(self, parent=None): + Node.__init__(self, parent) + self.cond_expr = [] + + def to_string(self): + return "[If %s]" % list_to_space_str(self.cond_expr, cont=("", "")) + +class Bool(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.name = "" + self.state = False + + def to_string(self): + s = "bool %s " % self.name + if s.state: + return s + "true" + else: + return s + "false" + +class InitialSid(Leaf): + def __init(self, parent=None): + Leaf.__init__(self, parent) + self.name = "" + self.context = None + + def to_string(self): + return "sid %s %s" % (self.name, str(self.context)) + +class GenfsCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.filesystem = "" + self.path = "" + self.context = None + + def to_string(self): + return "genfscon %s %s %s" % (self.filesystem, self.path, str(self.context)) + +class FilesystemUse(Leaf): + XATTR = 1 + TRANS = 2 + TASK = 3 + + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.type = self.XATTR + self.filesystem = "" + self.context = None + + def to_string(self): + s = "" + if self.type == XATTR: + s = "fs_use_xattr " + elif self.type == TRANS: + s = "fs_use_trans " + elif self.type == TASK: + s = "fs_use_task " + + return "%s %s %s;" % (s, self.filesystem, str(self.context)) + +class PortCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.port_type = "" + self.port_number = "" + self.context = None + + def to_string(self): + return "portcon %s %s %s" % (self.port_type, self.port_number, str(self.context)) + +class NodeCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.start = "" + self.end = "" + self.context = None + + def to_string(self): + return "nodecon %s %s %s" % (self.start, self.end, str(self.context)) + +class NetifCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.interface = "" + self.interface_context = None + self.packet_context = None + + def to_string(self): + return "netifcon %s %s %s" % (self.interface, str(self.interface_context), + str(self.packet_context)) +class PirqCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.pirq_number = "" + self.context = None + + def to_string(self): + return "pirqcon %s %s" % (self.pirq_number, str(self.context)) + +class IomemCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.device_mem = "" + self.context = None + + def to_string(self): + return "iomemcon %s %s" % (self.device_mem, str(self.context)) + +class IoportCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.ioport = "" + self.context = None + + def to_string(self): + return "ioportcon %s %s" % (self.ioport, str(self.context)) + +class PciDeviceCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.device = "" + self.context = None + + def to_string(self): + return "pcidevicecon %s %s" % (self.device, str(self.context)) + +class DeviceTreeCon(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.path = "" + self.context = None + + def to_string(self): + return "devicetreecon %s %s" % (self.path, str(self.context)) + +# Reference policy specific types + +def print_tree(head): + for node, depth in walktree(head, showdepth=True): + s = "" + for i in range(depth): + s = s + "\t" + print(s + str(node)) + + +class Headers(Node): + def __init__(self, parent=None): + Node.__init__(self, parent) + + def to_string(self): + return "[Headers]" + + +class Module(Node): + def __init__(self, parent=None): + Node.__init__(self, parent) + + def to_string(self): + return "" + +class Interface(Node): + """A reference policy interface definition. + + This class represents a reference policy interface definition. + """ + def __init__(self, name="", parent=None): + Node.__init__(self, parent) + self.name = name + + def to_string(self): + return "[Interface name: %s]" % self.name + +class TunablePolicy(Node): + def __init__(self, parent=None): + Node.__init__(self, parent) + self.cond_expr = [] + + def to_string(self): + return "[Tunable Policy %s]" % list_to_space_str(self.cond_expr, cont=("", "")) + +class Template(Node): + def __init__(self, name="", parent=None): + Node.__init__(self, parent) + self.name = name + + def to_string(self): + return "[Template name: %s]" % self.name + +class IfDef(Node): + def __init__(self, name="", parent=None): + Node.__init__(self, parent) + self.name = name + + def to_string(self): + return "[Ifdef name: %s]" % self.name + +class InterfaceCall(Leaf): + def __init__(self, ifname="", parent=None): + Leaf.__init__(self, parent) + self.ifname = ifname + self.args = [] + self.comments = [] + + def matches(self, other): + if self.ifname != other.ifname: + return False + if len(self.args) != len(other.args): + return False + for a,b in zip(self.args, other.args): + if a != b: + return False + return True + + def to_string(self): + s = "%s(" % self.ifname + i = 0 + for a in self.args: + if isinstance(a, list): + str = list_to_space_str(a) + else: + str = a + + if i != 0: + s = s + ", %s" % str + else: + s = s + str + i += 1 + return s + ")" + +class OptionalPolicy(Node): + def __init__(self, parent=None): + Node.__init__(self, parent) + + def to_string(self): + return "[Optional Policy]" + +class SupportMacros(Node): + def __init__(self, parent=None): + Node.__init__(self, parent) + self.map = None + + def to_string(self): + return "[Support Macros]" + + def __expand_perm(self, perm): + # Recursive expansion - the assumption is that these + # are ordered correctly so that no macro is used before + # it is defined + s = set() + if perm in self.map: + for p in self.by_name(perm): + s.update(self.__expand_perm(p)) + else: + s.add(perm) + return s + + def __gen_map(self): + self.map = {} + for x in self: + exp_perms = set() + for perm in x.perms: + exp_perms.update(self.__expand_perm(perm)) + self.map[x.name] = exp_perms + + def by_name(self, name): + if not self.map: + self.__gen_map() + return self.map[name] + + def has_key(self, name): + if not self.map: + self.__gen_map() + return name in self.map + +class Require(Leaf): + def __init__(self, parent=None): + Leaf.__init__(self, parent) + self.types = IdSet() + self.obj_classes = { } + self.roles = IdSet() + self.data = IdSet() + self.users = IdSet() + + def add_obj_class(self, obj_class, perms): + p = self.obj_classes.setdefault(obj_class, IdSet()) + p.update(perms) + + + def to_string(self): + s = [] + s.append("require {") + for type in self.types: + s.append("\ttype %s;" % type) + for obj_class, perms in self.obj_classes.items(): + s.append("\tclass %s %s;" % (obj_class, perms.to_space_str())) + for role in self.roles: + s.append("\trole %s;" % role) + for bool in self.data: + s.append("\tbool %s;" % bool) + for user in self.users: + s.append("\tuser %s;" % user) + s.append("}") + + # Handle empty requires + if len(s) == 2: + return "" + + return "\n".join(s) + + +class ObjPermSet: + def __init__(self, name): + self.name = name + self.perms = set() + + def to_string(self): + return "define(`%s', `%s')" % (self.name, self.perms.to_space_str()) + +class ClassMap: + def __init__(self, obj_class, perms): + self.obj_class = obj_class + self.perms = perms + + def to_string(self): + return self.obj_class + ": " + self.perms + +class Comment: + def __init__(self, l=None): + if l: + self.lines = l + else: + self.lines = [] + + def to_string(self): + # If there are no lines, treat this as a spacer between + # policy statements and return a new line. + if len(self.lines) == 0: + return "" + else: + out = [] + for line in self.lines: + out.append("#" + line) + return "\n".join(out) + + def merge(self, other): + if len(other.lines): + for line in other.lines: + if line != "": + self.lines.append(line) + + def __str__(self): + return self.to_string() + + diff --git a/lib/python2.7/site-packages/sepolgen/sepolgeni18n.py b/lib/python2.7/site-packages/sepolgen/sepolgeni18n.py new file mode 100644 index 0000000..998c435 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/sepolgeni18n.py @@ -0,0 +1,26 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# + +try: + import gettext + t = gettext.translation( 'yumex' ) + _ = t.gettext +except: + def _(str): + return str diff --git a/lib/python2.7/site-packages/sepolgen/util.py b/lib/python2.7/site-packages/sepolgen/util.py new file mode 100644 index 0000000..1fca971 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/util.py @@ -0,0 +1,182 @@ +# Authors: Karl MacMillan +# +# Copyright (C) 2006 Red Hat +# see file 'COPYING' for use and warranty information +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 only +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +import locale +import sys + + +PY3 = sys.version_info[0] == 3 + +if PY3: + bytes_type=bytes + string_type=str +else: + bytes_type=str + string_type=unicode + + +class ConsoleProgressBar: + def __init__(self, out, steps=100, indicator='#'): + self.blocks = 0 + self.current = 0 + self.steps = steps + self.indicator = indicator + self.out = out + self.done = False + + def start(self, message=None): + self.done = False + if message: + self.out.write('\n%s:\n' % message) + self.out.write('%--10---20---30---40---50---60---70---80---90--100\n') + + def step(self, n=1): + self.current += n + + old = self.blocks + self.blocks = int(round(self.current / float(self.steps) * 100) / 2) + + if self.blocks > 50: + self.blocks = 50 + + new = self.blocks - old + + self.out.write(self.indicator * new) + self.out.flush() + + if self.blocks == 50 and not self.done: + self.done = True + self.out.write("\n") + +def set_to_list(s): + l = [] + l.extend(s) + return l + +def first(s, sorted=False): + """ + Return the first element of a set. + + It sometimes useful to return the first element from a set but, + because sets are not indexable, this is rather hard. This function + will return the first element from a set. If sorted is True, then + the set will first be sorted (making this an expensive operation). + Otherwise a random element will be returned (as sets are not ordered). + """ + if not len(s): + raise IndexError("empty containter") + + if sorted: + l = set_to_list(s) + l.sort() + return l[0] + else: + for x in s: + return x + +def encode_input(text): + import locale + """Encode given text via preferred system encoding""" + # locale will often find out the correct encoding + encoding = locale.getpreferredencoding() + try: + encoded_text = text.encode(encoding) + except UnicodeError: + # if it fails to find correct encoding then ascii is used + # which may lead to UnicodeError if `text` contains non ascii signs + # utf-8 is our guess to fix the situation + encoded_text = text.encode('utf-8') + return encoded_text + +def decode_input(text): + import locale + """Decode given text via preferred system encoding""" + # locale will often find out the correct encoding + encoding = locale.getpreferredencoding() + try: + decoded_text = text.decode(encoding) + except UnicodeError: + # if it fails to find correct encoding then ascii is used + # which may lead to UnicodeError if `text` contains non ascii signs + # utf-8 is our guess to fix the situation + decoded_text = text.decode('utf-8') + return decoded_text + +class Comparison(): + """Class used when implementing rich comparison. + + Inherit from this class if you want to have a rich + comparison withing the class, afterwards implement + _compare function within your class.""" + + def _compare(self, other, method): + raise NotImplemented + + def __eq__(self, other): + return self._compare(other, lambda a, b: a == b) + + def __lt__(self, other): + return self._compare(other, lambda a, b: a < b) + + def __le__(self, other): + return self._compare(other, lambda a, b: a <= b) + + def __ge__(self, other): + return self._compare(other, lambda a, b: a >= b) + + def __gt__(self, other): + return self._compare(other, lambda a, b: a > b) + + def __ne__(self, other): + return self._compare(other, lambda a, b: a != b) + +if sys.version_info < (2,7): + # cmp_to_key function is missing in python2.6 + def cmp_to_key(mycmp): + 'Convert a cmp= function into a key= function' + class K: + def __init__(self, obj, *args): + self.obj = obj + def __lt__(self, other): + return mycmp(self.obj, other.obj) < 0 + def __gt__(self, other): + return mycmp(self.obj, other.obj) > 0 + def __eq__(self, other): + return mycmp(self.obj, other.obj) == 0 + def __le__(self, other): + return mycmp(self.obj, other.obj) <= 0 + def __ge__(self, other): + return mycmp(self.obj, other.obj) >= 0 + def __ne__(self, other): + return mycmp(self.obj, other.obj) != 0 + return K +else: + from functools import cmp_to_key + +def cmp(first, second): + return (first > second) - (second > first) + +if __name__ == "__main__": + import sys + import time + p = ConsoleProgressBar(sys.stdout, steps=999) + p.start("computing pi") + for i in range(999): + p.step() + time.sleep(0.001) + diff --git a/lib/python2.7/site-packages/sepolgen/yacc.py b/lib/python2.7/site-packages/sepolgen/yacc.py new file mode 100644 index 0000000..f006354 --- /dev/null +++ b/lib/python2.7/site-packages/sepolgen/yacc.py @@ -0,0 +1,2214 @@ +#----------------------------------------------------------------------------- +# ply: yacc.py +# +# Author(s): David M. Beazley (dave@dabeaz.com) +# +# Copyright (C) 2001-2006, David M. Beazley +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. The grammer is specified by supplying the BNF inside +# Python documentation strings. The inspiration for this technique was borrowed +# from John Aycock's Spark parsing system. PLY might be viewed as cross between +# Spark and the GNU bison utility. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +# +# This implementation supports both SLR and LALR(1) parsing. LALR(1) +# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), +# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, +# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced +# by the more efficient DeRemer and Pennello algorithm. +# +# :::::::: WARNING ::::::: +# +# Construction of LR parsing tables is fairly complicated and expensive. +# To make this module run fast, a *LOT* of work has been put into +# optimization---often at the expensive of readability and what might +# consider to be good Python "coding style." Modify the code at your +# own risk! +# ---------------------------------------------------------------------------- + +__version__ = "2.2" + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = 1 # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'LALR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +import re, types, sys, hashlib, os.path +try: + from cStringIO import StringIO +except ImportError: + from io import StringIO + +from . import util + +# Exception raised for yacc-related errors +class YaccError(Exception): pass + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) +# .lexpos = Starting lex position +# .endlexpos = Ending lex position (optional, set automatically) + +class YaccSymbol: + def __str__(self): return self.type + def __repr__(self): return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self,s,stack=None): + self.slice = s + self.pbstack = [] + self.stack = stack + + def __getitem__(self,n): + if type(n) == int: + if n >= 0: return self.slice[n].value + else: return self.stack[n].value + else: + return [s.value for s in self.slice[n.start:n.stop:n.step]] + + def __setitem__(self,n,v): + self.slice[n].value = v + + def __len__(self): + return len(self.slice) + + def lineno(self,n): + return getattr(self.slice[n],"lineno",0) + + def linespan(self,n): + startline = getattr(self.slice[n],"lineno",0) + endline = getattr(self.slice[n],"endlineno",startline) + return startline,endline + + def lexpos(self,n): + return getattr(self.slice[n],"lexpos",0) + + def lexspan(self,n): + startpos = getattr(self.slice[n],"lexpos",0) + endpos = getattr(self.slice[n],"endlexpos",startpos) + return startpos,endpos + + def pushback(self,n): + if n <= 0: + raise ValueError("Expected a positive value") + if n > (len(self.slice)-1): + raise ValueError("Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)) + for i in range(0,n): + self.pbstack.append(self.slice[-i-1]) + +# The LR Parsing engine. This is defined as a class so that multiple parsers +# can exist in the same process. A user never instantiates this directly. +# Instead, the global yacc() function should be used to create a suitable Parser +# object. + +class Parser: + def __init__(self,magic=None): + + # This is a hack to keep users from trying to instantiate a Parser + # object directly. + + if magic != "xyzzy": + raise YaccError("Can't instantiate Parser. Use yacc() instead.") + + # Reset internal state + self.productions = None # List of productions + self.errorfunc = None # Error handling function + self.action = { } # LR Action table + self.goto = { } # LR goto table + self.require = { } # Attribute require table + self.method = "Unknown LR" # Table construction method used + + def errok(self): + self.errorcount = 0 + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + def parse(self,input=None,lexer=None,debug=0): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table + goto = self.goto # Local reference to goto table + prod = self.productions # Local reference to production list + pslice = YaccProduction(None) # Production object passed to grammar rules + pslice.parser = self # Parser object + self.errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + pslice.lexer = lexer + + # If input was supplied, pass to lexer + if input: + lexer.input(input) + + # Tokenize function + get_token = lexer.token + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + if debug > 1: + print('state', statestack[-1]) + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + if debug: + errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() + + # Check the action table + s = statestack[-1] + ltype = lookahead.type + t = actions.get((s,ltype),None) + + if debug > 1: + print('action', t) + if t is not None: + if t > 0: + # shift a symbol on the stack + if ltype == '$end': + # Error, end of input + sys.stderr.write("yacc: Parse error. EOF\n") + return + statestack.append(t) + if debug > 1: + sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if self.errorcount > 0: + self.errorcount -= 1 + + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + if debug > 1: + sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t)) + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + try: + sym.lineno = targ[1].lineno + sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) + sym.lexpos = targ[1].lexpos + sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos) + except AttributeError: + sym.lineno = 0 + del symstack[-plen:] + del statestack[-plen:] + else: + sym.lineno = 0 + targ = [ sym ] + pslice.slice = targ + pslice.pbstack = [] + # Call the grammar rule with our special slice object + p.func(pslice) + + # If there was a pushback, put that on the stack + if pslice.pbstack: + lookaheadstack.append(lookahead) + for _t in pslice.pbstack: + lookaheadstack.append(_t) + lookahead = None + + symstack.append(sym) + statestack.append(goto[statestack[-1],pname]) + continue + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + sys.stderr.write(errorlead, "\n") + + if t == None: + if debug: + sys.stderr.write(errorlead + "\n") + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if not self.errorcount: + self.errorcount = error_count + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if not self.errorcount: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + else: + sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + else: + sys.stderr.write("yacc: Parse error in input. EOF\n") + return + + else: + self.errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + + continue + + # Call an error function here + raise RuntimeError("yacc: internal parser error!!!\n") + +# ----------------------------------------------------------------------------- +# === Parser Construction === +# +# The following functions and variables are used to implement the yacc() function +# itself. This is pretty hairy stuff involving lots of error checking, +# construction of LR items, kernels, and so forth. Although a lot of +# this work is done using global variables, the resulting Parser object +# is completely self contained--meaning that it is safe to repeatedly +# call yacc() with different grammars in the same application. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This function checks to see if there are duplicated p_rulename() functions +# in the parser module file. Without this function, it is really easy for +# users to make mistakes by cutting and pasting code fragments (and it's a real +# bugger to try and figure out why the resulting parser doesn't work). Therefore, +# we just do a little regular expression pattern matching of def statements +# to try and detect duplicates. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea. Assume it's okay. + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev)) + noerror = 0 + linen += 1 + return noerror + +# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. +def validate_dict(d): + for n,v in d.items(): + if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue + if n[0:2] == 't_': continue + + if n[0:2] == 'p_': + sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n) + if 1 and isinstance(v,types.FunctionType) and v.__code__.co_argcount == 1: + try: + doc = v.__doc__.split(" ") + if doc[1] == ':': + sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.__code__.co_filename, v.__code__.co_firstlineno,n)) + except Exception: + pass + +# ----------------------------------------------------------------------------- +# === GRAMMAR FUNCTIONS === +# +# The following global variables and functions are used to store, manipulate, +# and verify the grammar rules specified by the user. +# ----------------------------------------------------------------------------- + +# Initialize all of the global variables used during grammar construction +def initialize_vars(): + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + Prodmap = { } # A dictionary that is only used to detect duplicate + # productions. + + Terminals = { } # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + Nonterminals = { } # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + First = { } # A dictionary of precomputed FIRST(x) symbols + + Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + + Precedence = { } # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + LRitems = [ ] # A list of all LR items for the grammar. These are the + # productions with the "dot" like E -> E . PLUS E + + Errorfunc = None # User defined error handler + + Signature = hashlib.sha256() # Digital signature of the grammar rules, precedence + # and other information. Used to determined when a + # parsing table needs to be regenerated. + + Requires = { } # Requires list + + # File objects used when creating the parser.out debugging file + global _vf, _vfc + _vf = StringIO() + _vfc = StringIO() + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# It has a few required attributes: +# +# name - Name of the production (nonterminal) +# prod - A list of symbols making up its production +# number - Production number. +# +# In addition, a few additional attributes are used to help with debugging or +# optimization of table generation. +# +# file - File where production action is defined. +# lineno - Line number where action is defined +# func - Action function +# prec - Precedence level +# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' +# then lr_next refers to 'E -> E PLUS . E' +# lr_index - LR item index (location of the ".") in the prod list. +# lookaheads - LALR lookahead symbols for this item +# len - Length of the production (number of symbols on right hand side) +# ----------------------------------------------------------------------------- + +class Production: + def __init__(self,**kw): + for k,v in kw.items(): + setattr(self,k,v) + self.lr_index = -1 + self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure + self.lr1_added = 0 # Flag indicating whether or not added to LR1 + self.usyms = [ ] + self.lookaheads = { } + self.lk_added = { } + self.setnumbers = [ ] + + def __str__(self): + if self.prod: + s = "%s -> %s" % (self.name," ".join(self.prod)) + else: + s = "%s -> " % self.name + return s + + def __repr__(self): + return str(self) + + # Compute lr_items from the production + def lr_item(self,n): + if n > len(self.prod): return None + p = Production() + p.name = self.name + p.prod = list(self.prod) + p.number = self.number + p.lr_index = n + p.lookaheads = { } + p.setnumbers = self.setnumbers + p.prod.insert(n,".") + p.prod = tuple(p.prod) + p.len = len(p.prod) + p.usyms = self.usyms + + # Precompute list of productions immediately following + try: + p.lrafter = Prodnames[p.prod[n+1]] + except (IndexError,KeyError) as e: + p.lrafter = [] + try: + p.lrbefore = p.prod[n-1] + except IndexError: + p.lrbefore = None + + return p + +class MiniProduction: + pass + +# regex matching identifiers +_is_identifier = re.compile(r'^[a-zA-Z0-9_-~]+$') + +# ----------------------------------------------------------------------------- +# add_production() +# +# Given an action function, this function assembles a production rule. +# The production rule is assumed to be found in the function's docstring. +# This rule has the general syntax: +# +# name1 ::= production1 +# | production2 +# | production3 +# ... +# | productionn +# name2 ::= production1 +# | production2 +# ... +# ----------------------------------------------------------------------------- + +def add_production(f,file,line,prodname,syms): + + if prodname in Terminals: + sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname)) + return -1 + if prodname == 'error': + sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname)) + return -1 + + if not _is_identifier.match(prodname): + sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname)) + return -1 + + for x in range(len(syms)): + s = syms[x] + if s[0] in "'\"": + try: + c = eval(s) + if (len(c) > 1): + sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) + return -1 + if c not in Terminals: + Terminals[c] = [] + syms[x] = c + continue + except SyntaxError: + pass + if not _is_identifier.match(s) and s != '%prec': + sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname)) + return -1 + + # See if the rule is already in the rulemap + map = "%s -> %s" % (prodname,syms) + if map in Prodmap: + m = Prodmap[map] + sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m)) + sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line)) + return -1 + + p = Production() + p.name = prodname + p.prod = syms + p.file = file + p.line = line + p.func = f + p.number = len(Productions) + + + Productions.append(p) + Prodmap[map] = p + if prodname not in Nonterminals: + Nonterminals[prodname] = [ ] + + # Add all terminals to Terminals + i = 0 + while i < len(p.prod): + t = p.prod[i] + if t == '%prec': + try: + precname = p.prod[i+1] + except IndexError: + sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line)) + return -1 + + prec = Precedence.get(precname,None) + if not prec: + sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname)) + return -1 + else: + p.prec = prec + del p.prod[i] + del p.prod[i] + continue + + if t in Terminals: + Terminals[t].append(p.number) + # Is a terminal. We'll assign a precedence to p based on this + if not hasattr(p,"prec"): + p.prec = Precedence.get(t,('right',0)) + else: + if t not in Nonterminals: + Nonterminals[t] = [ ] + Nonterminals[t].append(p.number) + i += 1 + + if not hasattr(p,"prec"): + p.prec = ('right',0) + + # Set final length of productions + p.len = len(p.prod) + p.prod = tuple(p.prod) + + # Calculate unique syms in the production + p.usyms = [ ] + for s in p.prod: + if s not in p.usyms: + p.usyms.append(s) + + # Add to the global productions list + try: + Prodnames[p.name].append(p) + except KeyError: + Prodnames[p.name] = [ p ] + return 0 + +# Given a raw rule function, this function rips out its doc string +# and adds rules to the grammar + +def add_function(f): + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + error = 0 + + if isinstance(f,types.MethodType): + reqdargs = 2 + else: + reqdargs = 1 + + if f.__code__.co_argcount > reqdargs: + sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__)) + return -1 + + if f.__code__.co_argcount < reqdargs: + sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__)) + return -1 + + if f.__doc__: + # Split the doc string into lines + pstrings = f.__doc__.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline)) + return -1 + prodname = lastp + if len(p) > 1: + syms = p[1:] + else: + syms = [ ] + else: + prodname = p[0] + lastp = prodname + assign = p[1] + if len(p) > 2: + syms = p[2:] + else: + syms = [ ] + if assign != ':' and assign != '::=': + sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline)) + return -1 + + + e = add_production(f,file,dline,prodname,syms) + error += e + + + except Exception: + sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps)) + error -= 1 + else: + sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__)) + return error + + +# Cycle checking code (Michael Dyck) + +def compute_reachable(): + ''' + Find each symbol that can be reached from the start symbol. + Print a warning for any nonterminals that can't be reached. + (Unused terminals have already had their warning.) + ''' + Reachable = { } + for s in list(Terminals.keys()) + list(Nonterminals.keys()): + Reachable[s] = 0 + + mark_reachable_from( Productions[0].prod[0], Reachable ) + + for s in Nonterminals.keys(): + if not Reachable[s]: + sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s) + +def mark_reachable_from(s, Reachable): + ''' + Mark all symbols that are reachable from symbol s. + ''' + if Reachable[s]: + # We've already reached symbol s. + return + Reachable[s] = 1 + for p in Prodnames.get(s,[]): + for r in p.prod: + mark_reachable_from(r, Reachable) + +# ----------------------------------------------------------------------------- +# compute_terminates() +# +# This function looks at the various parsing rules and tries to detect +# infinite recursion cycles (grammar rules where there is no possible way +# to derive a string of only terminals). +# ----------------------------------------------------------------------------- +def compute_terminates(): + ''' + Raise an error for any symbols that don't terminate. + ''' + Terminates = {} + + # Terminals: + for t in Terminals.keys(): + Terminates[t] = 1 + + Terminates['$end'] = 1 + + # Nonterminals: + + # Initialize to false: + for n in Nonterminals.keys(): + Terminates[n] = 0 + + # Then propagate termination until no change: + while 1: + some_change = 0 + for (n,pl) in Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not Terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = 0 + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = 1 + + if p_terminates: + # symbol n terminates! + if not Terminates[n]: + Terminates[n] = 1 + some_change = 1 + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + some_error = 0 + for (s,terminates) in Terminates.items(): + if not terminates: + if s not in Prodnames and s not in Terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s) + some_error = 1 + + return some_error + +# ----------------------------------------------------------------------------- +# verify_productions() +# +# This function examines all of the supplied rules to see if they seem valid. +# ----------------------------------------------------------------------------- +def verify_productions(cycle_check=1): + error = 0 + for p in Productions: + if not p: continue + + for s in p.prod: + if s not in Prodnames and s not in Terminals and s != 'error': + sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s)) + error = 1 + continue + + unused_tok = 0 + # Now verify all of the tokens + if yaccdebug: + _vf.write("Unused terminals:\n\n") + for s,v in Terminals.items(): + if s != 'error' and not v: + sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s) + if yaccdebug: _vf.write(" %s\n"% s) + unused_tok += 1 + + # Print out all of the productions + if yaccdebug: + _vf.write("\nGrammar\n\n") + for i in range(1,len(Productions)): + _vf.write("Rule %-5d %s\n" % (i, Productions[i])) + + unused_prod = 0 + # Verify the use of all productions + for s,v in Nonterminals.items(): + if not v: + p = Prodnames[s][0] + sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s)) + unused_prod += 1 + + + if unused_tok == 1: + sys.stderr.write("yacc: Warning. There is 1 unused token.\n") + if unused_tok > 1: + sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok) + + if unused_prod == 1: + sys.stderr.write("yacc: Warning. There is 1 unused rule.\n") + if unused_prod > 1: + sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod) + + if yaccdebug: + _vf.write("\nTerminals, with rules where they appear\n\n") + ks = list(Terminals.keys()) + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) + _vf.write("\nNonterminals, with rules where they appear\n\n") + ks = list(Nonterminals.keys()) + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) + + if (cycle_check): + compute_reachable() + error += compute_terminates() +# error += check_cycles() + return error + +# ----------------------------------------------------------------------------- +# build_lritems() +# +# This function walks the list of productions and builds a complete set of the +# LR items. The LR items are stored in two ways: First, they are uniquely +# numbered and placed in the list _lritems. Second, a linked list of LR items +# is built for each production. For example: +# +# E -> E PLUS E +# +# Creates the list +# +# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] +# ----------------------------------------------------------------------------- + +def build_lritems(): + for p in Productions: + lastlri = p + lri = p.lr_item(0) + i = 0 + while 1: + lri = p.lr_item(i) + lastlri.lr_next = lri + if not lri: break + lri.lr_num = len(LRitems) + LRitems.append(lri) + lastlri = lri + i += 1 + + # In order for the rest of the parser generator to work, we need to + # guarantee that no more lritems are generated. Therefore, we nuke + # the p.lr_item method. (Only used in debugging) + # Production.lr_item = None + +# ----------------------------------------------------------------------------- +# add_precedence() +# +# Given a list of precedence rules, add to the precedence table. +# ----------------------------------------------------------------------------- + +def add_precedence(plist): + plevel = 0 + error = 0 + for p in plist: + plevel += 1 + try: + prec = p[0] + terms = p[1:] + if prec != 'left' and prec != 'right' and prec != 'nonassoc': + sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec) + return -1 + for t in terms: + if t in Precedence: + sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t) + error += 1 + continue + Precedence[t] = (prec,plevel) + except: + sys.stderr.write("yacc: Invalid precedence table.\n") + error += 1 + + return error + +# ----------------------------------------------------------------------------- +# augment_grammar() +# +# Compute the augmented grammar. This is just a rule S' -> start where start +# is the starting symbol. +# ----------------------------------------------------------------------------- + +def augment_grammar(start=None): + if not start: + start = Productions[1].name + Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) + Productions[0].usyms = [ start ] + Nonterminals[start].append(0) + + +# ------------------------------------------------------------------------- +# first() +# +# Compute the value of FIRST1(beta) where beta is a tuple of symbols. +# +# During execution of compute_first1, the result may be incomplete. +# Afterward (e.g., when called from compute_follow()), it will be complete. +# ------------------------------------------------------------------------- +def first(beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [ ] + for x in beta: + x_produces_empty = 0 + + # Add all the non- symbols of First[x] to the result. + for f in First[x]: + if f == '': + x_produces_empty = 1 + else: + if f not in result: result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('') + + return result + + +# FOLLOW(x) +# Given a non-terminal. This function computes the set of all symbols +# that might follow it. Dragon book, p. 189. + +def compute_follow(start=None): + # Add '$end' to the follow list of the start symbol + for k in Nonterminals.keys(): + Follow[k] = [ ] + + if not start: + start = Productions[1].name + + Follow[start] = [ '$end' ] + + while 1: + didadd = 0 + for p in Productions[1:]: + # Here is the production set + for i in range(len(p.prod)): + B = p.prod[i] + if B in Nonterminals: + # Okay. We got a non-terminal in a production + fst = first(p.prod[i+1:]) + hasempty = 0 + for f in fst: + if f != '' and f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if f == '': + hasempty = 1 + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in Follow[p.name]: + if f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if not didadd: break + + if 0 and yaccdebug: + _vf.write('\nFollow:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) + +# ------------------------------------------------------------------------- +# compute_first1() +# +# Compute the value of FIRST1(X) for all symbols +# ------------------------------------------------------------------------- +def compute_first1(): + + # Terminals: + for t in Terminals.keys(): + First[t] = [t] + + First['$end'] = ['$end'] + First['#'] = ['#'] # what's this for? + + # Nonterminals: + + # Initialize to the empty set: + for n in Nonterminals.keys(): + First[n] = [] + + # Then propagate symbols until no change: + while 1: + some_change = 0 + for n in Nonterminals.keys(): + for p in Prodnames[n]: + for f in first(p.prod): + if f not in First[n]: + First[n].append( f ) + some_change = 1 + if not some_change: + break + + if 0 and yaccdebug: + _vf.write('\nFirst:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % + (k, " ".join([str(s) for s in First[k]]))) + +# ----------------------------------------------------------------------------- +# === SLR Generation === +# +# The following functions are used to construct SLR (Simple LR) parsing tables +# as described on p.221-229 of the dragon book. +# ----------------------------------------------------------------------------- + +# Global variables for the LR parsing engine +def lr_init_vars(): + global _lr_action, _lr_goto, _lr_method + global _lr_goto_cache, _lr0_cidhash + + _lr_action = { } # Action table + _lr_goto = { } # Goto table + _lr_method = "Unknown" # LR method used + _lr_goto_cache = { } + _lr0_cidhash = { } + + +# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. +# prodlist is a list of productions. + +_add_count = 0 # Counter used to detect cycles + +def lr0_closure(I): + global _add_count + + _add_count += 1 + prodlist = Productions + + # Add everything in I to J + J = I[:] + didadd = 1 + while didadd: + didadd = 0 + for j in J: + for x in j.lrafter: + if x.lr0_added == _add_count: continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = _add_count + didadd = 1 + + return J + +# Compute the LR(0) goto function goto(I,X) where I is a set +# of LR(0) items and X is a grammar symbol. This function is written +# in a way that guarantees uniqueness of the generated goto sets +# (i.e. the same goto set will never be returned as two different Python +# objects). With uniqueness, we can later do fast set comparisons using +# id(obj) instead of element-wise comparison. + +def lr0_goto(I,x): + # First we look for a previously cached entry + g = _lr_goto_cache.get((id(I),x),None) + if g: return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = _lr_goto_cache.get(x,None) + if not s: + s = { } + _lr_goto_cache[x] = s + + gs = [ ] + for p in I: + n = p.lr_next + if n and n.lrbefore == x: + s1 = s.get(id(n),None) + if not s1: + s1 = { } + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end',None) + if not g: + if gs: + g = lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + _lr_goto_cache[(id(I),x)] = g + return g + +_lr0_cidhash = { } + +# Compute the LR(0) sets of item function +def lr0_items(): + + C = [ lr0_closure([Productions[0].lr_next]) ] + i = 0 + for I in C: + _lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = { } + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms.keys(): + g = lr0_goto(I,x) + if not g: continue + if id(g) in _lr0_cidhash: continue + _lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + +# ----------------------------------------------------------------------------- +# ==== LALR(1) Parsing ==== +# +# LALR(1) parsing is almost exactly the same as SLR except that instead of +# relying upon Follow() sets when performing reductions, a more selective +# lookahead set that incorporates the state of the LR(0) machine is utilized. +# Thus, we mainly just have to focus on calculating the lookahead sets. +# +# The method used here is due to DeRemer and Pennelo (1982). +# +# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) +# Lookahead Sets", ACM Transactions on Programming Languages and Systems, +# Vol. 4, No. 4, Oct. 1982, pp. 615-649 +# +# Further details can also be found in: +# +# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", +# McGraw-Hill Book Company, (1985). +# +# Note: This implementation is a complete replacement of the LALR(1) +# implementation in PLY-1.x releases. That version was based on +# a less efficient algorithm and it had bugs in its implementation. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# compute_nullable_nonterminals() +# +# Creates a dictionary containing all of the non-terminals that might produce +# an empty production. +# ----------------------------------------------------------------------------- + +def compute_nullable_nonterminals(): + nullable = {} + num_nullable = 0 + while 1: + for p in Productions[1:]: + if p.len == 0: + nullable[p.name] = 1 + continue + for t in p.prod: + if t not in nullable: break + else: + nullable[p.name] = 1 + if len(nullable) == num_nullable: break + num_nullable = len(nullable) + return nullable + +# ----------------------------------------------------------------------------- +# find_nonterminal_trans(C) +# +# Given a set of LR(0) items, this functions finds all of the non-terminal +# transitions. These are transitions in which a dot appears immediately before +# a non-terminal. Returns a list of tuples of the form (state,N) where state +# is the state number and N is the nonterminal symbol. +# +# The input C is the set of LR(0) items. +# ----------------------------------------------------------------------------- + +def find_nonterminal_transitions(C): + trans = [] + for state in range(len(C)): + for p in C[state]: + if p.lr_index < p.len - 1: + t = (state,p.prod[p.lr_index+1]) + if t[1] in Nonterminals: + if t not in trans: trans.append(t) + state = state + 1 + return trans + +# ----------------------------------------------------------------------------- +# dr_relation() +# +# Computes the DR(p,A) relationships for non-terminal transitions. The input +# is a tuple (state,N) where state is a number and N is a nonterminal symbol. +# +# Returns a list of terminals. +# ----------------------------------------------------------------------------- + +def dr_relation(C,trans,nullable): + dr_set = { } + state,N = trans + terms = [] + + g = lr0_goto(C[state],N) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in Terminals: + if a not in terms: terms.append(a) + + # This extra bit is to handle the start state + if state == 0 and N == Productions[0].prod[0]: + terms.append('$end') + + return terms + +# ----------------------------------------------------------------------------- +# reads_relation() +# +# Computes the READS() relation (p,A) READS (t,C). +# ----------------------------------------------------------------------------- + +def reads_relation(C, trans, empty): + # Look for empty transitions + rel = [] + state, N = trans + + g = lr0_goto(C[state],N) + j = _lr0_cidhash.get(id(g),-1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j,a)) + + return rel + +# ----------------------------------------------------------------------------- +# compute_lookback_includes() +# +# Determines the lookback and includes relations +# +# LOOKBACK: +# +# This relation is determined by running the LR(0) state machine forward. +# For example, starting with a production "N : . A B C", we run it forward +# to obtain "N : A B C ." We then build a relationship between this final +# state and the starting state. These relationships are stored in a dictionary +# lookdict. +# +# INCLUDES: +# +# Computes the INCLUDE() relation (p,A) INCLUDES (p',B). +# +# This relation is used to determine non-terminal transitions that occur +# inside of other non-terminal transition states. (p,A) INCLUDES (p', B) +# if the following holds: +# +# B -> LAT, where T -> epsilon and p' -L-> p +# +# L is essentially a prefix (which may be empty), T is a suffix that must be +# able to derive an empty string. State p' must lead to state p with the string L. +# +# ----------------------------------------------------------------------------- + +def compute_lookback_includes(C,trans,nullable): + + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations + + # Make a dictionary of non-terminal transitions + dtrans = {} + for t in trans: + dtrans[t] = 1 + + # Loop over all transitions and compute lookbacks and includes + for state,N in trans: + lookb = [] + includes = [] + for p in C[state]: + if p.name != N: continue + + # Okay, we have a name match. We now follow the production all the way + # through the state machine until we get the . on the right hand side + + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j,t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in Terminals: break # No forget it + if p.prod[li] not in nullable: break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j,t)) + + g = lr0_goto(C[j],t) # Go to next set + j = _lr0_cidhash.get(id(g),-1) # Go to next state + + # When we get here, j is the final state, now we have to locate the production + for r in C[j]: + if r.name != p.name: continue + if r.len != p.len: continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: break + i = i + 1 + else: + lookb.append((j,r)) + for i in includes: + if i not in includedict: includedict[i] = [] + includedict[i].append((state,N)) + lookdict[(state,N)] = lookb + + return lookdict,includedict + +# ----------------------------------------------------------------------------- +# digraph() +# traverse() +# +# The following two functions are used to compute set valued functions +# of the form: +# +# F(x) = F'(x) U U{F(y) | x R y} +# +# This is used to compute the values of Read() sets as well as FOLLOW sets +# in LALR(1) generation. +# +# Inputs: X - An input set +# R - A relation +# FP - Set-valued function +# ------------------------------------------------------------------------------ + +def digraph(X,R,FP): + N = { } + for x in X: + N[x] = 0 + stack = [] + F = { } + for x in X: + if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + return F + +def traverse(x,N,stack,F,X,R,FP): + stack.append(x) + d = len(stack) + N[x] = d + F[x] = FP(x) # F(X) <- F'(x) + + rel = R(x) # Get y's related to x + for y in rel: + if N[y] == 0: + traverse(y,N,stack,F,X,R,FP) + N[x] = min(N[x],N[y]) + for a in F.get(y,[]): + if a not in F[x]: F[x].append(a) + if N[x] == d: + N[stack[-1]] = sys.maxsize + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = sys.maxsize + F[stack[-1]] = F[x] + element = stack.pop() + +# ----------------------------------------------------------------------------- +# compute_read_sets() +# +# Given a set of LR(0) items, this function computes the read sets. +# +# Inputs: C = Set of LR(0) items +# ntrans = Set of nonterminal transitions +# nullable = Set of empty transitions +# +# Returns a set containing the read sets +# ----------------------------------------------------------------------------- + +def compute_read_sets(C, ntrans, nullable): + FP = lambda x: dr_relation(C,x,nullable) + R = lambda x: reads_relation(C,x,nullable) + F = digraph(ntrans,R,FP) + return F + +# ----------------------------------------------------------------------------- +# compute_follow_sets() +# +# Given a set of LR(0) items, a set of non-terminal transitions, a readset, +# and an include set, this function computes the follow sets +# +# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} +# +# Inputs: +# ntrans = Set of nonterminal transitions +# readsets = Readset (previously computed) +# inclsets = Include sets (previously computed) +# +# Returns a set containing the follow sets +# ----------------------------------------------------------------------------- + +def compute_follow_sets(ntrans,readsets,inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x,[]) + F = digraph(ntrans,R,FP) + return F + +# ----------------------------------------------------------------------------- +# add_lookaheads() +# +# Attaches the lookahead symbols to grammar rules. +# +# Inputs: lookbacks - Set of lookback relations +# followset - Computed follow set +# +# This function directly attaches the lookaheads to productions contained +# in the lookbacks set +# ----------------------------------------------------------------------------- + +def add_lookaheads(lookbacks,followset): + for trans,lb in lookbacks.items(): + # Loop over productions in lookback + for state,p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans,[]) + for a in f: + if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + +# ----------------------------------------------------------------------------- +# add_lalr_lookaheads() +# +# This function does all of the work of adding lookahead information for use +# with LALR parsing +# ----------------------------------------------------------------------------- + +def add_lalr_lookaheads(C): + # Determine all of the nullable nonterminals + nullable = compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = find_nonterminal_transitions(C) + + # Compute read sets + readsets = compute_read_sets(C,trans,nullable) + + # Compute lookback/includes relations + lookd, included = compute_lookback_includes(C,trans,nullable) + + # Compute LALR FOLLOW sets + followsets = compute_follow_sets(trans,readsets,included) + + # Add all of the lookaheads + add_lookaheads(lookd,followsets) + +# ----------------------------------------------------------------------------- +# lr_parse_table() +# +# This function constructs the parse tables for SLR or LALR +# ----------------------------------------------------------------------------- +def lr_parse_table(method): + global _lr_method + goto = _lr_goto # Goto array + action = _lr_action # Action array + actionp = { } # Action production array (temporary) + + _lr_method = method + + n_srconflict = 0 + n_rrconflict = 0 + + if yaccdebug: + sys.stderr.write("yacc: Generating %s parsing table...\n" % method) + _vf.write("\n\nParsing method: %s\n\n" % method) + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = lr0_items() + + if method == 'LALR': + add_lalr_lookaheads(C) + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [ ] # List of actions + + if yaccdebug: + _vf.write("\nstate %d\n\n" % st) + for p in I: + _vf.write(" (%d) %s\n" % (p.number, str(p))) + _vf.write("\n") + + for p in I: + try: + if p.prod[-1] == ".": + if p.name == "S'": + # Start symbol. Accept! + action[st,"$end"] = 0 + actionp[st,"$end"] = p + else: + # We are at the end of a production. Reduce! + if method == 'LALR': + laheads = p.lookaheads[st] + else: + laheads = Follow[p.name] + for a in laheads: + actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) + r = action.get((st,a),None) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec,slevel = Productions[actionp[st,a].number].prec + rprec,rlevel = Precedence.get(a,('right',0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + action[st,a] = -p.number + actionp[st,a] = p + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + n_srconflict += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + n_srconflict +=1 + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + action[st,a] = -p.number + actionp[st,a] = p + # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st) + n_rrconflict += 1 + _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) + _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) + else: + sys.stderr.write("Unknown conflict in state %d\n" % st) + else: + action[st,a] = -p.number + actionp[st,a] = p + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in Terminals: + g = lr0_goto(I,a) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + # We are in a shift state + actlist.append((a,p,"shift and go to state %d" % j)) + r = action.get((st,a),None) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + sys.stderr.write("Shift/shift conflict in state %d\n" % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec,rlevel = Productions[actionp[st,a].number].prec + sprec,slevel = Precedence.get(a,('right',0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): + # We decide to shift here... highest precedence to shift + action[st,a] = j + actionp[st,a] = p + if not rlevel: + n_srconflict += 1 + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + n_srconflict +=1 + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + + else: + sys.stderr.write("Unknown conflict in state %d\n" % st) + else: + action[st,a] = j + actionp[st,a] = p + + except Exception as e: + raise YaccError("Hosed in lr_parse_table").with_traceback(e) + + # Print the actions associated with each terminal + if yaccdebug: + _actprint = { } + for a,p,m in actlist: + if (st,a) in action: + if p is actionp[st,a]: + _vf.write(" %-15s %s\n" % (a,m)) + _actprint[(a,m)] = 1 + _vf.write("\n") + for a,p,m in actlist: + if (st,a) in action: + if p is not actionp[st,a]: + if (a,m) not in _actprint: + _vf.write(" ! %-15s [ %s ]\n" % (a,m)) + _actprint[(a,m)] = 1 + + # Construct the goto table for this state + if yaccdebug: + _vf.write("\n") + nkeys = { } + for ii in I: + for s in ii.usyms: + if s in Nonterminals: + nkeys[s] = None + for n in nkeys.keys(): + g = lr0_goto(I,n) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + goto[st,n] = j + if yaccdebug: + _vf.write(" %-30s shift and go to state %d\n" % (n,j)) + + st += 1 + + if yaccdebug: + if n_srconflict == 1: + sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict) + if n_srconflict > 1: + sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict) + if n_rrconflict == 1: + sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict) + if n_rrconflict > 1: + sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict) + +# ----------------------------------------------------------------------------- +# ==== LR Utility functions ==== +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _lr_write_tables() +# +# This function writes the LR parsing tables to a file +# ----------------------------------------------------------------------------- + +def lr_write_tables(modulename=tab_module,outputdir=''): + filename = os.path.join(outputdir,modulename) + ".py" + try: + f = open(filename,"w") + + f.write(""" +# %s +# This file is automatically generated. Do not edit. + +_lr_method = %s + +_lr_signature = %s +""" % (filename, repr(_lr_method), repr(Signature.digest()))) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = { } + + for k,v in _lr_action.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_action_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_action[(_x,_k)] = _y +del _lr_action_items +""") + + else: + f.write("\n_lr_action = { "); + for k,v in _lr_action.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + if smaller: + # Factor out names to try and make smaller + items = { } + + for k,v in _lr_goto.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_goto_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_goto[(_x,_k)] = _y +del _lr_goto_items +""") + else: + f.write("\n_lr_goto = { "); + for k,v in _lr_goto.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + # Write production table + f.write("_lr_productions = [\n") + for p in Productions: + if p: + if (p.func): + f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) + else: + f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) + else: + f.write(" None,\n") + f.write("]\n") + + f.close() + + except IOError as e: + print("Unable to create '%s'" % filename) + print(e) + +def lr_read_tables(module=tab_module,optimize=0): + global _lr_action, _lr_goto, _lr_productions, _lr_method + try: + exec("import %s as parsetab" % module) + + if (optimize) or (Signature.digest() == parsetab._lr_signature): + _lr_action = parsetab._lr_action + _lr_goto = parsetab._lr_goto + _lr_productions = parsetab._lr_productions + _lr_method = parsetab._lr_method + return 1 + else: + return 0 + + except (ImportError,AttributeError): + return 0 + + +# Available instance types. This is used when parsers are defined by a class. +# In Python3 the InstanceType and ObjectType are no more, they've passed, ceased +# to be, they are ex-classes along with old-style classes + +try: + _INSTANCETYPE = (types.InstanceType, types.ObjectType) +except AttributeError: + _INSTANCETYPE = object + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build the parser module +# ----------------------------------------------------------------------------- + +def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''): + global yaccdebug + yaccdebug = debug + + initialize_vars() + files = { } + error = 0 + + + # Add parsing method to signature + Signature.update(util.encode_input(method)) + + # If a "module" parameter was supplied, extract its dictionary. + # Note: a module may in fact be an instance as well. + + if module: + # User supplied a module object. + if isinstance(module, types.ModuleType): + ldict = module.__dict__ + elif isinstance(module, _INSTANCETYPE): + _items = [(k,getattr(module,k)) for k in dir(module)] + ldict = { } + for i in _items: + ldict[i[0]] = i[1] + else: + raise ValueError("Expected a module") + + else: + # No module given. We might be able to get information from the caller. + # Throw an exception and unwind the traceback to get the globals + + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + # Add starting symbol to signature + if not start: + start = ldict.get("start",None) + if start: + Signature.update(util.encode_input(start)) + + # If running in optimized mode. We're going to + + if (optimize and lr_read_tables(tabmodule,1)): + # Read parse table + del Productions[:] + for p in _lr_productions: + if not p: + Productions.append(None) + else: + m = MiniProduction() + m.name = p[0] + m.len = p[1] + m.file = p[3] + m.line = p[4] + if p[2]: + m.func = ldict[p[2]] + Productions.append(m) + + else: + # Get the tokens map + if (module and isinstance(module,_INSTANCETYPE)): + tokens = getattr(module,"tokens",None) + else: + tokens = ldict.get("tokens",None) + + if not tokens: + raise YaccError("module does not define a list 'tokens'") + if not (isinstance(tokens,list) or isinstance(tokens,tuple)): + raise YaccError("tokens must be a list or tuple.") + + # Check to see if a requires dictionary is defined. + requires = ldict.get("require",None) + if requires: + if not (isinstance(requires,dict)): + raise YaccError("require must be a dictionary.") + + for r,v in requires.items(): + try: + if not (isinstance(v,list)): + raise TypeError + v1 = [x.split(".") for x in v] + Requires[r] = v1 + except Exception: + print("Invalid specification for rule '%s' in require. Expected a list of strings" % r) + + + # Build the dictionary of terminals. We a record a 0 in the + # dictionary to track whether or not a terminal is actually + # used in the grammar + + if 'error' in tokens: + print("yacc: Illegal token 'error'. Is a reserved word.") + raise YaccError("Illegal token name") + + for n in tokens: + if n in Terminals: + print("yacc: Warning. Token '%s' multiply defined." % n) + Terminals[n] = [ ] + + Terminals['error'] = [ ] + + # Get the precedence map (if any) + prec = ldict.get("precedence",None) + if prec: + if not (isinstance(prec,list) or isinstance(prec,tuple)): + raise YaccError("precedence must be a list or tuple.") + add_precedence(prec) + Signature.update(util.encode_input(repr(prec))) + + for n in tokens: + if n not in Precedence: + Precedence[n] = ('right',0) # Default, right associative, 0 precedence + + # Look for error handler + ef = ldict.get('p_error',None) + if ef: + if isinstance(ef,types.FunctionType): + ismethod = 0 + elif isinstance(ef, types.MethodType): + ismethod = 1 + else: + raise YaccError("'p_error' defined, but is not a function or method.") + eline = ef.__code__.co_firstlineno + efile = ef.__code__.co_filename + files[efile] = None + + if (ef.__code__.co_argcount != 1+ismethod): + raise YaccError("%s:%d: p_error() requires 1 argument." % (efile,eline)) + global Errorfunc + Errorfunc = ef + else: + print("yacc: Warning. no p_error() function is defined.") + + # Get the list of built-in functions with p_ prefix + symbols = [ldict[f] for f in ldict.keys() + if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_' + and ldict[f].__name__ != 'p_error')] + + # Check for non-empty symbols + if len(symbols) == 0: + raise YaccError("no rules of the form p_rulename are defined.") + + # Sort the symbols by line number + symbols.sort(key=lambda x: x.__code__.co_firstlineno) + + # Add all of the symbols to the grammar + for f in symbols: + if (add_function(f)) < 0: + error += 1 + else: + files[f.__code__.co_filename] = None + + # Make a signature of the docstrings + for f in symbols: + if f.__doc__: + Signature.update(util.encode_input(f.__doc__)) + + lr_init_vars() + + if error: + raise YaccError("Unable to construct parser.") + + if not lr_read_tables(tabmodule): + + # Validate files + for filename in files.keys(): + if not validate_file(filename): + error = 1 + + # Validate dictionary + validate_dict(ldict) + + if start and start not in Prodnames: + raise YaccError("Bad starting symbol '%s'" % start) + + augment_grammar(start) + error = verify_productions(cycle_check=check_recursion) + otherfunc = [ldict[f] for f in ldict.keys() + if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')] + + if error: + raise YaccError("Unable to construct parser.") + + build_lritems() + compute_first1() + compute_follow(start) + + if method in ['SLR','LALR']: + lr_parse_table(method) + else: + raise YaccError("Unknown parsing method '%s'" % method) + + if write_tables: + lr_write_tables(tabmodule,outputdir) + + if yaccdebug: + try: + f = open(os.path.join(outputdir,debugfile),"w") + f.write(_vfc.getvalue()) + f.write("\n\n") + f.write(_vf.getvalue()) + f.close() + except IOError as e: + print("yacc: can't create '%s'" % debugfile,e) + + # Made it here. Create a parser object and set up its internal state. + # Set global parse() method to bound method of parser object. + + p = Parser("xyzzy") + p.productions = Productions + p.errorfunc = Errorfunc + p.action = _lr_action + p.goto = _lr_goto + p.method = _lr_method + p.require = Requires + + global parse + parse = p.parse + + global parser + parser = p + + # Clean up all of the globals we created + if (not optimize): + yacc_cleanup() + return p + +# yacc_cleanup function. Delete all of the global variables +# used during table construction + +def yacc_cleanup(): + global _lr_action, _lr_goto, _lr_method, _lr_goto_cache + del _lr_action, _lr_goto, _lr_method, _lr_goto_cache + + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + del Productions, Prodnames, Prodmap, Terminals + del Nonterminals, First, Follow, Precedence, LRitems + del Errorfunc, Signature, Requires + + global _vf, _vfc + del _vf, _vfc + + +# Stub that raises an error if parsing is attempted without first calling yacc() +def parse(*args,**kwargs): + raise YaccError("yacc: No parser built with yacc()") + -- cgit v1.2.3