diff options
Diffstat (limited to 'Lib/fontTools/misc/psLib.py')
-rw-r--r-- | Lib/fontTools/misc/psLib.py | 687 |
1 files changed, 350 insertions, 337 deletions
diff --git a/Lib/fontTools/misc/psLib.py b/Lib/fontTools/misc/psLib.py index a6c8b8b5..3bfdb4ae 100644 --- a/Lib/fontTools/misc/psLib.py +++ b/Lib/fontTools/misc/psLib.py @@ -1,20 +1,20 @@ from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr from fontTools.misc import eexec from .psOperators import ( - PSOperators, - ps_StandardEncoding, - ps_array, - ps_boolean, - ps_dict, - ps_integer, - ps_literal, - ps_mark, - ps_name, - ps_operator, - ps_procedure, - ps_procmark, - ps_real, - ps_string, + PSOperators, + ps_StandardEncoding, + ps_array, + ps_boolean, + ps_dict, + ps_integer, + ps_literal, + ps_mark, + ps_name, + ps_operator, + ps_procedure, + ps_procmark, + ps_real, + ps_string, ) import re from collections.abc import Callable @@ -24,7 +24,7 @@ import logging log = logging.getLogger(__name__) -ps_special = b'()<>[]{}%' # / is one too, but we take care of that one differently +ps_special = b"()<>[]{}%" # / is one too, but we take care of that one differently skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"])) endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"]) @@ -32,7 +32,7 @@ endofthingRE = re.compile(endofthingPat) commentRE = re.compile(b"%[^\n\r]*") # XXX This not entirely correct as it doesn't allow *nested* embedded parens: -stringPat = br""" +stringPat = rb""" \( ( ( @@ -51,335 +51,348 @@ stringRE = re.compile(stringPat) hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"])) -class PSTokenError(Exception): pass -class PSError(Exception): pass +class PSTokenError(Exception): + pass -class PSTokenizer(object): - def __init__(self, buf=b'', encoding="ascii"): - # Force self.buf to be a byte string - buf = tobytes(buf) - self.buf = buf - self.len = len(buf) - self.pos = 0 - self.closed = False - self.encoding = encoding - - def read(self, n=-1): - """Read at most 'n' bytes from the buffer, or less if the read - hits EOF before obtaining 'n' bytes. - If 'n' is negative or omitted, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - if n is None or n < 0: - newpos = self.len - else: - newpos = min(self.pos+n, self.len) - r = self.buf[self.pos:newpos] - self.pos = newpos - return r - - def close(self): - if not self.closed: - self.closed = True - del self.buf, self.pos - - def getnexttoken(self, - # localize some stuff, for performance - len=len, - ps_special=ps_special, - stringmatch=stringRE.match, - hexstringmatch=hexstringRE.match, - commentmatch=commentRE.match, - endmatch=endofthingRE.match): - - self.skipwhite() - if self.pos >= self.len: - return None, None - pos = self.pos - buf = self.buf - char = bytechr(byteord(buf[pos])) - if char in ps_special: - if char in b'{}[]': - tokentype = 'do_special' - token = char - elif char == b'%': - tokentype = 'do_comment' - _, nextpos = commentmatch(buf, pos).span() - token = buf[pos:nextpos] - elif char == b'(': - tokentype = 'do_string' - m = stringmatch(buf, pos) - if m is None: - raise PSTokenError('bad string at character %d' % pos) - _, nextpos = m.span() - token = buf[pos:nextpos] - elif char == b'<': - tokentype = 'do_hexstring' - m = hexstringmatch(buf, pos) - if m is None: - raise PSTokenError('bad hexstring at character %d' % pos) - _, nextpos = m.span() - token = buf[pos:nextpos] - else: - raise PSTokenError('bad token at character %d' % pos) - else: - if char == b'/': - tokentype = 'do_literal' - m = endmatch(buf, pos+1) - else: - tokentype = '' - m = endmatch(buf, pos) - if m is None: - raise PSTokenError('bad token at character %d' % pos) - _, nextpos = m.span() - token = buf[pos:nextpos] - self.pos = pos + len(token) - token = tostr(token, encoding=self.encoding) - return tokentype, token - - def skipwhite(self, whitematch=skipwhiteRE.match): - _, nextpos = whitematch(self.buf, self.pos).span() - self.pos = nextpos - - def starteexec(self): - self.pos = self.pos + 1 - self.dirtybuf = self.buf[self.pos:] - self.buf, R = eexec.decrypt(self.dirtybuf, 55665) - self.len = len(self.buf) - self.pos = 4 - - def stopeexec(self): - if not hasattr(self, 'dirtybuf'): - return - self.buf = self.dirtybuf - del self.dirtybuf +class PSError(Exception): + pass -class PSInterpreter(PSOperators): +class PSTokenizer(object): + def __init__(self, buf=b"", encoding="ascii"): + # Force self.buf to be a byte string + buf = tobytes(buf) + self.buf = buf + self.len = len(buf) + self.pos = 0 + self.closed = False + self.encoding = encoding + + def read(self, n=-1): + """Read at most 'n' bytes from the buffer, or less if the read + hits EOF before obtaining 'n' bytes. + If 'n' is negative or omitted, read all data until EOF is reached. + """ + if self.closed: + raise ValueError("I/O operation on closed file") + if n is None or n < 0: + newpos = self.len + else: + newpos = min(self.pos + n, self.len) + r = self.buf[self.pos : newpos] + self.pos = newpos + return r + + def close(self): + if not self.closed: + self.closed = True + del self.buf, self.pos + + def getnexttoken( + self, + # localize some stuff, for performance + len=len, + ps_special=ps_special, + stringmatch=stringRE.match, + hexstringmatch=hexstringRE.match, + commentmatch=commentRE.match, + endmatch=endofthingRE.match, + ): + self.skipwhite() + if self.pos >= self.len: + return None, None + pos = self.pos + buf = self.buf + char = bytechr(byteord(buf[pos])) + if char in ps_special: + if char in b"{}[]": + tokentype = "do_special" + token = char + elif char == b"%": + tokentype = "do_comment" + _, nextpos = commentmatch(buf, pos).span() + token = buf[pos:nextpos] + elif char == b"(": + tokentype = "do_string" + m = stringmatch(buf, pos) + if m is None: + raise PSTokenError("bad string at character %d" % pos) + _, nextpos = m.span() + token = buf[pos:nextpos] + elif char == b"<": + tokentype = "do_hexstring" + m = hexstringmatch(buf, pos) + if m is None: + raise PSTokenError("bad hexstring at character %d" % pos) + _, nextpos = m.span() + token = buf[pos:nextpos] + else: + raise PSTokenError("bad token at character %d" % pos) + else: + if char == b"/": + tokentype = "do_literal" + m = endmatch(buf, pos + 1) + else: + tokentype = "" + m = endmatch(buf, pos) + if m is None: + raise PSTokenError("bad token at character %d" % pos) + _, nextpos = m.span() + token = buf[pos:nextpos] + self.pos = pos + len(token) + token = tostr(token, encoding=self.encoding) + return tokentype, token + + def skipwhite(self, whitematch=skipwhiteRE.match): + _, nextpos = whitematch(self.buf, self.pos).span() + self.pos = nextpos + + def starteexec(self): + self.pos = self.pos + 1 + self.dirtybuf = self.buf[self.pos :] + self.buf, R = eexec.decrypt(self.dirtybuf, 55665) + self.len = len(self.buf) + self.pos = 4 + + def stopeexec(self): + if not hasattr(self, "dirtybuf"): + return + self.buf = self.dirtybuf + del self.dirtybuf - def __init__(self, encoding="ascii"): - systemdict = {} - userdict = {} - self.encoding = encoding - self.dictstack = [systemdict, userdict] - self.stack = [] - self.proclevel = 0 - self.procmark = ps_procmark() - self.fillsystemdict() - - def fillsystemdict(self): - systemdict = self.dictstack[0] - systemdict['['] = systemdict['mark'] = self.mark = ps_mark() - systemdict[']'] = ps_operator(']', self.do_makearray) - systemdict['true'] = ps_boolean(1) - systemdict['false'] = ps_boolean(0) - systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding) - systemdict['FontDirectory'] = ps_dict({}) - self.suckoperators(systemdict, self.__class__) - - def suckoperators(self, systemdict, klass): - for name in dir(klass): - attr = getattr(self, name) - if isinstance(attr, Callable) and name[:3] == 'ps_': - name = name[3:] - systemdict[name] = ps_operator(name, attr) - for baseclass in klass.__bases__: - self.suckoperators(systemdict, baseclass) - - def interpret(self, data, getattr=getattr): - tokenizer = self.tokenizer = PSTokenizer(data, self.encoding) - getnexttoken = tokenizer.getnexttoken - do_token = self.do_token - handle_object = self.handle_object - try: - while 1: - tokentype, token = getnexttoken() - if not token: - break - if tokentype: - handler = getattr(self, tokentype) - object = handler(token) - else: - object = do_token(token) - if object is not None: - handle_object(object) - tokenizer.close() - self.tokenizer = None - except: - if self.tokenizer is not None: - log.debug( - 'ps error:\n' - '- - - - - - -\n' - '%s\n' - '>>>\n' - '%s\n' - '- - - - - - -', - self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos], - self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50]) - raise - - def handle_object(self, object): - if not (self.proclevel or object.literal or object.type == 'proceduretype'): - if object.type != 'operatortype': - object = self.resolve_name(object.value) - if object.literal: - self.push(object) - else: - if object.type == 'proceduretype': - self.call_procedure(object) - else: - object.function() - else: - self.push(object) - - def call_procedure(self, proc): - handle_object = self.handle_object - for item in proc.value: - handle_object(item) - - def resolve_name(self, name): - dictstack = self.dictstack - for i in range(len(dictstack)-1, -1, -1): - if name in dictstack[i]: - return dictstack[i][name] - raise PSError('name error: ' + str(name)) - - def do_token(self, token, - int=int, - float=float, - ps_name=ps_name, - ps_integer=ps_integer, - ps_real=ps_real): - try: - num = int(token) - except (ValueError, OverflowError): - try: - num = float(token) - except (ValueError, OverflowError): - if '#' in token: - hashpos = token.find('#') - try: - base = int(token[:hashpos]) - num = int(token[hashpos+1:], base) - except (ValueError, OverflowError): - return ps_name(token) - else: - return ps_integer(num) - else: - return ps_name(token) - else: - return ps_real(num) - else: - return ps_integer(num) - - def do_comment(self, token): - pass - - def do_literal(self, token): - return ps_literal(token[1:]) - - def do_string(self, token): - return ps_string(token[1:-1]) - - def do_hexstring(self, token): - hexStr = "".join(token[1:-1].split()) - if len(hexStr) % 2: - hexStr = hexStr + '0' - cleanstr = [] - for i in range(0, len(hexStr), 2): - cleanstr.append(chr(int(hexStr[i:i+2], 16))) - cleanstr = "".join(cleanstr) - return ps_string(cleanstr) - - def do_special(self, token): - if token == '{': - self.proclevel = self.proclevel + 1 - return self.procmark - elif token == '}': - proc = [] - while 1: - topobject = self.pop() - if topobject == self.procmark: - break - proc.append(topobject) - self.proclevel = self.proclevel - 1 - proc.reverse() - return ps_procedure(proc) - elif token == '[': - return self.mark - elif token == ']': - return ps_name(']') - else: - raise PSTokenError('huh?') - - def push(self, object): - self.stack.append(object) - - def pop(self, *types): - stack = self.stack - if not stack: - raise PSError('stack underflow') - object = stack[-1] - if types: - if object.type not in types: - raise PSError('typecheck, expected %s, found %s' % (repr(types), object.type)) - del stack[-1] - return object - - def do_makearray(self): - array = [] - while 1: - topobject = self.pop() - if topobject == self.mark: - break - array.append(topobject) - array.reverse() - self.push(ps_array(array)) - - def close(self): - """Remove circular references.""" - del self.stack - del self.dictstack + +class PSInterpreter(PSOperators): + def __init__(self, encoding="ascii"): + systemdict = {} + userdict = {} + self.encoding = encoding + self.dictstack = [systemdict, userdict] + self.stack = [] + self.proclevel = 0 + self.procmark = ps_procmark() + self.fillsystemdict() + + def fillsystemdict(self): + systemdict = self.dictstack[0] + systemdict["["] = systemdict["mark"] = self.mark = ps_mark() + systemdict["]"] = ps_operator("]", self.do_makearray) + systemdict["true"] = ps_boolean(1) + systemdict["false"] = ps_boolean(0) + systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding) + systemdict["FontDirectory"] = ps_dict({}) + self.suckoperators(systemdict, self.__class__) + + def suckoperators(self, systemdict, klass): + for name in dir(klass): + attr = getattr(self, name) + if isinstance(attr, Callable) and name[:3] == "ps_": + name = name[3:] + systemdict[name] = ps_operator(name, attr) + for baseclass in klass.__bases__: + self.suckoperators(systemdict, baseclass) + + def interpret(self, data, getattr=getattr): + tokenizer = self.tokenizer = PSTokenizer(data, self.encoding) + getnexttoken = tokenizer.getnexttoken + do_token = self.do_token + handle_object = self.handle_object + try: + while 1: + tokentype, token = getnexttoken() + if not token: + break + if tokentype: + handler = getattr(self, tokentype) + object = handler(token) + else: + object = do_token(token) + if object is not None: + handle_object(object) + tokenizer.close() + self.tokenizer = None + except: + if self.tokenizer is not None: + log.debug( + "ps error:\n" + "- - - - - - -\n" + "%s\n" + ">>>\n" + "%s\n" + "- - - - - - -", + self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos], + self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50], + ) + raise + + def handle_object(self, object): + if not (self.proclevel or object.literal or object.type == "proceduretype"): + if object.type != "operatortype": + object = self.resolve_name(object.value) + if object.literal: + self.push(object) + else: + if object.type == "proceduretype": + self.call_procedure(object) + else: + object.function() + else: + self.push(object) + + def call_procedure(self, proc): + handle_object = self.handle_object + for item in proc.value: + handle_object(item) + + def resolve_name(self, name): + dictstack = self.dictstack + for i in range(len(dictstack) - 1, -1, -1): + if name in dictstack[i]: + return dictstack[i][name] + raise PSError("name error: " + str(name)) + + def do_token( + self, + token, + int=int, + float=float, + ps_name=ps_name, + ps_integer=ps_integer, + ps_real=ps_real, + ): + try: + num = int(token) + except (ValueError, OverflowError): + try: + num = float(token) + except (ValueError, OverflowError): + if "#" in token: + hashpos = token.find("#") + try: + base = int(token[:hashpos]) + num = int(token[hashpos + 1 :], base) + except (ValueError, OverflowError): + return ps_name(token) + else: + return ps_integer(num) + else: + return ps_name(token) + else: + return ps_real(num) + else: + return ps_integer(num) + + def do_comment(self, token): + pass + + def do_literal(self, token): + return ps_literal(token[1:]) + + def do_string(self, token): + return ps_string(token[1:-1]) + + def do_hexstring(self, token): + hexStr = "".join(token[1:-1].split()) + if len(hexStr) % 2: + hexStr = hexStr + "0" + cleanstr = [] + for i in range(0, len(hexStr), 2): + cleanstr.append(chr(int(hexStr[i : i + 2], 16))) + cleanstr = "".join(cleanstr) + return ps_string(cleanstr) + + def do_special(self, token): + if token == "{": + self.proclevel = self.proclevel + 1 + return self.procmark + elif token == "}": + proc = [] + while 1: + topobject = self.pop() + if topobject == self.procmark: + break + proc.append(topobject) + self.proclevel = self.proclevel - 1 + proc.reverse() + return ps_procedure(proc) + elif token == "[": + return self.mark + elif token == "]": + return ps_name("]") + else: + raise PSTokenError("huh?") + + def push(self, object): + self.stack.append(object) + + def pop(self, *types): + stack = self.stack + if not stack: + raise PSError("stack underflow") + object = stack[-1] + if types: + if object.type not in types: + raise PSError( + "typecheck, expected %s, found %s" % (repr(types), object.type) + ) + del stack[-1] + return object + + def do_makearray(self): + array = [] + while 1: + topobject = self.pop() + if topobject == self.mark: + break + array.append(topobject) + array.reverse() + self.push(ps_array(array)) + + def close(self): + """Remove circular references.""" + del self.stack + del self.dictstack def unpack_item(item): - tp = type(item.value) - if tp == dict: - newitem = {} - for key, value in item.value.items(): - newitem[key] = unpack_item(value) - elif tp == list: - newitem = [None] * len(item.value) - for i in range(len(item.value)): - newitem[i] = unpack_item(item.value[i]) - if item.type == 'proceduretype': - newitem = tuple(newitem) - else: - newitem = item.value - return newitem + tp = type(item.value) + if tp == dict: + newitem = {} + for key, value in item.value.items(): + newitem[key] = unpack_item(value) + elif tp == list: + newitem = [None] * len(item.value) + for i in range(len(item.value)): + newitem[i] = unpack_item(item.value[i]) + if item.type == "proceduretype": + newitem = tuple(newitem) + else: + newitem = item.value + return newitem + def suckfont(data, encoding="ascii"): - m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data) - if m: - fontName = m.group(1) - fontName = fontName.decode() - else: - fontName = None - interpreter = PSInterpreter(encoding=encoding) - interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop") - interpreter.interpret(data) - fontdir = interpreter.dictstack[0]['FontDirectory'].value - if fontName in fontdir: - rawfont = fontdir[fontName] - else: - # fall back, in case fontName wasn't found - fontNames = list(fontdir.keys()) - if len(fontNames) > 1: - fontNames.remove("Helvetica") - fontNames.sort() - rawfont = fontdir[fontNames[0]] - interpreter.close() - return unpack_item(rawfont) + m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data) + if m: + fontName = m.group(1) + fontName = fontName.decode() + else: + fontName = None + interpreter = PSInterpreter(encoding=encoding) + interpreter.interpret( + b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop" + ) + interpreter.interpret(data) + fontdir = interpreter.dictstack[0]["FontDirectory"].value + if fontName in fontdir: + rawfont = fontdir[fontName] + else: + # fall back, in case fontName wasn't found + fontNames = list(fontdir.keys()) + if len(fontNames) > 1: + fontNames.remove("Helvetica") + fontNames.sort() + rawfont = fontdir[fontNames[0]] + interpreter.close() + return unpack_item(rawfont) |