aboutsummaryrefslogtreecommitdiff
path: root/Lib/fontTools/misc/psLib.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/fontTools/misc/psLib.py')
-rw-r--r--Lib/fontTools/misc/psLib.py687
1 files changed, 350 insertions, 337 deletions
diff --git a/Lib/fontTools/misc/psLib.py b/Lib/fontTools/misc/psLib.py
index a6c8b8b5..3bfdb4ae 100644
--- a/Lib/fontTools/misc/psLib.py
+++ b/Lib/fontTools/misc/psLib.py
@@ -1,20 +1,20 @@
from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr
from fontTools.misc import eexec
from .psOperators import (
- PSOperators,
- ps_StandardEncoding,
- ps_array,
- ps_boolean,
- ps_dict,
- ps_integer,
- ps_literal,
- ps_mark,
- ps_name,
- ps_operator,
- ps_procedure,
- ps_procmark,
- ps_real,
- ps_string,
+ PSOperators,
+ ps_StandardEncoding,
+ ps_array,
+ ps_boolean,
+ ps_dict,
+ ps_integer,
+ ps_literal,
+ ps_mark,
+ ps_name,
+ ps_operator,
+ ps_procedure,
+ ps_procmark,
+ ps_real,
+ ps_string,
)
import re
from collections.abc import Callable
@@ -24,7 +24,7 @@ import logging
log = logging.getLogger(__name__)
-ps_special = b'()<>[]{}%' # / is one too, but we take care of that one differently
+ps_special = b"()<>[]{}%" # / is one too, but we take care of that one differently
skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"]))
endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"])
@@ -32,7 +32,7 @@ endofthingRE = re.compile(endofthingPat)
commentRE = re.compile(b"%[^\n\r]*")
# XXX This not entirely correct as it doesn't allow *nested* embedded parens:
-stringPat = br"""
+stringPat = rb"""
\(
(
(
@@ -51,335 +51,348 @@ stringRE = re.compile(stringPat)
hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"]))
-class PSTokenError(Exception): pass
-class PSError(Exception): pass
+class PSTokenError(Exception):
+ pass
-class PSTokenizer(object):
- def __init__(self, buf=b'', encoding="ascii"):
- # Force self.buf to be a byte string
- buf = tobytes(buf)
- self.buf = buf
- self.len = len(buf)
- self.pos = 0
- self.closed = False
- self.encoding = encoding
-
- def read(self, n=-1):
- """Read at most 'n' bytes from the buffer, or less if the read
- hits EOF before obtaining 'n' bytes.
- If 'n' is negative or omitted, read all data until EOF is reached.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
- if n is None or n < 0:
- newpos = self.len
- else:
- newpos = min(self.pos+n, self.len)
- r = self.buf[self.pos:newpos]
- self.pos = newpos
- return r
-
- def close(self):
- if not self.closed:
- self.closed = True
- del self.buf, self.pos
-
- def getnexttoken(self,
- # localize some stuff, for performance
- len=len,
- ps_special=ps_special,
- stringmatch=stringRE.match,
- hexstringmatch=hexstringRE.match,
- commentmatch=commentRE.match,
- endmatch=endofthingRE.match):
-
- self.skipwhite()
- if self.pos >= self.len:
- return None, None
- pos = self.pos
- buf = self.buf
- char = bytechr(byteord(buf[pos]))
- if char in ps_special:
- if char in b'{}[]':
- tokentype = 'do_special'
- token = char
- elif char == b'%':
- tokentype = 'do_comment'
- _, nextpos = commentmatch(buf, pos).span()
- token = buf[pos:nextpos]
- elif char == b'(':
- tokentype = 'do_string'
- m = stringmatch(buf, pos)
- if m is None:
- raise PSTokenError('bad string at character %d' % pos)
- _, nextpos = m.span()
- token = buf[pos:nextpos]
- elif char == b'<':
- tokentype = 'do_hexstring'
- m = hexstringmatch(buf, pos)
- if m is None:
- raise PSTokenError('bad hexstring at character %d' % pos)
- _, nextpos = m.span()
- token = buf[pos:nextpos]
- else:
- raise PSTokenError('bad token at character %d' % pos)
- else:
- if char == b'/':
- tokentype = 'do_literal'
- m = endmatch(buf, pos+1)
- else:
- tokentype = ''
- m = endmatch(buf, pos)
- if m is None:
- raise PSTokenError('bad token at character %d' % pos)
- _, nextpos = m.span()
- token = buf[pos:nextpos]
- self.pos = pos + len(token)
- token = tostr(token, encoding=self.encoding)
- return tokentype, token
-
- def skipwhite(self, whitematch=skipwhiteRE.match):
- _, nextpos = whitematch(self.buf, self.pos).span()
- self.pos = nextpos
-
- def starteexec(self):
- self.pos = self.pos + 1
- self.dirtybuf = self.buf[self.pos:]
- self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
- self.len = len(self.buf)
- self.pos = 4
-
- def stopeexec(self):
- if not hasattr(self, 'dirtybuf'):
- return
- self.buf = self.dirtybuf
- del self.dirtybuf
+class PSError(Exception):
+ pass
-class PSInterpreter(PSOperators):
+class PSTokenizer(object):
+ def __init__(self, buf=b"", encoding="ascii"):
+ # Force self.buf to be a byte string
+ buf = tobytes(buf)
+ self.buf = buf
+ self.len = len(buf)
+ self.pos = 0
+ self.closed = False
+ self.encoding = encoding
+
+ def read(self, n=-1):
+ """Read at most 'n' bytes from the buffer, or less if the read
+ hits EOF before obtaining 'n' bytes.
+ If 'n' is negative or omitted, read all data until EOF is reached.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ if n is None or n < 0:
+ newpos = self.len
+ else:
+ newpos = min(self.pos + n, self.len)
+ r = self.buf[self.pos : newpos]
+ self.pos = newpos
+ return r
+
+ def close(self):
+ if not self.closed:
+ self.closed = True
+ del self.buf, self.pos
+
+ def getnexttoken(
+ self,
+ # localize some stuff, for performance
+ len=len,
+ ps_special=ps_special,
+ stringmatch=stringRE.match,
+ hexstringmatch=hexstringRE.match,
+ commentmatch=commentRE.match,
+ endmatch=endofthingRE.match,
+ ):
+ self.skipwhite()
+ if self.pos >= self.len:
+ return None, None
+ pos = self.pos
+ buf = self.buf
+ char = bytechr(byteord(buf[pos]))
+ if char in ps_special:
+ if char in b"{}[]":
+ tokentype = "do_special"
+ token = char
+ elif char == b"%":
+ tokentype = "do_comment"
+ _, nextpos = commentmatch(buf, pos).span()
+ token = buf[pos:nextpos]
+ elif char == b"(":
+ tokentype = "do_string"
+ m = stringmatch(buf, pos)
+ if m is None:
+ raise PSTokenError("bad string at character %d" % pos)
+ _, nextpos = m.span()
+ token = buf[pos:nextpos]
+ elif char == b"<":
+ tokentype = "do_hexstring"
+ m = hexstringmatch(buf, pos)
+ if m is None:
+ raise PSTokenError("bad hexstring at character %d" % pos)
+ _, nextpos = m.span()
+ token = buf[pos:nextpos]
+ else:
+ raise PSTokenError("bad token at character %d" % pos)
+ else:
+ if char == b"/":
+ tokentype = "do_literal"
+ m = endmatch(buf, pos + 1)
+ else:
+ tokentype = ""
+ m = endmatch(buf, pos)
+ if m is None:
+ raise PSTokenError("bad token at character %d" % pos)
+ _, nextpos = m.span()
+ token = buf[pos:nextpos]
+ self.pos = pos + len(token)
+ token = tostr(token, encoding=self.encoding)
+ return tokentype, token
+
+ def skipwhite(self, whitematch=skipwhiteRE.match):
+ _, nextpos = whitematch(self.buf, self.pos).span()
+ self.pos = nextpos
+
+ def starteexec(self):
+ self.pos = self.pos + 1
+ self.dirtybuf = self.buf[self.pos :]
+ self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
+ self.len = len(self.buf)
+ self.pos = 4
+
+ def stopeexec(self):
+ if not hasattr(self, "dirtybuf"):
+ return
+ self.buf = self.dirtybuf
+ del self.dirtybuf
- def __init__(self, encoding="ascii"):
- systemdict = {}
- userdict = {}
- self.encoding = encoding
- self.dictstack = [systemdict, userdict]
- self.stack = []
- self.proclevel = 0
- self.procmark = ps_procmark()
- self.fillsystemdict()
-
- def fillsystemdict(self):
- systemdict = self.dictstack[0]
- systemdict['['] = systemdict['mark'] = self.mark = ps_mark()
- systemdict[']'] = ps_operator(']', self.do_makearray)
- systemdict['true'] = ps_boolean(1)
- systemdict['false'] = ps_boolean(0)
- systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding)
- systemdict['FontDirectory'] = ps_dict({})
- self.suckoperators(systemdict, self.__class__)
-
- def suckoperators(self, systemdict, klass):
- for name in dir(klass):
- attr = getattr(self, name)
- if isinstance(attr, Callable) and name[:3] == 'ps_':
- name = name[3:]
- systemdict[name] = ps_operator(name, attr)
- for baseclass in klass.__bases__:
- self.suckoperators(systemdict, baseclass)
-
- def interpret(self, data, getattr=getattr):
- tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
- getnexttoken = tokenizer.getnexttoken
- do_token = self.do_token
- handle_object = self.handle_object
- try:
- while 1:
- tokentype, token = getnexttoken()
- if not token:
- break
- if tokentype:
- handler = getattr(self, tokentype)
- object = handler(token)
- else:
- object = do_token(token)
- if object is not None:
- handle_object(object)
- tokenizer.close()
- self.tokenizer = None
- except:
- if self.tokenizer is not None:
- log.debug(
- 'ps error:\n'
- '- - - - - - -\n'
- '%s\n'
- '>>>\n'
- '%s\n'
- '- - - - - - -',
- self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos],
- self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50])
- raise
-
- def handle_object(self, object):
- if not (self.proclevel or object.literal or object.type == 'proceduretype'):
- if object.type != 'operatortype':
- object = self.resolve_name(object.value)
- if object.literal:
- self.push(object)
- else:
- if object.type == 'proceduretype':
- self.call_procedure(object)
- else:
- object.function()
- else:
- self.push(object)
-
- def call_procedure(self, proc):
- handle_object = self.handle_object
- for item in proc.value:
- handle_object(item)
-
- def resolve_name(self, name):
- dictstack = self.dictstack
- for i in range(len(dictstack)-1, -1, -1):
- if name in dictstack[i]:
- return dictstack[i][name]
- raise PSError('name error: ' + str(name))
-
- def do_token(self, token,
- int=int,
- float=float,
- ps_name=ps_name,
- ps_integer=ps_integer,
- ps_real=ps_real):
- try:
- num = int(token)
- except (ValueError, OverflowError):
- try:
- num = float(token)
- except (ValueError, OverflowError):
- if '#' in token:
- hashpos = token.find('#')
- try:
- base = int(token[:hashpos])
- num = int(token[hashpos+1:], base)
- except (ValueError, OverflowError):
- return ps_name(token)
- else:
- return ps_integer(num)
- else:
- return ps_name(token)
- else:
- return ps_real(num)
- else:
- return ps_integer(num)
-
- def do_comment(self, token):
- pass
-
- def do_literal(self, token):
- return ps_literal(token[1:])
-
- def do_string(self, token):
- return ps_string(token[1:-1])
-
- def do_hexstring(self, token):
- hexStr = "".join(token[1:-1].split())
- if len(hexStr) % 2:
- hexStr = hexStr + '0'
- cleanstr = []
- for i in range(0, len(hexStr), 2):
- cleanstr.append(chr(int(hexStr[i:i+2], 16)))
- cleanstr = "".join(cleanstr)
- return ps_string(cleanstr)
-
- def do_special(self, token):
- if token == '{':
- self.proclevel = self.proclevel + 1
- return self.procmark
- elif token == '}':
- proc = []
- while 1:
- topobject = self.pop()
- if topobject == self.procmark:
- break
- proc.append(topobject)
- self.proclevel = self.proclevel - 1
- proc.reverse()
- return ps_procedure(proc)
- elif token == '[':
- return self.mark
- elif token == ']':
- return ps_name(']')
- else:
- raise PSTokenError('huh?')
-
- def push(self, object):
- self.stack.append(object)
-
- def pop(self, *types):
- stack = self.stack
- if not stack:
- raise PSError('stack underflow')
- object = stack[-1]
- if types:
- if object.type not in types:
- raise PSError('typecheck, expected %s, found %s' % (repr(types), object.type))
- del stack[-1]
- return object
-
- def do_makearray(self):
- array = []
- while 1:
- topobject = self.pop()
- if topobject == self.mark:
- break
- array.append(topobject)
- array.reverse()
- self.push(ps_array(array))
-
- def close(self):
- """Remove circular references."""
- del self.stack
- del self.dictstack
+
+class PSInterpreter(PSOperators):
+ def __init__(self, encoding="ascii"):
+ systemdict = {}
+ userdict = {}
+ self.encoding = encoding
+ self.dictstack = [systemdict, userdict]
+ self.stack = []
+ self.proclevel = 0
+ self.procmark = ps_procmark()
+ self.fillsystemdict()
+
+ def fillsystemdict(self):
+ systemdict = self.dictstack[0]
+ systemdict["["] = systemdict["mark"] = self.mark = ps_mark()
+ systemdict["]"] = ps_operator("]", self.do_makearray)
+ systemdict["true"] = ps_boolean(1)
+ systemdict["false"] = ps_boolean(0)
+ systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding)
+ systemdict["FontDirectory"] = ps_dict({})
+ self.suckoperators(systemdict, self.__class__)
+
+ def suckoperators(self, systemdict, klass):
+ for name in dir(klass):
+ attr = getattr(self, name)
+ if isinstance(attr, Callable) and name[:3] == "ps_":
+ name = name[3:]
+ systemdict[name] = ps_operator(name, attr)
+ for baseclass in klass.__bases__:
+ self.suckoperators(systemdict, baseclass)
+
+ def interpret(self, data, getattr=getattr):
+ tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
+ getnexttoken = tokenizer.getnexttoken
+ do_token = self.do_token
+ handle_object = self.handle_object
+ try:
+ while 1:
+ tokentype, token = getnexttoken()
+ if not token:
+ break
+ if tokentype:
+ handler = getattr(self, tokentype)
+ object = handler(token)
+ else:
+ object = do_token(token)
+ if object is not None:
+ handle_object(object)
+ tokenizer.close()
+ self.tokenizer = None
+ except:
+ if self.tokenizer is not None:
+ log.debug(
+ "ps error:\n"
+ "- - - - - - -\n"
+ "%s\n"
+ ">>>\n"
+ "%s\n"
+ "- - - - - - -",
+ self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos],
+ self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50],
+ )
+ raise
+
+ def handle_object(self, object):
+ if not (self.proclevel or object.literal or object.type == "proceduretype"):
+ if object.type != "operatortype":
+ object = self.resolve_name(object.value)
+ if object.literal:
+ self.push(object)
+ else:
+ if object.type == "proceduretype":
+ self.call_procedure(object)
+ else:
+ object.function()
+ else:
+ self.push(object)
+
+ def call_procedure(self, proc):
+ handle_object = self.handle_object
+ for item in proc.value:
+ handle_object(item)
+
+ def resolve_name(self, name):
+ dictstack = self.dictstack
+ for i in range(len(dictstack) - 1, -1, -1):
+ if name in dictstack[i]:
+ return dictstack[i][name]
+ raise PSError("name error: " + str(name))
+
+ def do_token(
+ self,
+ token,
+ int=int,
+ float=float,
+ ps_name=ps_name,
+ ps_integer=ps_integer,
+ ps_real=ps_real,
+ ):
+ try:
+ num = int(token)
+ except (ValueError, OverflowError):
+ try:
+ num = float(token)
+ except (ValueError, OverflowError):
+ if "#" in token:
+ hashpos = token.find("#")
+ try:
+ base = int(token[:hashpos])
+ num = int(token[hashpos + 1 :], base)
+ except (ValueError, OverflowError):
+ return ps_name(token)
+ else:
+ return ps_integer(num)
+ else:
+ return ps_name(token)
+ else:
+ return ps_real(num)
+ else:
+ return ps_integer(num)
+
+ def do_comment(self, token):
+ pass
+
+ def do_literal(self, token):
+ return ps_literal(token[1:])
+
+ def do_string(self, token):
+ return ps_string(token[1:-1])
+
+ def do_hexstring(self, token):
+ hexStr = "".join(token[1:-1].split())
+ if len(hexStr) % 2:
+ hexStr = hexStr + "0"
+ cleanstr = []
+ for i in range(0, len(hexStr), 2):
+ cleanstr.append(chr(int(hexStr[i : i + 2], 16)))
+ cleanstr = "".join(cleanstr)
+ return ps_string(cleanstr)
+
+ def do_special(self, token):
+ if token == "{":
+ self.proclevel = self.proclevel + 1
+ return self.procmark
+ elif token == "}":
+ proc = []
+ while 1:
+ topobject = self.pop()
+ if topobject == self.procmark:
+ break
+ proc.append(topobject)
+ self.proclevel = self.proclevel - 1
+ proc.reverse()
+ return ps_procedure(proc)
+ elif token == "[":
+ return self.mark
+ elif token == "]":
+ return ps_name("]")
+ else:
+ raise PSTokenError("huh?")
+
+ def push(self, object):
+ self.stack.append(object)
+
+ def pop(self, *types):
+ stack = self.stack
+ if not stack:
+ raise PSError("stack underflow")
+ object = stack[-1]
+ if types:
+ if object.type not in types:
+ raise PSError(
+ "typecheck, expected %s, found %s" % (repr(types), object.type)
+ )
+ del stack[-1]
+ return object
+
+ def do_makearray(self):
+ array = []
+ while 1:
+ topobject = self.pop()
+ if topobject == self.mark:
+ break
+ array.append(topobject)
+ array.reverse()
+ self.push(ps_array(array))
+
+ def close(self):
+ """Remove circular references."""
+ del self.stack
+ del self.dictstack
def unpack_item(item):
- tp = type(item.value)
- if tp == dict:
- newitem = {}
- for key, value in item.value.items():
- newitem[key] = unpack_item(value)
- elif tp == list:
- newitem = [None] * len(item.value)
- for i in range(len(item.value)):
- newitem[i] = unpack_item(item.value[i])
- if item.type == 'proceduretype':
- newitem = tuple(newitem)
- else:
- newitem = item.value
- return newitem
+ tp = type(item.value)
+ if tp == dict:
+ newitem = {}
+ for key, value in item.value.items():
+ newitem[key] = unpack_item(value)
+ elif tp == list:
+ newitem = [None] * len(item.value)
+ for i in range(len(item.value)):
+ newitem[i] = unpack_item(item.value[i])
+ if item.type == "proceduretype":
+ newitem = tuple(newitem)
+ else:
+ newitem = item.value
+ return newitem
+
def suckfont(data, encoding="ascii"):
- m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data)
- if m:
- fontName = m.group(1)
- fontName = fontName.decode()
- else:
- fontName = None
- interpreter = PSInterpreter(encoding=encoding)
- interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop")
- interpreter.interpret(data)
- fontdir = interpreter.dictstack[0]['FontDirectory'].value
- if fontName in fontdir:
- rawfont = fontdir[fontName]
- else:
- # fall back, in case fontName wasn't found
- fontNames = list(fontdir.keys())
- if len(fontNames) > 1:
- fontNames.remove("Helvetica")
- fontNames.sort()
- rawfont = fontdir[fontNames[0]]
- interpreter.close()
- return unpack_item(rawfont)
+ m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data)
+ if m:
+ fontName = m.group(1)
+ fontName = fontName.decode()
+ else:
+ fontName = None
+ interpreter = PSInterpreter(encoding=encoding)
+ interpreter.interpret(
+ b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop"
+ )
+ interpreter.interpret(data)
+ fontdir = interpreter.dictstack[0]["FontDirectory"].value
+ if fontName in fontdir:
+ rawfont = fontdir[fontName]
+ else:
+ # fall back, in case fontName wasn't found
+ fontNames = list(fontdir.keys())
+ if len(fontNames) > 1:
+ fontNames.remove("Helvetica")
+ fontNames.sort()
+ rawfont = fontdir[fontNames[0]]
+ interpreter.close()
+ return unpack_item(rawfont)