aboutsummaryrefslogtreecommitdiff
path: root/Lib/fontTools/tfmLib.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/fontTools/tfmLib.py')
-rw-r--r--Lib/fontTools/tfmLib.py460
1 files changed, 460 insertions, 0 deletions
diff --git a/Lib/fontTools/tfmLib.py b/Lib/fontTools/tfmLib.py
new file mode 100644
index 00000000..673373ff
--- /dev/null
+++ b/Lib/fontTools/tfmLib.py
@@ -0,0 +1,460 @@
+"""Module for reading TFM (TeX Font Metrics) files.
+
+The TFM format is described in the TFtoPL WEB source code, whose typeset form
+can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_.
+
+ >>> from fontTools.tfmLib import TFM
+ >>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm")
+ >>>
+ >>> # Accessing an attribute gets you metadata.
+ >>> tfm.checksum
+ 1274110073
+ >>> tfm.designsize
+ 10.0
+ >>> tfm.codingscheme
+ 'TeX text'
+ >>> tfm.family
+ 'CMR'
+ >>> tfm.seven_bit_safe_flag
+ False
+ >>> tfm.face
+ 234
+ >>> tfm.extraheader
+ {}
+ >>> tfm.fontdimens
+ {'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578}
+ >>> # Accessing a character gets you its metrics.
+ >>> # “width” is always available, other metrics are available only when
+ >>> # applicable. All values are relative to “designsize”.
+ >>> tfm.chars[ord("g")]
+ {'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219}
+ >>> # Kerning and ligature can be accessed as well.
+ >>> tfm.kerning[ord("c")]
+ {104: -0.02777862548828125, 107: -0.02777862548828125}
+ >>> tfm.ligatures[ord("f")]
+ {105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)}
+"""
+
+from types import SimpleNamespace
+
+from fontTools.misc.sstruct import calcsize, unpack, unpack2
+
+SIZES_FORMAT = """
+ >
+ lf: h # length of the entire file, in words
+ lh: h # length of the header data, in words
+ bc: h # smallest character code in the font
+ ec: h # largest character code in the font
+ nw: h # number of words in the width table
+ nh: h # number of words in the height table
+ nd: h # number of words in the depth table
+ ni: h # number of words in the italic correction table
+ nl: h # number of words in the ligature/kern table
+ nk: h # number of words in the kern table
+ ne: h # number of words in the extensible character table
+ np: h # number of font parameter words
+"""
+
+SIZES_SIZE = calcsize(SIZES_FORMAT)
+
+FIXED_FORMAT = "12.20F"
+
+HEADER_FORMAT1 = f"""
+ >
+ checksum: L
+ designsize: {FIXED_FORMAT}
+"""
+
+HEADER_FORMAT2 = f"""
+ {HEADER_FORMAT1}
+ codingscheme: 40p
+"""
+
+HEADER_FORMAT3 = f"""
+ {HEADER_FORMAT2}
+ family: 20p
+"""
+
+HEADER_FORMAT4 = f"""
+ {HEADER_FORMAT3}
+ seven_bit_safe_flag: ?
+ ignored: x
+ ignored: x
+ face: B
+"""
+
+HEADER_SIZE1 = calcsize(HEADER_FORMAT1)
+HEADER_SIZE2 = calcsize(HEADER_FORMAT2)
+HEADER_SIZE3 = calcsize(HEADER_FORMAT3)
+HEADER_SIZE4 = calcsize(HEADER_FORMAT4)
+
+LIG_KERN_COMMAND = """
+ >
+ skip_byte: B
+ next_char: B
+ op_byte: B
+ remainder: B
+"""
+
+BASE_PARAMS = [
+ "SLANT",
+ "SPACE",
+ "STRETCH",
+ "SHRINK",
+ "XHEIGHT",
+ "QUAD",
+ "EXTRASPACE",
+]
+
+MATHSY_PARAMS = [
+ "NUM1",
+ "NUM2",
+ "NUM3",
+ "DENOM1",
+ "DENOM2",
+ "SUP1",
+ "SUP2",
+ "SUP3",
+ "SUB1",
+ "SUB2",
+ "SUPDROP",
+ "SUBDROP",
+ "DELIM1",
+ "DELIM2",
+ "AXISHEIGHT",
+]
+
+MATHEX_PARAMS = [
+ "DEFAULTRULETHICKNESS",
+ "BIGOPSPACING1",
+ "BIGOPSPACING2",
+ "BIGOPSPACING3",
+ "BIGOPSPACING4",
+ "BIGOPSPACING5",
+]
+
+VANILLA = 0
+MATHSY = 1
+MATHEX = 2
+
+UNREACHABLE = 0
+PASSTHROUGH = 1
+ACCESSABLE = 2
+
+NO_TAG = 0
+LIG_TAG = 1
+LIST_TAG = 2
+EXT_TAG = 3
+
+STOP_FLAG = 128
+KERN_FLAG = 128
+
+
+class TFMException(Exception):
+ def __init__(self, message):
+ super().__init__(message)
+
+
+class TFM:
+ def __init__(self, file):
+ self._read(file)
+
+ def __repr__(self):
+ return (
+ f"<TFM"
+ f" for {self.family}"
+ f" in {self.codingscheme}"
+ f" at {self.designsize:g}pt>"
+ )
+
+ def _read(self, file):
+ if hasattr(file, "read"):
+ data = file.read()
+ else:
+ with open(file, "rb") as fp:
+ data = fp.read()
+
+ self._data = data
+
+ if len(data) < SIZES_SIZE:
+ raise TFMException("Too short input file")
+
+ sizes = SimpleNamespace()
+ unpack2(SIZES_FORMAT, data, sizes)
+
+ # Do some file structure sanity checks.
+ # TeX and TFtoPL do additional functional checks and might even correct
+ # “errors” in the input file, but we instead try to output the file as
+ # it is as long as it is parsable, even if the data make no sense.
+
+ if sizes.lf < 0:
+ raise TFMException("The file claims to have negative or zero length!")
+
+ if len(data) < sizes.lf * 4:
+ raise TFMException("The file has fewer bytes than it claims!")
+
+ for name, length in vars(sizes).items():
+ if length < 0:
+ raise TFMException("The subfile size: '{name}' is negative!")
+
+ if sizes.lh < 2:
+ raise TFMException(f"The header length is only {sizes.lh}!")
+
+ if sizes.bc > sizes.ec + 1 or sizes.ec > 255:
+ raise TFMException(
+ f"The character code range {sizes.bc}..{sizes.ec} is illegal!"
+ )
+
+ if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0:
+ raise TFMException("Incomplete subfiles for character dimensions!")
+
+ if sizes.ne > 256:
+ raise TFMException(f"There are {ne} extensible recipes!")
+
+ if sizes.lf != (
+ 6
+ + sizes.lh
+ + (sizes.ec - sizes.bc + 1)
+ + sizes.nw
+ + sizes.nh
+ + sizes.nd
+ + sizes.ni
+ + sizes.nl
+ + sizes.nk
+ + sizes.ne
+ + sizes.np
+ ):
+ raise TFMException("Subfile sizes don’t add up to the stated total")
+
+ # Subfile offsets, used in the helper function below. These all are
+ # 32-bit word offsets not 8-bit byte offsets.
+ char_base = 6 + sizes.lh - sizes.bc
+ width_base = char_base + sizes.ec + 1
+ height_base = width_base + sizes.nw
+ depth_base = height_base + sizes.nh
+ italic_base = depth_base + sizes.nd
+ lig_kern_base = italic_base + sizes.ni
+ kern_base = lig_kern_base + sizes.nl
+ exten_base = kern_base + sizes.nk
+ param_base = exten_base + sizes.ne
+
+ # Helper functions for accessing individual data. If this looks
+ # nonidiomatic Python, I blame the effect of reading the literate WEB
+ # documentation of TFtoPL.
+ def char_info(c):
+ return 4 * (char_base + c)
+
+ def width_index(c):
+ return data[char_info(c)]
+
+ def noneexistent(c):
+ return c < sizes.bc or c > sizes.ec or width_index(c) == 0
+
+ def height_index(c):
+ return data[char_info(c) + 1] // 16
+
+ def depth_index(c):
+ return data[char_info(c) + 1] % 16
+
+ def italic_index(c):
+ return data[char_info(c) + 2] // 4
+
+ def tag(c):
+ return data[char_info(c) + 2] % 4
+
+ def remainder(c):
+ return data[char_info(c) + 3]
+
+ def width(c):
+ r = 4 * (width_base + width_index(c))
+ return read_fixed(r, "v")["v"]
+
+ def height(c):
+ r = 4 * (height_base + height_index(c))
+ return read_fixed(r, "v")["v"]
+
+ def depth(c):
+ r = 4 * (depth_base + depth_index(c))
+ return read_fixed(r, "v")["v"]
+
+ def italic(c):
+ r = 4 * (italic_base + italic_index(c))
+ return read_fixed(r, "v")["v"]
+
+ def exten(c):
+ return 4 * (exten_base + remainder(c))
+
+ def lig_step(i):
+ return 4 * (lig_kern_base + i)
+
+ def lig_kern_command(i):
+ command = SimpleNamespace()
+ unpack2(LIG_KERN_COMMAND, data[i:], command)
+ return command
+
+ def kern(i):
+ r = 4 * (kern_base + i)
+ return read_fixed(r, "v")["v"]
+
+ def param(i):
+ return 4 * (param_base + i)
+
+ def read_fixed(index, key, obj=None):
+ ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj)
+ return ret[0]
+
+ # Set all attributes to empty values regardless of the header size.
+ unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self)
+
+ offset = 24
+ length = sizes.lh * 4
+ self.extraheader = {}
+ if length >= HEADER_SIZE4:
+ rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1]
+ if self.face < 18:
+ s = self.face % 2
+ b = self.face // 2
+ self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3]
+ for i in range(sizes.lh - HEADER_SIZE4 // 4):
+ rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1]
+ elif length >= HEADER_SIZE3:
+ unpack2(HEADER_FORMAT3, data[offset:], self)
+ elif length >= HEADER_SIZE2:
+ unpack2(HEADER_FORMAT2, data[offset:], self)
+ elif length >= HEADER_SIZE1:
+ unpack2(HEADER_FORMAT1, data[offset:], self)
+
+ self.fonttype = VANILLA
+ scheme = self.codingscheme.upper()
+ if scheme.startswith("TEX MATH SY"):
+ self.fonttype = MATHSY
+ elif scheme.startswith("TEX MATH EX"):
+ self.fonttype = MATHEX
+
+ self.fontdimens = {}
+ for i in range(sizes.np):
+ name = f"PARAMETER{i+1}"
+ if i <= 6:
+ name = BASE_PARAMS[i]
+ elif self.fonttype == MATHSY and i <= 21:
+ name = MATHSY_PARAMS[i - 7]
+ elif self.fonttype == MATHEX and i <= 12:
+ name = MATHEX_PARAMS[i - 7]
+ read_fixed(param(i), name, self.fontdimens)
+
+ lig_kern_map = {}
+ self.right_boundary_char = None
+ self.left_boundary_char = None
+ if sizes.nl > 0:
+ cmd = lig_kern_command(lig_step(0))
+ if cmd.skip_byte == 255:
+ self.right_boundary_char = cmd.next_char
+
+ cmd = lig_kern_command(lig_step((sizes.nl - 1)))
+ if cmd.skip_byte == 255:
+ self.left_boundary_char = 256
+ r = 256 * cmd.op_byte + cmd.remainder
+ lig_kern_map[self.left_boundary_char] = r
+
+ self.chars = {}
+ for c in range(sizes.bc, sizes.ec + 1):
+ if width_index(c) > 0:
+ self.chars[c] = info = {}
+ info["width"] = width(c)
+ if height_index(c) > 0:
+ info["height"] = height(c)
+ if depth_index(c) > 0:
+ info["depth"] = depth(c)
+ if italic_index(c) > 0:
+ info["italic"] = italic(c)
+ char_tag = tag(c)
+ if char_tag == NO_TAG:
+ pass
+ elif char_tag == LIG_TAG:
+ lig_kern_map[c] = remainder(c)
+ elif char_tag == LIST_TAG:
+ info["nextlarger"] = remainder(c)
+ elif char_tag == EXT_TAG:
+ info["varchar"] = varchar = {}
+ for i in range(4):
+ part = data[exten(c) + i]
+ if i == 3 or part > 0:
+ name = "rep"
+ if i == 0:
+ name = "top"
+ elif i == 1:
+ name = "mid"
+ elif i == 2:
+ name = "bot"
+ if noneexistent(part):
+ varchar[name] = c
+ else:
+ varchar[name] = part
+
+ self.ligatures = {}
+ self.kerning = {}
+ for c, i in sorted(lig_kern_map.items()):
+ cmd = lig_kern_command(lig_step(i))
+ if cmd.skip_byte > STOP_FLAG:
+ i = 256 * cmd.op_byte + cmd.remainder
+
+ while i < sizes.nl:
+ cmd = lig_kern_command(lig_step(i))
+ if cmd.skip_byte > STOP_FLAG:
+ pass
+ else:
+ if cmd.op_byte >= KERN_FLAG:
+ r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder
+ self.kerning.setdefault(c, {})[cmd.next_char] = kern(r)
+ else:
+ r = cmd.op_byte
+ if r == 4 or (r > 7 and r != 11):
+ # Ligature step with nonstandard code, we output
+ # the code verbatim.
+ lig = r
+ else:
+ lig = ""
+ if r % 4 > 1:
+ lig += "/"
+ lig += "LIG"
+ if r % 2 != 0:
+ lig += "/"
+ while r > 3:
+ lig += ">"
+ r -= 4
+ self.ligatures.setdefault(c, {})[cmd.next_char] = (
+ lig,
+ cmd.remainder,
+ )
+
+ if cmd.skip_byte >= STOP_FLAG:
+ break
+ i += cmd.skip_byte + 1
+
+
+if __name__ == "__main__":
+ import sys
+
+ tfm = TFM(sys.argv[1])
+ print(
+ "\n".join(
+ x
+ for x in [
+ f"tfm.checksum={tfm.checksum}",
+ f"tfm.designsize={tfm.designsize}",
+ f"tfm.codingscheme={tfm.codingscheme}",
+ f"tfm.fonttype={tfm.fonttype}",
+ f"tfm.family={tfm.family}",
+ f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}",
+ f"tfm.face={tfm.face}",
+ f"tfm.extraheader={tfm.extraheader}",
+ f"tfm.fontdimens={tfm.fontdimens}",
+ f"tfm.right_boundary_char={tfm.right_boundary_char}",
+ f"tfm.left_boundary_char={tfm.left_boundary_char}",
+ f"tfm.kerning={tfm.kerning}",
+ f"tfm.ligatures={tfm.ligatures}",
+ f"tfm.chars={tfm.chars}",
+ ]
+ )
+ )
+ print(tfm)