"""Module for reading TFM (TeX Font Metrics) files. The TFM format is described in the TFtoPL WEB source code, whose typeset form can be found on `CTAN `_. >>> from fontTools.tfmLib import TFM >>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm") >>> >>> # Accessing an attribute gets you metadata. >>> tfm.checksum 1274110073 >>> tfm.designsize 10.0 >>> tfm.codingscheme 'TeX text' >>> tfm.family 'CMR' >>> tfm.seven_bit_safe_flag False >>> tfm.face 234 >>> tfm.extraheader {} >>> tfm.fontdimens {'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578} >>> # Accessing a character gets you its metrics. >>> # “width” is always available, other metrics are available only when >>> # applicable. All values are relative to “designsize”. >>> tfm.chars[ord("g")] {'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219} >>> # Kerning and ligature can be accessed as well. >>> tfm.kerning[ord("c")] {104: -0.02777862548828125, 107: -0.02777862548828125} >>> tfm.ligatures[ord("f")] {105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)} """ from types import SimpleNamespace from fontTools.misc.sstruct import calcsize, unpack, unpack2 SIZES_FORMAT = """ > lf: h # length of the entire file, in words lh: h # length of the header data, in words bc: h # smallest character code in the font ec: h # largest character code in the font nw: h # number of words in the width table nh: h # number of words in the height table nd: h # number of words in the depth table ni: h # number of words in the italic correction table nl: h # number of words in the ligature/kern table nk: h # number of words in the kern table ne: h # number of words in the extensible character table np: h # number of font parameter words """ SIZES_SIZE = calcsize(SIZES_FORMAT) FIXED_FORMAT = "12.20F" HEADER_FORMAT1 = f""" > checksum: L designsize: {FIXED_FORMAT} """ HEADER_FORMAT2 = f""" {HEADER_FORMAT1} codingscheme: 40p """ HEADER_FORMAT3 = f""" {HEADER_FORMAT2} family: 20p """ HEADER_FORMAT4 = f""" {HEADER_FORMAT3} seven_bit_safe_flag: ? ignored: x ignored: x face: B """ HEADER_SIZE1 = calcsize(HEADER_FORMAT1) HEADER_SIZE2 = calcsize(HEADER_FORMAT2) HEADER_SIZE3 = calcsize(HEADER_FORMAT3) HEADER_SIZE4 = calcsize(HEADER_FORMAT4) LIG_KERN_COMMAND = """ > skip_byte: B next_char: B op_byte: B remainder: B """ BASE_PARAMS = [ "SLANT", "SPACE", "STRETCH", "SHRINK", "XHEIGHT", "QUAD", "EXTRASPACE", ] MATHSY_PARAMS = [ "NUM1", "NUM2", "NUM3", "DENOM1", "DENOM2", "SUP1", "SUP2", "SUP3", "SUB1", "SUB2", "SUPDROP", "SUBDROP", "DELIM1", "DELIM2", "AXISHEIGHT", ] MATHEX_PARAMS = [ "DEFAULTRULETHICKNESS", "BIGOPSPACING1", "BIGOPSPACING2", "BIGOPSPACING3", "BIGOPSPACING4", "BIGOPSPACING5", ] VANILLA = 0 MATHSY = 1 MATHEX = 2 UNREACHABLE = 0 PASSTHROUGH = 1 ACCESSABLE = 2 NO_TAG = 0 LIG_TAG = 1 LIST_TAG = 2 EXT_TAG = 3 STOP_FLAG = 128 KERN_FLAG = 128 class TFMException(Exception): def __init__(self, message): super().__init__(message) class TFM: def __init__(self, file): self._read(file) def __repr__(self): return ( f"" ) def _read(self, file): if hasattr(file, "read"): data = file.read() else: with open(file, "rb") as fp: data = fp.read() self._data = data if len(data) < SIZES_SIZE: raise TFMException("Too short input file") sizes = SimpleNamespace() unpack2(SIZES_FORMAT, data, sizes) # Do some file structure sanity checks. # TeX and TFtoPL do additional functional checks and might even correct # “errors” in the input file, but we instead try to output the file as # it is as long as it is parsable, even if the data make no sense. if sizes.lf < 0: raise TFMException("The file claims to have negative or zero length!") if len(data) < sizes.lf * 4: raise TFMException("The file has fewer bytes than it claims!") for name, length in vars(sizes).items(): if length < 0: raise TFMException("The subfile size: '{name}' is negative!") if sizes.lh < 2: raise TFMException(f"The header length is only {sizes.lh}!") if sizes.bc > sizes.ec + 1 or sizes.ec > 255: raise TFMException( f"The character code range {sizes.bc}..{sizes.ec} is illegal!" ) if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0: raise TFMException("Incomplete subfiles for character dimensions!") if sizes.ne > 256: raise TFMException(f"There are {ne} extensible recipes!") if sizes.lf != ( 6 + sizes.lh + (sizes.ec - sizes.bc + 1) + sizes.nw + sizes.nh + sizes.nd + sizes.ni + sizes.nl + sizes.nk + sizes.ne + sizes.np ): raise TFMException("Subfile sizes don’t add up to the stated total") # Subfile offsets, used in the helper function below. These all are # 32-bit word offsets not 8-bit byte offsets. char_base = 6 + sizes.lh - sizes.bc width_base = char_base + sizes.ec + 1 height_base = width_base + sizes.nw depth_base = height_base + sizes.nh italic_base = depth_base + sizes.nd lig_kern_base = italic_base + sizes.ni kern_base = lig_kern_base + sizes.nl exten_base = kern_base + sizes.nk param_base = exten_base + sizes.ne # Helper functions for accessing individual data. If this looks # nonidiomatic Python, I blame the effect of reading the literate WEB # documentation of TFtoPL. def char_info(c): return 4 * (char_base + c) def width_index(c): return data[char_info(c)] def noneexistent(c): return c < sizes.bc or c > sizes.ec or width_index(c) == 0 def height_index(c): return data[char_info(c) + 1] // 16 def depth_index(c): return data[char_info(c) + 1] % 16 def italic_index(c): return data[char_info(c) + 2] // 4 def tag(c): return data[char_info(c) + 2] % 4 def remainder(c): return data[char_info(c) + 3] def width(c): r = 4 * (width_base + width_index(c)) return read_fixed(r, "v")["v"] def height(c): r = 4 * (height_base + height_index(c)) return read_fixed(r, "v")["v"] def depth(c): r = 4 * (depth_base + depth_index(c)) return read_fixed(r, "v")["v"] def italic(c): r = 4 * (italic_base + italic_index(c)) return read_fixed(r, "v")["v"] def exten(c): return 4 * (exten_base + remainder(c)) def lig_step(i): return 4 * (lig_kern_base + i) def lig_kern_command(i): command = SimpleNamespace() unpack2(LIG_KERN_COMMAND, data[i:], command) return command def kern(i): r = 4 * (kern_base + i) return read_fixed(r, "v")["v"] def param(i): return 4 * (param_base + i) def read_fixed(index, key, obj=None): ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj) return ret[0] # Set all attributes to empty values regardless of the header size. unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self) offset = 24 length = sizes.lh * 4 self.extraheader = {} if length >= HEADER_SIZE4: rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1] if self.face < 18: s = self.face % 2 b = self.face // 2 self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3] for i in range(sizes.lh - HEADER_SIZE4 // 4): rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1] elif length >= HEADER_SIZE3: unpack2(HEADER_FORMAT3, data[offset:], self) elif length >= HEADER_SIZE2: unpack2(HEADER_FORMAT2, data[offset:], self) elif length >= HEADER_SIZE1: unpack2(HEADER_FORMAT1, data[offset:], self) self.fonttype = VANILLA scheme = self.codingscheme.upper() if scheme.startswith("TEX MATH SY"): self.fonttype = MATHSY elif scheme.startswith("TEX MATH EX"): self.fonttype = MATHEX self.fontdimens = {} for i in range(sizes.np): name = f"PARAMETER{i+1}" if i <= 6: name = BASE_PARAMS[i] elif self.fonttype == MATHSY and i <= 21: name = MATHSY_PARAMS[i - 7] elif self.fonttype == MATHEX and i <= 12: name = MATHEX_PARAMS[i - 7] read_fixed(param(i), name, self.fontdimens) lig_kern_map = {} self.right_boundary_char = None self.left_boundary_char = None if sizes.nl > 0: cmd = lig_kern_command(lig_step(0)) if cmd.skip_byte == 255: self.right_boundary_char = cmd.next_char cmd = lig_kern_command(lig_step((sizes.nl - 1))) if cmd.skip_byte == 255: self.left_boundary_char = 256 r = 256 * cmd.op_byte + cmd.remainder lig_kern_map[self.left_boundary_char] = r self.chars = {} for c in range(sizes.bc, sizes.ec + 1): if width_index(c) > 0: self.chars[c] = info = {} info["width"] = width(c) if height_index(c) > 0: info["height"] = height(c) if depth_index(c) > 0: info["depth"] = depth(c) if italic_index(c) > 0: info["italic"] = italic(c) char_tag = tag(c) if char_tag == NO_TAG: pass elif char_tag == LIG_TAG: lig_kern_map[c] = remainder(c) elif char_tag == LIST_TAG: info["nextlarger"] = remainder(c) elif char_tag == EXT_TAG: info["varchar"] = varchar = {} for i in range(4): part = data[exten(c) + i] if i == 3 or part > 0: name = "rep" if i == 0: name = "top" elif i == 1: name = "mid" elif i == 2: name = "bot" if noneexistent(part): varchar[name] = c else: varchar[name] = part self.ligatures = {} self.kerning = {} for c, i in sorted(lig_kern_map.items()): cmd = lig_kern_command(lig_step(i)) if cmd.skip_byte > STOP_FLAG: i = 256 * cmd.op_byte + cmd.remainder while i < sizes.nl: cmd = lig_kern_command(lig_step(i)) if cmd.skip_byte > STOP_FLAG: pass else: if cmd.op_byte >= KERN_FLAG: r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder self.kerning.setdefault(c, {})[cmd.next_char] = kern(r) else: r = cmd.op_byte if r == 4 or (r > 7 and r != 11): # Ligature step with nonstandard code, we output # the code verbatim. lig = r else: lig = "" if r % 4 > 1: lig += "/" lig += "LIG" if r % 2 != 0: lig += "/" while r > 3: lig += ">" r -= 4 self.ligatures.setdefault(c, {})[cmd.next_char] = ( lig, cmd.remainder, ) if cmd.skip_byte >= STOP_FLAG: break i += cmd.skip_byte + 1 if __name__ == "__main__": import sys tfm = TFM(sys.argv[1]) print( "\n".join( x for x in [ f"tfm.checksum={tfm.checksum}", f"tfm.designsize={tfm.designsize}", f"tfm.codingscheme={tfm.codingscheme}", f"tfm.fonttype={tfm.fonttype}", f"tfm.family={tfm.family}", f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}", f"tfm.face={tfm.face}", f"tfm.extraheader={tfm.extraheader}", f"tfm.fontdimens={tfm.fontdimens}", f"tfm.right_boundary_char={tfm.right_boundary_char}", f"tfm.left_boundary_char={tfm.left_boundary_char}", f"tfm.kerning={tfm.kerning}", f"tfm.ligatures={tfm.ligatures}", f"tfm.chars={tfm.chars}", ] ) ) print(tfm)