aboutsummaryrefslogtreecommitdiff
path: root/Lib/fontTools/ttLib/tables/_n_a_m_e.py
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2023-11-10 19:23:18 +0000
committerElliott Hughes <enh@google.com>2023-11-10 19:23:18 +0000
commit02cec46c7a3918f19153e4e2de707f9d7de83fc8 (patch)
treec927c514a071f3a5ec125b3f474ad4ce4d39a7fa /Lib/fontTools/ttLib/tables/_n_a_m_e.py
parenta936b27b9394502de80c116f46aff5b1a1cc3925 (diff)
downloadfonttools-02cec46c7a3918f19153e4e2de707f9d7de83fc8.tar.gz
Upgrade fonttools to 4.44.0
This project was upgraded with external_updater. Usage: tools/external_updater/updater.sh update fonttools For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md Test: TreeHugger Change-Id: I5de68c96999d1b8671c251a2555948da63de5bc6
Diffstat (limited to 'Lib/fontTools/ttLib/tables/_n_a_m_e.py')
-rw-r--r--Lib/fontTools/ttLib/tables/_n_a_m_e.py1913
1 files changed, 1029 insertions, 884 deletions
diff --git a/Lib/fontTools/ttLib/tables/_n_a_m_e.py b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
index 9558addb..bbb4f536 100644
--- a/Lib/fontTools/ttLib/tables/_n_a_m_e.py
+++ b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
@@ -1,8 +1,20 @@
# -*- coding: utf-8 -*-
from fontTools.misc import sstruct
-from fontTools.misc.textTools import bytechr, byteord, bytesjoin, strjoin, tobytes, tostr, safeEval
+from fontTools.misc.textTools import (
+ bytechr,
+ byteord,
+ bytesjoin,
+ strjoin,
+ tobytes,
+ tostr,
+ safeEval,
+)
from fontTools.misc.encodingTools import getEncoding
from fontTools.ttLib import newTable
+from fontTools.ttLib.ttVisitor import TTVisitor
+from fontTools import ttLib
+import fontTools.ttLib.tables.otTables as otTables
+from fontTools.ttLib.tables import C_P_A_L_
from . import DefaultTable
import struct
import logging
@@ -24,573 +36,643 @@ nameRecordSize = sstruct.calcsize(nameRecordFormat)
class table__n_a_m_e(DefaultTable.DefaultTable):
- dependencies = ["ltag"]
-
- def decompile(self, data, ttFont):
- format, n, stringOffset = struct.unpack(b">HHH", data[:6])
- expectedStringOffset = 6 + n * nameRecordSize
- if stringOffset != expectedStringOffset:
- log.error(
- "'name' table stringOffset incorrect. Expected: %s; Actual: %s",
- expectedStringOffset, stringOffset)
- stringData = data[stringOffset:]
- data = data[6:]
- self.names = []
- for i in range(n):
- if len(data) < 12:
- log.error('skipping malformed name record #%d', i)
- continue
- name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
- name.string = stringData[name.offset:name.offset+name.length]
- if name.offset + name.length > len(stringData):
- log.error('skipping malformed name record #%d', i)
- continue
- assert len(name.string) == name.length
- #if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
- # if len(name.string) % 2:
- # print "2-byte string doesn't have even length!"
- # print name.__dict__
- del name.offset, name.length
- self.names.append(name)
-
- def compile(self, ttFont):
- if not hasattr(self, "names"):
- # only happens when there are NO name table entries read
- # from the TTX file
- self.names = []
- names = self.names
- names.sort() # sort according to the spec; see NameRecord.__lt__()
- stringData = b""
- format = 0
- n = len(names)
- stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
- data = struct.pack(b">HHH", format, n, stringOffset)
- lastoffset = 0
- done = {} # remember the data so we can reuse the "pointers"
- for name in names:
- string = name.toBytes()
- if string in done:
- name.offset, name.length = done[string]
- else:
- name.offset, name.length = done[string] = len(stringData), len(string)
- stringData = bytesjoin([stringData, string])
- data = data + sstruct.pack(nameRecordFormat, name)
- return data + stringData
-
- def toXML(self, writer, ttFont):
- for name in self.names:
- name.toXML(writer, ttFont)
-
- def fromXML(self, name, attrs, content, ttFont):
- if name != "namerecord":
- return # ignore unknown tags
- if not hasattr(self, "names"):
- self.names = []
- name = NameRecord()
- self.names.append(name)
- name.fromXML(name, attrs, content, ttFont)
-
- def getName(self, nameID, platformID, platEncID, langID=None):
- for namerecord in self.names:
- if ( namerecord.nameID == nameID and
- namerecord.platformID == platformID and
- namerecord.platEncID == platEncID):
- if langID is None or namerecord.langID == langID:
- return namerecord
- return None # not found
-
- def getDebugName(self, nameID):
- englishName = someName = None
- for name in self.names:
- if name.nameID != nameID:
- continue
- try:
- unistr = name.toUnicode()
- except UnicodeDecodeError:
- continue
-
- someName = unistr
- if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
- englishName = unistr
- break
- if englishName:
- return englishName
- elif someName:
- return someName
- else:
- return None
-
- def getFirstDebugName(self, nameIDs):
- for nameID in nameIDs:
- name = self.getDebugName(nameID)
- if name is not None:
- return name
- return None
-
- def getBestFamilyName(self):
- # 21 = WWS Family Name
- # 16 = Typographic Family Name
- # 1 = Family Name
- return self.getFirstDebugName((21, 16, 1))
-
- def getBestSubFamilyName(self):
- # 22 = WWS SubFamily Name
- # 17 = Typographic SubFamily Name
- # 2 = SubFamily Name
- return self.getFirstDebugName((22, 17, 2))
-
- def getBestFullName(self):
- # 4 = Full Name
- # 6 = PostScript Name
- for nameIDs in ((21, 22), (16, 17), (1, 2), (4, ), (6, )):
- if len(nameIDs) == 2:
- name_fam = self.getDebugName(nameIDs[0])
- name_subfam = self.getDebugName(nameIDs[1])
- if None in [name_fam, name_subfam]:
- continue # if any is None, skip
- name = f"{name_fam} {name_subfam}"
- if name_subfam.lower() == 'regular':
- name = f"{name_fam}"
- return name
- else:
- name = self.getDebugName(nameIDs[0])
- if name is not None:
- return name
- return None
-
- def setName(self, string, nameID, platformID, platEncID, langID):
- """ Set the 'string' for the name record identified by 'nameID', 'platformID',
- 'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
- and append to the name table.
-
- 'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
- it is assumed to be already encoded with the correct plaform-specific encoding
- identified by the (platformID, platEncID, langID) triplet. A warning is issued
- to prevent unexpected results.
- """
- if not hasattr(self, 'names'):
- self.names = []
- if not isinstance(string, str):
- if isinstance(string, bytes):
- log.warning(
- "name string is bytes, ensure it's correctly encoded: %r", string)
- else:
- raise TypeError(
- "expected unicode or bytes, found %s: %r" % (
- type(string).__name__, string))
- namerecord = self.getName(nameID, platformID, platEncID, langID)
- if namerecord:
- namerecord.string = string
- else:
- self.names.append(makeName(string, nameID, platformID, platEncID, langID))
-
- def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None):
- """Remove any name records identified by the given combination of 'nameID',
- 'platformID', 'platEncID' and 'langID'.
- """
- args = {
- argName: argValue
- for argName, argValue in (
- ("nameID", nameID),
- ("platformID", platformID),
- ("platEncID", platEncID),
- ("langID", langID),
- )
- if argValue is not None
- }
- if not args:
- # no arguments, nothing to do
- return
- self.names = [
- rec for rec in self.names
- if any(
- argValue != getattr(rec, argName)
- for argName, argValue in args.items()
- )
- ]
-
- def _findUnusedNameID(self, minNameID=256):
- """Finds an unused name id.
-
- The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
- following the last nameID in the name table.
- """
- names = getattr(self, 'names', [])
- nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
- if nameID > 32767:
- raise ValueError("nameID must be less than 32768")
- return nameID
-
- def findMultilingualName(self, names, windows=True, mac=True, minNameID=0):
- """Return the name ID of an existing multilingual name that
- matches the 'names' dictionary, or None if not found.
-
- 'names' is a dictionary with the name in multiple languages,
- such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
- The keys can be arbitrary IETF BCP 47 language codes;
- the values are Unicode strings.
-
- If 'windows' is True, the returned name ID is guaranteed
- exist for all requested languages for platformID=3 and
- platEncID=1.
- If 'mac' is True, the returned name ID is guaranteed to exist
- for all requested languages for platformID=1 and platEncID=0.
-
- The returned name ID will not be less than the 'minNameID'
- argument.
- """
- # Gather the set of requested
- # (string, platformID, platEncID, langID)
- # tuples
- reqNameSet = set()
- for lang, name in sorted(names.items()):
- if windows:
- windowsName = _makeWindowsName(name, None, lang)
- if windowsName is not None:
- reqNameSet.add((windowsName.string,
- windowsName.platformID,
- windowsName.platEncID,
- windowsName.langID))
- if mac:
- macName = _makeMacName(name, None, lang)
- if macName is not None:
- reqNameSet.add((macName.string,
- macName.platformID,
- macName.platEncID,
- macName.langID))
-
- # Collect matching name IDs
- matchingNames = dict()
- for name in self.names:
- try:
- key = (name.toUnicode(), name.platformID,
- name.platEncID, name.langID)
- except UnicodeDecodeError:
- continue
- if key in reqNameSet and name.nameID >= minNameID:
- nameSet = matchingNames.setdefault(name.nameID, set())
- nameSet.add(key)
-
- # Return the first name ID that defines all requested strings
- for nameID, nameSet in sorted(matchingNames.items()):
- if nameSet == reqNameSet:
- return nameID
-
- return None # not found
-
- def addMultilingualName(self, names, ttFont=None, nameID=None,
- windows=True, mac=True, minNameID=0):
- """Add a multilingual name, returning its name ID
-
- 'names' is a dictionary with the name in multiple languages,
- such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
- The keys can be arbitrary IETF BCP 47 language codes;
- the values are Unicode strings.
-
- 'ttFont' is the TTFont to which the names are added, or None.
- If present, the font's 'ltag' table can get populated
- to store exotic language codes, which allows encoding
- names that otherwise cannot get encoded at all.
-
- 'nameID' is the name ID to be used, or None to let the library
- find an existing set of name records that match, or pick an
- unused name ID.
-
- If 'windows' is True, a platformID=3 name record will be added.
- If 'mac' is True, a platformID=1 name record will be added.
-
- If the 'nameID' argument is None, the created nameID will not
- be less than the 'minNameID' argument.
- """
- if not hasattr(self, 'names'):
- self.names = []
- if nameID is None:
- # Reuse nameID if possible
- nameID = self.findMultilingualName(
- names, windows=windows, mac=mac, minNameID=minNameID)
- if nameID is not None:
- return nameID
- nameID = self._findUnusedNameID()
- # TODO: Should minimize BCP 47 language codes.
- # https://github.com/fonttools/fonttools/issues/930
- for lang, name in sorted(names.items()):
- if windows:
- windowsName = _makeWindowsName(name, nameID, lang)
- if windowsName is not None:
- self.names.append(windowsName)
- else:
- # We cannot not make a Windows name: make sure we add a
- # Mac name as a fallback. This can happen for exotic
- # BCP47 language tags that have no Windows language code.
- mac = True
- if mac:
- macName = _makeMacName(name, nameID, lang, ttFont)
- if macName is not None:
- self.names.append(macName)
- return nameID
-
- def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
- """ Add a new name record containing 'string' for each (platformID, platEncID,
- langID) tuple specified in the 'platforms' list.
-
- The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
- following the last nameID in the name table.
- If no 'platforms' are specified, two English name records are added, one for the
- Macintosh (platformID=0), and one for the Windows platform (3).
-
- The 'string' must be a Unicode string, so it can be encoded with different,
- platform-specific encodings.
-
- Return the new nameID.
- """
- assert len(platforms) > 0, \
- "'platforms' must contain at least one (platformID, platEncID, langID) tuple"
- if not hasattr(self, 'names'):
- self.names = []
- if not isinstance(string, str):
- raise TypeError(
- "expected str, found %s: %r" % (type(string).__name__, string))
- nameID = self._findUnusedNameID(minNameID + 1)
- for platformID, platEncID, langID in platforms:
- self.names.append(makeName(string, nameID, platformID, platEncID, langID))
- return nameID
+ dependencies = ["ltag"]
+
+ def decompile(self, data, ttFont):
+ format, n, stringOffset = struct.unpack(b">HHH", data[:6])
+ expectedStringOffset = 6 + n * nameRecordSize
+ if stringOffset != expectedStringOffset:
+ log.error(
+ "'name' table stringOffset incorrect. Expected: %s; Actual: %s",
+ expectedStringOffset,
+ stringOffset,
+ )
+ stringData = data[stringOffset:]
+ data = data[6:]
+ self.names = []
+ for i in range(n):
+ if len(data) < 12:
+ log.error("skipping malformed name record #%d", i)
+ continue
+ name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
+ name.string = stringData[name.offset : name.offset + name.length]
+ if name.offset + name.length > len(stringData):
+ log.error("skipping malformed name record #%d", i)
+ continue
+ assert len(name.string) == name.length
+ # if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
+ # if len(name.string) % 2:
+ # print "2-byte string doesn't have even length!"
+ # print name.__dict__
+ del name.offset, name.length
+ self.names.append(name)
+
+ def compile(self, ttFont):
+ if not hasattr(self, "names"):
+ # only happens when there are NO name table entries read
+ # from the TTX file
+ self.names = []
+ names = self.names
+ names.sort() # sort according to the spec; see NameRecord.__lt__()
+ stringData = b""
+ format = 0
+ n = len(names)
+ stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
+ data = struct.pack(b">HHH", format, n, stringOffset)
+ lastoffset = 0
+ done = {} # remember the data so we can reuse the "pointers"
+ for name in names:
+ string = name.toBytes()
+ if string in done:
+ name.offset, name.length = done[string]
+ else:
+ name.offset, name.length = done[string] = len(stringData), len(string)
+ stringData = bytesjoin([stringData, string])
+ data = data + sstruct.pack(nameRecordFormat, name)
+ return data + stringData
+
+ def toXML(self, writer, ttFont):
+ for name in self.names:
+ name.toXML(writer, ttFont)
+
+ def fromXML(self, name, attrs, content, ttFont):
+ if name != "namerecord":
+ return # ignore unknown tags
+ if not hasattr(self, "names"):
+ self.names = []
+ name = NameRecord()
+ self.names.append(name)
+ name.fromXML(name, attrs, content, ttFont)
+
+ def getName(self, nameID, platformID, platEncID, langID=None):
+ for namerecord in self.names:
+ if (
+ namerecord.nameID == nameID
+ and namerecord.platformID == platformID
+ and namerecord.platEncID == platEncID
+ ):
+ if langID is None or namerecord.langID == langID:
+ return namerecord
+ return None # not found
+
+ def getDebugName(self, nameID):
+ englishName = someName = None
+ for name in self.names:
+ if name.nameID != nameID:
+ continue
+ try:
+ unistr = name.toUnicode()
+ except UnicodeDecodeError:
+ continue
+
+ someName = unistr
+ if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
+ englishName = unistr
+ break
+ if englishName:
+ return englishName
+ elif someName:
+ return someName
+ else:
+ return None
+
+ def getFirstDebugName(self, nameIDs):
+ for nameID in nameIDs:
+ name = self.getDebugName(nameID)
+ if name is not None:
+ return name
+ return None
+
+ def getBestFamilyName(self):
+ # 21 = WWS Family Name
+ # 16 = Typographic Family Name
+ # 1 = Family Name
+ return self.getFirstDebugName((21, 16, 1))
+
+ def getBestSubFamilyName(self):
+ # 22 = WWS SubFamily Name
+ # 17 = Typographic SubFamily Name
+ # 2 = SubFamily Name
+ return self.getFirstDebugName((22, 17, 2))
+
+ def getBestFullName(self):
+ # 4 = Full Name
+ # 6 = PostScript Name
+ for nameIDs in ((21, 22), (16, 17), (1, 2), (4,), (6,)):
+ if len(nameIDs) == 2:
+ name_fam = self.getDebugName(nameIDs[0])
+ name_subfam = self.getDebugName(nameIDs[1])
+ if None in [name_fam, name_subfam]:
+ continue # if any is None, skip
+ name = f"{name_fam} {name_subfam}"
+ if name_subfam.lower() == "regular":
+ name = f"{name_fam}"
+ return name
+ else:
+ name = self.getDebugName(nameIDs[0])
+ if name is not None:
+ return name
+ return None
+
+ def setName(self, string, nameID, platformID, platEncID, langID):
+ """Set the 'string' for the name record identified by 'nameID', 'platformID',
+ 'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
+ and append to the name table.
+
+ 'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
+ it is assumed to be already encoded with the correct plaform-specific encoding
+ identified by the (platformID, platEncID, langID) triplet. A warning is issued
+ to prevent unexpected results.
+ """
+ if not hasattr(self, "names"):
+ self.names = []
+ if not isinstance(string, str):
+ if isinstance(string, bytes):
+ log.warning(
+ "name string is bytes, ensure it's correctly encoded: %r", string
+ )
+ else:
+ raise TypeError(
+ "expected unicode or bytes, found %s: %r"
+ % (type(string).__name__, string)
+ )
+ namerecord = self.getName(nameID, platformID, platEncID, langID)
+ if namerecord:
+ namerecord.string = string
+ else:
+ self.names.append(makeName(string, nameID, platformID, platEncID, langID))
+
+ def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None):
+ """Remove any name records identified by the given combination of 'nameID',
+ 'platformID', 'platEncID' and 'langID'.
+ """
+ args = {
+ argName: argValue
+ for argName, argValue in (
+ ("nameID", nameID),
+ ("platformID", platformID),
+ ("platEncID", platEncID),
+ ("langID", langID),
+ )
+ if argValue is not None
+ }
+ if not args:
+ # no arguments, nothing to do
+ return
+ self.names = [
+ rec
+ for rec in self.names
+ if any(
+ argValue != getattr(rec, argName) for argName, argValue in args.items()
+ )
+ ]
+
+ @staticmethod
+ def removeUnusedNames(ttFont):
+ """Remove any name records which are not in NameID range 0-255 and not utilized
+ within the font itself."""
+ visitor = NameRecordVisitor()
+ visitor.visit(ttFont)
+ toDelete = set()
+ for record in ttFont["name"].names:
+ # Name IDs 26 to 255, inclusive, are reserved for future standard names.
+ # https://learn.microsoft.com/en-us/typography/opentype/spec/name#name-ids
+ if record.nameID < 256:
+ continue
+ if record.nameID not in visitor.seen:
+ toDelete.add(record.nameID)
+
+ for nameID in toDelete:
+ ttFont["name"].removeNames(nameID)
+ return toDelete
+
+ def _findUnusedNameID(self, minNameID=256):
+ """Finds an unused name id.
+
+ The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
+ following the last nameID in the name table.
+ """
+ names = getattr(self, "names", [])
+ nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
+ if nameID > 32767:
+ raise ValueError("nameID must be less than 32768")
+ return nameID
+
+ def findMultilingualName(
+ self, names, windows=True, mac=True, minNameID=0, ttFont=None
+ ):
+ """Return the name ID of an existing multilingual name that
+ matches the 'names' dictionary, or None if not found.
+
+ 'names' is a dictionary with the name in multiple languages,
+ such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
+ The keys can be arbitrary IETF BCP 47 language codes;
+ the values are Unicode strings.
+
+ If 'windows' is True, the returned name ID is guaranteed
+ exist for all requested languages for platformID=3 and
+ platEncID=1.
+ If 'mac' is True, the returned name ID is guaranteed to exist
+ for all requested languages for platformID=1 and platEncID=0.
+
+ The returned name ID will not be less than the 'minNameID'
+ argument.
+ """
+ # Gather the set of requested
+ # (string, platformID, platEncID, langID)
+ # tuples
+ reqNameSet = set()
+ for lang, name in sorted(names.items()):
+ if windows:
+ windowsName = _makeWindowsName(name, None, lang)
+ if windowsName is not None:
+ reqNameSet.add(
+ (
+ windowsName.string,
+ windowsName.platformID,
+ windowsName.platEncID,
+ windowsName.langID,
+ )
+ )
+ if mac:
+ macName = _makeMacName(name, None, lang, ttFont)
+ if macName is not None:
+ reqNameSet.add(
+ (
+ macName.string,
+ macName.platformID,
+ macName.platEncID,
+ macName.langID,
+ )
+ )
+
+ # Collect matching name IDs
+ matchingNames = dict()
+ for name in self.names:
+ try:
+ key = (name.toUnicode(), name.platformID, name.platEncID, name.langID)
+ except UnicodeDecodeError:
+ continue
+ if key in reqNameSet and name.nameID >= minNameID:
+ nameSet = matchingNames.setdefault(name.nameID, set())
+ nameSet.add(key)
+
+ # Return the first name ID that defines all requested strings
+ for nameID, nameSet in sorted(matchingNames.items()):
+ if nameSet == reqNameSet:
+ return nameID
+
+ return None # not found
+
+ def addMultilingualName(
+ self, names, ttFont=None, nameID=None, windows=True, mac=True, minNameID=0
+ ):
+ """Add a multilingual name, returning its name ID
+
+ 'names' is a dictionary with the name in multiple languages,
+ such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
+ The keys can be arbitrary IETF BCP 47 language codes;
+ the values are Unicode strings.
+
+ 'ttFont' is the TTFont to which the names are added, or None.
+ If present, the font's 'ltag' table can get populated
+ to store exotic language codes, which allows encoding
+ names that otherwise cannot get encoded at all.
+
+ 'nameID' is the name ID to be used, or None to let the library
+ find an existing set of name records that match, or pick an
+ unused name ID.
+
+ If 'windows' is True, a platformID=3 name record will be added.
+ If 'mac' is True, a platformID=1 name record will be added.
+
+ If the 'nameID' argument is None, the created nameID will not
+ be less than the 'minNameID' argument.
+ """
+ if not hasattr(self, "names"):
+ self.names = []
+ if nameID is None:
+ # Reuse nameID if possible
+ nameID = self.findMultilingualName(
+ names, windows=windows, mac=mac, minNameID=minNameID, ttFont=ttFont
+ )
+ if nameID is not None:
+ return nameID
+ nameID = self._findUnusedNameID()
+ # TODO: Should minimize BCP 47 language codes.
+ # https://github.com/fonttools/fonttools/issues/930
+ for lang, name in sorted(names.items()):
+ if windows:
+ windowsName = _makeWindowsName(name, nameID, lang)
+ if windowsName is not None:
+ self.names.append(windowsName)
+ else:
+ # We cannot not make a Windows name: make sure we add a
+ # Mac name as a fallback. This can happen for exotic
+ # BCP47 language tags that have no Windows language code.
+ mac = True
+ if mac:
+ macName = _makeMacName(name, nameID, lang, ttFont)
+ if macName is not None:
+ self.names.append(macName)
+ return nameID
+
+ def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
+ """Add a new name record containing 'string' for each (platformID, platEncID,
+ langID) tuple specified in the 'platforms' list.
+
+ The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
+ following the last nameID in the name table.
+ If no 'platforms' are specified, two English name records are added, one for the
+ Macintosh (platformID=0), and one for the Windows platform (3).
+
+ The 'string' must be a Unicode string, so it can be encoded with different,
+ platform-specific encodings.
+
+ Return the new nameID.
+ """
+ assert (
+ len(platforms) > 0
+ ), "'platforms' must contain at least one (platformID, platEncID, langID) tuple"
+ if not hasattr(self, "names"):
+ self.names = []
+ if not isinstance(string, str):
+ raise TypeError(
+ "expected str, found %s: %r" % (type(string).__name__, string)
+ )
+ nameID = self._findUnusedNameID(minNameID + 1)
+ for platformID, platEncID, langID in platforms:
+ self.names.append(makeName(string, nameID, platformID, platEncID, langID))
+ return nameID
def makeName(string, nameID, platformID, platEncID, langID):
- name = NameRecord()
- name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
- string, nameID, platformID, platEncID, langID)
- return name
+ name = NameRecord()
+ name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
+ string,
+ nameID,
+ platformID,
+ platEncID,
+ langID,
+ )
+ return name
def _makeWindowsName(name, nameID, language):
- """Create a NameRecord for the Microsoft Windows platform
-
- 'language' is an arbitrary IETF BCP 47 language identifier such
- as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
- does not support the desired language, the result will be None.
- Future versions of fonttools might return a NameRecord for the
- OpenType 'name' table format 1, but this is not implemented yet.
- """
- langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
- if langID is not None:
- return makeName(name, nameID, 3, 1, langID)
- else:
- log.warning("cannot add Windows name in language %s "
- "because fonttools does not yet support "
- "name table format 1" % language)
- return None
+ """Create a NameRecord for the Microsoft Windows platform
+
+ 'language' is an arbitrary IETF BCP 47 language identifier such
+ as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
+ does not support the desired language, the result will be None.
+ Future versions of fonttools might return a NameRecord for the
+ OpenType 'name' table format 1, but this is not implemented yet.
+ """
+ langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
+ if langID is not None:
+ return makeName(name, nameID, 3, 1, langID)
+ else:
+ log.warning(
+ "cannot add Windows name in language %s "
+ "because fonttools does not yet support "
+ "name table format 1" % language
+ )
+ return None
def _makeMacName(name, nameID, language, font=None):
- """Create a NameRecord for Apple platforms
-
- 'language' is an arbitrary IETF BCP 47 language identifier such
- as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
- create a Macintosh NameRecord that is understood by old applications
- (platform ID 1 and an old-style Macintosh language enum). If this
- is not possible, we create a Unicode NameRecord (platform ID 0)
- whose language points to the font’s 'ltag' table. The latter
- can encode any string in any language, but legacy applications
- might not recognize the format (in which case they will ignore
- those names).
-
- 'font' should be the TTFont for which you want to create a name.
- If 'font' is None, we only return NameRecords for legacy Macintosh;
- in that case, the result will be None for names that need to
- be encoded with an 'ltag' table.
-
- See the section “The language identifier” in Apple’s specification:
- https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
- """
- macLang = _MAC_LANGUAGE_CODES.get(language.lower())
- macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
- if macLang is not None and macScript is not None:
- encoding = getEncoding(1, macScript, macLang, default="ascii")
- # Check if we can actually encode this name. If we can't,
- # for example because we have no support for the legacy
- # encoding, or because the name string contains Unicode
- # characters that the legacy encoding cannot represent,
- # we fall back to encoding the name in Unicode and put
- # the language tag into the ltag table.
- try:
- _ = tobytes(name, encoding, errors="strict")
- return makeName(name, nameID, 1, macScript, macLang)
- except UnicodeEncodeError:
- pass
- if font is not None:
- ltag = font.tables.get("ltag")
- if ltag is None:
- ltag = font["ltag"] = newTable("ltag")
- # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
- # “The preferred platform-specific code for Unicode would be 3 or 4.”
- # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
- return makeName(name, nameID, 0, 4, ltag.addTag(language))
- else:
- log.warning("cannot store language %s into 'ltag' table "
- "without having access to the TTFont object" %
- language)
- return None
+ """Create a NameRecord for Apple platforms
+
+ 'language' is an arbitrary IETF BCP 47 language identifier such
+ as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
+ create a Macintosh NameRecord that is understood by old applications
+ (platform ID 1 and an old-style Macintosh language enum). If this
+ is not possible, we create a Unicode NameRecord (platform ID 0)
+ whose language points to the font’s 'ltag' table. The latter
+ can encode any string in any language, but legacy applications
+ might not recognize the format (in which case they will ignore
+ those names).
+
+ 'font' should be the TTFont for which you want to create a name.
+ If 'font' is None, we only return NameRecords for legacy Macintosh;
+ in that case, the result will be None for names that need to
+ be encoded with an 'ltag' table.
+
+ See the section “The language identifier” in Apple’s specification:
+ https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
+ """
+ macLang = _MAC_LANGUAGE_CODES.get(language.lower())
+ macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
+ if macLang is not None and macScript is not None:
+ encoding = getEncoding(1, macScript, macLang, default="ascii")
+ # Check if we can actually encode this name. If we can't,
+ # for example because we have no support for the legacy
+ # encoding, or because the name string contains Unicode
+ # characters that the legacy encoding cannot represent,
+ # we fall back to encoding the name in Unicode and put
+ # the language tag into the ltag table.
+ try:
+ _ = tobytes(name, encoding, errors="strict")
+ return makeName(name, nameID, 1, macScript, macLang)
+ except UnicodeEncodeError:
+ pass
+ if font is not None:
+ ltag = font.tables.get("ltag")
+ if ltag is None:
+ ltag = font["ltag"] = newTable("ltag")
+ # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
+ # “The preferred platform-specific code for Unicode would be 3 or 4.”
+ # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
+ return makeName(name, nameID, 0, 4, ltag.addTag(language))
+ else:
+ log.warning(
+ "cannot store language %s into 'ltag' table "
+ "without having access to the TTFont object" % language
+ )
+ return None
class NameRecord(object):
-
- def getEncoding(self, default='ascii'):
- """Returns the Python encoding name for this name entry based on its platformID,
- platEncID, and langID. If encoding for these values is not known, by default
- 'ascii' is returned. That can be overriden by passing a value to the default
- argument.
- """
- return getEncoding(self.platformID, self.platEncID, self.langID, default)
-
- def encodingIsUnicodeCompatible(self):
- return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1']
-
- def __str__(self):
- return self.toStr(errors='backslashreplace')
-
- def isUnicode(self):
- return (self.platformID == 0 or
- (self.platformID == 3 and self.platEncID in [0, 1, 10]))
-
- def toUnicode(self, errors='strict'):
- """
- If self.string is a Unicode string, return it; otherwise try decoding the
- bytes in self.string to a Unicode string using the encoding of this
- entry as returned by self.getEncoding(); Note that self.getEncoding()
- returns 'ascii' if the encoding is unknown to the library.
-
- Certain heuristics are performed to recover data from bytes that are
- ill-formed in the chosen encoding, or that otherwise look misencoded
- (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
- but marked otherwise). If the bytes are ill-formed and the heuristics fail,
- the error is handled according to the errors parameter to this function, which is
- passed to the underlying decode() function; by default it throws a
- UnicodeDecodeError exception.
-
- Note: The mentioned heuristics mean that roundtripping a font to XML and back
- to binary might recover some misencoded data whereas just loading the font
- and saving it back will not change them.
- """
- def isascii(b):
- return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
- encoding = self.getEncoding()
- string = self.string
-
- if isinstance(string, bytes) and encoding == 'utf_16_be' and len(string) % 2 == 1:
- # Recover badly encoded UTF-16 strings that have an odd number of bytes:
- # - If the last byte is zero, drop it. Otherwise,
- # - If all the odd bytes are zero and all the even bytes are ASCII,
- # prepend one zero byte. Otherwise,
- # - If first byte is zero and all other bytes are ASCII, insert zero
- # bytes between consecutive ASCII bytes.
- #
- # (Yes, I've seen all of these in the wild... sigh)
- if byteord(string[-1]) == 0:
- string = string[:-1]
- elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)):
- string = b'\0' + string
- elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]):
- string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:])
-
- string = tostr(string, encoding=encoding, errors=errors)
-
- # If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
- # Fix it up.
- if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)):
- # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
- # narrow it down.
- string = ''.join(c for c in string[1::2])
-
- return string
-
- def toBytes(self, errors='strict'):
- """ If self.string is a bytes object, return it; otherwise try encoding
- the Unicode string in self.string to bytes using the encoding of this
- entry as returned by self.getEncoding(); Note that self.getEncoding()
- returns 'ascii' if the encoding is unknown to the library.
-
- If the Unicode string cannot be encoded to bytes in the chosen encoding,
- the error is handled according to the errors parameter to this function,
- which is passed to the underlying encode() function; by default it throws a
- UnicodeEncodeError exception.
- """
- return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
-
- toStr = toUnicode
-
- def toXML(self, writer, ttFont):
- try:
- unistr = self.toUnicode()
- except UnicodeDecodeError:
- unistr = None
- attrs = [
- ("nameID", self.nameID),
- ("platformID", self.platformID),
- ("platEncID", self.platEncID),
- ("langID", hex(self.langID)),
- ]
-
- if unistr is None or not self.encodingIsUnicodeCompatible():
- attrs.append(("unicode", unistr is not None))
-
- writer.begintag("namerecord", attrs)
- writer.newline()
- if unistr is not None:
- writer.write(unistr)
- else:
- writer.write8bit(self.string)
- writer.newline()
- writer.endtag("namerecord")
- writer.newline()
-
- def fromXML(self, name, attrs, content, ttFont):
- self.nameID = safeEval(attrs["nameID"])
- self.platformID = safeEval(attrs["platformID"])
- self.platEncID = safeEval(attrs["platEncID"])
- self.langID = safeEval(attrs["langID"])
- s = strjoin(content).strip()
- encoding = self.getEncoding()
- if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")):
- self.string = s.encode(encoding)
- else:
- # This is the inverse of write8bit...
- self.string = s.encode("latin1")
-
- def __lt__(self, other):
- if type(self) != type(other):
- return NotImplemented
-
- try:
- # implemented so that list.sort() sorts according to the spec.
- selfTuple = (
- self.platformID,
- self.platEncID,
- self.langID,
- self.nameID,
- self.toBytes(),
- )
- otherTuple = (
- other.platformID,
- other.platEncID,
- other.langID,
- other.nameID,
- other.toBytes(),
- )
- return selfTuple < otherTuple
- except (UnicodeEncodeError, AttributeError):
- # This can only happen for
- # 1) an object that is not a NameRecord, or
- # 2) an unlikely incomplete NameRecord object which has not been
- # fully populated, or
- # 3) when all IDs are identical but the strings can't be encoded
- # for their platform encoding.
- # In all cases it is best to return NotImplemented.
- return NotImplemented
-
- def __repr__(self):
- return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
- self.nameID, self.platformID, self.langID)
+ def getEncoding(self, default="ascii"):
+ """Returns the Python encoding name for this name entry based on its platformID,
+ platEncID, and langID. If encoding for these values is not known, by default
+ 'ascii' is returned. That can be overriden by passing a value to the default
+ argument.
+ """
+ return getEncoding(self.platformID, self.platEncID, self.langID, default)
+
+ def encodingIsUnicodeCompatible(self):
+ return self.getEncoding(None) in ["utf_16_be", "ucs2be", "ascii", "latin1"]
+
+ def __str__(self):
+ return self.toStr(errors="backslashreplace")
+
+ def isUnicode(self):
+ return self.platformID == 0 or (
+ self.platformID == 3 and self.platEncID in [0, 1, 10]
+ )
+
+ def toUnicode(self, errors="strict"):
+ """
+ If self.string is a Unicode string, return it; otherwise try decoding the
+ bytes in self.string to a Unicode string using the encoding of this
+ entry as returned by self.getEncoding(); Note that self.getEncoding()
+ returns 'ascii' if the encoding is unknown to the library.
+
+ Certain heuristics are performed to recover data from bytes that are
+ ill-formed in the chosen encoding, or that otherwise look misencoded
+ (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
+ but marked otherwise). If the bytes are ill-formed and the heuristics fail,
+ the error is handled according to the errors parameter to this function, which is
+ passed to the underlying decode() function; by default it throws a
+ UnicodeDecodeError exception.
+
+ Note: The mentioned heuristics mean that roundtripping a font to XML and back
+ to binary might recover some misencoded data whereas just loading the font
+ and saving it back will not change them.
+ """
+
+ def isascii(b):
+ return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
+
+ encoding = self.getEncoding()
+ string = self.string
+
+ if (
+ isinstance(string, bytes)
+ and encoding == "utf_16_be"
+ and len(string) % 2 == 1
+ ):
+ # Recover badly encoded UTF-16 strings that have an odd number of bytes:
+ # - If the last byte is zero, drop it. Otherwise,
+ # - If all the odd bytes are zero and all the even bytes are ASCII,
+ # prepend one zero byte. Otherwise,
+ # - If first byte is zero and all other bytes are ASCII, insert zero
+ # bytes between consecutive ASCII bytes.
+ #
+ # (Yes, I've seen all of these in the wild... sigh)
+ if byteord(string[-1]) == 0:
+ string = string[:-1]
+ elif all(
+ byteord(b) == 0 if i % 2 else isascii(byteord(b))
+ for i, b in enumerate(string)
+ ):
+ string = b"\0" + string
+ elif byteord(string[0]) == 0 and all(
+ isascii(byteord(b)) for b in string[1:]
+ ):
+ string = bytesjoin(b"\0" + bytechr(byteord(b)) for b in string[1:])
+
+ string = tostr(string, encoding=encoding, errors=errors)
+
+ # If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
+ # Fix it up.
+ if all(
+ ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i, c in enumerate(string)
+ ):
+ # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
+ # narrow it down.
+ string = "".join(c for c in string[1::2])
+
+ return string
+
+ def toBytes(self, errors="strict"):
+ """If self.string is a bytes object, return it; otherwise try encoding
+ the Unicode string in self.string to bytes using the encoding of this
+ entry as returned by self.getEncoding(); Note that self.getEncoding()
+ returns 'ascii' if the encoding is unknown to the library.
+
+ If the Unicode string cannot be encoded to bytes in the chosen encoding,
+ the error is handled according to the errors parameter to this function,
+ which is passed to the underlying encode() function; by default it throws a
+ UnicodeEncodeError exception.
+ """
+ return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
+
+ toStr = toUnicode
+
+ def toXML(self, writer, ttFont):
+ try:
+ unistr = self.toUnicode()
+ except UnicodeDecodeError:
+ unistr = None
+ attrs = [
+ ("nameID", self.nameID),
+ ("platformID", self.platformID),
+ ("platEncID", self.platEncID),
+ ("langID", hex(self.langID)),
+ ]
+
+ if unistr is None or not self.encodingIsUnicodeCompatible():
+ attrs.append(("unicode", unistr is not None))
+
+ writer.begintag("namerecord", attrs)
+ writer.newline()
+ if unistr is not None:
+ writer.write(unistr)
+ else:
+ writer.write8bit(self.string)
+ writer.newline()
+ writer.endtag("namerecord")
+ writer.newline()
+
+ def fromXML(self, name, attrs, content, ttFont):
+ self.nameID = safeEval(attrs["nameID"])
+ self.platformID = safeEval(attrs["platformID"])
+ self.platEncID = safeEval(attrs["platEncID"])
+ self.langID = safeEval(attrs["langID"])
+ s = strjoin(content).strip()
+ encoding = self.getEncoding()
+ if self.encodingIsUnicodeCompatible() or safeEval(
+ attrs.get("unicode", "False")
+ ):
+ self.string = s.encode(encoding)
+ else:
+ # This is the inverse of write8bit...
+ self.string = s.encode("latin1")
+
+ def __lt__(self, other):
+ if type(self) != type(other):
+ return NotImplemented
+
+ try:
+ selfTuple = (
+ self.platformID,
+ self.platEncID,
+ self.langID,
+ self.nameID,
+ )
+ otherTuple = (
+ other.platformID,
+ other.platEncID,
+ other.langID,
+ other.nameID,
+ )
+ except AttributeError:
+ # This can only happen for
+ # 1) an object that is not a NameRecord, or
+ # 2) an unlikely incomplete NameRecord object which has not been
+ # fully populated
+ return NotImplemented
+
+ try:
+ # Include the actual NameRecord string in the comparison tuples
+ selfTuple = selfTuple + (self.toBytes(),)
+ otherTuple = otherTuple + (other.toBytes(),)
+ except UnicodeEncodeError as e:
+ # toBytes caused an encoding error in either of the two, so content
+ # to sorting based on IDs only
+ log.error("NameRecord sorting failed to encode: %s" % e)
+
+ # Implemented so that list.sort() sorts according to the spec by using
+ # the order of the tuple items and their comparison
+ return selfTuple < otherTuple
+
+ def __repr__(self):
+ return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
+ self.nameID,
+ self.platformID,
+ self.langID,
+ )
# Windows language ID → IETF BCP-47 language tag
@@ -604,183 +686,182 @@ class NameRecord(object):
# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
_WINDOWS_LANGUAGES = {
- 0x0436: 'af',
- 0x041C: 'sq',
- 0x0484: 'gsw',
- 0x045E: 'am',
- 0x1401: 'ar-DZ',
- 0x3C01: 'ar-BH',
- 0x0C01: 'ar',
- 0x0801: 'ar-IQ',
- 0x2C01: 'ar-JO',
- 0x3401: 'ar-KW',
- 0x3001: 'ar-LB',
- 0x1001: 'ar-LY',
- 0x1801: 'ary',
- 0x2001: 'ar-OM',
- 0x4001: 'ar-QA',
- 0x0401: 'ar-SA',
- 0x2801: 'ar-SY',
- 0x1C01: 'aeb',
- 0x3801: 'ar-AE',
- 0x2401: 'ar-YE',
- 0x042B: 'hy',
- 0x044D: 'as',
- 0x082C: 'az-Cyrl',
- 0x042C: 'az',
- 0x046D: 'ba',
- 0x042D: 'eu',
- 0x0423: 'be',
- 0x0845: 'bn',
- 0x0445: 'bn-IN',
- 0x201A: 'bs-Cyrl',
- 0x141A: 'bs',
- 0x047E: 'br',
- 0x0402: 'bg',
- 0x0403: 'ca',
- 0x0C04: 'zh-HK',
- 0x1404: 'zh-MO',
- 0x0804: 'zh',
- 0x1004: 'zh-SG',
- 0x0404: 'zh-TW',
- 0x0483: 'co',
- 0x041A: 'hr',
- 0x101A: 'hr-BA',
- 0x0405: 'cs',
- 0x0406: 'da',
- 0x048C: 'prs',
- 0x0465: 'dv',
- 0x0813: 'nl-BE',
- 0x0413: 'nl',
- 0x0C09: 'en-AU',
- 0x2809: 'en-BZ',
- 0x1009: 'en-CA',
- 0x2409: 'en-029',
- 0x4009: 'en-IN',
- 0x1809: 'en-IE',
- 0x2009: 'en-JM',
- 0x4409: 'en-MY',
- 0x1409: 'en-NZ',
- 0x3409: 'en-PH',
- 0x4809: 'en-SG',
- 0x1C09: 'en-ZA',
- 0x2C09: 'en-TT',
- 0x0809: 'en-GB',
- 0x0409: 'en',
- 0x3009: 'en-ZW',
- 0x0425: 'et',
- 0x0438: 'fo',
- 0x0464: 'fil',
- 0x040B: 'fi',
- 0x080C: 'fr-BE',
- 0x0C0C: 'fr-CA',
- 0x040C: 'fr',
- 0x140C: 'fr-LU',
- 0x180C: 'fr-MC',
- 0x100C: 'fr-CH',
- 0x0462: 'fy',
- 0x0456: 'gl',
- 0x0437: 'ka',
- 0x0C07: 'de-AT',
- 0x0407: 'de',
- 0x1407: 'de-LI',
- 0x1007: 'de-LU',
- 0x0807: 'de-CH',
- 0x0408: 'el',
- 0x046F: 'kl',
- 0x0447: 'gu',
- 0x0468: 'ha',
- 0x040D: 'he',
- 0x0439: 'hi',
- 0x040E: 'hu',
- 0x040F: 'is',
- 0x0470: 'ig',
- 0x0421: 'id',
- 0x045D: 'iu',
- 0x085D: 'iu-Latn',
- 0x083C: 'ga',
- 0x0434: 'xh',
- 0x0435: 'zu',
- 0x0410: 'it',
- 0x0810: 'it-CH',
- 0x0411: 'ja',
- 0x044B: 'kn',
- 0x043F: 'kk',
- 0x0453: 'km',
- 0x0486: 'quc',
- 0x0487: 'rw',
- 0x0441: 'sw',
- 0x0457: 'kok',
- 0x0412: 'ko',
- 0x0440: 'ky',
- 0x0454: 'lo',
- 0x0426: 'lv',
- 0x0427: 'lt',
- 0x082E: 'dsb',
- 0x046E: 'lb',
- 0x042F: 'mk',
- 0x083E: 'ms-BN',
- 0x043E: 'ms',
- 0x044C: 'ml',
- 0x043A: 'mt',
- 0x0481: 'mi',
- 0x047A: 'arn',
- 0x044E: 'mr',
- 0x047C: 'moh',
- 0x0450: 'mn',
- 0x0850: 'mn-CN',
- 0x0461: 'ne',
- 0x0414: 'nb',
- 0x0814: 'nn',
- 0x0482: 'oc',
- 0x0448: 'or',
- 0x0463: 'ps',
- 0x0415: 'pl',
- 0x0416: 'pt',
- 0x0816: 'pt-PT',
- 0x0446: 'pa',
- 0x046B: 'qu-BO',
- 0x086B: 'qu-EC',
- 0x0C6B: 'qu',
- 0x0418: 'ro',
- 0x0417: 'rm',
- 0x0419: 'ru',
- 0x243B: 'smn',
- 0x103B: 'smj-NO',
- 0x143B: 'smj',
- 0x0C3B: 'se-FI',
- 0x043B: 'se',
- 0x083B: 'se-SE',
- 0x203B: 'sms',
- 0x183B: 'sma-NO',
- 0x1C3B: 'sms',
- 0x044F: 'sa',
- 0x1C1A: 'sr-Cyrl-BA',
- 0x0C1A: 'sr',
- 0x181A: 'sr-Latn-BA',
- 0x081A: 'sr-Latn',
- 0x046C: 'nso',
- 0x0432: 'tn',
- 0x045B: 'si',
- 0x041B: 'sk',
- 0x0424: 'sl',
- 0x2C0A: 'es-AR',
- 0x400A: 'es-BO',
- 0x340A: 'es-CL',
- 0x240A: 'es-CO',
- 0x140A: 'es-CR',
- 0x1C0A: 'es-DO',
- 0x300A: 'es-EC',
- 0x440A: 'es-SV',
- 0x100A: 'es-GT',
- 0x480A: 'es-HN',
- 0x080A: 'es-MX',
- 0x4C0A: 'es-NI',
- 0x180A: 'es-PA',
- 0x3C0A: 'es-PY',
- 0x280A: 'es-PE',
- 0x500A: 'es-PR',
-
+ 0x0436: "af",
+ 0x041C: "sq",
+ 0x0484: "gsw",
+ 0x045E: "am",
+ 0x1401: "ar-DZ",
+ 0x3C01: "ar-BH",
+ 0x0C01: "ar",
+ 0x0801: "ar-IQ",
+ 0x2C01: "ar-JO",
+ 0x3401: "ar-KW",
+ 0x3001: "ar-LB",
+ 0x1001: "ar-LY",
+ 0x1801: "ary",
+ 0x2001: "ar-OM",
+ 0x4001: "ar-QA",
+ 0x0401: "ar-SA",
+ 0x2801: "ar-SY",
+ 0x1C01: "aeb",
+ 0x3801: "ar-AE",
+ 0x2401: "ar-YE",
+ 0x042B: "hy",
+ 0x044D: "as",
+ 0x082C: "az-Cyrl",
+ 0x042C: "az",
+ 0x046D: "ba",
+ 0x042D: "eu",
+ 0x0423: "be",
+ 0x0845: "bn",
+ 0x0445: "bn-IN",
+ 0x201A: "bs-Cyrl",
+ 0x141A: "bs",
+ 0x047E: "br",
+ 0x0402: "bg",
+ 0x0403: "ca",
+ 0x0C04: "zh-HK",
+ 0x1404: "zh-MO",
+ 0x0804: "zh",
+ 0x1004: "zh-SG",
+ 0x0404: "zh-TW",
+ 0x0483: "co",
+ 0x041A: "hr",
+ 0x101A: "hr-BA",
+ 0x0405: "cs",
+ 0x0406: "da",
+ 0x048C: "prs",
+ 0x0465: "dv",
+ 0x0813: "nl-BE",
+ 0x0413: "nl",
+ 0x0C09: "en-AU",
+ 0x2809: "en-BZ",
+ 0x1009: "en-CA",
+ 0x2409: "en-029",
+ 0x4009: "en-IN",
+ 0x1809: "en-IE",
+ 0x2009: "en-JM",
+ 0x4409: "en-MY",
+ 0x1409: "en-NZ",
+ 0x3409: "en-PH",
+ 0x4809: "en-SG",
+ 0x1C09: "en-ZA",
+ 0x2C09: "en-TT",
+ 0x0809: "en-GB",
+ 0x0409: "en",
+ 0x3009: "en-ZW",
+ 0x0425: "et",
+ 0x0438: "fo",
+ 0x0464: "fil",
+ 0x040B: "fi",
+ 0x080C: "fr-BE",
+ 0x0C0C: "fr-CA",
+ 0x040C: "fr",
+ 0x140C: "fr-LU",
+ 0x180C: "fr-MC",
+ 0x100C: "fr-CH",
+ 0x0462: "fy",
+ 0x0456: "gl",
+ 0x0437: "ka",
+ 0x0C07: "de-AT",
+ 0x0407: "de",
+ 0x1407: "de-LI",
+ 0x1007: "de-LU",
+ 0x0807: "de-CH",
+ 0x0408: "el",
+ 0x046F: "kl",
+ 0x0447: "gu",
+ 0x0468: "ha",
+ 0x040D: "he",
+ 0x0439: "hi",
+ 0x040E: "hu",
+ 0x040F: "is",
+ 0x0470: "ig",
+ 0x0421: "id",
+ 0x045D: "iu",
+ 0x085D: "iu-Latn",
+ 0x083C: "ga",
+ 0x0434: "xh",
+ 0x0435: "zu",
+ 0x0410: "it",
+ 0x0810: "it-CH",
+ 0x0411: "ja",
+ 0x044B: "kn",
+ 0x043F: "kk",
+ 0x0453: "km",
+ 0x0486: "quc",
+ 0x0487: "rw",
+ 0x0441: "sw",
+ 0x0457: "kok",
+ 0x0412: "ko",
+ 0x0440: "ky",
+ 0x0454: "lo",
+ 0x0426: "lv",
+ 0x0427: "lt",
+ 0x082E: "dsb",
+ 0x046E: "lb",
+ 0x042F: "mk",
+ 0x083E: "ms-BN",
+ 0x043E: "ms",
+ 0x044C: "ml",
+ 0x043A: "mt",
+ 0x0481: "mi",
+ 0x047A: "arn",
+ 0x044E: "mr",
+ 0x047C: "moh",
+ 0x0450: "mn",
+ 0x0850: "mn-CN",
+ 0x0461: "ne",
+ 0x0414: "nb",
+ 0x0814: "nn",
+ 0x0482: "oc",
+ 0x0448: "or",
+ 0x0463: "ps",
+ 0x0415: "pl",
+ 0x0416: "pt",
+ 0x0816: "pt-PT",
+ 0x0446: "pa",
+ 0x046B: "qu-BO",
+ 0x086B: "qu-EC",
+ 0x0C6B: "qu",
+ 0x0418: "ro",
+ 0x0417: "rm",
+ 0x0419: "ru",
+ 0x243B: "smn",
+ 0x103B: "smj-NO",
+ 0x143B: "smj",
+ 0x0C3B: "se-FI",
+ 0x043B: "se",
+ 0x083B: "se-SE",
+ 0x203B: "sms",
+ 0x183B: "sma-NO",
+ 0x1C3B: "sms",
+ 0x044F: "sa",
+ 0x1C1A: "sr-Cyrl-BA",
+ 0x0C1A: "sr",
+ 0x181A: "sr-Latn-BA",
+ 0x081A: "sr-Latn",
+ 0x046C: "nso",
+ 0x0432: "tn",
+ 0x045B: "si",
+ 0x041B: "sk",
+ 0x0424: "sl",
+ 0x2C0A: "es-AR",
+ 0x400A: "es-BO",
+ 0x340A: "es-CL",
+ 0x240A: "es-CO",
+ 0x140A: "es-CR",
+ 0x1C0A: "es-DO",
+ 0x300A: "es-EC",
+ 0x440A: "es-SV",
+ 0x100A: "es-GT",
+ 0x480A: "es-HN",
+ 0x080A: "es-MX",
+ 0x4C0A: "es-NI",
+ 0x180A: "es-PA",
+ 0x3C0A: "es-PY",
+ 0x280A: "es-PE",
+ 0x500A: "es-PR",
# Microsoft has defined two different language codes for
# “Spanish with modern sorting” and “Spanish with traditional
# sorting”. This makes sense for collation APIs, and it would be
@@ -788,163 +869,164 @@ _WINDOWS_LANGUAGES = {
# extensions (eg., “es-u-co-trad” is “Spanish with traditional
# sorting”). However, for storing names in fonts, this distinction
# does not make sense, so we use “es” in both cases.
- 0x0C0A: 'es',
- 0x040A: 'es',
-
- 0x540A: 'es-US',
- 0x380A: 'es-UY',
- 0x200A: 'es-VE',
- 0x081D: 'sv-FI',
- 0x041D: 'sv',
- 0x045A: 'syr',
- 0x0428: 'tg',
- 0x085F: 'tzm',
- 0x0449: 'ta',
- 0x0444: 'tt',
- 0x044A: 'te',
- 0x041E: 'th',
- 0x0451: 'bo',
- 0x041F: 'tr',
- 0x0442: 'tk',
- 0x0480: 'ug',
- 0x0422: 'uk',
- 0x042E: 'hsb',
- 0x0420: 'ur',
- 0x0843: 'uz-Cyrl',
- 0x0443: 'uz',
- 0x042A: 'vi',
- 0x0452: 'cy',
- 0x0488: 'wo',
- 0x0485: 'sah',
- 0x0478: 'ii',
- 0x046A: 'yo',
+ 0x0C0A: "es",
+ 0x040A: "es",
+ 0x540A: "es-US",
+ 0x380A: "es-UY",
+ 0x200A: "es-VE",
+ 0x081D: "sv-FI",
+ 0x041D: "sv",
+ 0x045A: "syr",
+ 0x0428: "tg",
+ 0x085F: "tzm",
+ 0x0449: "ta",
+ 0x0444: "tt",
+ 0x044A: "te",
+ 0x041E: "th",
+ 0x0451: "bo",
+ 0x041F: "tr",
+ 0x0442: "tk",
+ 0x0480: "ug",
+ 0x0422: "uk",
+ 0x042E: "hsb",
+ 0x0420: "ur",
+ 0x0843: "uz-Cyrl",
+ 0x0443: "uz",
+ 0x042A: "vi",
+ 0x0452: "cy",
+ 0x0488: "wo",
+ 0x0485: "sah",
+ 0x0478: "ii",
+ 0x046A: "yo",
}
_MAC_LANGUAGES = {
- 0: 'en',
- 1: 'fr',
- 2: 'de',
- 3: 'it',
- 4: 'nl',
- 5: 'sv',
- 6: 'es',
- 7: 'da',
- 8: 'pt',
- 9: 'no',
- 10: 'he',
- 11: 'ja',
- 12: 'ar',
- 13: 'fi',
- 14: 'el',
- 15: 'is',
- 16: 'mt',
- 17: 'tr',
- 18: 'hr',
- 19: 'zh-Hant',
- 20: 'ur',
- 21: 'hi',
- 22: 'th',
- 23: 'ko',
- 24: 'lt',
- 25: 'pl',
- 26: 'hu',
- 27: 'es',
- 28: 'lv',
- 29: 'se',
- 30: 'fo',
- 31: 'fa',
- 32: 'ru',
- 33: 'zh',
- 34: 'nl-BE',
- 35: 'ga',
- 36: 'sq',
- 37: 'ro',
- 38: 'cz',
- 39: 'sk',
- 40: 'sl',
- 41: 'yi',
- 42: 'sr',
- 43: 'mk',
- 44: 'bg',
- 45: 'uk',
- 46: 'be',
- 47: 'uz',
- 48: 'kk',
- 49: 'az-Cyrl',
- 50: 'az-Arab',
- 51: 'hy',
- 52: 'ka',
- 53: 'mo',
- 54: 'ky',
- 55: 'tg',
- 56: 'tk',
- 57: 'mn-CN',
- 58: 'mn',
- 59: 'ps',
- 60: 'ks',
- 61: 'ku',
- 62: 'sd',
- 63: 'bo',
- 64: 'ne',
- 65: 'sa',
- 66: 'mr',
- 67: 'bn',
- 68: 'as',
- 69: 'gu',
- 70: 'pa',
- 71: 'or',
- 72: 'ml',
- 73: 'kn',
- 74: 'ta',
- 75: 'te',
- 76: 'si',
- 77: 'my',
- 78: 'km',
- 79: 'lo',
- 80: 'vi',
- 81: 'id',
- 82: 'tl',
- 83: 'ms',
- 84: 'ms-Arab',
- 85: 'am',
- 86: 'ti',
- 87: 'om',
- 88: 'so',
- 89: 'sw',
- 90: 'rw',
- 91: 'rn',
- 92: 'ny',
- 93: 'mg',
- 94: 'eo',
- 128: 'cy',
- 129: 'eu',
- 130: 'ca',
- 131: 'la',
- 132: 'qu',
- 133: 'gn',
- 134: 'ay',
- 135: 'tt',
- 136: 'ug',
- 137: 'dz',
- 138: 'jv',
- 139: 'su',
- 140: 'gl',
- 141: 'af',
- 142: 'br',
- 143: 'iu',
- 144: 'gd',
- 145: 'gv',
- 146: 'ga',
- 147: 'to',
- 148: 'el-polyton',
- 149: 'kl',
- 150: 'az',
- 151: 'nn',
+ 0: "en",
+ 1: "fr",
+ 2: "de",
+ 3: "it",
+ 4: "nl",
+ 5: "sv",
+ 6: "es",
+ 7: "da",
+ 8: "pt",
+ 9: "no",
+ 10: "he",
+ 11: "ja",
+ 12: "ar",
+ 13: "fi",
+ 14: "el",
+ 15: "is",
+ 16: "mt",
+ 17: "tr",
+ 18: "hr",
+ 19: "zh-Hant",
+ 20: "ur",
+ 21: "hi",
+ 22: "th",
+ 23: "ko",
+ 24: "lt",
+ 25: "pl",
+ 26: "hu",
+ 27: "es",
+ 28: "lv",
+ 29: "se",
+ 30: "fo",
+ 31: "fa",
+ 32: "ru",
+ 33: "zh",
+ 34: "nl-BE",
+ 35: "ga",
+ 36: "sq",
+ 37: "ro",
+ 38: "cz",
+ 39: "sk",
+ 40: "sl",
+ 41: "yi",
+ 42: "sr",
+ 43: "mk",
+ 44: "bg",
+ 45: "uk",
+ 46: "be",
+ 47: "uz",
+ 48: "kk",
+ 49: "az-Cyrl",
+ 50: "az-Arab",
+ 51: "hy",
+ 52: "ka",
+ 53: "mo",
+ 54: "ky",
+ 55: "tg",
+ 56: "tk",
+ 57: "mn-CN",
+ 58: "mn",
+ 59: "ps",
+ 60: "ks",
+ 61: "ku",
+ 62: "sd",
+ 63: "bo",
+ 64: "ne",
+ 65: "sa",
+ 66: "mr",
+ 67: "bn",
+ 68: "as",
+ 69: "gu",
+ 70: "pa",
+ 71: "or",
+ 72: "ml",
+ 73: "kn",
+ 74: "ta",
+ 75: "te",
+ 76: "si",
+ 77: "my",
+ 78: "km",
+ 79: "lo",
+ 80: "vi",
+ 81: "id",
+ 82: "tl",
+ 83: "ms",
+ 84: "ms-Arab",
+ 85: "am",
+ 86: "ti",
+ 87: "om",
+ 88: "so",
+ 89: "sw",
+ 90: "rw",
+ 91: "rn",
+ 92: "ny",
+ 93: "mg",
+ 94: "eo",
+ 128: "cy",
+ 129: "eu",
+ 130: "ca",
+ 131: "la",
+ 132: "qu",
+ 133: "gn",
+ 134: "ay",
+ 135: "tt",
+ 136: "ug",
+ 137: "dz",
+ 138: "jv",
+ 139: "su",
+ 140: "gl",
+ 141: "af",
+ 142: "br",
+ 143: "iu",
+ 144: "gd",
+ 145: "gv",
+ 146: "ga",
+ 147: "to",
+ 148: "el-polyton",
+ 149: "kl",
+ 150: "az",
+ 151: "nn",
}
-_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()}
+_WINDOWS_LANGUAGE_CODES = {
+ lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()
+}
_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}
@@ -1079,5 +1161,68 @@ _MAC_LANGUAGE_TO_SCRIPT = {
148: 6, # langGreekAncient → smRoman
149: 0, # langGreenlandic → smRoman
150: 0, # langAzerbaijanRoman → smRoman
- 151: 0, # langNynorsk → smRoman
+ 151: 0, # langNynorsk → smRoman
}
+
+
+class NameRecordVisitor(TTVisitor):
+ # Font tables that have NameIDs we need to collect.
+ TABLES = ("GSUB", "GPOS", "fvar", "CPAL", "STAT")
+
+ def __init__(self):
+ self.seen = set()
+
+
+@NameRecordVisitor.register_attrs(
+ (
+ (otTables.FeatureParamsSize, ("SubfamilyID", "SubfamilyNameID")),
+ (otTables.FeatureParamsStylisticSet, ("UINameID",)),
+ (
+ otTables.FeatureParamsCharacterVariants,
+ (
+ "FeatUILabelNameID",
+ "FeatUITooltipTextNameID",
+ "SampleTextNameID",
+ "FirstParamUILabelNameID",
+ ),
+ ),
+ (otTables.STAT, ("ElidedFallbackNameID",)),
+ (otTables.AxisRecord, ("AxisNameID",)),
+ (otTables.AxisValue, ("ValueNameID",)),
+ (otTables.FeatureName, ("FeatureNameID",)),
+ (otTables.Setting, ("SettingNameID",)),
+ )
+)
+def visit(visitor, obj, attr, value):
+ visitor.seen.add(value)
+
+
+@NameRecordVisitor.register(ttLib.getTableClass("fvar"))
+def visit(visitor, obj):
+ for inst in obj.instances:
+ if inst.postscriptNameID != 0xFFFF:
+ visitor.seen.add(inst.postscriptNameID)
+ visitor.seen.add(inst.subfamilyNameID)
+
+ for axis in obj.axes:
+ visitor.seen.add(axis.axisNameID)
+
+
+@NameRecordVisitor.register(ttLib.getTableClass("CPAL"))
+def visit(visitor, obj):
+ if obj.version == 1:
+ visitor.seen.update(obj.paletteLabels)
+ visitor.seen.update(obj.paletteEntryLabels)
+
+
+@NameRecordVisitor.register(ttLib.TTFont)
+def visit(visitor, font, *args, **kwargs):
+ if hasattr(visitor, "font"):
+ return False
+
+ visitor.font = font
+ for tag in visitor.TABLES:
+ if tag in font:
+ visitor.visit(font[tag], *args, **kwargs)
+ del visitor.font
+ return False