diff options
author | Elliott Hughes <enh@google.com> | 2023-11-10 19:23:18 +0000 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2023-11-10 19:23:18 +0000 |
commit | 02cec46c7a3918f19153e4e2de707f9d7de83fc8 (patch) | |
tree | c927c514a071f3a5ec125b3f474ad4ce4d39a7fa /Lib/fontTools/ttLib/tables/_n_a_m_e.py | |
parent | a936b27b9394502de80c116f46aff5b1a1cc3925 (diff) | |
download | fonttools-02cec46c7a3918f19153e4e2de707f9d7de83fc8.tar.gz |
Upgrade fonttools to 4.44.0
This project was upgraded with external_updater.
Usage: tools/external_updater/updater.sh update fonttools
For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
Test: TreeHugger
Change-Id: I5de68c96999d1b8671c251a2555948da63de5bc6
Diffstat (limited to 'Lib/fontTools/ttLib/tables/_n_a_m_e.py')
-rw-r--r-- | Lib/fontTools/ttLib/tables/_n_a_m_e.py | 1913 |
1 files changed, 1029 insertions, 884 deletions
diff --git a/Lib/fontTools/ttLib/tables/_n_a_m_e.py b/Lib/fontTools/ttLib/tables/_n_a_m_e.py index 9558addb..bbb4f536 100644 --- a/Lib/fontTools/ttLib/tables/_n_a_m_e.py +++ b/Lib/fontTools/ttLib/tables/_n_a_m_e.py @@ -1,8 +1,20 @@ # -*- coding: utf-8 -*- from fontTools.misc import sstruct -from fontTools.misc.textTools import bytechr, byteord, bytesjoin, strjoin, tobytes, tostr, safeEval +from fontTools.misc.textTools import ( + bytechr, + byteord, + bytesjoin, + strjoin, + tobytes, + tostr, + safeEval, +) from fontTools.misc.encodingTools import getEncoding from fontTools.ttLib import newTable +from fontTools.ttLib.ttVisitor import TTVisitor +from fontTools import ttLib +import fontTools.ttLib.tables.otTables as otTables +from fontTools.ttLib.tables import C_P_A_L_ from . import DefaultTable import struct import logging @@ -24,573 +36,643 @@ nameRecordSize = sstruct.calcsize(nameRecordFormat) class table__n_a_m_e(DefaultTable.DefaultTable): - dependencies = ["ltag"] - - def decompile(self, data, ttFont): - format, n, stringOffset = struct.unpack(b">HHH", data[:6]) - expectedStringOffset = 6 + n * nameRecordSize - if stringOffset != expectedStringOffset: - log.error( - "'name' table stringOffset incorrect. Expected: %s; Actual: %s", - expectedStringOffset, stringOffset) - stringData = data[stringOffset:] - data = data[6:] - self.names = [] - for i in range(n): - if len(data) < 12: - log.error('skipping malformed name record #%d', i) - continue - name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord()) - name.string = stringData[name.offset:name.offset+name.length] - if name.offset + name.length > len(stringData): - log.error('skipping malformed name record #%d', i) - continue - assert len(name.string) == name.length - #if (name.platEncID, name.platformID) in ((0, 0), (1, 3)): - # if len(name.string) % 2: - # print "2-byte string doesn't have even length!" - # print name.__dict__ - del name.offset, name.length - self.names.append(name) - - def compile(self, ttFont): - if not hasattr(self, "names"): - # only happens when there are NO name table entries read - # from the TTX file - self.names = [] - names = self.names - names.sort() # sort according to the spec; see NameRecord.__lt__() - stringData = b"" - format = 0 - n = len(names) - stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat) - data = struct.pack(b">HHH", format, n, stringOffset) - lastoffset = 0 - done = {} # remember the data so we can reuse the "pointers" - for name in names: - string = name.toBytes() - if string in done: - name.offset, name.length = done[string] - else: - name.offset, name.length = done[string] = len(stringData), len(string) - stringData = bytesjoin([stringData, string]) - data = data + sstruct.pack(nameRecordFormat, name) - return data + stringData - - def toXML(self, writer, ttFont): - for name in self.names: - name.toXML(writer, ttFont) - - def fromXML(self, name, attrs, content, ttFont): - if name != "namerecord": - return # ignore unknown tags - if not hasattr(self, "names"): - self.names = [] - name = NameRecord() - self.names.append(name) - name.fromXML(name, attrs, content, ttFont) - - def getName(self, nameID, platformID, platEncID, langID=None): - for namerecord in self.names: - if ( namerecord.nameID == nameID and - namerecord.platformID == platformID and - namerecord.platEncID == platEncID): - if langID is None or namerecord.langID == langID: - return namerecord - return None # not found - - def getDebugName(self, nameID): - englishName = someName = None - for name in self.names: - if name.nameID != nameID: - continue - try: - unistr = name.toUnicode() - except UnicodeDecodeError: - continue - - someName = unistr - if (name.platformID, name.langID) in ((1, 0), (3, 0x409)): - englishName = unistr - break - if englishName: - return englishName - elif someName: - return someName - else: - return None - - def getFirstDebugName(self, nameIDs): - for nameID in nameIDs: - name = self.getDebugName(nameID) - if name is not None: - return name - return None - - def getBestFamilyName(self): - # 21 = WWS Family Name - # 16 = Typographic Family Name - # 1 = Family Name - return self.getFirstDebugName((21, 16, 1)) - - def getBestSubFamilyName(self): - # 22 = WWS SubFamily Name - # 17 = Typographic SubFamily Name - # 2 = SubFamily Name - return self.getFirstDebugName((22, 17, 2)) - - def getBestFullName(self): - # 4 = Full Name - # 6 = PostScript Name - for nameIDs in ((21, 22), (16, 17), (1, 2), (4, ), (6, )): - if len(nameIDs) == 2: - name_fam = self.getDebugName(nameIDs[0]) - name_subfam = self.getDebugName(nameIDs[1]) - if None in [name_fam, name_subfam]: - continue # if any is None, skip - name = f"{name_fam} {name_subfam}" - if name_subfam.lower() == 'regular': - name = f"{name_fam}" - return name - else: - name = self.getDebugName(nameIDs[0]) - if name is not None: - return name - return None - - def setName(self, string, nameID, platformID, platEncID, langID): - """ Set the 'string' for the name record identified by 'nameID', 'platformID', - 'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it - and append to the name table. - - 'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case, - it is assumed to be already encoded with the correct plaform-specific encoding - identified by the (platformID, platEncID, langID) triplet. A warning is issued - to prevent unexpected results. - """ - if not hasattr(self, 'names'): - self.names = [] - if not isinstance(string, str): - if isinstance(string, bytes): - log.warning( - "name string is bytes, ensure it's correctly encoded: %r", string) - else: - raise TypeError( - "expected unicode or bytes, found %s: %r" % ( - type(string).__name__, string)) - namerecord = self.getName(nameID, platformID, platEncID, langID) - if namerecord: - namerecord.string = string - else: - self.names.append(makeName(string, nameID, platformID, platEncID, langID)) - - def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None): - """Remove any name records identified by the given combination of 'nameID', - 'platformID', 'platEncID' and 'langID'. - """ - args = { - argName: argValue - for argName, argValue in ( - ("nameID", nameID), - ("platformID", platformID), - ("platEncID", platEncID), - ("langID", langID), - ) - if argValue is not None - } - if not args: - # no arguments, nothing to do - return - self.names = [ - rec for rec in self.names - if any( - argValue != getattr(rec, argName) - for argName, argValue in args.items() - ) - ] - - def _findUnusedNameID(self, minNameID=256): - """Finds an unused name id. - - The nameID is assigned in the range between 'minNameID' and 32767 (inclusive), - following the last nameID in the name table. - """ - names = getattr(self, 'names', []) - nameID = 1 + max([n.nameID for n in names] + [minNameID - 1]) - if nameID > 32767: - raise ValueError("nameID must be less than 32768") - return nameID - - def findMultilingualName(self, names, windows=True, mac=True, minNameID=0): - """Return the name ID of an existing multilingual name that - matches the 'names' dictionary, or None if not found. - - 'names' is a dictionary with the name in multiple languages, - such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. - The keys can be arbitrary IETF BCP 47 language codes; - the values are Unicode strings. - - If 'windows' is True, the returned name ID is guaranteed - exist for all requested languages for platformID=3 and - platEncID=1. - If 'mac' is True, the returned name ID is guaranteed to exist - for all requested languages for platformID=1 and platEncID=0. - - The returned name ID will not be less than the 'minNameID' - argument. - """ - # Gather the set of requested - # (string, platformID, platEncID, langID) - # tuples - reqNameSet = set() - for lang, name in sorted(names.items()): - if windows: - windowsName = _makeWindowsName(name, None, lang) - if windowsName is not None: - reqNameSet.add((windowsName.string, - windowsName.platformID, - windowsName.platEncID, - windowsName.langID)) - if mac: - macName = _makeMacName(name, None, lang) - if macName is not None: - reqNameSet.add((macName.string, - macName.platformID, - macName.platEncID, - macName.langID)) - - # Collect matching name IDs - matchingNames = dict() - for name in self.names: - try: - key = (name.toUnicode(), name.platformID, - name.platEncID, name.langID) - except UnicodeDecodeError: - continue - if key in reqNameSet and name.nameID >= minNameID: - nameSet = matchingNames.setdefault(name.nameID, set()) - nameSet.add(key) - - # Return the first name ID that defines all requested strings - for nameID, nameSet in sorted(matchingNames.items()): - if nameSet == reqNameSet: - return nameID - - return None # not found - - def addMultilingualName(self, names, ttFont=None, nameID=None, - windows=True, mac=True, minNameID=0): - """Add a multilingual name, returning its name ID - - 'names' is a dictionary with the name in multiple languages, - such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. - The keys can be arbitrary IETF BCP 47 language codes; - the values are Unicode strings. - - 'ttFont' is the TTFont to which the names are added, or None. - If present, the font's 'ltag' table can get populated - to store exotic language codes, which allows encoding - names that otherwise cannot get encoded at all. - - 'nameID' is the name ID to be used, or None to let the library - find an existing set of name records that match, or pick an - unused name ID. - - If 'windows' is True, a platformID=3 name record will be added. - If 'mac' is True, a platformID=1 name record will be added. - - If the 'nameID' argument is None, the created nameID will not - be less than the 'minNameID' argument. - """ - if not hasattr(self, 'names'): - self.names = [] - if nameID is None: - # Reuse nameID if possible - nameID = self.findMultilingualName( - names, windows=windows, mac=mac, minNameID=minNameID) - if nameID is not None: - return nameID - nameID = self._findUnusedNameID() - # TODO: Should minimize BCP 47 language codes. - # https://github.com/fonttools/fonttools/issues/930 - for lang, name in sorted(names.items()): - if windows: - windowsName = _makeWindowsName(name, nameID, lang) - if windowsName is not None: - self.names.append(windowsName) - else: - # We cannot not make a Windows name: make sure we add a - # Mac name as a fallback. This can happen for exotic - # BCP47 language tags that have no Windows language code. - mac = True - if mac: - macName = _makeMacName(name, nameID, lang, ttFont) - if macName is not None: - self.names.append(macName) - return nameID - - def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255): - """ Add a new name record containing 'string' for each (platformID, platEncID, - langID) tuple specified in the 'platforms' list. - - The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive), - following the last nameID in the name table. - If no 'platforms' are specified, two English name records are added, one for the - Macintosh (platformID=0), and one for the Windows platform (3). - - The 'string' must be a Unicode string, so it can be encoded with different, - platform-specific encodings. - - Return the new nameID. - """ - assert len(platforms) > 0, \ - "'platforms' must contain at least one (platformID, platEncID, langID) tuple" - if not hasattr(self, 'names'): - self.names = [] - if not isinstance(string, str): - raise TypeError( - "expected str, found %s: %r" % (type(string).__name__, string)) - nameID = self._findUnusedNameID(minNameID + 1) - for platformID, platEncID, langID in platforms: - self.names.append(makeName(string, nameID, platformID, platEncID, langID)) - return nameID + dependencies = ["ltag"] + + def decompile(self, data, ttFont): + format, n, stringOffset = struct.unpack(b">HHH", data[:6]) + expectedStringOffset = 6 + n * nameRecordSize + if stringOffset != expectedStringOffset: + log.error( + "'name' table stringOffset incorrect. Expected: %s; Actual: %s", + expectedStringOffset, + stringOffset, + ) + stringData = data[stringOffset:] + data = data[6:] + self.names = [] + for i in range(n): + if len(data) < 12: + log.error("skipping malformed name record #%d", i) + continue + name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord()) + name.string = stringData[name.offset : name.offset + name.length] + if name.offset + name.length > len(stringData): + log.error("skipping malformed name record #%d", i) + continue + assert len(name.string) == name.length + # if (name.platEncID, name.platformID) in ((0, 0), (1, 3)): + # if len(name.string) % 2: + # print "2-byte string doesn't have even length!" + # print name.__dict__ + del name.offset, name.length + self.names.append(name) + + def compile(self, ttFont): + if not hasattr(self, "names"): + # only happens when there are NO name table entries read + # from the TTX file + self.names = [] + names = self.names + names.sort() # sort according to the spec; see NameRecord.__lt__() + stringData = b"" + format = 0 + n = len(names) + stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat) + data = struct.pack(b">HHH", format, n, stringOffset) + lastoffset = 0 + done = {} # remember the data so we can reuse the "pointers" + for name in names: + string = name.toBytes() + if string in done: + name.offset, name.length = done[string] + else: + name.offset, name.length = done[string] = len(stringData), len(string) + stringData = bytesjoin([stringData, string]) + data = data + sstruct.pack(nameRecordFormat, name) + return data + stringData + + def toXML(self, writer, ttFont): + for name in self.names: + name.toXML(writer, ttFont) + + def fromXML(self, name, attrs, content, ttFont): + if name != "namerecord": + return # ignore unknown tags + if not hasattr(self, "names"): + self.names = [] + name = NameRecord() + self.names.append(name) + name.fromXML(name, attrs, content, ttFont) + + def getName(self, nameID, platformID, platEncID, langID=None): + for namerecord in self.names: + if ( + namerecord.nameID == nameID + and namerecord.platformID == platformID + and namerecord.platEncID == platEncID + ): + if langID is None or namerecord.langID == langID: + return namerecord + return None # not found + + def getDebugName(self, nameID): + englishName = someName = None + for name in self.names: + if name.nameID != nameID: + continue + try: + unistr = name.toUnicode() + except UnicodeDecodeError: + continue + + someName = unistr + if (name.platformID, name.langID) in ((1, 0), (3, 0x409)): + englishName = unistr + break + if englishName: + return englishName + elif someName: + return someName + else: + return None + + def getFirstDebugName(self, nameIDs): + for nameID in nameIDs: + name = self.getDebugName(nameID) + if name is not None: + return name + return None + + def getBestFamilyName(self): + # 21 = WWS Family Name + # 16 = Typographic Family Name + # 1 = Family Name + return self.getFirstDebugName((21, 16, 1)) + + def getBestSubFamilyName(self): + # 22 = WWS SubFamily Name + # 17 = Typographic SubFamily Name + # 2 = SubFamily Name + return self.getFirstDebugName((22, 17, 2)) + + def getBestFullName(self): + # 4 = Full Name + # 6 = PostScript Name + for nameIDs in ((21, 22), (16, 17), (1, 2), (4,), (6,)): + if len(nameIDs) == 2: + name_fam = self.getDebugName(nameIDs[0]) + name_subfam = self.getDebugName(nameIDs[1]) + if None in [name_fam, name_subfam]: + continue # if any is None, skip + name = f"{name_fam} {name_subfam}" + if name_subfam.lower() == "regular": + name = f"{name_fam}" + return name + else: + name = self.getDebugName(nameIDs[0]) + if name is not None: + return name + return None + + def setName(self, string, nameID, platformID, platEncID, langID): + """Set the 'string' for the name record identified by 'nameID', 'platformID', + 'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it + and append to the name table. + + 'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case, + it is assumed to be already encoded with the correct plaform-specific encoding + identified by the (platformID, platEncID, langID) triplet. A warning is issued + to prevent unexpected results. + """ + if not hasattr(self, "names"): + self.names = [] + if not isinstance(string, str): + if isinstance(string, bytes): + log.warning( + "name string is bytes, ensure it's correctly encoded: %r", string + ) + else: + raise TypeError( + "expected unicode or bytes, found %s: %r" + % (type(string).__name__, string) + ) + namerecord = self.getName(nameID, platformID, platEncID, langID) + if namerecord: + namerecord.string = string + else: + self.names.append(makeName(string, nameID, platformID, platEncID, langID)) + + def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None): + """Remove any name records identified by the given combination of 'nameID', + 'platformID', 'platEncID' and 'langID'. + """ + args = { + argName: argValue + for argName, argValue in ( + ("nameID", nameID), + ("platformID", platformID), + ("platEncID", platEncID), + ("langID", langID), + ) + if argValue is not None + } + if not args: + # no arguments, nothing to do + return + self.names = [ + rec + for rec in self.names + if any( + argValue != getattr(rec, argName) for argName, argValue in args.items() + ) + ] + + @staticmethod + def removeUnusedNames(ttFont): + """Remove any name records which are not in NameID range 0-255 and not utilized + within the font itself.""" + visitor = NameRecordVisitor() + visitor.visit(ttFont) + toDelete = set() + for record in ttFont["name"].names: + # Name IDs 26 to 255, inclusive, are reserved for future standard names. + # https://learn.microsoft.com/en-us/typography/opentype/spec/name#name-ids + if record.nameID < 256: + continue + if record.nameID not in visitor.seen: + toDelete.add(record.nameID) + + for nameID in toDelete: + ttFont["name"].removeNames(nameID) + return toDelete + + def _findUnusedNameID(self, minNameID=256): + """Finds an unused name id. + + The nameID is assigned in the range between 'minNameID' and 32767 (inclusive), + following the last nameID in the name table. + """ + names = getattr(self, "names", []) + nameID = 1 + max([n.nameID for n in names] + [minNameID - 1]) + if nameID > 32767: + raise ValueError("nameID must be less than 32768") + return nameID + + def findMultilingualName( + self, names, windows=True, mac=True, minNameID=0, ttFont=None + ): + """Return the name ID of an existing multilingual name that + matches the 'names' dictionary, or None if not found. + + 'names' is a dictionary with the name in multiple languages, + such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. + The keys can be arbitrary IETF BCP 47 language codes; + the values are Unicode strings. + + If 'windows' is True, the returned name ID is guaranteed + exist for all requested languages for platformID=3 and + platEncID=1. + If 'mac' is True, the returned name ID is guaranteed to exist + for all requested languages for platformID=1 and platEncID=0. + + The returned name ID will not be less than the 'minNameID' + argument. + """ + # Gather the set of requested + # (string, platformID, platEncID, langID) + # tuples + reqNameSet = set() + for lang, name in sorted(names.items()): + if windows: + windowsName = _makeWindowsName(name, None, lang) + if windowsName is not None: + reqNameSet.add( + ( + windowsName.string, + windowsName.platformID, + windowsName.platEncID, + windowsName.langID, + ) + ) + if mac: + macName = _makeMacName(name, None, lang, ttFont) + if macName is not None: + reqNameSet.add( + ( + macName.string, + macName.platformID, + macName.platEncID, + macName.langID, + ) + ) + + # Collect matching name IDs + matchingNames = dict() + for name in self.names: + try: + key = (name.toUnicode(), name.platformID, name.platEncID, name.langID) + except UnicodeDecodeError: + continue + if key in reqNameSet and name.nameID >= minNameID: + nameSet = matchingNames.setdefault(name.nameID, set()) + nameSet.add(key) + + # Return the first name ID that defines all requested strings + for nameID, nameSet in sorted(matchingNames.items()): + if nameSet == reqNameSet: + return nameID + + return None # not found + + def addMultilingualName( + self, names, ttFont=None, nameID=None, windows=True, mac=True, minNameID=0 + ): + """Add a multilingual name, returning its name ID + + 'names' is a dictionary with the name in multiple languages, + such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. + The keys can be arbitrary IETF BCP 47 language codes; + the values are Unicode strings. + + 'ttFont' is the TTFont to which the names are added, or None. + If present, the font's 'ltag' table can get populated + to store exotic language codes, which allows encoding + names that otherwise cannot get encoded at all. + + 'nameID' is the name ID to be used, or None to let the library + find an existing set of name records that match, or pick an + unused name ID. + + If 'windows' is True, a platformID=3 name record will be added. + If 'mac' is True, a platformID=1 name record will be added. + + If the 'nameID' argument is None, the created nameID will not + be less than the 'minNameID' argument. + """ + if not hasattr(self, "names"): + self.names = [] + if nameID is None: + # Reuse nameID if possible + nameID = self.findMultilingualName( + names, windows=windows, mac=mac, minNameID=minNameID, ttFont=ttFont + ) + if nameID is not None: + return nameID + nameID = self._findUnusedNameID() + # TODO: Should minimize BCP 47 language codes. + # https://github.com/fonttools/fonttools/issues/930 + for lang, name in sorted(names.items()): + if windows: + windowsName = _makeWindowsName(name, nameID, lang) + if windowsName is not None: + self.names.append(windowsName) + else: + # We cannot not make a Windows name: make sure we add a + # Mac name as a fallback. This can happen for exotic + # BCP47 language tags that have no Windows language code. + mac = True + if mac: + macName = _makeMacName(name, nameID, lang, ttFont) + if macName is not None: + self.names.append(macName) + return nameID + + def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255): + """Add a new name record containing 'string' for each (platformID, platEncID, + langID) tuple specified in the 'platforms' list. + + The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive), + following the last nameID in the name table. + If no 'platforms' are specified, two English name records are added, one for the + Macintosh (platformID=0), and one for the Windows platform (3). + + The 'string' must be a Unicode string, so it can be encoded with different, + platform-specific encodings. + + Return the new nameID. + """ + assert ( + len(platforms) > 0 + ), "'platforms' must contain at least one (platformID, platEncID, langID) tuple" + if not hasattr(self, "names"): + self.names = [] + if not isinstance(string, str): + raise TypeError( + "expected str, found %s: %r" % (type(string).__name__, string) + ) + nameID = self._findUnusedNameID(minNameID + 1) + for platformID, platEncID, langID in platforms: + self.names.append(makeName(string, nameID, platformID, platEncID, langID)) + return nameID def makeName(string, nameID, platformID, platEncID, langID): - name = NameRecord() - name.string, name.nameID, name.platformID, name.platEncID, name.langID = ( - string, nameID, platformID, platEncID, langID) - return name + name = NameRecord() + name.string, name.nameID, name.platformID, name.platEncID, name.langID = ( + string, + nameID, + platformID, + platEncID, + langID, + ) + return name def _makeWindowsName(name, nameID, language): - """Create a NameRecord for the Microsoft Windows platform - - 'language' is an arbitrary IETF BCP 47 language identifier such - as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows - does not support the desired language, the result will be None. - Future versions of fonttools might return a NameRecord for the - OpenType 'name' table format 1, but this is not implemented yet. - """ - langID = _WINDOWS_LANGUAGE_CODES.get(language.lower()) - if langID is not None: - return makeName(name, nameID, 3, 1, langID) - else: - log.warning("cannot add Windows name in language %s " - "because fonttools does not yet support " - "name table format 1" % language) - return None + """Create a NameRecord for the Microsoft Windows platform + + 'language' is an arbitrary IETF BCP 47 language identifier such + as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows + does not support the desired language, the result will be None. + Future versions of fonttools might return a NameRecord for the + OpenType 'name' table format 1, but this is not implemented yet. + """ + langID = _WINDOWS_LANGUAGE_CODES.get(language.lower()) + if langID is not None: + return makeName(name, nameID, 3, 1, langID) + else: + log.warning( + "cannot add Windows name in language %s " + "because fonttools does not yet support " + "name table format 1" % language + ) + return None def _makeMacName(name, nameID, language, font=None): - """Create a NameRecord for Apple platforms - - 'language' is an arbitrary IETF BCP 47 language identifier such - as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we - create a Macintosh NameRecord that is understood by old applications - (platform ID 1 and an old-style Macintosh language enum). If this - is not possible, we create a Unicode NameRecord (platform ID 0) - whose language points to the font’s 'ltag' table. The latter - can encode any string in any language, but legacy applications - might not recognize the format (in which case they will ignore - those names). - - 'font' should be the TTFont for which you want to create a name. - If 'font' is None, we only return NameRecords for legacy Macintosh; - in that case, the result will be None for names that need to - be encoded with an 'ltag' table. - - See the section “The language identifier” in Apple’s specification: - https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html - """ - macLang = _MAC_LANGUAGE_CODES.get(language.lower()) - macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang) - if macLang is not None and macScript is not None: - encoding = getEncoding(1, macScript, macLang, default="ascii") - # Check if we can actually encode this name. If we can't, - # for example because we have no support for the legacy - # encoding, or because the name string contains Unicode - # characters that the legacy encoding cannot represent, - # we fall back to encoding the name in Unicode and put - # the language tag into the ltag table. - try: - _ = tobytes(name, encoding, errors="strict") - return makeName(name, nameID, 1, macScript, macLang) - except UnicodeEncodeError: - pass - if font is not None: - ltag = font.tables.get("ltag") - if ltag is None: - ltag = font["ltag"] = newTable("ltag") - # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)” - # “The preferred platform-specific code for Unicode would be 3 or 4.” - # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html - return makeName(name, nameID, 0, 4, ltag.addTag(language)) - else: - log.warning("cannot store language %s into 'ltag' table " - "without having access to the TTFont object" % - language) - return None + """Create a NameRecord for Apple platforms + + 'language' is an arbitrary IETF BCP 47 language identifier such + as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we + create a Macintosh NameRecord that is understood by old applications + (platform ID 1 and an old-style Macintosh language enum). If this + is not possible, we create a Unicode NameRecord (platform ID 0) + whose language points to the font’s 'ltag' table. The latter + can encode any string in any language, but legacy applications + might not recognize the format (in which case they will ignore + those names). + + 'font' should be the TTFont for which you want to create a name. + If 'font' is None, we only return NameRecords for legacy Macintosh; + in that case, the result will be None for names that need to + be encoded with an 'ltag' table. + + See the section “The language identifier” in Apple’s specification: + https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html + """ + macLang = _MAC_LANGUAGE_CODES.get(language.lower()) + macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang) + if macLang is not None and macScript is not None: + encoding = getEncoding(1, macScript, macLang, default="ascii") + # Check if we can actually encode this name. If we can't, + # for example because we have no support for the legacy + # encoding, or because the name string contains Unicode + # characters that the legacy encoding cannot represent, + # we fall back to encoding the name in Unicode and put + # the language tag into the ltag table. + try: + _ = tobytes(name, encoding, errors="strict") + return makeName(name, nameID, 1, macScript, macLang) + except UnicodeEncodeError: + pass + if font is not None: + ltag = font.tables.get("ltag") + if ltag is None: + ltag = font["ltag"] = newTable("ltag") + # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)” + # “The preferred platform-specific code for Unicode would be 3 or 4.” + # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html + return makeName(name, nameID, 0, 4, ltag.addTag(language)) + else: + log.warning( + "cannot store language %s into 'ltag' table " + "without having access to the TTFont object" % language + ) + return None class NameRecord(object): - - def getEncoding(self, default='ascii'): - """Returns the Python encoding name for this name entry based on its platformID, - platEncID, and langID. If encoding for these values is not known, by default - 'ascii' is returned. That can be overriden by passing a value to the default - argument. - """ - return getEncoding(self.platformID, self.platEncID, self.langID, default) - - def encodingIsUnicodeCompatible(self): - return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1'] - - def __str__(self): - return self.toStr(errors='backslashreplace') - - def isUnicode(self): - return (self.platformID == 0 or - (self.platformID == 3 and self.platEncID in [0, 1, 10])) - - def toUnicode(self, errors='strict'): - """ - If self.string is a Unicode string, return it; otherwise try decoding the - bytes in self.string to a Unicode string using the encoding of this - entry as returned by self.getEncoding(); Note that self.getEncoding() - returns 'ascii' if the encoding is unknown to the library. - - Certain heuristics are performed to recover data from bytes that are - ill-formed in the chosen encoding, or that otherwise look misencoded - (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE - but marked otherwise). If the bytes are ill-formed and the heuristics fail, - the error is handled according to the errors parameter to this function, which is - passed to the underlying decode() function; by default it throws a - UnicodeDecodeError exception. - - Note: The mentioned heuristics mean that roundtripping a font to XML and back - to binary might recover some misencoded data whereas just loading the font - and saving it back will not change them. - """ - def isascii(b): - return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D] - encoding = self.getEncoding() - string = self.string - - if isinstance(string, bytes) and encoding == 'utf_16_be' and len(string) % 2 == 1: - # Recover badly encoded UTF-16 strings that have an odd number of bytes: - # - If the last byte is zero, drop it. Otherwise, - # - If all the odd bytes are zero and all the even bytes are ASCII, - # prepend one zero byte. Otherwise, - # - If first byte is zero and all other bytes are ASCII, insert zero - # bytes between consecutive ASCII bytes. - # - # (Yes, I've seen all of these in the wild... sigh) - if byteord(string[-1]) == 0: - string = string[:-1] - elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)): - string = b'\0' + string - elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]): - string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:]) - - string = tostr(string, encoding=encoding, errors=errors) - - # If decoded strings still looks like UTF-16BE, it suggests a double-encoding. - # Fix it up. - if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)): - # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text, - # narrow it down. - string = ''.join(c for c in string[1::2]) - - return string - - def toBytes(self, errors='strict'): - """ If self.string is a bytes object, return it; otherwise try encoding - the Unicode string in self.string to bytes using the encoding of this - entry as returned by self.getEncoding(); Note that self.getEncoding() - returns 'ascii' if the encoding is unknown to the library. - - If the Unicode string cannot be encoded to bytes in the chosen encoding, - the error is handled according to the errors parameter to this function, - which is passed to the underlying encode() function; by default it throws a - UnicodeEncodeError exception. - """ - return tobytes(self.string, encoding=self.getEncoding(), errors=errors) - - toStr = toUnicode - - def toXML(self, writer, ttFont): - try: - unistr = self.toUnicode() - except UnicodeDecodeError: - unistr = None - attrs = [ - ("nameID", self.nameID), - ("platformID", self.platformID), - ("platEncID", self.platEncID), - ("langID", hex(self.langID)), - ] - - if unistr is None or not self.encodingIsUnicodeCompatible(): - attrs.append(("unicode", unistr is not None)) - - writer.begintag("namerecord", attrs) - writer.newline() - if unistr is not None: - writer.write(unistr) - else: - writer.write8bit(self.string) - writer.newline() - writer.endtag("namerecord") - writer.newline() - - def fromXML(self, name, attrs, content, ttFont): - self.nameID = safeEval(attrs["nameID"]) - self.platformID = safeEval(attrs["platformID"]) - self.platEncID = safeEval(attrs["platEncID"]) - self.langID = safeEval(attrs["langID"]) - s = strjoin(content).strip() - encoding = self.getEncoding() - if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")): - self.string = s.encode(encoding) - else: - # This is the inverse of write8bit... - self.string = s.encode("latin1") - - def __lt__(self, other): - if type(self) != type(other): - return NotImplemented - - try: - # implemented so that list.sort() sorts according to the spec. - selfTuple = ( - self.platformID, - self.platEncID, - self.langID, - self.nameID, - self.toBytes(), - ) - otherTuple = ( - other.platformID, - other.platEncID, - other.langID, - other.nameID, - other.toBytes(), - ) - return selfTuple < otherTuple - except (UnicodeEncodeError, AttributeError): - # This can only happen for - # 1) an object that is not a NameRecord, or - # 2) an unlikely incomplete NameRecord object which has not been - # fully populated, or - # 3) when all IDs are identical but the strings can't be encoded - # for their platform encoding. - # In all cases it is best to return NotImplemented. - return NotImplemented - - def __repr__(self): - return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % ( - self.nameID, self.platformID, self.langID) + def getEncoding(self, default="ascii"): + """Returns the Python encoding name for this name entry based on its platformID, + platEncID, and langID. If encoding for these values is not known, by default + 'ascii' is returned. That can be overriden by passing a value to the default + argument. + """ + return getEncoding(self.platformID, self.platEncID, self.langID, default) + + def encodingIsUnicodeCompatible(self): + return self.getEncoding(None) in ["utf_16_be", "ucs2be", "ascii", "latin1"] + + def __str__(self): + return self.toStr(errors="backslashreplace") + + def isUnicode(self): + return self.platformID == 0 or ( + self.platformID == 3 and self.platEncID in [0, 1, 10] + ) + + def toUnicode(self, errors="strict"): + """ + If self.string is a Unicode string, return it; otherwise try decoding the + bytes in self.string to a Unicode string using the encoding of this + entry as returned by self.getEncoding(); Note that self.getEncoding() + returns 'ascii' if the encoding is unknown to the library. + + Certain heuristics are performed to recover data from bytes that are + ill-formed in the chosen encoding, or that otherwise look misencoded + (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE + but marked otherwise). If the bytes are ill-formed and the heuristics fail, + the error is handled according to the errors parameter to this function, which is + passed to the underlying decode() function; by default it throws a + UnicodeDecodeError exception. + + Note: The mentioned heuristics mean that roundtripping a font to XML and back + to binary might recover some misencoded data whereas just loading the font + and saving it back will not change them. + """ + + def isascii(b): + return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D] + + encoding = self.getEncoding() + string = self.string + + if ( + isinstance(string, bytes) + and encoding == "utf_16_be" + and len(string) % 2 == 1 + ): + # Recover badly encoded UTF-16 strings that have an odd number of bytes: + # - If the last byte is zero, drop it. Otherwise, + # - If all the odd bytes are zero and all the even bytes are ASCII, + # prepend one zero byte. Otherwise, + # - If first byte is zero and all other bytes are ASCII, insert zero + # bytes between consecutive ASCII bytes. + # + # (Yes, I've seen all of these in the wild... sigh) + if byteord(string[-1]) == 0: + string = string[:-1] + elif all( + byteord(b) == 0 if i % 2 else isascii(byteord(b)) + for i, b in enumerate(string) + ): + string = b"\0" + string + elif byteord(string[0]) == 0 and all( + isascii(byteord(b)) for b in string[1:] + ): + string = bytesjoin(b"\0" + bytechr(byteord(b)) for b in string[1:]) + + string = tostr(string, encoding=encoding, errors=errors) + + # If decoded strings still looks like UTF-16BE, it suggests a double-encoding. + # Fix it up. + if all( + ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i, c in enumerate(string) + ): + # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text, + # narrow it down. + string = "".join(c for c in string[1::2]) + + return string + + def toBytes(self, errors="strict"): + """If self.string is a bytes object, return it; otherwise try encoding + the Unicode string in self.string to bytes using the encoding of this + entry as returned by self.getEncoding(); Note that self.getEncoding() + returns 'ascii' if the encoding is unknown to the library. + + If the Unicode string cannot be encoded to bytes in the chosen encoding, + the error is handled according to the errors parameter to this function, + which is passed to the underlying encode() function; by default it throws a + UnicodeEncodeError exception. + """ + return tobytes(self.string, encoding=self.getEncoding(), errors=errors) + + toStr = toUnicode + + def toXML(self, writer, ttFont): + try: + unistr = self.toUnicode() + except UnicodeDecodeError: + unistr = None + attrs = [ + ("nameID", self.nameID), + ("platformID", self.platformID), + ("platEncID", self.platEncID), + ("langID", hex(self.langID)), + ] + + if unistr is None or not self.encodingIsUnicodeCompatible(): + attrs.append(("unicode", unistr is not None)) + + writer.begintag("namerecord", attrs) + writer.newline() + if unistr is not None: + writer.write(unistr) + else: + writer.write8bit(self.string) + writer.newline() + writer.endtag("namerecord") + writer.newline() + + def fromXML(self, name, attrs, content, ttFont): + self.nameID = safeEval(attrs["nameID"]) + self.platformID = safeEval(attrs["platformID"]) + self.platEncID = safeEval(attrs["platEncID"]) + self.langID = safeEval(attrs["langID"]) + s = strjoin(content).strip() + encoding = self.getEncoding() + if self.encodingIsUnicodeCompatible() or safeEval( + attrs.get("unicode", "False") + ): + self.string = s.encode(encoding) + else: + # This is the inverse of write8bit... + self.string = s.encode("latin1") + + def __lt__(self, other): + if type(self) != type(other): + return NotImplemented + + try: + selfTuple = ( + self.platformID, + self.platEncID, + self.langID, + self.nameID, + ) + otherTuple = ( + other.platformID, + other.platEncID, + other.langID, + other.nameID, + ) + except AttributeError: + # This can only happen for + # 1) an object that is not a NameRecord, or + # 2) an unlikely incomplete NameRecord object which has not been + # fully populated + return NotImplemented + + try: + # Include the actual NameRecord string in the comparison tuples + selfTuple = selfTuple + (self.toBytes(),) + otherTuple = otherTuple + (other.toBytes(),) + except UnicodeEncodeError as e: + # toBytes caused an encoding error in either of the two, so content + # to sorting based on IDs only + log.error("NameRecord sorting failed to encode: %s" % e) + + # Implemented so that list.sort() sorts according to the spec by using + # the order of the tuple items and their comparison + return selfTuple < otherTuple + + def __repr__(self): + return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % ( + self.nameID, + self.platformID, + self.langID, + ) # Windows language ID → IETF BCP-47 language tag @@ -604,183 +686,182 @@ class NameRecord(object): # http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html # http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry _WINDOWS_LANGUAGES = { - 0x0436: 'af', - 0x041C: 'sq', - 0x0484: 'gsw', - 0x045E: 'am', - 0x1401: 'ar-DZ', - 0x3C01: 'ar-BH', - 0x0C01: 'ar', - 0x0801: 'ar-IQ', - 0x2C01: 'ar-JO', - 0x3401: 'ar-KW', - 0x3001: 'ar-LB', - 0x1001: 'ar-LY', - 0x1801: 'ary', - 0x2001: 'ar-OM', - 0x4001: 'ar-QA', - 0x0401: 'ar-SA', - 0x2801: 'ar-SY', - 0x1C01: 'aeb', - 0x3801: 'ar-AE', - 0x2401: 'ar-YE', - 0x042B: 'hy', - 0x044D: 'as', - 0x082C: 'az-Cyrl', - 0x042C: 'az', - 0x046D: 'ba', - 0x042D: 'eu', - 0x0423: 'be', - 0x0845: 'bn', - 0x0445: 'bn-IN', - 0x201A: 'bs-Cyrl', - 0x141A: 'bs', - 0x047E: 'br', - 0x0402: 'bg', - 0x0403: 'ca', - 0x0C04: 'zh-HK', - 0x1404: 'zh-MO', - 0x0804: 'zh', - 0x1004: 'zh-SG', - 0x0404: 'zh-TW', - 0x0483: 'co', - 0x041A: 'hr', - 0x101A: 'hr-BA', - 0x0405: 'cs', - 0x0406: 'da', - 0x048C: 'prs', - 0x0465: 'dv', - 0x0813: 'nl-BE', - 0x0413: 'nl', - 0x0C09: 'en-AU', - 0x2809: 'en-BZ', - 0x1009: 'en-CA', - 0x2409: 'en-029', - 0x4009: 'en-IN', - 0x1809: 'en-IE', - 0x2009: 'en-JM', - 0x4409: 'en-MY', - 0x1409: 'en-NZ', - 0x3409: 'en-PH', - 0x4809: 'en-SG', - 0x1C09: 'en-ZA', - 0x2C09: 'en-TT', - 0x0809: 'en-GB', - 0x0409: 'en', - 0x3009: 'en-ZW', - 0x0425: 'et', - 0x0438: 'fo', - 0x0464: 'fil', - 0x040B: 'fi', - 0x080C: 'fr-BE', - 0x0C0C: 'fr-CA', - 0x040C: 'fr', - 0x140C: 'fr-LU', - 0x180C: 'fr-MC', - 0x100C: 'fr-CH', - 0x0462: 'fy', - 0x0456: 'gl', - 0x0437: 'ka', - 0x0C07: 'de-AT', - 0x0407: 'de', - 0x1407: 'de-LI', - 0x1007: 'de-LU', - 0x0807: 'de-CH', - 0x0408: 'el', - 0x046F: 'kl', - 0x0447: 'gu', - 0x0468: 'ha', - 0x040D: 'he', - 0x0439: 'hi', - 0x040E: 'hu', - 0x040F: 'is', - 0x0470: 'ig', - 0x0421: 'id', - 0x045D: 'iu', - 0x085D: 'iu-Latn', - 0x083C: 'ga', - 0x0434: 'xh', - 0x0435: 'zu', - 0x0410: 'it', - 0x0810: 'it-CH', - 0x0411: 'ja', - 0x044B: 'kn', - 0x043F: 'kk', - 0x0453: 'km', - 0x0486: 'quc', - 0x0487: 'rw', - 0x0441: 'sw', - 0x0457: 'kok', - 0x0412: 'ko', - 0x0440: 'ky', - 0x0454: 'lo', - 0x0426: 'lv', - 0x0427: 'lt', - 0x082E: 'dsb', - 0x046E: 'lb', - 0x042F: 'mk', - 0x083E: 'ms-BN', - 0x043E: 'ms', - 0x044C: 'ml', - 0x043A: 'mt', - 0x0481: 'mi', - 0x047A: 'arn', - 0x044E: 'mr', - 0x047C: 'moh', - 0x0450: 'mn', - 0x0850: 'mn-CN', - 0x0461: 'ne', - 0x0414: 'nb', - 0x0814: 'nn', - 0x0482: 'oc', - 0x0448: 'or', - 0x0463: 'ps', - 0x0415: 'pl', - 0x0416: 'pt', - 0x0816: 'pt-PT', - 0x0446: 'pa', - 0x046B: 'qu-BO', - 0x086B: 'qu-EC', - 0x0C6B: 'qu', - 0x0418: 'ro', - 0x0417: 'rm', - 0x0419: 'ru', - 0x243B: 'smn', - 0x103B: 'smj-NO', - 0x143B: 'smj', - 0x0C3B: 'se-FI', - 0x043B: 'se', - 0x083B: 'se-SE', - 0x203B: 'sms', - 0x183B: 'sma-NO', - 0x1C3B: 'sms', - 0x044F: 'sa', - 0x1C1A: 'sr-Cyrl-BA', - 0x0C1A: 'sr', - 0x181A: 'sr-Latn-BA', - 0x081A: 'sr-Latn', - 0x046C: 'nso', - 0x0432: 'tn', - 0x045B: 'si', - 0x041B: 'sk', - 0x0424: 'sl', - 0x2C0A: 'es-AR', - 0x400A: 'es-BO', - 0x340A: 'es-CL', - 0x240A: 'es-CO', - 0x140A: 'es-CR', - 0x1C0A: 'es-DO', - 0x300A: 'es-EC', - 0x440A: 'es-SV', - 0x100A: 'es-GT', - 0x480A: 'es-HN', - 0x080A: 'es-MX', - 0x4C0A: 'es-NI', - 0x180A: 'es-PA', - 0x3C0A: 'es-PY', - 0x280A: 'es-PE', - 0x500A: 'es-PR', - + 0x0436: "af", + 0x041C: "sq", + 0x0484: "gsw", + 0x045E: "am", + 0x1401: "ar-DZ", + 0x3C01: "ar-BH", + 0x0C01: "ar", + 0x0801: "ar-IQ", + 0x2C01: "ar-JO", + 0x3401: "ar-KW", + 0x3001: "ar-LB", + 0x1001: "ar-LY", + 0x1801: "ary", + 0x2001: "ar-OM", + 0x4001: "ar-QA", + 0x0401: "ar-SA", + 0x2801: "ar-SY", + 0x1C01: "aeb", + 0x3801: "ar-AE", + 0x2401: "ar-YE", + 0x042B: "hy", + 0x044D: "as", + 0x082C: "az-Cyrl", + 0x042C: "az", + 0x046D: "ba", + 0x042D: "eu", + 0x0423: "be", + 0x0845: "bn", + 0x0445: "bn-IN", + 0x201A: "bs-Cyrl", + 0x141A: "bs", + 0x047E: "br", + 0x0402: "bg", + 0x0403: "ca", + 0x0C04: "zh-HK", + 0x1404: "zh-MO", + 0x0804: "zh", + 0x1004: "zh-SG", + 0x0404: "zh-TW", + 0x0483: "co", + 0x041A: "hr", + 0x101A: "hr-BA", + 0x0405: "cs", + 0x0406: "da", + 0x048C: "prs", + 0x0465: "dv", + 0x0813: "nl-BE", + 0x0413: "nl", + 0x0C09: "en-AU", + 0x2809: "en-BZ", + 0x1009: "en-CA", + 0x2409: "en-029", + 0x4009: "en-IN", + 0x1809: "en-IE", + 0x2009: "en-JM", + 0x4409: "en-MY", + 0x1409: "en-NZ", + 0x3409: "en-PH", + 0x4809: "en-SG", + 0x1C09: "en-ZA", + 0x2C09: "en-TT", + 0x0809: "en-GB", + 0x0409: "en", + 0x3009: "en-ZW", + 0x0425: "et", + 0x0438: "fo", + 0x0464: "fil", + 0x040B: "fi", + 0x080C: "fr-BE", + 0x0C0C: "fr-CA", + 0x040C: "fr", + 0x140C: "fr-LU", + 0x180C: "fr-MC", + 0x100C: "fr-CH", + 0x0462: "fy", + 0x0456: "gl", + 0x0437: "ka", + 0x0C07: "de-AT", + 0x0407: "de", + 0x1407: "de-LI", + 0x1007: "de-LU", + 0x0807: "de-CH", + 0x0408: "el", + 0x046F: "kl", + 0x0447: "gu", + 0x0468: "ha", + 0x040D: "he", + 0x0439: "hi", + 0x040E: "hu", + 0x040F: "is", + 0x0470: "ig", + 0x0421: "id", + 0x045D: "iu", + 0x085D: "iu-Latn", + 0x083C: "ga", + 0x0434: "xh", + 0x0435: "zu", + 0x0410: "it", + 0x0810: "it-CH", + 0x0411: "ja", + 0x044B: "kn", + 0x043F: "kk", + 0x0453: "km", + 0x0486: "quc", + 0x0487: "rw", + 0x0441: "sw", + 0x0457: "kok", + 0x0412: "ko", + 0x0440: "ky", + 0x0454: "lo", + 0x0426: "lv", + 0x0427: "lt", + 0x082E: "dsb", + 0x046E: "lb", + 0x042F: "mk", + 0x083E: "ms-BN", + 0x043E: "ms", + 0x044C: "ml", + 0x043A: "mt", + 0x0481: "mi", + 0x047A: "arn", + 0x044E: "mr", + 0x047C: "moh", + 0x0450: "mn", + 0x0850: "mn-CN", + 0x0461: "ne", + 0x0414: "nb", + 0x0814: "nn", + 0x0482: "oc", + 0x0448: "or", + 0x0463: "ps", + 0x0415: "pl", + 0x0416: "pt", + 0x0816: "pt-PT", + 0x0446: "pa", + 0x046B: "qu-BO", + 0x086B: "qu-EC", + 0x0C6B: "qu", + 0x0418: "ro", + 0x0417: "rm", + 0x0419: "ru", + 0x243B: "smn", + 0x103B: "smj-NO", + 0x143B: "smj", + 0x0C3B: "se-FI", + 0x043B: "se", + 0x083B: "se-SE", + 0x203B: "sms", + 0x183B: "sma-NO", + 0x1C3B: "sms", + 0x044F: "sa", + 0x1C1A: "sr-Cyrl-BA", + 0x0C1A: "sr", + 0x181A: "sr-Latn-BA", + 0x081A: "sr-Latn", + 0x046C: "nso", + 0x0432: "tn", + 0x045B: "si", + 0x041B: "sk", + 0x0424: "sl", + 0x2C0A: "es-AR", + 0x400A: "es-BO", + 0x340A: "es-CL", + 0x240A: "es-CO", + 0x140A: "es-CR", + 0x1C0A: "es-DO", + 0x300A: "es-EC", + 0x440A: "es-SV", + 0x100A: "es-GT", + 0x480A: "es-HN", + 0x080A: "es-MX", + 0x4C0A: "es-NI", + 0x180A: "es-PA", + 0x3C0A: "es-PY", + 0x280A: "es-PE", + 0x500A: "es-PR", # Microsoft has defined two different language codes for # “Spanish with modern sorting” and “Spanish with traditional # sorting”. This makes sense for collation APIs, and it would be @@ -788,163 +869,164 @@ _WINDOWS_LANGUAGES = { # extensions (eg., “es-u-co-trad” is “Spanish with traditional # sorting”). However, for storing names in fonts, this distinction # does not make sense, so we use “es” in both cases. - 0x0C0A: 'es', - 0x040A: 'es', - - 0x540A: 'es-US', - 0x380A: 'es-UY', - 0x200A: 'es-VE', - 0x081D: 'sv-FI', - 0x041D: 'sv', - 0x045A: 'syr', - 0x0428: 'tg', - 0x085F: 'tzm', - 0x0449: 'ta', - 0x0444: 'tt', - 0x044A: 'te', - 0x041E: 'th', - 0x0451: 'bo', - 0x041F: 'tr', - 0x0442: 'tk', - 0x0480: 'ug', - 0x0422: 'uk', - 0x042E: 'hsb', - 0x0420: 'ur', - 0x0843: 'uz-Cyrl', - 0x0443: 'uz', - 0x042A: 'vi', - 0x0452: 'cy', - 0x0488: 'wo', - 0x0485: 'sah', - 0x0478: 'ii', - 0x046A: 'yo', + 0x0C0A: "es", + 0x040A: "es", + 0x540A: "es-US", + 0x380A: "es-UY", + 0x200A: "es-VE", + 0x081D: "sv-FI", + 0x041D: "sv", + 0x045A: "syr", + 0x0428: "tg", + 0x085F: "tzm", + 0x0449: "ta", + 0x0444: "tt", + 0x044A: "te", + 0x041E: "th", + 0x0451: "bo", + 0x041F: "tr", + 0x0442: "tk", + 0x0480: "ug", + 0x0422: "uk", + 0x042E: "hsb", + 0x0420: "ur", + 0x0843: "uz-Cyrl", + 0x0443: "uz", + 0x042A: "vi", + 0x0452: "cy", + 0x0488: "wo", + 0x0485: "sah", + 0x0478: "ii", + 0x046A: "yo", } _MAC_LANGUAGES = { - 0: 'en', - 1: 'fr', - 2: 'de', - 3: 'it', - 4: 'nl', - 5: 'sv', - 6: 'es', - 7: 'da', - 8: 'pt', - 9: 'no', - 10: 'he', - 11: 'ja', - 12: 'ar', - 13: 'fi', - 14: 'el', - 15: 'is', - 16: 'mt', - 17: 'tr', - 18: 'hr', - 19: 'zh-Hant', - 20: 'ur', - 21: 'hi', - 22: 'th', - 23: 'ko', - 24: 'lt', - 25: 'pl', - 26: 'hu', - 27: 'es', - 28: 'lv', - 29: 'se', - 30: 'fo', - 31: 'fa', - 32: 'ru', - 33: 'zh', - 34: 'nl-BE', - 35: 'ga', - 36: 'sq', - 37: 'ro', - 38: 'cz', - 39: 'sk', - 40: 'sl', - 41: 'yi', - 42: 'sr', - 43: 'mk', - 44: 'bg', - 45: 'uk', - 46: 'be', - 47: 'uz', - 48: 'kk', - 49: 'az-Cyrl', - 50: 'az-Arab', - 51: 'hy', - 52: 'ka', - 53: 'mo', - 54: 'ky', - 55: 'tg', - 56: 'tk', - 57: 'mn-CN', - 58: 'mn', - 59: 'ps', - 60: 'ks', - 61: 'ku', - 62: 'sd', - 63: 'bo', - 64: 'ne', - 65: 'sa', - 66: 'mr', - 67: 'bn', - 68: 'as', - 69: 'gu', - 70: 'pa', - 71: 'or', - 72: 'ml', - 73: 'kn', - 74: 'ta', - 75: 'te', - 76: 'si', - 77: 'my', - 78: 'km', - 79: 'lo', - 80: 'vi', - 81: 'id', - 82: 'tl', - 83: 'ms', - 84: 'ms-Arab', - 85: 'am', - 86: 'ti', - 87: 'om', - 88: 'so', - 89: 'sw', - 90: 'rw', - 91: 'rn', - 92: 'ny', - 93: 'mg', - 94: 'eo', - 128: 'cy', - 129: 'eu', - 130: 'ca', - 131: 'la', - 132: 'qu', - 133: 'gn', - 134: 'ay', - 135: 'tt', - 136: 'ug', - 137: 'dz', - 138: 'jv', - 139: 'su', - 140: 'gl', - 141: 'af', - 142: 'br', - 143: 'iu', - 144: 'gd', - 145: 'gv', - 146: 'ga', - 147: 'to', - 148: 'el-polyton', - 149: 'kl', - 150: 'az', - 151: 'nn', + 0: "en", + 1: "fr", + 2: "de", + 3: "it", + 4: "nl", + 5: "sv", + 6: "es", + 7: "da", + 8: "pt", + 9: "no", + 10: "he", + 11: "ja", + 12: "ar", + 13: "fi", + 14: "el", + 15: "is", + 16: "mt", + 17: "tr", + 18: "hr", + 19: "zh-Hant", + 20: "ur", + 21: "hi", + 22: "th", + 23: "ko", + 24: "lt", + 25: "pl", + 26: "hu", + 27: "es", + 28: "lv", + 29: "se", + 30: "fo", + 31: "fa", + 32: "ru", + 33: "zh", + 34: "nl-BE", + 35: "ga", + 36: "sq", + 37: "ro", + 38: "cz", + 39: "sk", + 40: "sl", + 41: "yi", + 42: "sr", + 43: "mk", + 44: "bg", + 45: "uk", + 46: "be", + 47: "uz", + 48: "kk", + 49: "az-Cyrl", + 50: "az-Arab", + 51: "hy", + 52: "ka", + 53: "mo", + 54: "ky", + 55: "tg", + 56: "tk", + 57: "mn-CN", + 58: "mn", + 59: "ps", + 60: "ks", + 61: "ku", + 62: "sd", + 63: "bo", + 64: "ne", + 65: "sa", + 66: "mr", + 67: "bn", + 68: "as", + 69: "gu", + 70: "pa", + 71: "or", + 72: "ml", + 73: "kn", + 74: "ta", + 75: "te", + 76: "si", + 77: "my", + 78: "km", + 79: "lo", + 80: "vi", + 81: "id", + 82: "tl", + 83: "ms", + 84: "ms-Arab", + 85: "am", + 86: "ti", + 87: "om", + 88: "so", + 89: "sw", + 90: "rw", + 91: "rn", + 92: "ny", + 93: "mg", + 94: "eo", + 128: "cy", + 129: "eu", + 130: "ca", + 131: "la", + 132: "qu", + 133: "gn", + 134: "ay", + 135: "tt", + 136: "ug", + 137: "dz", + 138: "jv", + 139: "su", + 140: "gl", + 141: "af", + 142: "br", + 143: "iu", + 144: "gd", + 145: "gv", + 146: "ga", + 147: "to", + 148: "el-polyton", + 149: "kl", + 150: "az", + 151: "nn", } -_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()} +_WINDOWS_LANGUAGE_CODES = { + lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items() +} _MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()} @@ -1079,5 +1161,68 @@ _MAC_LANGUAGE_TO_SCRIPT = { 148: 6, # langGreekAncient → smRoman 149: 0, # langGreenlandic → smRoman 150: 0, # langAzerbaijanRoman → smRoman - 151: 0, # langNynorsk → smRoman + 151: 0, # langNynorsk → smRoman } + + +class NameRecordVisitor(TTVisitor): + # Font tables that have NameIDs we need to collect. + TABLES = ("GSUB", "GPOS", "fvar", "CPAL", "STAT") + + def __init__(self): + self.seen = set() + + +@NameRecordVisitor.register_attrs( + ( + (otTables.FeatureParamsSize, ("SubfamilyID", "SubfamilyNameID")), + (otTables.FeatureParamsStylisticSet, ("UINameID",)), + ( + otTables.FeatureParamsCharacterVariants, + ( + "FeatUILabelNameID", + "FeatUITooltipTextNameID", + "SampleTextNameID", + "FirstParamUILabelNameID", + ), + ), + (otTables.STAT, ("ElidedFallbackNameID",)), + (otTables.AxisRecord, ("AxisNameID",)), + (otTables.AxisValue, ("ValueNameID",)), + (otTables.FeatureName, ("FeatureNameID",)), + (otTables.Setting, ("SettingNameID",)), + ) +) +def visit(visitor, obj, attr, value): + visitor.seen.add(value) + + +@NameRecordVisitor.register(ttLib.getTableClass("fvar")) +def visit(visitor, obj): + for inst in obj.instances: + if inst.postscriptNameID != 0xFFFF: + visitor.seen.add(inst.postscriptNameID) + visitor.seen.add(inst.subfamilyNameID) + + for axis in obj.axes: + visitor.seen.add(axis.axisNameID) + + +@NameRecordVisitor.register(ttLib.getTableClass("CPAL")) +def visit(visitor, obj): + if obj.version == 1: + visitor.seen.update(obj.paletteLabels) + visitor.seen.update(obj.paletteEntryLabels) + + +@NameRecordVisitor.register(ttLib.TTFont) +def visit(visitor, font, *args, **kwargs): + if hasattr(visitor, "font"): + return False + + visitor.font = font + for tag in visitor.TABLES: + if tag in font: + visitor.visit(font[tag], *args, **kwargs) + del visitor.font + return False |