Upgrade fonttools from 2.4 to 3.28.0android-p-preview-5 android-o-mr1-iot-release-1.0.2 android-n-iot-release-smart-display-r2

1. Add METADATA. 2. Run tools/external_updater/updater.sh update fonttools Test: m checkbuild Change-Id: Iab9e8c5da04f4c06347a924b4cea04f743f274c3
author: Haibo Huang <hhb@google.com> 2018-07-03 17:43:11 -0700
committer: Haibo Huang <hhb@google.com> 2018-07-04 06:00:25 +0000
commit: 8b3c57bdcbbbd9cb1dc98546b567a138207b7638 (patch)
tree: af539e10dc3b2b42edd3197b34f1c8b5ee9a82a0 /Lib/fontTools/ttLib/tables/_n_a_m_e.py
parent: cc9a86e36194f2ef4456e14d98b810e41e4fa52d (diff)
download: fonttools-8b3c57bdcbbbd9cb1dc98546b567a138207b7638.tar.gz
1 files changed, 832 insertions, 36 deletions
diff --git a/Lib/fontTools/ttLib/tables/_n_a_m_e.py b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
index 53fde4d7..a30291cc 100644
--- a/Lib/fontTools/ttLib/tables/_n_a_m_e.py
+++ b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
@@ -1,9 +1,17 @@
+# -*- coding: utf-8 -*-
 from __future__ import print_function, division, absolute_import
+from __future__ import unicode_literals
 from fontTools.misc.py23 import *
 from fontTools.misc import sstruct
 from fontTools.misc.textTools import safeEval
+from fontTools.misc.encodingTools import getEncoding
+from fontTools.ttLib import newTable
 from . import DefaultTable
 import struct
+import logging
+
+
+log = logging.getLogger(__name__)
 
 nameRecordFormat = """
 		>	# big endian
@@ -19,22 +27,27 @@ nameRecordSize = sstruct.calcsize(nameRecordFormat)
 
 
 class table__n_a_m_e(DefaultTable.DefaultTable):
-	
+	dependencies = ["ltag"]
+
 	def decompile(self, data, ttFont):
-		format, n, stringOffset = struct.unpack(">HHH", data[:6])
+		format, n, stringOffset = struct.unpack(b">HHH", data[:6])
 		expectedStringOffset = 6 + n * nameRecordSize
 		if stringOffset != expectedStringOffset:
-			# XXX we need a warn function
-			print("Warning: 'name' table stringOffset incorrect. Expected: %s; Actual: %s" % (expectedStringOffset, stringOffset))
+			log.error(
+				"'name' table stringOffset incorrect. Expected: %s; Actual: %s",
+				expectedStringOffset, stringOffset)
 		stringData = data[stringOffset:]
 		data = data[6:]
 		self.names = []
 		for i in range(n):
 			if len(data) < 12:
-				# compensate for buggy font
-				break
+				log.error('skipping malformed name record #%d', i)
+				continue
 			name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
 			name.string = stringData[name.offset:name.offset+name.length]
+			if name.offset + name.length > len(stringData):
+				log.error('skipping malformed name record #%d', i)
+				continue
 			assert len(name.string) == name.length
 			#if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
 			#	if len(name.string) % 2:
@@ -42,33 +55,35 @@ class table__n_a_m_e(DefaultTable.DefaultTable):
 			#		print name.__dict__
 			del name.offset, name.length
 			self.names.append(name)
-	
+
 	def compile(self, ttFont):
 		if not hasattr(self, "names"):
 			# only happens when there are NO name table entries read
 			# from the TTX file
 			self.names = []
-		self.names.sort()  # sort according to the spec; see NameRecord.__lt__()
+		names = self.names
+		names.sort() # sort according to the spec; see NameRecord.__lt__()
 		stringData = b""
 		format = 0
-		n = len(self.names)
+		n = len(names)
 		stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
-		data = struct.pack(">HHH", format, n, stringOffset)
+		data = struct.pack(b">HHH", format, n, stringOffset)
 		lastoffset = 0
 		done = {}  # remember the data so we can reuse the "pointers"
-		for name in self.names:
-			if name.string in done:
-				name.offset, name.length = done[name.string]
+		for name in names:
+			string = name.toBytes()
+			if string in done:
+				name.offset, name.length = done[string]
 			else:
-				name.offset, name.length = done[name.string] = len(stringData), len(name.string)
-				stringData = stringData + name.string
+				name.offset, name.length = done[string] = len(stringData), len(string)
+				stringData = bytesjoin([stringData, string])
 			data = data + sstruct.pack(nameRecordFormat, name)
 		return data + stringData
-	
+
 	def toXML(self, writer, ttFont):
 		for name in self.names:
 			name.toXML(writer, ttFont)
-	
+
 	def fromXML(self, name, attrs, content, ttFont):
 		if name != "namerecord":
 			return # ignore unknown tags
@@ -77,56 +92,347 @@ class table__n_a_m_e(DefaultTable.DefaultTable):
 		name = NameRecord()
 		self.names.append(name)
 		name.fromXML(name, attrs, content, ttFont)
-	
+
 	def getName(self, nameID, platformID, platEncID, langID=None):
 		for namerecord in self.names:
-			if (	namerecord.nameID == nameID and 
-					namerecord.platformID == platformID and 
+			if (	namerecord.nameID == nameID and
+					namerecord.platformID == platformID and
 					namerecord.platEncID == platEncID):
 				if langID is None or namerecord.langID == langID:
 					return namerecord
 		return None # not found
 
+	def getDebugName(self, nameID):
+		englishName = someName = None
+		for name in self.names:
+			if name.nameID != nameID:
+				continue
+			try:
+				unistr = name.toUnicode()
+			except UnicodeDecodeError:
+				continue
+
+			someName = unistr
+			if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
+				englishName = unistr
+				break
+		if englishName:
+			return englishName
+		elif someName:
+			return someName
+		else:
+			return None
+
+	def setName(self, string, nameID, platformID, platEncID, langID):
+		""" Set the 'string' for the name record identified by 'nameID', 'platformID',
+		'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
+		and append to the name table.
+
+		'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
+		it is assumed to be already encoded with the correct plaform-specific encoding
+		identified by the (platformID, platEncID, langID) triplet. A warning is issued
+		to prevent unexpected results.
+		"""
+		if not hasattr(self, 'names'):
+			self.names = []
+		if not isinstance(string, unicode):
+			if isinstance(string, bytes):
+				log.warning(
+					"name string is bytes, ensure it's correctly encoded: %r", string)
+			else:
+				raise TypeError(
+					"expected unicode or bytes, found %s: %r" % (
+						type(string).__name__, string))
+		namerecord = self.getName(nameID, platformID, platEncID, langID)
+		if namerecord:
+			namerecord.string = string
+		else:
+			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
+
+	def _findUnusedNameID(self, minNameID=256):
+		"""Finds an unused name id.
+
+		The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
+		following the last nameID in the name table.
+		"""
+		names = getattr(self, 'names', [])
+		nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
+		if nameID > 32767:
+			raise ValueError("nameID must be less than 32768")
+		return nameID
+
+	def addMultilingualName(self, names, ttFont=None, nameID=None):
+		"""Add a multilingual name, returning its name ID
+
+		'names' is a dictionary with the name in multiple languages,
+		such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
+		The keys can be arbitrary IETF BCP 47 language codes;
+		the values are Unicode strings.
+
+		'ttFont' is the TTFont to which the names are added, or None.
+		If present, the font's 'ltag' table can get populated
+		to store exotic language codes, which allows encoding
+		names that otherwise cannot get encoded at all.
+
+		'nameID' is the name ID to be used, or None to let the library
+		pick an unused name ID.
+		"""
+		if not hasattr(self, 'names'):
+			self.names = []
+		if nameID is None:
+			nameID = self._findUnusedNameID()
+		# TODO: Should minimize BCP 47 language codes.
+		# https://github.com/fonttools/fonttools/issues/930
+		for lang, name in sorted(names.items()):
+			# Apple platforms have been recognizing Windows names
+			# since early OSX (~2001), so we only add names
+			# for the Macintosh platform when we cannot not make
+			# a Windows name. This can happen for exotic BCP47
+			# language tags that have no Windows language code.
+			windowsName = _makeWindowsName(name, nameID, lang)
+			if windowsName is not None:
+				self.names.append(windowsName)
+			else:
+				macName = _makeMacName(name, nameID, lang, ttFont)
+				if macName is not None:
+					self.names.append(macName)
+		return nameID
+
+	def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
+		""" Add a new name record containing 'string' for each (platformID, platEncID,
+		langID) tuple specified in the 'platforms' list.
+
+		The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
+		following the last nameID in the name table.
+		If no 'platforms' are specified, two English name records are added, one for the
+		Macintosh (platformID=0), and one for the Windows platform (3).
+
+		The 'string' must be a Unicode string, so it can be encoded with different,
+		platform-specific encodings.
+
+		Return the new nameID.
+		"""
+		assert len(platforms) > 0, \
+			"'platforms' must contain at least one (platformID, platEncID, langID) tuple"
+		if not hasattr(self, 'names'):
+			self.names = []
+		if not isinstance(string, unicode):
+			raise TypeError(
+				"expected %s, found %s: %r" % (
+					unicode.__name__, type(string).__name__,string ))
+		nameID = self._findUnusedNameID(minNameID + 1)
+		for platformID, platEncID, langID in platforms:
+			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
+		return nameID
+
+
+def makeName(string, nameID, platformID, platEncID, langID):
+	name = NameRecord()
+	name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
+		string, nameID, platformID, platEncID, langID)
+	return name
+
+
+def _makeWindowsName(name, nameID, language):
+	"""Create a NameRecord for the Microsoft Windows platform
+
+	'language' is an arbitrary IETF BCP 47 language identifier such
+	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
+	does not support the desired language, the result will be None.
+	Future versions of fonttools might return a NameRecord for the
+	OpenType 'name' table format 1, but this is not implemented yet.
+	"""
+	langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
+	if langID is not None:
+		return makeName(name, nameID, 3, 1, langID)
+	else:
+		log.warning("cannot add Windows name in language %s "
+		            "because fonttools does not yet support "
+		            "name table format 1" % language)
+		return None
+
+
+def _makeMacName(name, nameID, language, font=None):
+	"""Create a NameRecord for Apple platforms
+
+	'language' is an arbitrary IETF BCP 47 language identifier such
+	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
+	create a Macintosh NameRecord that is understood by old applications
+	(platform ID 1 and an old-style Macintosh language enum). If this
+	is not possible, we create a Unicode NameRecord (platform ID 0)
+	whose language points to the font’s 'ltag' table. The latter
+	can encode any string in any language, but legacy applications
+	might not recognize the format (in which case they will ignore
+	those names).
+
+	'font' should be the TTFont for which you want to create a name.
+	If 'font' is None, we only return NameRecords for legacy Macintosh;
+	in that case, the result will be None for names that need to
+	be encoded with an 'ltag' table.
+
+	See the section “The language identifier” in Apple’s specification:
+	https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
+	"""
+	macLang = _MAC_LANGUAGE_CODES.get(language.lower())
+	macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
+	if macLang is not None and macScript is not None:
+		encoding = getEncoding(1, macScript, macLang, default="ascii")
+		# Check if we can actually encode this name. If we can't,
+		# for example because we have no support for the legacy
+		# encoding, or because the name string contains Unicode
+		# characters that the legacy encoding cannot represent,
+		# we fall back to encoding the name in Unicode and put
+		# the language tag into the ltag table.
+		try:
+			_ = tobytes(name, encoding, errors="strict")
+			return makeName(name, nameID, 1, macScript, macLang)
+		except UnicodeEncodeError:
+			pass
+	if font is not None:
+		ltag = font.tables.get("ltag")
+		if ltag is None:
+			ltag = font["ltag"] = newTable("ltag")
+		# 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
+		# “The preferred platform-specific code for Unicode would be 3 or 4.”
+		# https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
+		return makeName(name, nameID, 0, 4, ltag.addTag(language))
+	else:
+		log.warning("cannot store language %s into 'ltag' table "
+		            "without having access to the TTFont object" %
+		            language)
+		return None
+
 
 class NameRecord(object):
-	
+
+	def getEncoding(self, default='ascii'):
+		"""Returns the Python encoding name for this name entry based on its platformID,
+		platEncID, and langID.  If encoding for these values is not known, by default
+		'ascii' is returned.  That can be overriden by passing a value to the default
+		argument.
+		"""
+		return getEncoding(self.platformID, self.platEncID, self.langID, default)
+
+	def encodingIsUnicodeCompatible(self):
+		return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1']
+
+	def __str__(self):
+		return self.toStr(errors='backslashreplace')
+
 	def isUnicode(self):
 		return (self.platformID == 0 or
 			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
 
+	def toUnicode(self, errors='strict'):
+		"""
+		If self.string is a Unicode string, return it; otherwise try decoding the
+		bytes in self.string to a Unicode string using the encoding of this
+		entry as returned by self.getEncoding(); Note that  self.getEncoding()
+		returns 'ascii' if the encoding is unknown to the library.
+
+		Certain heuristics are performed to recover data from bytes that are
+		ill-formed in the chosen encoding, or that otherwise look misencoded
+		(mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
+		but marked otherwise).  If the bytes are ill-formed and the heuristics fail,
+		the error is handled according to the errors parameter to this function, which is
+		passed to the underlying decode() function; by default it throws a
+		UnicodeDecodeError exception.
+
+		Note: The mentioned heuristics mean that roundtripping a font to XML and back
+		to binary might recover some misencoded data whereas just loading the font
+		and saving it back will not change them.
+		"""
+		def isascii(b):
+			return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
+		encoding = self.getEncoding()
+		string = self.string
+
+		if encoding == 'utf_16_be' and len(string) % 2 == 1:
+			# Recover badly encoded UTF-16 strings that have an odd number of bytes:
+			# - If the last byte is zero, drop it.  Otherwise,
+			# - If all the odd bytes are zero and all the even bytes are ASCII,
+			#   prepend one zero byte.  Otherwise,
+			# - If first byte is zero and all other bytes are ASCII, insert zero
+			#   bytes between consecutive ASCII bytes.
+			#
+			# (Yes, I've seen all of these in the wild... sigh)
+			if byteord(string[-1]) == 0:
+				string = string[:-1]
+			elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)):
+				string = b'\0' + string
+			elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]):
+				string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:])
+
+		string = tounicode(string, encoding=encoding, errors=errors)
+
+		# If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
+		# Fix it up.
+		if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)):
+			# If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
+			# narrow it down.
+			string = ''.join(c for c in string[1::2])
+
+		return string
+
+	def toBytes(self, errors='strict'):
+		""" If self.string is a bytes object, return it; otherwise try encoding
+		the Unicode string in self.string to bytes using the encoding of this
+		entry as returned by self.getEncoding(); Note that self.getEncoding()
+		returns 'ascii' if the encoding is unknown to the library.
+
+		If the Unicode string cannot be encoded to bytes in the chosen encoding,
+		the error is handled according to the errors parameter to this function,
+		which is passed to the underlying encode() function; by default it throws a
+		UnicodeEncodeError exception.
+		"""
+		return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
+
+	def toStr(self, errors='strict'):
+		if str == bytes:
+			# python 2
+			return self.toBytes(errors)
+		else:
+			# python 3
+			return self.toUnicode(errors)
+
 	def toXML(self, writer, ttFont):
-		writer.begintag("namerecord", [
+		try:
+			unistr = self.toUnicode()
+		except UnicodeDecodeError:
+			unistr = None
+		attrs = [
 				("nameID", self.nameID),
 				("platformID", self.platformID),
 				("platEncID", self.platEncID),
 				("langID", hex(self.langID)),
-						])
+			]
+
+		if unistr is None or not self.encodingIsUnicodeCompatible():
+			attrs.append(("unicode", unistr is not None))
+
+		writer.begintag("namerecord", attrs)
 		writer.newline()
-		if self.isUnicode():
-			if len(self.string) % 2:
-				# no, shouldn't happen, but some of the Apple
-				# tools cause this anyway :-(
-				writer.write16bit(self.string + b"\0", strip=True)
-			else:
-				writer.write16bit(self.string, strip=True)
+		if unistr is not None:
+			writer.write(unistr)
 		else:
-			writer.write8bit(self.string, strip=True)
+			writer.write8bit(self.string)
 		writer.newline()
 		writer.endtag("namerecord")
 		writer.newline()
-	
+
 	def fromXML(self, name, attrs, content, ttFont):
 		self.nameID = safeEval(attrs["nameID"])
 		self.platformID = safeEval(attrs["platformID"])
 		self.platEncID = safeEval(attrs["platEncID"])
 		self.langID =  safeEval(attrs["langID"])
 		s = strjoin(content).strip()
-		if self.isUnicode():
-			self.string = s.encode("utf_16_be")
+		encoding = self.getEncoding()
+		if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")):
+			self.string = s.encode(encoding)
 		else:
 			# This is the inverse of write8bit...
 			self.string = s.encode("latin1")
-	
+
 	def __lt__(self, other):
 		if type(self) != type(other):
 			return NotImplemented
@@ -147,7 +453,497 @@ class NameRecord(object):
 			getattr(other, "string", None),
 		)
 		return selfTuple < otherTuple
-	
+
 	def __repr__(self):
 		return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
 				self.nameID, self.platformID, self.langID)
+
+
+# Windows language ID → IETF BCP-47 language tag
+#
+# While Microsoft indicates a region/country for all its language
+# IDs, we follow Unicode practice by omitting “most likely subtags”
+# as per Unicode CLDR. For example, English is simply “en” and not
+# “en-Latn” because according to Unicode, the default script
+# for English is Latin.
+#
+# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
+# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
+_WINDOWS_LANGUAGES = {
+    0x0436: 'af',
+    0x041C: 'sq',
+    0x0484: 'gsw',
+    0x045E: 'am',
+    0x1401: 'ar-DZ',
+    0x3C01: 'ar-BH',
+    0x0C01: 'ar',
+    0x0801: 'ar-IQ',
+    0x2C01: 'ar-JO',
+    0x3401: 'ar-KW',
+    0x3001: 'ar-LB',
+    0x1001: 'ar-LY',
+    0x1801: 'ary',
+    0x2001: 'ar-OM',
+    0x4001: 'ar-QA',
+    0x0401: 'ar-SA',
+    0x2801: 'ar-SY',
+    0x1C01: 'aeb',
+    0x3801: 'ar-AE',
+    0x2401: 'ar-YE',
+    0x042B: 'hy',
+    0x044D: 'as',
+    0x082C: 'az-Cyrl',
+    0x042C: 'az',
+    0x046D: 'ba',
+    0x042D: 'eu',
+    0x0423: 'be',
+    0x0845: 'bn',
+    0x0445: 'bn-IN',
+    0x201A: 'bs-Cyrl',
+    0x141A: 'bs',
+    0x047E: 'br',
+    0x0402: 'bg',
+    0x0403: 'ca',
+    0x0C04: 'zh-HK',
+    0x1404: 'zh-MO',
+    0x0804: 'zh',
+    0x1004: 'zh-SG',
+    0x0404: 'zh-TW',
+    0x0483: 'co',
+    0x041A: 'hr',
+    0x101A: 'hr-BA',
+    0x0405: 'cs',
+    0x0406: 'da',
+    0x048C: 'prs',
+    0x0465: 'dv',
+    0x0813: 'nl-BE',
+    0x0413: 'nl',
+    0x0C09: 'en-AU',
+    0x2809: 'en-BZ',
+    0x1009: 'en-CA',
+    0x2409: 'en-029',
+    0x4009: 'en-IN',
+    0x1809: 'en-IE',
+    0x2009: 'en-JM',
+    0x4409: 'en-MY',
+    0x1409: 'en-NZ',
+    0x3409: 'en-PH',
+    0x4809: 'en-SG',
+    0x1C09: 'en-ZA',
+    0x2C09: 'en-TT',
+    0x0809: 'en-GB',
+    0x0409: 'en',
+    0x3009: 'en-ZW',
+    0x0425: 'et',
+    0x0438: 'fo',
+    0x0464: 'fil',
+    0x040B: 'fi',
+    0x080C: 'fr-BE',
+    0x0C0C: 'fr-CA',
+    0x040C: 'fr',
+    0x140C: 'fr-LU',
+    0x180C: 'fr-MC',
+    0x100C: 'fr-CH',
+    0x0462: 'fy',
+    0x0456: 'gl',
+    0x0437: 'ka',
+    0x0C07: 'de-AT',
+    0x0407: 'de',
+    0x1407: 'de-LI',
+    0x1007: 'de-LU',
+    0x0807: 'de-CH',
+    0x0408: 'el',
+    0x046F: 'kl',
+    0x0447: 'gu',
+    0x0468: 'ha',
+    0x040D: 'he',
+    0x0439: 'hi',
+    0x040E: 'hu',
+    0x040F: 'is',
+    0x0470: 'ig',
+    0x0421: 'id',
+    0x045D: 'iu',
+    0x085D: 'iu-Latn',
+    0x083C: 'ga',
+    0x0434: 'xh',
+    0x0435: 'zu',
+    0x0410: 'it',
+    0x0810: 'it-CH',
+    0x0411: 'ja',
+    0x044B: 'kn',
+    0x043F: 'kk',
+    0x0453: 'km',
+    0x0486: 'quc',
+    0x0487: 'rw',
+    0x0441: 'sw',
+    0x0457: 'kok',
+    0x0412: 'ko',
+    0x0440: 'ky',
+    0x0454: 'lo',
+    0x0426: 'lv',
+    0x0427: 'lt',
+    0x082E: 'dsb',
+    0x046E: 'lb',
+    0x042F: 'mk',
+    0x083E: 'ms-BN',
+    0x043E: 'ms',
+    0x044C: 'ml',
+    0x043A: 'mt',
+    0x0481: 'mi',
+    0x047A: 'arn',
+    0x044E: 'mr',
+    0x047C: 'moh',
+    0x0450: 'mn',
+    0x0850: 'mn-CN',
+    0x0461: 'ne',
+    0x0414: 'nb',
+    0x0814: 'nn',
+    0x0482: 'oc',
+    0x0448: 'or',
+    0x0463: 'ps',
+    0x0415: 'pl',
+    0x0416: 'pt',
+    0x0816: 'pt-PT',
+    0x0446: 'pa',
+    0x046B: 'qu-BO',
+    0x086B: 'qu-EC',
+    0x0C6B: 'qu',
+    0x0418: 'ro',
+    0x0417: 'rm',
+    0x0419: 'ru',
+    0x243B: 'smn',
+    0x103B: 'smj-NO',
+    0x143B: 'smj',
+    0x0C3B: 'se-FI',
+    0x043B: 'se',
+    0x083B: 'se-SE',
+    0x203B: 'sms',
+    0x183B: 'sma-NO',
+    0x1C3B: 'sms',
+    0x044F: 'sa',
+    0x1C1A: 'sr-Cyrl-BA',
+    0x0C1A: 'sr',
+    0x181A: 'sr-Latn-BA',
+    0x081A: 'sr-Latn',
+    0x046C: 'nso',
+    0x0432: 'tn',
+    0x045B: 'si',
+    0x041B: 'sk',
+    0x0424: 'sl',
+    0x2C0A: 'es-AR',
+    0x400A: 'es-BO',
+    0x340A: 'es-CL',
+    0x240A: 'es-CO',
+    0x140A: 'es-CR',
+    0x1C0A: 'es-DO',
+    0x300A: 'es-EC',
+    0x440A: 'es-SV',
+    0x100A: 'es-GT',
+    0x480A: 'es-HN',
+    0x080A: 'es-MX',
+    0x4C0A: 'es-NI',
+    0x180A: 'es-PA',
+    0x3C0A: 'es-PY',
+    0x280A: 'es-PE',
+    0x500A: 'es-PR',
+
+    # Microsoft has defined two different language codes for
+    # “Spanish with modern sorting” and “Spanish with traditional
+    # sorting”. This makes sense for collation APIs, and it would be
+    # possible to express this in BCP 47 language tags via Unicode
+    # extensions (eg., “es-u-co-trad” is “Spanish with traditional
+    # sorting”). However, for storing names in fonts, this distinction
+    # does not make sense, so we use “es” in both cases.
+    0x0C0A: 'es',
+    0x040A: 'es',
+
+    0x540A: 'es-US',
+    0x380A: 'es-UY',
+    0x200A: 'es-VE',
+    0x081D: 'sv-FI',
+    0x041D: 'sv',
+    0x045A: 'syr',
+    0x0428: 'tg',
+    0x085F: 'tzm',
+    0x0449: 'ta',
+    0x0444: 'tt',
+    0x044A: 'te',
+    0x041E: 'th',
+    0x0451: 'bo',
+    0x041F: 'tr',
+    0x0442: 'tk',
+    0x0480: 'ug',
+    0x0422: 'uk',
+    0x042E: 'hsb',
+    0x0420: 'ur',
+    0x0843: 'uz-Cyrl',
+    0x0443: 'uz',
+    0x042A: 'vi',
+    0x0452: 'cy',
+    0x0488: 'wo',
+    0x0485: 'sah',
+    0x0478: 'ii',
+    0x046A: 'yo',
+}
+
+
+_MAC_LANGUAGES = {
+    0: 'en',
+    1: 'fr',
+    2: 'de',
+    3: 'it',
+    4: 'nl',
+    5: 'sv',
+    6: 'es',
+    7: 'da',
+    8: 'pt',
+    9: 'no',
+    10: 'he',
+    11: 'ja',
+    12: 'ar',
+    13: 'fi',
+    14: 'el',
+    15: 'is',
+    16: 'mt',
+    17: 'tr',
+    18: 'hr',
+    19: 'zh-Hant',
+    20: 'ur',
+    21: 'hi',
+    22: 'th',
+    23: 'ko',
+    24: 'lt',
+    25: 'pl',
+    26: 'hu',
+    27: 'es',
+    28: 'lv',
+    29: 'se',
+    30: 'fo',
+    31: 'fa',
+    32: 'ru',
+    33: 'zh',
+    34: 'nl-BE',
+    35: 'ga',
+    36: 'sq',
+    37: 'ro',
+    38: 'cz',
+    39: 'sk',
+    40: 'sl',
+    41: 'yi',
+    42: 'sr',
+    43: 'mk',
+    44: 'bg',
+    45: 'uk',
+    46: 'be',
+    47: 'uz',
+    48: 'kk',
+    49: 'az-Cyrl',
+    50: 'az-Arab',
+    51: 'hy',
+    52: 'ka',
+    53: 'mo',
+    54: 'ky',
+    55: 'tg',
+    56: 'tk',
+    57: 'mn-CN',
+    58: 'mn',
+    59: 'ps',
+    60: 'ks',
+    61: 'ku',
+    62: 'sd',
+    63: 'bo',
+    64: 'ne',
+    65: 'sa',
+    66: 'mr',
+    67: 'bn',
+    68: 'as',
+    69: 'gu',
+    70: 'pa',
+    71: 'or',
+    72: 'ml',
+    73: 'kn',
+    74: 'ta',
+    75: 'te',
+    76: 'si',
+    77: 'my',
+    78: 'km',
+    79: 'lo',
+    80: 'vi',
+    81: 'id',
+    82: 'tl',
+    83: 'ms',
+    84: 'ms-Arab',
+    85: 'am',
+    86: 'ti',
+    87: 'om',
+    88: 'so',
+    89: 'sw',
+    90: 'rw',
+    91: 'rn',
+    92: 'ny',
+    93: 'mg',
+    94: 'eo',
+    128: 'cy',
+    129: 'eu',
+    130: 'ca',
+    131: 'la',
+    132: 'qu',
+    133: 'gn',
+    134: 'ay',
+    135: 'tt',
+    136: 'ug',
+    137: 'dz',
+    138: 'jv',
+    139: 'su',
+    140: 'gl',
+    141: 'af',
+    142: 'br',
+    143: 'iu',
+    144: 'gd',
+    145: 'gv',
+    146: 'ga',
+    147: 'to',
+    148: 'el-polyton',
+    149: 'kl',
+    150: 'az',
+    151: 'nn',
+}
+
+
+_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()}
+_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}
+
+
+# MacOS language ID → MacOS script ID
+#
+# Note that the script ID is not sufficient to determine what encoding
+# to use in TrueType files. For some languages, MacOS used a modification
+# of a mainstream script. For example, an Icelandic name would be stored
+# with smRoman in the TrueType naming table, but the actual encoding
+# is a special Icelandic version of the normal Macintosh Roman encoding.
+# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
+# Syllables but MacOS had run out of available script codes, so this was
+# done as a (pretty radical) “modification” of Ethiopic.
+#
+# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
+_MAC_LANGUAGE_TO_SCRIPT = {
+    0: 0,  # langEnglish → smRoman
+    1: 0,  # langFrench → smRoman
+    2: 0,  # langGerman → smRoman
+    3: 0,  # langItalian → smRoman
+    4: 0,  # langDutch → smRoman
+    5: 0,  # langSwedish → smRoman
+    6: 0,  # langSpanish → smRoman
+    7: 0,  # langDanish → smRoman
+    8: 0,  # langPortuguese → smRoman
+    9: 0,  # langNorwegian → smRoman
+    10: 5,  # langHebrew → smHebrew
+    11: 1,  # langJapanese → smJapanese
+    12: 4,  # langArabic → smArabic
+    13: 0,  # langFinnish → smRoman
+    14: 6,  # langGreek → smGreek
+    15: 0,  # langIcelandic → smRoman (modified)
+    16: 0,  # langMaltese → smRoman
+    17: 0,  # langTurkish → smRoman (modified)
+    18: 0,  # langCroatian → smRoman (modified)
+    19: 2,  # langTradChinese → smTradChinese
+    20: 4,  # langUrdu → smArabic
+    21: 9,  # langHindi → smDevanagari
+    22: 21,  # langThai → smThai
+    23: 3,  # langKorean → smKorean
+    24: 29,  # langLithuanian → smCentralEuroRoman
+    25: 29,  # langPolish → smCentralEuroRoman
+    26: 29,  # langHungarian → smCentralEuroRoman
+    27: 29,  # langEstonian → smCentralEuroRoman
+    28: 29,  # langLatvian → smCentralEuroRoman
+    29: 0,  # langSami → smRoman
+    30: 0,  # langFaroese → smRoman (modified)
+    31: 4,  # langFarsi → smArabic (modified)
+    32: 7,  # langRussian → smCyrillic
+    33: 25,  # langSimpChinese → smSimpChinese
+    34: 0,  # langFlemish → smRoman
+    35: 0,  # langIrishGaelic → smRoman (modified)
+    36: 0,  # langAlbanian → smRoman
+    37: 0,  # langRomanian → smRoman (modified)
+    38: 29,  # langCzech → smCentralEuroRoman
+    39: 29,  # langSlovak → smCentralEuroRoman
+    40: 0,  # langSlovenian → smRoman (modified)
+    41: 5,  # langYiddish → smHebrew
+    42: 7,  # langSerbian → smCyrillic
+    43: 7,  # langMacedonian → smCyrillic
+    44: 7,  # langBulgarian → smCyrillic
+    45: 7,  # langUkrainian → smCyrillic (modified)
+    46: 7,  # langByelorussian → smCyrillic
+    47: 7,  # langUzbek → smCyrillic
+    48: 7,  # langKazakh → smCyrillic
+    49: 7,  # langAzerbaijani → smCyrillic
+    50: 4,  # langAzerbaijanAr → smArabic
+    51: 24,  # langArmenian → smArmenian
+    52: 23,  # langGeorgian → smGeorgian
+    53: 7,  # langMoldavian → smCyrillic
+    54: 7,  # langKirghiz → smCyrillic
+    55: 7,  # langTajiki → smCyrillic
+    56: 7,  # langTurkmen → smCyrillic
+    57: 27,  # langMongolian → smMongolian
+    58: 7,  # langMongolianCyr → smCyrillic
+    59: 4,  # langPashto → smArabic
+    60: 4,  # langKurdish → smArabic
+    61: 4,  # langKashmiri → smArabic
+    62: 4,  # langSindhi → smArabic
+    63: 26,  # langTibetan → smTibetan
+    64: 9,  # langNepali → smDevanagari
+    65: 9,  # langSanskrit → smDevanagari
+    66: 9,  # langMarathi → smDevanagari
+    67: 13,  # langBengali → smBengali
+    68: 13,  # langAssamese → smBengali
+    69: 11,  # langGujarati → smGujarati
+    70: 10,  # langPunjabi → smGurmukhi
+    71: 12,  # langOriya → smOriya
+    72: 17,  # langMalayalam → smMalayalam
+    73: 16,  # langKannada → smKannada
+    74: 14,  # langTamil → smTamil
+    75: 15,  # langTelugu → smTelugu
+    76: 18,  # langSinhalese → smSinhalese
+    77: 19,  # langBurmese → smBurmese
+    78: 20,  # langKhmer → smKhmer
+    79: 22,  # langLao → smLao
+    80: 30,  # langVietnamese → smVietnamese
+    81: 0,  # langIndonesian → smRoman
+    82: 0,  # langTagalog → smRoman
+    83: 0,  # langMalayRoman → smRoman
+    84: 4,  # langMalayArabic → smArabic
+    85: 28,  # langAmharic → smEthiopic
+    86: 28,  # langTigrinya → smEthiopic
+    87: 28,  # langOromo → smEthiopic
+    88: 0,  # langSomali → smRoman
+    89: 0,  # langSwahili → smRoman
+    90: 0,  # langKinyarwanda → smRoman
+    91: 0,  # langRundi → smRoman
+    92: 0,  # langNyanja → smRoman
+    93: 0,  # langMalagasy → smRoman
+    94: 0,  # langEsperanto → smRoman
+    128: 0,  # langWelsh → smRoman (modified)
+    129: 0,  # langBasque → smRoman
+    130: 0,  # langCatalan → smRoman
+    131: 0,  # langLatin → smRoman
+    132: 0,  # langQuechua → smRoman
+    133: 0,  # langGuarani → smRoman
+    134: 0,  # langAymara → smRoman
+    135: 7,  # langTatar → smCyrillic
+    136: 4,  # langUighur → smArabic
+    137: 26,  # langDzongkha → smTibetan
+    138: 0,  # langJavaneseRom → smRoman
+    139: 0,  # langSundaneseRom → smRoman
+    140: 0,  # langGalician → smRoman
+    141: 0,  # langAfrikaans → smRoman
+    142: 0,  # langBreton → smRoman (modified)
+    143: 28,  # langInuktitut → smEthiopic (modified)
+    144: 0,  # langScottishGaelic → smRoman (modified)
+    145: 0,  # langManxGaelic → smRoman (modified)
+    146: 0,  # langIrishGaelicScript → smRoman (modified)
+    147: 0,  # langTongan → smRoman
+    148: 6,  # langGreekAncient → smRoman
+    149: 0,  # langGreenlandic → smRoman
+    150: 0,  # langAzerbaijanRoman → smRoman
+    151: 0,   # langNynorsk → smRoman
+}
author	Haibo Huang <hhb@google.com>	2018-07-03 17:43:11 -0700
committer	Haibo Huang <hhb@google.com>	2018-07-04 06:00:25 +0000
commit	8b3c57bdcbbbd9cb1dc98546b567a138207b7638 (patch)
tree	af539e10dc3b2b42edd3197b34f1c8b5ee9a82a0 /Lib/fontTools/ttLib/tables/_n_a_m_e.py
parent	cc9a86e36194f2ef4456e14d98b810e41e4fa52d (diff)
download	fonttools-8b3c57bdcbbbd9cb1dc98546b567a138207b7638.tar.gz