1 files changed, 106 insertions, 23 deletions
diff --git a/Lib/fontTools/ttLib/tables/_c_m_a_p.py b/Lib/fontTools/ttLib/tables/_c_m_a_p.py
index a65a0c25..a31b5059 100644
--- a/Lib/fontTools/ttLib/tables/_c_m_a_p.py
+++ b/Lib/fontTools/ttLib/tables/_c_m_a_p.py
@@ -1,5 +1,4 @@
-from fontTools.misc.py23 import bytesjoin
-from fontTools.misc.textTools import safeEval, readHex
+from fontTools.misc.textTools import bytesjoin, safeEval, readHex
 from fontTools.misc.encodingTools import getEncoding
 from fontTools.ttLib import getSearchRange
 from fontTools.unicode import Unicode
@@ -15,21 +14,61 @@ log = logging.getLogger(__name__)
 
 def _make_map(font, chars, gids):
 	assert len(chars) == len(gids)
+	glyphNames = font.getGlyphNameMany(gids)
 	cmap = {}
-	glyphOrder = font.getGlyphOrder()
-	for char,gid in zip(chars,gids):
+	for char,gid,name in zip(chars,gids,glyphNames):
 		if gid == 0:
 			continue
-		try:
-			name = glyphOrder[gid]
-		except IndexError:
-			name = font.getGlyphName(gid)
 		cmap[char] = name
 	return cmap
 
 class table__c_m_a_p(DefaultTable.DefaultTable):
+	"""Character to Glyph Index Mapping Table
+
+	This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_
+	table, which maps between input characters (in Unicode or other system encodings)
+	and glyphs within the font. The ``cmap`` table contains one or more subtables
+	which determine the mapping of of characters to glyphs across different platforms
+	and encoding systems.
+
+	``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access
+	to the subtables, although it is normally easier to retrieve individual subtables
+	through the utility methods described below. To add new subtables to a font,
+	first determine the subtable format (if in doubt use format 4 for glyphs within
+	the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation
+	Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``,
+	and append them to the ``.tables`` list.
+
+	Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap``
+	attribute.
+
+	Example::
+
+		cmap4_0_3 = CmapSubtable.newSubtable(4)
+		cmap4_0_3.platformID = 0
+		cmap4_0_3.platEncID = 3
+		cmap4_0_3.language = 0
+		cmap4_0_3.cmap = { 0xC1: "Aacute" }
+
+		cmap = newTable("cmap")
+		cmap.tableVersion = 0
+		cmap.tables = [cmap4_0_3]
+	"""
 
 	def getcmap(self, platformID, platEncID):
+		"""Returns the first subtable which matches the given platform and encoding.
+
+		Args:
+			platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh
+				(deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows.
+			encodingID (int): Encoding ID. Interpretation depends on the platform ID.
+				See the OpenType specification for details.
+
+		Returns:
+			An object which is a subclass of :py:class:`CmapSubtable` if a matching
+			subtable is found within the font, or ``None`` otherwise.
+		"""
+
 		for subtable in self.tables:
 			if (subtable.platformID == platformID and
 					subtable.platEncID == platEncID):
@@ -37,13 +76,22 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
 		return None # not found
 
 	def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))):
-		"""Return the 'best' unicode cmap dictionary available in the font,
-		or None, if no unicode cmap subtable is available.
+		"""Returns the 'best' Unicode cmap dictionary available in the font
+		or ``None``, if no Unicode cmap subtable is available.
 
 		By default it will search for the following (platformID, platEncID)
-		pairs:
-			(3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0)
-		This can be customized via the cmapPreferences argument.
+		pairs in order::
+
+				(3, 10), # Windows Unicode full repertoire
+				(0, 6),  # Unicode full repertoire (format 13 subtable)
+				(0, 4),  # Unicode 2.0 full repertoire
+				(3, 1),  # Windows Unicode BMP
+				(0, 3),  # Unicode 2.0 BMP
+				(0, 2),  # Unicode ISO/IEC 10646
+				(0, 1),  # Unicode 1.1
+				(0, 0)   # Unicode 1.0
+
+		This order can be customized via the ``cmapPreferences`` argument.
 		"""
 		for platformID, platEncID in cmapPreferences:
 			cmapSubtable = self.getcmap(platformID, platEncID)
@@ -52,12 +100,20 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
 		return None  # None of the requested cmap subtables were found
 
 	def buildReversed(self):
-		"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
+		"""Builds a reverse mapping dictionary
+
+		Iterates over all Unicode cmap tables and returns a dictionary mapping
+		glyphs to sets of codepoints, such as::
+
+			{
+				'one': {0x31}
+				'A': {0x41,0x391}
+			}
 
 		The values are sets of Unicode codepoints because
 		some fonts map different codepoints to the same glyph.
-		For example, U+0041 LATIN CAPITAL LETTER A and U+0391
-		GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
+		For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391
+		GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph.
 		"""
 		result = {}
 		for subtable in self.tables:
@@ -100,6 +156,12 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
 			else:
 				seenOffsets[offset] = i
 			tables.append(table)
+		if ttFont.lazy is False:  # Be lazy for None and True
+			self.ensureDecompiled()
+
+	def ensureDecompiled(self):
+		for st in self.tables:
+			st.ensureDecompiled()
 
 	def compile(self, ttFont):
 		self.tables.sort()  # sort according to the spec; see CmapSubtable.__lt__()
@@ -145,6 +207,16 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
 
 
 class CmapSubtable(object):
+	"""Base class for all cmap subtable formats.
+
+	Subclasses which handle the individual subtable formats are named
+	``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass`
+	to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a
+	new subtable object for a given format.
+
+	The object exposes a ``.cmap`` attribute, which contains a dictionary mapping
+	character codepoints to glyph names.
+	"""
 
 	@staticmethod
 	def getSubtableClass(format):
@@ -153,7 +225,8 @@ class CmapSubtable(object):
 
 	@staticmethod
 	def newSubtable(format):
-		"""Return a new instance of a subtable for format."""
+		"""Return a new instance of a subtable for the given format
+		."""
 		subtableClass = CmapSubtable.getSubtableClass(format)
 		return subtableClass(format)
 
@@ -161,6 +234,17 @@ class CmapSubtable(object):
 		self.format = format
 		self.data = None
 		self.ttFont = None
+		self.platformID = None  #: The platform ID of this subtable
+		self.platEncID = None   #: The encoding ID of this subtable (interpretation depends on ``platformID``)
+		self.language = None    #: The language ID of this subtable (Macintosh platform only)
+
+	def ensureDecompiled(self):
+		if self.data is None:
+			return
+		self.decompile(None, None) # use saved data.
+		self.data = None	# Once this table has been decompiled, make sure we don't
+							# just return the original data. Also avoids recursion when
+							# called with an attribute that the cmap subtable doesn't have.
 
 	def __getattr__(self, attr):
 		# allow lazy decompilation of subtables.
@@ -168,10 +252,7 @@ class CmapSubtable(object):
 			raise AttributeError(attr)
 		if self.data is None:
 			raise AttributeError(attr)
-		self.decompile(None, None) # use saved data.
-		self.data = None	# Once this table has been decompiled, make sure we don't
-							# just return the original data. Also avoids recursion when
-							# called with an attribute that the cmap subtable doesn't have.
+		self.ensureDecompiled()
 		return getattr(self, attr)
 
 	def decompileHeader(self, data, ttFont):
@@ -198,20 +279,22 @@ class CmapSubtable(object):
 	def getEncoding(self, default=None):
 		"""Returns the Python encoding name for this cmap subtable based on its platformID,
 		platEncID, and language.  If encoding for these values is not known, by default
-		None is returned.  That can be overriden by passing a value to the default
+		``None`` is returned.  That can be overridden by passing a value to the ``default``
 		argument.
 
 		Note that if you want to choose a "preferred" cmap subtable, most of the time
-		self.isUnicode() is what you want as that one only returns true for the modern,
+		``self.isUnicode()`` is what you want as that one only returns true for the modern,
 		commonly used, Unicode-compatible triplets, not the legacy ones.
 		"""
 		return getEncoding(self.platformID, self.platEncID, self.language, default)
 
 	def isUnicode(self):
+		"""Returns true if the characters are interpreted as Unicode codepoints."""
 		return (self.platformID == 0 or
 			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
 
 	def isSymbol(self):
+		"""Returns true if the subtable is for the Symbol encoding (3,0)"""
 		return self.platformID == 3 and self.platEncID == 0
 
 	def _writeCodes(self, codes, writer):