aboutsummaryrefslogtreecommitdiff
path: root/Lib/fontTools/ttLib/tables/_c_m_a_p.py
blob: a31b5059c138c07153b6d18699aafb2d3fda65e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
from fontTools.misc.textTools import bytesjoin, safeEval, readHex
from fontTools.misc.encodingTools import getEncoding
from fontTools.ttLib import getSearchRange
from fontTools.unicode import Unicode
from . import DefaultTable
import sys
import struct
import array
import logging


log = logging.getLogger(__name__)


def _make_map(font, chars, gids):
	assert len(chars) == len(gids)
	glyphNames = font.getGlyphNameMany(gids)
	cmap = {}
	for char,gid,name in zip(chars,gids,glyphNames):
		if gid == 0:
			continue
		cmap[char] = name
	return cmap

class table__c_m_a_p(DefaultTable.DefaultTable):
	"""Character to Glyph Index Mapping Table

	This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_
	table, which maps between input characters (in Unicode or other system encodings)
	and glyphs within the font. The ``cmap`` table contains one or more subtables
	which determine the mapping of of characters to glyphs across different platforms
	and encoding systems.

	``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access
	to the subtables, although it is normally easier to retrieve individual subtables
	through the utility methods described below. To add new subtables to a font,
	first determine the subtable format (if in doubt use format 4 for glyphs within
	the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation
	Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``,
	and append them to the ``.tables`` list.

	Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap``
	attribute.

	Example::

		cmap4_0_3 = CmapSubtable.newSubtable(4)
		cmap4_0_3.platformID = 0
		cmap4_0_3.platEncID = 3
		cmap4_0_3.language = 0
		cmap4_0_3.cmap = { 0xC1: "Aacute" }

		cmap = newTable("cmap")
		cmap.tableVersion = 0
		cmap.tables = [cmap4_0_3]
	"""

	def getcmap(self, platformID, platEncID):
		"""Returns the first subtable which matches the given platform and encoding.

		Args:
			platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh
				(deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows.
			encodingID (int): Encoding ID. Interpretation depends on the platform ID.
				See the OpenType specification for details.

		Returns:
			An object which is a subclass of :py:class:`CmapSubtable` if a matching
			subtable is found within the font, or ``None`` otherwise.
		"""

		for subtable in self.tables:
			if (subtable.platformID == platformID and
					subtable.platEncID == platEncID):
				return subtable
		return None # not found

	def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))):
		"""Returns the 'best' Unicode cmap dictionary available in the font
		or ``None``, if no Unicode cmap subtable is available.

		By default it will search for the following (platformID, platEncID)
		pairs in order::

				(3, 10), # Windows Unicode full repertoire
				(0, 6),  # Unicode full repertoire (format 13 subtable)
				(0, 4),  # Unicode 2.0 full repertoire
				(3, 1),  # Windows Unicode BMP
				(0, 3),  # Unicode 2.0 BMP
				(0, 2),  # Unicode ISO/IEC 10646
				(0, 1),  # Unicode 1.1
				(0, 0)   # Unicode 1.0

		This order can be customized via the ``cmapPreferences`` argument.
		"""
		for platformID, platEncID in cmapPreferences:
			cmapSubtable = self.getcmap(platformID, platEncID)
			if cmapSubtable is not None:
				return cmapSubtable.cmap
		return None  # None of the requested cmap subtables were found

	def buildReversed(self):
		"""Builds a reverse mapping dictionary

		Iterates over all Unicode cmap tables and returns a dictionary mapping
		glyphs to sets of codepoints, such as::

			{
				'one': {0x31}
				'A': {0x41,0x391}
			}

		The values are sets of Unicode codepoints because
		some fonts map different codepoints to the same glyph.
		For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391
		GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph.
		"""
		result = {}
		for subtable in self.tables:
			if subtable.isUnicode():
				for codepoint, name in subtable.cmap.items():
					result.setdefault(name, set()).add(codepoint)
		return result

	def decompile(self, data, ttFont):
		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
		self.tableVersion = int(tableVersion)
		self.tables = tables = []
		seenOffsets = {}
		for i in range(numSubTables):
			platformID, platEncID, offset = struct.unpack(
					">HHl", data[4+i*8:4+(i+1)*8])
			platformID, platEncID = int(platformID), int(platEncID)
			format, length = struct.unpack(">HH", data[offset:offset+4])
			if format in [8,10,12,13]:
				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
			elif format in [14]:
				format, length = struct.unpack(">HL", data[offset:offset+6])

			if not length:
				log.error(
					"cmap subtable is reported as having zero length: platformID %s, "
					"platEncID %s, format %s offset %s. Skipping table.",
					platformID, platEncID, format, offset)
				continue
			table = CmapSubtable.newSubtable(format)
			table.platformID = platformID
			table.platEncID = platEncID
			# Note that by default we decompile only the subtable header info;
			# any other data gets decompiled only when an attribute of the
			# subtable is referenced.
			table.decompileHeader(data[offset:offset+int(length)], ttFont)
			if offset in seenOffsets:
				table.data = None # Mark as decompiled
				table.cmap = tables[seenOffsets[offset]].cmap
			else:
				seenOffsets[offset] = i
			tables.append(table)
		if ttFont.lazy is False:  # Be lazy for None and True
			self.ensureDecompiled()

	def ensureDecompiled(self):
		for st in self.tables:
			st.ensureDecompiled()

	def compile(self, ttFont):
		self.tables.sort()  # sort according to the spec; see CmapSubtable.__lt__()
		numSubTables = len(self.tables)
		totalOffset = 4 + 8 * numSubTables
		data = struct.pack(">HH", self.tableVersion, numSubTables)
		tableData = b""
		seen = {}  # Some tables are the same object reference. Don't compile them twice.
		done = {}  # Some tables are different objects, but compile to the same data chunk
		for table in self.tables:
			try:
				offset = seen[id(table.cmap)]
			except KeyError:
				chunk = table.compile(ttFont)
				if chunk in done:
					offset = done[chunk]
				else:
					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
					tableData = tableData + chunk
			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
		return data + tableData

	def toXML(self, writer, ttFont):
		writer.simpletag("tableVersion", version=self.tableVersion)
		writer.newline()
		for table in self.tables:
			table.toXML(writer, ttFont)

	def fromXML(self, name, attrs, content, ttFont):
		if name == "tableVersion":
			self.tableVersion = safeEval(attrs["version"])
			return
		if name[:12] != "cmap_format_":
			return
		if not hasattr(self, "tables"):
			self.tables = []
		format = safeEval(name[12:])
		table = CmapSubtable.newSubtable(format)
		table.platformID = safeEval(attrs["platformID"])
		table.platEncID = safeEval(attrs["platEncID"])
		table.fromXML(name, attrs, content, ttFont)
		self.tables.append(table)


class CmapSubtable(object):
	"""Base class for all cmap subtable formats.

	Subclasses which handle the individual subtable formats are named
	``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass`
	to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a
	new subtable object for a given format.

	The object exposes a ``.cmap`` attribute, which contains a dictionary mapping
	character codepoints to glyph names.
	"""

	@staticmethod
	def getSubtableClass(format):
		"""Return the subtable class for a format."""
		return cmap_classes.get(format, cmap_format_unknown)

	@staticmethod
	def newSubtable(format):
		"""Return a new instance of a subtable for the given format
		."""
		subtableClass = CmapSubtable.getSubtableClass(format)
		return subtableClass(format)

	def __init__(self, format):
		self.format = format
		self.data = None
		self.ttFont = None
		self.platformID = None  #: The platform ID of this subtable
		self.platEncID = None   #: The encoding ID of this subtable (interpretation depends on ``platformID``)
		self.language = None    #: The language ID of this subtable (Macintosh platform only)

	def ensureDecompiled(self):
		if self.data is None:
			return
		self.decompile(None, None) # use saved data.
		self.data = None	# Once this table has been decompiled, make sure we don't
							# just return the original data. Also avoids recursion when
							# called with an attribute that the cmap subtable doesn't have.

	def __getattr__(self, attr):
		# allow lazy decompilation of subtables.
		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
			raise AttributeError(attr)
		if self.data is None:
			raise AttributeError(attr)
		self.ensureDecompiled()
		return getattr(self, attr)

	def decompileHeader(self, data, ttFont):
		format, length, language = struct.unpack(">HHH", data[:6])
		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
		self.format = int(format)
		self.length = int(length)
		self.language = int(language)
		self.data = data[6:]
		self.ttFont = ttFont

	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("language", self.language),
				])
		writer.newline()
		codes = sorted(self.cmap.items())
		self._writeCodes(codes, writer)
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def getEncoding(self, default=None):
		"""Returns the Python encoding name for this cmap subtable based on its platformID,
		platEncID, and language.  If encoding for these values is not known, by default
		``None`` is returned.  That can be overridden by passing a value to the ``default``
		argument.

		Note that if you want to choose a "preferred" cmap subtable, most of the time
		``self.isUnicode()`` is what you want as that one only returns true for the modern,
		commonly used, Unicode-compatible triplets, not the legacy ones.
		"""
		return getEncoding(self.platformID, self.platEncID, self.language, default)

	def isUnicode(self):
		"""Returns true if the characters are interpreted as Unicode codepoints."""
		return (self.platformID == 0 or
			(self.platformID == 3 and self.platEncID in [0, 1, 10]))

	def isSymbol(self):
		"""Returns true if the subtable is for the Symbol encoding (3,0)"""
		return self.platformID == 3 and self.platEncID == 0

	def _writeCodes(self, codes, writer):
		isUnicode = self.isUnicode()
		for code, name in codes:
			writer.simpletag("map", code=hex(code), name=name)
			if isUnicode:
				writer.comment(Unicode[code])
			writer.newline()

	def __lt__(self, other):
		if not isinstance(other, CmapSubtable):
			return NotImplemented

		# implemented so that list.sort() sorts according to the spec.
		selfTuple = (
			getattr(self, "platformID", None),
			getattr(self, "platEncID", None),
			getattr(self, "language", None),
			self.__dict__)
		otherTuple = (
			getattr(other, "platformID", None),
			getattr(other, "platEncID", None),
			getattr(other, "language", None),
			other.__dict__)
		return selfTuple < otherTuple


class cmap_format_0(CmapSubtable):

	def decompile(self, data, ttFont):
		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
		# If not, someone is calling the subtable decompile() directly, and must provide both args.
		if data is not None and ttFont is not None:
			self.decompileHeader(data, ttFont)
		else:
			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
		data = self.data # decompileHeader assigns the data after the header to self.data
		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
		gids = array.array("B")
		gids.frombytes(self.data)
		charCodes = list(range(len(gids)))
		self.cmap = _make_map(self.ttFont, charCodes, gids)

	def compile(self, ttFont):
		if self.data:
			return struct.pack(">HHH", 0, 262, self.language) + self.data

		cmap = self.cmap
		assert set(cmap.keys()).issubset(range(256))
		getGlyphID = ttFont.getGlyphID
		valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)]

		gids = array.array("B", valueList)
		data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes()
		assert len(data) == 262
		return data

	def fromXML(self, name, attrs, content, ttFont):
		self.language = safeEval(attrs["language"])
		if not hasattr(self, "cmap"):
			self.cmap = {}
		cmap = self.cmap
		for element in content:
			if not isinstance(element, tuple):
				continue
			name, attrs, content = element
			if name != "map":
				continue
			cmap[safeEval(attrs["code"])] = attrs["name"]


subHeaderFormat = ">HHhH"
class SubHeader(object):
	def __init__(self):
		self.firstCode = None
		self.entryCount = None
		self.idDelta = None
		self.idRangeOffset = None
		self.glyphIndexArray = []

class cmap_format_2(CmapSubtable):

	def setIDDelta(self, subHeader):
		subHeader.idDelta = 0
		# find the minGI which is not zero.
		minGI = subHeader.glyphIndexArray[0]
		for gid in subHeader.glyphIndexArray:
			if (gid != 0) and (gid < minGI):
				minGI = gid
		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
		# We would like to pick an idDelta such that the first glyphArray GID is 1,
		# so that we are more likely to be able to combine glypharray GID subranges.
		# This means that we have a problem when minGI is > 32K
		# Since the final gi is reconstructed from the glyphArray GID by:
		#    (short)finalGID = (gid + idDelta) % 0x10000),
		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
		# negative number to an unsigned short.

		if (minGI > 1):
			if minGI > 0x7FFF:
				subHeader.idDelta = -(0x10000 - minGI) -1
			else:
				subHeader.idDelta = minGI -1
			idDelta = subHeader.idDelta
			for i in range(subHeader.entryCount):
				gid = subHeader.glyphIndexArray[i]
				if gid > 0:
					subHeader.glyphIndexArray[i] = gid - idDelta

	def decompile(self, data, ttFont):
		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
		# If not, someone is calling the subtable decompile() directly, and must provide both args.
		if data is not None and ttFont is not None:
			self.decompileHeader(data, ttFont)
		else:
			assert (data is None and ttFont is None), "Need both data and ttFont arguments"

		data = self.data # decompileHeader assigns the data after the header to self.data
		subHeaderKeys = []
		maxSubHeaderindex = 0
		# get the key array, and determine the number of subHeaders.
		allKeys = array.array("H")
		allKeys.frombytes(data[:512])
		data = data[512:]
		if sys.byteorder != "big": allKeys.byteswap()
		subHeaderKeys = [ key//8 for key in allKeys]
		maxSubHeaderindex = max(subHeaderKeys)

		#Load subHeaders
		subHeaderList = []
		pos = 0
		for i in range(maxSubHeaderindex + 1):
			subHeader = SubHeader()
			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
			pos += 8
			giDataPos = pos + subHeader.idRangeOffset-2
			giList = array.array("H")
			giList.frombytes(data[giDataPos:giDataPos + subHeader.entryCount*2])
			if sys.byteorder != "big": giList.byteswap()
			subHeader.glyphIndexArray = giList
			subHeaderList.append(subHeader)
		# How this gets processed.
		# Charcodes may be one or two bytes.
		# The first byte of a charcode is mapped through the subHeaderKeys, to select
		# a subHeader. For any subheader but 0, the next byte is then mapped through the
		# selected subheader. If subheader Index 0 is selected, then the byte itself is
		# mapped through the subheader, and there is no second byte.
		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
		#
		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
		# referenced by another subheader.
		# The only subheader that will be referenced by more than one first-byte value is the subheader
		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
		# A subheader specifies a subrange within (0...256) by the
		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
		# (e.g. glyph not in font).
		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
		# Example for Logocut-Medium
		# first byte of charcode = 129; selects subheader 1.
		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
		# second byte of charCode = 66
		# the index offset = 66-64 = 2.
		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
		# [glyphIndexArray index], [subrange array index] = glyphIndex
		# [256], [0]=1 	from charcode [129, 64]
		# [257], [1]=2  	from charcode [129, 65]
		# [258], [2]=3  	from charcode [129, 66]
		# [259], [3]=4  	from charcode [129, 67]
		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
		# add it to the glyphID to get the final glyphIndex
		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!

		self.data = b""
		cmap = {}
		notdefGI = 0
		for firstByte in range(256):
			subHeadindex = subHeaderKeys[firstByte]
			subHeader = subHeaderList[subHeadindex]
			if subHeadindex == 0:
				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
					continue # gi is notdef.
				else:
					charCode = firstByte
					offsetIndex = firstByte - subHeader.firstCode
					gi = subHeader.glyphIndexArray[offsetIndex]
					if gi != 0:
						gi = (gi + subHeader.idDelta) % 0x10000
					else:
						continue # gi is notdef.
				cmap[charCode] = gi
			else:
				if subHeader.entryCount:
					charCodeOffset = firstByte * 256 + subHeader.firstCode
					for offsetIndex in range(subHeader.entryCount):
						charCode = charCodeOffset + offsetIndex
						gi = subHeader.glyphIndexArray[offsetIndex]
						if gi != 0:
							gi = (gi + subHeader.idDelta) % 0x10000
						else:
							continue
						cmap[charCode] = gi
				# If not subHeader.entryCount, then all char codes with this first byte are
				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
				# same as mapping it to .notdef.

		gids = list(cmap.values())
		charCodes = list(cmap.keys())
		self.cmap = _make_map(self.ttFont, charCodes, gids)

	def compile(self, ttFont):
		if self.data:
			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
		kEmptyTwoCharCodeRange = -1
		notdefGI = 0

		items = sorted(self.cmap.items())
		charCodes = [item[0] for item in items]
		names = [item[1] for item in items]
		nameMap = ttFont.getReverseGlyphMap()
		try:
			gids = [nameMap[name] for name in names]
		except KeyError:
			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
			try:
				gids = [nameMap[name] for name in names]
			except KeyError:
				# allow virtual GIDs in format 2 tables
				gids = []
				for name in names:
					try:
						gid = nameMap[name]
					except KeyError:
						try:
							if (name[:3] == 'gid'):
								gid = int(name[3:])
							else:
								gid = ttFont.getGlyphID(name)
						except:
							raise KeyError(name)

					gids.append(gid)

		# Process the (char code to gid) item list in char code order.
		# By definition, all one byte char codes map to subheader 0.
		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
		# which defines all char codes in its range to map to notdef) unless proven otherwise.
		# Note that since the char code items are processed in char code order, all the char codes with the
		# same first byte are in sequential order.

		subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList.
		subHeaderList = []

		# We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
		# with a cmap where all the one byte char codes map to notdef,
		# with the result that the subhead 0 would not get created just by processing the item list.
		charCode = charCodes[0]
		if charCode > 255:
			subHeader = SubHeader()
			subHeader.firstCode = 0
			subHeader.entryCount = 0
			subHeader.idDelta = 0
			subHeader.idRangeOffset = 0
			subHeaderList.append(subHeader)

		lastFirstByte = -1
		items = zip(charCodes, gids)
		for charCode, gid in items:
			if gid == 0:
				continue
			firstbyte = charCode >> 8
			secondByte = charCode & 0x00FF

			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
				if lastFirstByte > -1:
					# fix GI's and iDelta of current subheader.
					self.setIDDelta(subHeader)

					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
					# for the indices matching the char codes.
					if lastFirstByte == 0:
						for index in range(subHeader.entryCount):
							charCode = subHeader.firstCode + index
							subHeaderKeys[charCode] = 0

					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
				# init new subheader
				subHeader = SubHeader()
				subHeader.firstCode = secondByte
				subHeader.entryCount = 1
				subHeader.glyphIndexArray.append(gid)
				subHeaderList.append(subHeader)
				subHeaderKeys[firstbyte] = len(subHeaderList) -1
				lastFirstByte = firstbyte
			else:
				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
				for i in range(codeDiff):
					subHeader.glyphIndexArray.append(notdefGI)
				subHeader.glyphIndexArray.append(gid)
				subHeader.entryCount = subHeader.entryCount + codeDiff + 1

		# fix GI's and iDelta of last subheader that we we added to the subheader array.
		self.setIDDelta(subHeader)

		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
		subHeader = SubHeader()
		subHeader.firstCode = 0
		subHeader.entryCount = 0
		subHeader.idDelta = 0
		subHeader.idRangeOffset = 2
		subHeaderList.append(subHeader)
		emptySubheadIndex = len(subHeaderList) - 1
		for index in range(256):
			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
				subHeaderKeys[index] = emptySubheadIndex
		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
		# charcode 0 and GID 0.

		idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
		for index in range(subheadRangeLen):
			subHeader = subHeaderList[index]
			subHeader.idRangeOffset = 0
			for j in range(index):
				prevSubhead = subHeaderList[j]
				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
					subHeader.glyphIndexArray = []
					break
			if subHeader.idRangeOffset == 0: # didn't find one.
				subHeader.idRangeOffset = idRangeOffset
				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
			else:
				idRangeOffset = idRangeOffset - 8  # one less subheader

		# Now we can write out the data!
		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
		for subhead in 	subHeaderList[:-1]:
			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
		dataList = [struct.pack(">HHH", 2, length, self.language)]
		for index in subHeaderKeys:
			dataList.append(struct.pack(">H", index*8))
		for subhead in 	subHeaderList:
			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
		for subhead in 	subHeaderList[:-1]:
			for gi in subhead.glyphIndexArray:
				dataList.append(struct.pack(">H", gi))
		data = bytesjoin(dataList)
		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
		return data

	def fromXML(self, name, attrs, content, ttFont):
		self.language = safeEval(attrs["language"])
		if not hasattr(self, "cmap"):
			self.cmap = {}
		cmap = self.cmap

		for element in content:
			if not isinstance(element, tuple):
				continue
			name, attrs, content = element
			if name != "map":
				continue
			cmap[safeEval(attrs["code"])] = attrs["name"]


cmap_format_4_format = ">7H"

#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
#uint16  reservedPad                # This value should be zero
#uint16  startCode[segCount]        # Starting character code for each segment
#uint16  idDelta[segCount]          # Delta for all character codes in segment
#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
#uint16  glyphIndexArray[variable]  # Glyph index array

def splitRange(startCode, endCode, cmap):
	# Try to split a range of character codes into subranges with consecutive
	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
	# efficiently. I can't prove I've got the optimal solution, but it seems
	# to do well with the fonts I tested: none became bigger, many became smaller.
	if startCode == endCode:
		return [], [endCode]

	lastID = cmap[startCode]
	lastCode = startCode
	inOrder = None
	orderedBegin = None
	subRanges = []

	# Gather subranges in which the glyph IDs are consecutive.
	for code in range(startCode + 1, endCode + 1):
		glyphID = cmap[code]

		if glyphID - 1 == lastID:
			if inOrder is None or not inOrder:
				inOrder = 1
				orderedBegin = lastCode
		else:
			if inOrder:
				inOrder = 0
				subRanges.append((orderedBegin, lastCode))
				orderedBegin = None

		lastID = glyphID
		lastCode = code

	if inOrder:
		subRanges.append((orderedBegin, lastCode))
	assert lastCode == endCode

	# Now filter out those new subranges that would only make the data bigger.
	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
	# character.
	newRanges = []
	for b, e in subRanges:
		if b == startCode and e == endCode:
			break  # the whole range, we're fine
		if b == startCode or e == endCode:
			threshold = 4  # split costs one more segment
		else:
			threshold = 8  # split costs two more segments
		if (e - b + 1) > threshold:
			newRanges.append((b, e))
	subRanges = newRanges

	if not subRanges:
		return [], [endCode]

	if subRanges[0][0] != startCode:
		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
	if subRanges[-1][1] != endCode:
		subRanges.append((subRanges[-1][1] + 1, endCode))

	# Fill the "holes" in the segments list -- those are the segments in which
	# the glyph IDs are _not_ consecutive.
	i = 1
	while i < len(subRanges):
		if subRanges[i-1][1] + 1 != subRanges[i][0]:
			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
			i = i + 1
		i = i + 1

	# Transform the ranges into startCode/endCode lists.
	start = []
	end = []
	for b, e in subRanges:
		start.append(b)
		end.append(e)
	start.pop(0)

	assert len(start) + 1 == len(end)
	return start, end


class cmap_format_4(CmapSubtable):

	def decompile(self, data, ttFont):
		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
		# If not, someone is calling the subtable decompile() directly, and must provide both args.
		if data is not None and ttFont is not None:
			self.decompileHeader(data, ttFont)
		else:
			assert (data is None and ttFont is None), "Need both data and ttFont arguments"

		data = self.data # decompileHeader assigns the data after the header to self.data
		(segCountX2, searchRange, entrySelector, rangeShift) = \
					struct.unpack(">4H", data[:8])
		data = data[8:]
		segCount = segCountX2 // 2

		allCodes = array.array("H")
		allCodes.frombytes(data)
		self.data = data = None

		if sys.byteorder != "big": allCodes.byteswap()

		# divide the data
		endCode = allCodes[:segCount]
		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
		startCode = allCodes[:segCount]
		allCodes = allCodes[segCount:]
		idDelta = allCodes[:segCount]
		allCodes = allCodes[segCount:]
		idRangeOffset = allCodes[:segCount]
		glyphIndexArray = allCodes[segCount:]
		lenGIArray = len(glyphIndexArray)

		# build 2-byte character mapping
		charCodes = []
		gids = []
		for i in range(len(startCode) - 1):	# don't do 0xffff!
			start = startCode[i]
			delta = idDelta[i]
			rangeOffset = idRangeOffset[i]
			# *someone* needs to get killed.
			partial = rangeOffset // 2 - start + i - len(idRangeOffset)

			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
			charCodes.extend(rangeCharCodes)
			if rangeOffset == 0:
				gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
			else:
				for charCode in rangeCharCodes:
					index = charCode + partial
					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray)
					if glyphIndexArray[index] != 0:  # if not missing glyph
						glyphID = glyphIndexArray[index] + delta
					else:
						glyphID = 0  # missing glyph
					gids.append(glyphID & 0xFFFF)

		self.cmap = _make_map(self.ttFont, charCodes, gids)

	def compile(self, ttFont):
		if self.data:
			return struct.pack(">HHH", self.format, self.length, self.language) + self.data

		charCodes = list(self.cmap.keys())
		if not charCodes:
			startCode = [0xffff]
			endCode = [0xffff]
		else:
			charCodes.sort()
			names = [self.cmap[code] for code in charCodes]
			nameMap = ttFont.getReverseGlyphMap()
			try:
				gids = [nameMap[name] for name in names]
			except KeyError:
				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
				try:
					gids = [nameMap[name] for name in names]
				except KeyError:
					# allow virtual GIDs in format 4 tables
					gids = []
					for name in names:
						try:
							gid = nameMap[name]
						except KeyError:
							try:
								if (name[:3] == 'gid'):
									gid = int(name[3:])
								else:
									gid = ttFont.getGlyphID(name)
							except:
								raise KeyError(name)

						gids.append(gid)
			cmap = {}  # code:glyphID mapping
			for code, gid in zip(charCodes, gids):
				cmap[code] = gid

			# Build startCode and endCode lists.
			# Split the char codes in ranges of consecutive char codes, then split
			# each range in more ranges of consecutive/not consecutive glyph IDs.
			# See splitRange().
			lastCode = charCodes[0]
			endCode = []
			startCode = [lastCode]
			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
				if charCode == lastCode + 1:
					lastCode = charCode
					continue
				start, end = splitRange(startCode[-1], lastCode, cmap)
				startCode.extend(start)
				endCode.extend(end)
				startCode.append(charCode)
				lastCode = charCode
			start, end = splitRange(startCode[-1], lastCode, cmap)
			startCode.extend(start)
			endCode.extend(end)
			startCode.append(0xffff)
			endCode.append(0xffff)

		# build up rest of cruft
		idDelta = []
		idRangeOffset = []
		glyphIndexArray = []
		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
			indices = []
			for charCode in range(startCode[i], endCode[i] + 1):
				indices.append(cmap[charCode])
			if (indices == list(range(indices[0], indices[0] + len(indices)))):
				idDelta.append((indices[0] - startCode[i]) % 0x10000)
				idRangeOffset.append(0)
			else:
				# someone *definitely* needs to get killed.
				idDelta.append(0)
				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
				glyphIndexArray.extend(indices)
		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
		idRangeOffset.append(0)

		# Insane.
		segCount = len(endCode)
		segCountX2 = segCount * 2
		searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)

		charCodeArray = array.array("H", endCode + [0] + startCode)
		idDeltaArray = array.array("H", idDelta)
		restArray = array.array("H", idRangeOffset + glyphIndexArray)
		if sys.byteorder != "big": charCodeArray.byteswap()
		if sys.byteorder != "big": idDeltaArray.byteswap()
		if sys.byteorder != "big": restArray.byteswap()
		data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes()

		length = struct.calcsize(cmap_format_4_format) + len(data)
		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
				segCountX2, searchRange, entrySelector, rangeShift)
		return header + data

	def fromXML(self, name, attrs, content, ttFont):
		self.language = safeEval(attrs["language"])
		if not hasattr(self, "cmap"):
			self.cmap = {}
		cmap = self.cmap

		for element in content:
			if not isinstance(element, tuple):
				continue
			nameMap, attrsMap, dummyContent = element
			if nameMap != "map":
				assert 0, "Unrecognized keyword in cmap subtable"
			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]


class cmap_format_6(CmapSubtable):

	def decompile(self, data, ttFont):
		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
		# If not, someone is calling the subtable decompile() directly, and must provide both args.
		if data is not None and ttFont is not None:
			self.decompileHeader(data, ttFont)
		else:
			assert (data is None and ttFont is None), "Need both data and ttFont arguments"

		data = self.data # decompileHeader assigns the data after the header to self.data
		firstCode, entryCount = struct.unpack(">HH", data[:4])
		firstCode = int(firstCode)
		data = data[4:]
		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
		gids = array.array("H")
		gids.frombytes(data[:2 * int(entryCount)])
		if sys.byteorder != "big": gids.byteswap()
		self.data = data = None

		charCodes = list(range(firstCode, firstCode + len(gids)))
		self.cmap = _make_map(self.ttFont, charCodes, gids)

	def compile(self, ttFont):
		if self.data:
			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
		cmap = self.cmap
		codes = sorted(cmap.keys())
		if codes: # yes, there are empty cmap tables.
			codes = list(range(codes[0], codes[-1] + 1))
			firstCode = codes[0]
			valueList = [
				ttFont.getGlyphID(cmap[code]) if code in cmap else 0
				for code in codes
			]
			gids = array.array("H", valueList)
			if sys.byteorder != "big": gids.byteswap()
			data = gids.tobytes()
		else:
			data = b""
			firstCode = 0
		header = struct.pack(">HHHHH",
				6, len(data) + 10, self.language, firstCode, len(codes))
		return header + data

	def fromXML(self, name, attrs, content, ttFont):
		self.language = safeEval(attrs["language"])
		if not hasattr(self, "cmap"):
			self.cmap = {}
		cmap = self.cmap

		for element in content:
			if not isinstance(element, tuple):
				continue
			name, attrs, content = element
			if name != "map":
				continue
			cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_12_or_13(CmapSubtable):

	def __init__(self, format):
		self.format = format
		self.reserved = 0
		self.data = None
		self.ttFont = None

	def decompileHeader(self, data, ttFont):
		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length)
		self.format = format
		self.reserved = reserved
		self.length = length
		self.language = language
		self.nGroups = nGroups
		self.data = data[16:]
		self.ttFont = ttFont

	def decompile(self, data, ttFont):
		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
		# If not, someone is calling the subtable decompile() directly, and must provide both args.
		if data is not None and ttFont is not None:
			self.decompileHeader(data, ttFont)
		else:
			assert (data is None and ttFont is None), "Need both data and ttFont arguments"

		data = self.data # decompileHeader assigns the data after the header to self.data
		charCodes = []
		gids = []
		pos = 0
		for i in range(self.nGroups):
			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
			pos += 12
			lenGroup = 1 + endCharCode - startCharCode
			charCodes.extend(list(range(startCharCode, endCharCode +1)))
			gids.extend(self._computeGIDs(glyphID, lenGroup))
		self.data = data = None
		self.cmap = _make_map(self.ttFont, charCodes, gids)

	def compile(self, ttFont):
		if self.data:
			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
		charCodes = list(self.cmap.keys())
		names = list(self.cmap.values())
		nameMap = ttFont.getReverseGlyphMap()
		try:
			gids = [nameMap[name] for name in names]
		except KeyError:
			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
			try:
				gids = [nameMap[name] for name in names]
			except KeyError:
				# allow virtual GIDs in format 12 tables
				gids = []
				for name in names:
					try:
						gid = nameMap[name]
					except KeyError:
						try:
							if (name[:3] == 'gid'):
								gid = int(name[3:])
							else:
								gid = ttFont.getGlyphID(name)
						except:
							raise KeyError(name)

					gids.append(gid)

		cmap = {}  # code:glyphID mapping
		for code, gid in zip(charCodes, gids):
			cmap[code] = gid

		charCodes.sort()
		index = 0
		startCharCode = charCodes[0]
		startGlyphID = cmap[startCharCode]
		lastGlyphID = startGlyphID - self._format_step
		lastCharCode = startCharCode - 1
		nGroups = 0
		dataList = []
		maxIndex = len(charCodes)
		for index in range(maxIndex):
			charCode = charCodes[index]
			glyphID = cmap[charCode]
			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
				startCharCode = charCode
				startGlyphID = glyphID
				nGroups = nGroups + 1
			lastGlyphID = glyphID
			lastCharCode = charCode
		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
		nGroups = nGroups + 1
		data = bytesjoin(dataList)
		lengthSubtable = len(data) +16
		assert len(data) == (nGroups*12) == (lengthSubtable-16)
		return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data

	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				("format", self.format),
				("reserved", self.reserved),
				("length", self.length),
				("language", self.language),
				("nGroups", self.nGroups),
				])
		writer.newline()
		codes = sorted(self.cmap.items())
		self._writeCodes(codes, writer)
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, name, attrs, content, ttFont):
		self.format = safeEval(attrs["format"])
		self.reserved = safeEval(attrs["reserved"])
		self.length = safeEval(attrs["length"])
		self.language = safeEval(attrs["language"])
		self.nGroups = safeEval(attrs["nGroups"])
		if not hasattr(self, "cmap"):
			self.cmap = {}
		cmap = self.cmap

		for element in content:
			if not isinstance(element, tuple):
				continue
			name, attrs, content = element
			if name != "map":
				continue
			cmap[safeEval(attrs["code"])] = attrs["name"]


class cmap_format_12(cmap_format_12_or_13):

	_format_step = 1

	def __init__(self, format=12):
		cmap_format_12_or_13.__init__(self, format)

	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))

	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)


class cmap_format_13(cmap_format_12_or_13):

	_format_step = 0

	def __init__(self, format=13):
		cmap_format_12_or_13.__init__(self, format)

	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
		return [startingGlyph] * numberOfGlyphs

	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)


def cvtToUVS(threeByteString):
	data = b"\0" + threeByteString
	val, = struct.unpack(">L", data)
	return val

def cvtFromUVS(val):
	assert 0 <= val < 0x1000000
	fourByteString = struct.pack(">L", val)
	return fourByteString[1:]


class cmap_format_14(CmapSubtable):

	def decompileHeader(self, data, ttFont):
		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
		self.data = data[10:]
		self.length = length
		self.numVarSelectorRecords = numVarSelectorRecords
		self.ttFont = ttFont
		self.language = 0xFF # has no language.

	def decompile(self, data, ttFont):
		if data is not None and ttFont is not None:
			self.decompileHeader(data, ttFont)
		else:
			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
		data = self.data

		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
		uvsDict = {}
		recOffset = 0
		for n in range(self.numVarSelectorRecords):
			uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11])
			recOffset += 11
			varUVS = cvtToUVS(uvs)
			if defOVSOffset:
				startOffset = defOVSOffset - 10
				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
				startOffset +=4
				for r in range(numValues):
					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
					startOffset += 4
					firstBaseUV = cvtToUVS(uv)
					cnt = addtlCnt+1
					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
					glyphList = [None]*cnt
					localUVList = zip(baseUVList, glyphList)
					try:
						uvsDict[varUVS].extend(localUVList)
					except KeyError:
						uvsDict[varUVS] = list(localUVList)

			if nonDefUVSOffset:
				startOffset = nonDefUVSOffset - 10
				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
				startOffset +=4
				localUVList = []
				for r in range(numRecs):
					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
					startOffset += 5
					uv = cvtToUVS(uv)
					glyphName = self.ttFont.getGlyphName(gid)
					localUVList.append((uv, glyphName))
				try:
					uvsDict[varUVS].extend(localUVList)
				except KeyError:
					uvsDict[varUVS] = localUVList

		self.uvsDict = uvsDict

	def toXML(self, writer, ttFont):
		writer.begintag(self.__class__.__name__, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				])
		writer.newline()
		uvsDict = self.uvsDict
		uvsList = sorted(uvsDict.keys())
		for uvs in uvsList:
			uvList = uvsDict[uvs]
			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
			for uv, gname in uvList:
				attrs = [("uv", hex(uv)), ("uvs", hex(uvs))]
				if gname is not None:
					attrs.append(("name", gname))
				writer.simpletag("map", attrs)
				writer.newline()
		writer.endtag(self.__class__.__name__)
		writer.newline()

	def fromXML(self, name, attrs, content, ttFont):
		self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail
		if not hasattr(self, "cmap"):
			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
		if not hasattr(self, "uvsDict"):
			self.uvsDict = {}
			uvsDict = self.uvsDict

		# For backwards compatibility reasons we accept "None" as an indicator
		# for "default mapping", unless the font actually has a glyph named
		# "None".
		_hasGlyphNamedNone = None

		for element in content:
			if not isinstance(element, tuple):
				continue
			name, attrs, content = element
			if name != "map":
				continue
			uvs = safeEval(attrs["uvs"])
			uv = safeEval(attrs["uv"])
			gname = attrs.get("name")
			if gname == "None":
				if _hasGlyphNamedNone is None:
					_hasGlyphNamedNone = "None" in ttFont.getGlyphOrder()
				if not _hasGlyphNamedNone:
					gname = None
			try:
				uvsDict[uvs].append((uv, gname))
			except KeyError:
				uvsDict[uvs] = [(uv, gname)]

	def compile(self, ttFont):
		if self.data:
			return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data

		uvsDict = self.uvsDict
		uvsList = sorted(uvsDict.keys())
		self.numVarSelectorRecords = len(uvsList)
		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
		data = []
		varSelectorRecords =[]
		for uvs in uvsList:
			entryList = uvsDict[uvs]

			defList = [entry for entry in entryList if entry[1] is None]
			if defList:
				defList = [entry[0] for entry in defList]
				defOVSOffset = offset
				defList.sort()

				lastUV = defList[0]
				cnt = -1
				defRecs = []
				for defEntry in defList:
					cnt +=1
					if (lastUV+cnt) != defEntry:
						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
						lastUV = defEntry
						defRecs.append(rec)
						cnt = 0

				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
				defRecs.append(rec)

				numDefRecs = len(defRecs)
				data.append(struct.pack(">L", numDefRecs))
				data.extend(defRecs)
				offset += 4 + numDefRecs*4
			else:
				defOVSOffset = 0

			ndefList = [entry for entry in entryList if entry[1] is not None]
			if ndefList:
				nonDefUVSOffset = offset
				ndefList.sort()
				numNonDefRecs = len(ndefList)
				data.append(struct.pack(">L", numNonDefRecs))
				offset += 4 + numNonDefRecs*5

				for uv, gname in ndefList:
					gid = ttFont.getGlyphID(gname)
					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
					data.append(ndrec)
			else:
				nonDefUVSOffset = 0

			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
			varSelectorRecords.append(vrec)

		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
		self.length = 10 + len(data)
		headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords)

		return headerdata + data


class cmap_format_unknown(CmapSubtable):

	def toXML(self, writer, ttFont):
		cmapName = self.__class__.__name__[:12] + str(self.format)
		writer.begintag(cmapName, [
				("platformID", self.platformID),
				("platEncID", self.platEncID),
				])
		writer.newline()
		writer.dumphex(self.data)
		writer.endtag(cmapName)
		writer.newline()

	def fromXML(self, name, attrs, content, ttFont):
		self.data = readHex(content)
		self.cmap = {}

	def decompileHeader(self, data, ttFont):
		self.language = 0  # dummy value
		self.data = data

	def decompile(self, data, ttFont):
		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
		# If not, someone is calling the subtable decompile() directly, and must provide both args.
		if data is not None and ttFont is not None:
			self.decompileHeader(data, ttFont)
		else:
			assert (data is None and ttFont is None), "Need both data and ttFont arguments"

	def compile(self, ttFont):
		if self.data:
			return self.data
		else:
			return None

cmap_classes = {
		0: cmap_format_0,
		2: cmap_format_2,
		4: cmap_format_4,
		6: cmap_format_6,
		12: cmap_format_12,
		13: cmap_format_13,
		14: cmap_format_14,
}