diff options
Diffstat (limited to 'Lib/fontTools/agl.py')
-rw-r--r-- | Lib/fontTools/agl.py | 251 |
1 files changed, 126 insertions, 125 deletions
diff --git a/Lib/fontTools/agl.py b/Lib/fontTools/agl.py index cc286e42..d6994628 100644 --- a/Lib/fontTools/agl.py +++ b/Lib/fontTools/agl.py @@ -5059,174 +5059,175 @@ _aglfnText = """\ class AGLError(Exception): - pass + pass + LEGACY_AGL2UV = {} AGL2UV = {} UV2AGL = {} + def _builddicts(): - import re + import re + + lines = _aglText.splitlines() - lines = _aglText.splitlines() + parseAGL_RE = re.compile("([A-Za-z0-9]+);((?:[0-9A-F]{4})(?: (?:[0-9A-F]{4}))*)$") - parseAGL_RE = re.compile("([A-Za-z0-9]+);((?:[0-9A-F]{4})(?: (?:[0-9A-F]{4}))*)$") + for line in lines: + if not line or line[:1] == "#": + continue + m = parseAGL_RE.match(line) + if not m: + raise AGLError("syntax error in glyphlist.txt: %s" % repr(line[:20])) + unicodes = m.group(2) + assert len(unicodes) % 5 == 4 + unicodes = [int(unicode, 16) for unicode in unicodes.split()] + glyphName = tostr(m.group(1)) + LEGACY_AGL2UV[glyphName] = unicodes - for line in lines: - if not line or line[:1] == '#': - continue - m = parseAGL_RE.match(line) - if not m: - raise AGLError("syntax error in glyphlist.txt: %s" % repr(line[:20])) - unicodes = m.group(2) - assert len(unicodes) % 5 == 4 - unicodes = [int(unicode, 16) for unicode in unicodes.split()] - glyphName = tostr(m.group(1)) - LEGACY_AGL2UV[glyphName] = unicodes + lines = _aglfnText.splitlines() - lines = _aglfnText.splitlines() + parseAGLFN_RE = re.compile("([0-9A-F]{4});([A-Za-z0-9]+);.*?$") - parseAGLFN_RE = re.compile("([0-9A-F]{4});([A-Za-z0-9]+);.*?$") + for line in lines: + if not line or line[:1] == "#": + continue + m = parseAGLFN_RE.match(line) + if not m: + raise AGLError("syntax error in aglfn.txt: %s" % repr(line[:20])) + unicode = m.group(1) + assert len(unicode) == 4 + unicode = int(unicode, 16) + glyphName = tostr(m.group(2)) + AGL2UV[glyphName] = unicode + UV2AGL[unicode] = glyphName - for line in lines: - if not line or line[:1] == '#': - continue - m = parseAGLFN_RE.match(line) - if not m: - raise AGLError("syntax error in aglfn.txt: %s" % repr(line[:20])) - unicode = m.group(1) - assert len(unicode) == 4 - unicode = int(unicode, 16) - glyphName = tostr(m.group(2)) - AGL2UV[glyphName] = unicode - UV2AGL[unicode] = glyphName _builddicts() def toUnicode(glyph, isZapfDingbats=False): - """Convert glyph names to Unicode, such as ``'longs_t.oldstyle'`` --> ``u'ſt'`` + """Convert glyph names to Unicode, such as ``'longs_t.oldstyle'`` --> ``u'ſt'`` - If ``isZapfDingbats`` is ``True``, the implementation recognizes additional - glyph names (as required by the AGL specification). - """ - # https://github.com/adobe-type-tools/agl-specification#2-the-mapping - # - # 1. Drop all the characters from the glyph name starting with - # the first occurrence of a period (U+002E; FULL STOP), if any. - glyph = glyph.split(".", 1)[0] + If ``isZapfDingbats`` is ``True``, the implementation recognizes additional + glyph names (as required by the AGL specification). + """ + # https://github.com/adobe-type-tools/agl-specification#2-the-mapping + # + # 1. Drop all the characters from the glyph name starting with + # the first occurrence of a period (U+002E; FULL STOP), if any. + glyph = glyph.split(".", 1)[0] - # 2. Split the remaining string into a sequence of components, - # using underscore (U+005F; LOW LINE) as the delimiter. - components = glyph.split("_") + # 2. Split the remaining string into a sequence of components, + # using underscore (U+005F; LOW LINE) as the delimiter. + components = glyph.split("_") - # 3. Map each component to a character string according to the - # procedure below, and concatenate those strings; the result - # is the character string to which the glyph name is mapped. - result = [_glyphComponentToUnicode(c, isZapfDingbats) - for c in components] - return "".join(result) + # 3. Map each component to a character string according to the + # procedure below, and concatenate those strings; the result + # is the character string to which the glyph name is mapped. + result = [_glyphComponentToUnicode(c, isZapfDingbats) for c in components] + return "".join(result) def _glyphComponentToUnicode(component, isZapfDingbats): - # If the font is Zapf Dingbats (PostScript FontName: ZapfDingbats), - # and the component is in the ITC Zapf Dingbats Glyph List, then - # map it to the corresponding character in that list. - dingbat = _zapfDingbatsToUnicode(component) if isZapfDingbats else None - if dingbat: - return dingbat + # If the font is Zapf Dingbats (PostScript FontName: ZapfDingbats), + # and the component is in the ITC Zapf Dingbats Glyph List, then + # map it to the corresponding character in that list. + dingbat = _zapfDingbatsToUnicode(component) if isZapfDingbats else None + if dingbat: + return dingbat - # Otherwise, if the component is in AGL, then map it - # to the corresponding character in that list. - uchars = LEGACY_AGL2UV.get(component) - if uchars: - return "".join(map(chr, uchars)) + # Otherwise, if the component is in AGL, then map it + # to the corresponding character in that list. + uchars = LEGACY_AGL2UV.get(component) + if uchars: + return "".join(map(chr, uchars)) - # Otherwise, if the component is of the form "uni" (U+0075, - # U+006E, and U+0069) followed by a sequence of uppercase - # hexadecimal digits (0–9 and A–F, meaning U+0030 through - # U+0039 and U+0041 through U+0046), if the length of that - # sequence is a multiple of four, and if each group of four - # digits represents a value in the ranges 0000 through D7FF - # or E000 through FFFF, then interpret each as a Unicode scalar - # value and map the component to the string made of those - # scalar values. Note that the range and digit-length - # restrictions mean that the "uni" glyph name prefix can be - # used only with UVs in the Basic Multilingual Plane (BMP). - uni = _uniToUnicode(component) - if uni: - return uni + # Otherwise, if the component is of the form "uni" (U+0075, + # U+006E, and U+0069) followed by a sequence of uppercase + # hexadecimal digits (0–9 and A–F, meaning U+0030 through + # U+0039 and U+0041 through U+0046), if the length of that + # sequence is a multiple of four, and if each group of four + # digits represents a value in the ranges 0000 through D7FF + # or E000 through FFFF, then interpret each as a Unicode scalar + # value and map the component to the string made of those + # scalar values. Note that the range and digit-length + # restrictions mean that the "uni" glyph name prefix can be + # used only with UVs in the Basic Multilingual Plane (BMP). + uni = _uniToUnicode(component) + if uni: + return uni - # Otherwise, if the component is of the form "u" (U+0075) - # followed by a sequence of four to six uppercase hexadecimal - # digits (0–9 and A–F, meaning U+0030 through U+0039 and - # U+0041 through U+0046), and those digits represents a value - # in the ranges 0000 through D7FF or E000 through 10FFFF, then - # interpret it as a Unicode scalar value and map the component - # to the string made of this scalar value. - uni = _uToUnicode(component) - if uni: - return uni + # Otherwise, if the component is of the form "u" (U+0075) + # followed by a sequence of four to six uppercase hexadecimal + # digits (0–9 and A–F, meaning U+0030 through U+0039 and + # U+0041 through U+0046), and those digits represents a value + # in the ranges 0000 through D7FF or E000 through 10FFFF, then + # interpret it as a Unicode scalar value and map the component + # to the string made of this scalar value. + uni = _uToUnicode(component) + if uni: + return uni - # Otherwise, map the component to an empty string. - return '' + # Otherwise, map the component to an empty string. + return "" # https://github.com/adobe-type-tools/agl-aglfn/blob/master/zapfdingbats.txt _AGL_ZAPF_DINGBATS = ( - " ✁✂✄☎✆✝✞✟✠✡☛☞✌✍✎✏✑✒✓✔✕✖✗✘✙✚✛✜✢✣✤✥✦✧★✩✪✫✬✭✮✯✰✱✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀" - "❁❂❃❄❅❆❇❈❉❊❋●❍■❏❑▲▼◆❖ ◗❘❙❚❯❱❲❳❨❩❬❭❪❫❴❵❛❜❝❞❡❢❣❤✐❥❦❧♠♥♦♣ ✉✈✇" - "①②③④⑤⑥⑦⑧⑨⑩❶❷❸❹❺❻❼❽❾❿➀➁➂➃➄➅➆➇➈➉➊➋➌➍➎➏➐➑➒➓➔→➣↔" - "↕➙➛➜➝➞➟➠➡➢➤➥➦➧➨➩➫➭➯➲➳➵➸➺➻➼➽➾➚➪➶➹➘➴➷➬➮➱✃❐❒❮❰") + " ✁✂✄☎✆✝✞✟✠✡☛☞✌✍✎✏✑✒✓✔✕✖✗✘✙✚✛✜✢✣✤✥✦✧★✩✪✫✬✭✮✯✰✱✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀" + "❁❂❃❄❅❆❇❈❉❊❋●❍■❏❑▲▼◆❖ ◗❘❙❚❯❱❲❳❨❩❬❭❪❫❴❵❛❜❝❞❡❢❣❤✐❥❦❧♠♥♦♣ ✉✈✇" + "①②③④⑤⑥⑦⑧⑨⑩❶❷❸❹❺❻❼❽❾❿➀➁➂➃➄➅➆➇➈➉➊➋➌➍➎➏➐➑➒➓➔→➣↔" + "↕➙➛➜➝➞➟➠➡➢➤➥➦➧➨➩➫➭➯➲➳➵➸➺➻➼➽➾➚➪➶➹➘➴➷➬➮➱✃❐❒❮❰" +) def _zapfDingbatsToUnicode(glyph): - """Helper for toUnicode().""" - if len(glyph) < 2 or glyph[0] != 'a': - return None - try: - gid = int(glyph[1:]) - except ValueError: - return None - if gid < 0 or gid >= len(_AGL_ZAPF_DINGBATS): - return None - uchar = _AGL_ZAPF_DINGBATS[gid] - return uchar if uchar != ' ' else None + """Helper for toUnicode().""" + if len(glyph) < 2 or glyph[0] != "a": + return None + try: + gid = int(glyph[1:]) + except ValueError: + return None + if gid < 0 or gid >= len(_AGL_ZAPF_DINGBATS): + return None + uchar = _AGL_ZAPF_DINGBATS[gid] + return uchar if uchar != " " else None _re_uni = re.compile("^uni([0-9A-F]+)$") def _uniToUnicode(component): - """Helper for toUnicode() to handle "uniABCD" components.""" - match = _re_uni.match(component) - if match is None: - return None - digits = match.group(1) - if len(digits) % 4 != 0: - return None - chars = [int(digits[i : i + 4], 16) - for i in range(0, len(digits), 4)] - if any(c >= 0xD800 and c <= 0xDFFF for c in chars): - # The AGL specification explicitly excluded surrogate pairs. - return None - return ''.join([chr(c) for c in chars]) + """Helper for toUnicode() to handle "uniABCD" components.""" + match = _re_uni.match(component) + if match is None: + return None + digits = match.group(1) + if len(digits) % 4 != 0: + return None + chars = [int(digits[i : i + 4], 16) for i in range(0, len(digits), 4)] + if any(c >= 0xD800 and c <= 0xDFFF for c in chars): + # The AGL specification explicitly excluded surrogate pairs. + return None + return "".join([chr(c) for c in chars]) _re_u = re.compile("^u([0-9A-F]{4,6})$") def _uToUnicode(component): - """Helper for toUnicode() to handle "u1ABCD" components.""" - match = _re_u.match(component) - if match is None: - return None - digits = match.group(1) - try: - value = int(digits, 16) - except ValueError: - return None - if ((value >= 0x0000 and value <= 0xD7FF) or - (value >= 0xE000 and value <= 0x10FFFF)): - return chr(value) - return None + """Helper for toUnicode() to handle "u1ABCD" components.""" + match = _re_u.match(component) + if match is None: + return None + digits = match.group(1) + try: + value = int(digits, 16) + except ValueError: + return None + if (value >= 0x0000 and value <= 0xD7FF) or (value >= 0xE000 and value <= 0x10FFFF): + return chr(value) + return None |