diff options
Diffstat (limited to 'Lib/fontTools/encodings/codecs.py')
-rw-r--r-- | Lib/fontTools/encodings/codecs.py | 210 |
1 files changed, 113 insertions, 97 deletions
diff --git a/Lib/fontTools/encodings/codecs.py b/Lib/fontTools/encodings/codecs.py index 3b1a8256..3ac0268d 100644 --- a/Lib/fontTools/encodings/codecs.py +++ b/Lib/fontTools/encodings/codecs.py @@ -4,116 +4,132 @@ but missing from Python. See https://github.com/fonttools/fonttools/issues/236 import codecs import encodings -class ExtendCodec(codecs.Codec): - def __init__(self, name, base_encoding, mapping): - self.name = name - self.base_encoding = base_encoding - self.mapping = mapping - self.reverse = {v:k for k,v in mapping.items()} - self.max_len = max(len(v) for v in mapping.values()) - self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode) - codecs.register_error(name, self.error) +class ExtendCodec(codecs.Codec): + def __init__(self, name, base_encoding, mapping): + self.name = name + self.base_encoding = base_encoding + self.mapping = mapping + self.reverse = {v: k for k, v in mapping.items()} + self.max_len = max(len(v) for v in mapping.values()) + self.info = codecs.CodecInfo( + name=self.name, encode=self.encode, decode=self.decode + ) + codecs.register_error(name, self.error) - def _map(self, mapper, output_type, exc_type, input, errors): - base_error_handler = codecs.lookup_error(errors) - length = len(input) - out = output_type() - while input: - # first try to use self.error as the error handler - try: - part = mapper(input, self.base_encoding, errors=self.name) - out += part - break # All converted - except exc_type as e: - # else convert the correct part, handle error as requested and continue - out += mapper(input[:e.start], self.base_encoding, self.name) - replacement, pos = base_error_handler(e) - out += replacement - input = input[pos:] - return out, length + def _map(self, mapper, output_type, exc_type, input, errors): + base_error_handler = codecs.lookup_error(errors) + length = len(input) + out = output_type() + while input: + # first try to use self.error as the error handler + try: + part = mapper(input, self.base_encoding, errors=self.name) + out += part + break # All converted + except exc_type as e: + # else convert the correct part, handle error as requested and continue + out += mapper(input[: e.start], self.base_encoding, self.name) + replacement, pos = base_error_handler(e) + out += replacement + input = input[pos:] + return out, length - def encode(self, input, errors='strict'): - return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors) + def encode(self, input, errors="strict"): + return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors) - def decode(self, input, errors='strict'): - return self._map(codecs.decode, str, UnicodeDecodeError, input, errors) + def decode(self, input, errors="strict"): + return self._map(codecs.decode, str, UnicodeDecodeError, input, errors) - def error(self, e): - if isinstance(e, UnicodeDecodeError): - for end in range(e.start + 1, e.end + 1): - s = e.object[e.start:end] - if s in self.mapping: - return self.mapping[s], end - elif isinstance(e, UnicodeEncodeError): - for end in range(e.start + 1, e.start + self.max_len + 1): - s = e.object[e.start:end] - if s in self.reverse: - return self.reverse[s], end - e.encoding = self.name - raise e + def error(self, e): + if isinstance(e, UnicodeDecodeError): + for end in range(e.start + 1, e.end + 1): + s = e.object[e.start : end] + if s in self.mapping: + return self.mapping[s], end + elif isinstance(e, UnicodeEncodeError): + for end in range(e.start + 1, e.start + self.max_len + 1): + s = e.object[e.start : end] + if s in self.reverse: + return self.reverse[s], end + e.encoding = self.name + raise e _extended_encodings = { - "x_mac_japanese_ttx": ("shift_jis", { - b"\xFC": chr(0x007C), - b"\x7E": chr(0x007E), - b"\x80": chr(0x005C), - b"\xA0": chr(0x00A0), - b"\xFD": chr(0x00A9), - b"\xFE": chr(0x2122), - b"\xFF": chr(0x2026), - }), - "x_mac_trad_chinese_ttx": ("big5", { - b"\x80": chr(0x005C), - b"\xA0": chr(0x00A0), - b"\xFD": chr(0x00A9), - b"\xFE": chr(0x2122), - b"\xFF": chr(0x2026), - }), - "x_mac_korean_ttx": ("euc_kr", { - b"\x80": chr(0x00A0), - b"\x81": chr(0x20A9), - b"\x82": chr(0x2014), - b"\x83": chr(0x00A9), - b"\xFE": chr(0x2122), - b"\xFF": chr(0x2026), - }), - "x_mac_simp_chinese_ttx": ("gb2312", { - b"\x80": chr(0x00FC), - b"\xA0": chr(0x00A0), - b"\xFD": chr(0x00A9), - b"\xFE": chr(0x2122), - b"\xFF": chr(0x2026), - }), + "x_mac_japanese_ttx": ( + "shift_jis", + { + b"\xFC": chr(0x007C), + b"\x7E": chr(0x007E), + b"\x80": chr(0x005C), + b"\xA0": chr(0x00A0), + b"\xFD": chr(0x00A9), + b"\xFE": chr(0x2122), + b"\xFF": chr(0x2026), + }, + ), + "x_mac_trad_chinese_ttx": ( + "big5", + { + b"\x80": chr(0x005C), + b"\xA0": chr(0x00A0), + b"\xFD": chr(0x00A9), + b"\xFE": chr(0x2122), + b"\xFF": chr(0x2026), + }, + ), + "x_mac_korean_ttx": ( + "euc_kr", + { + b"\x80": chr(0x00A0), + b"\x81": chr(0x20A9), + b"\x82": chr(0x2014), + b"\x83": chr(0x00A9), + b"\xFE": chr(0x2122), + b"\xFF": chr(0x2026), + }, + ), + "x_mac_simp_chinese_ttx": ( + "gb2312", + { + b"\x80": chr(0x00FC), + b"\xA0": chr(0x00A0), + b"\xFD": chr(0x00A9), + b"\xFE": chr(0x2122), + b"\xFF": chr(0x2026), + }, + ), } _cache = {} + def search_function(name): - name = encodings.normalize_encoding(name) # Rather undocumented... - if name in _extended_encodings: - if name not in _cache: - base_encoding, mapping = _extended_encodings[name] - assert(name[-4:] == "_ttx") - # Python 2 didn't have any of the encodings that we are implementing - # in this file. Python 3 added aliases for the East Asian ones, mapping - # them "temporarily" to the same base encoding as us, with a comment - # suggesting that full implementation will appear some time later. - # As such, try the Python version of the x_mac_... first, if that is found, - # use *that* as our base encoding. This would make our encoding upgrade - # to the full encoding when and if Python finally implements that. - # http://bugs.python.org/issue24041 - base_encodings = [name[:-4], base_encoding] - for base_encoding in base_encodings: - try: - codecs.lookup(base_encoding) - except LookupError: - continue - _cache[name] = ExtendCodec(name, base_encoding, mapping) - break - return _cache[name].info + name = encodings.normalize_encoding(name) # Rather undocumented... + if name in _extended_encodings: + if name not in _cache: + base_encoding, mapping = _extended_encodings[name] + assert name[-4:] == "_ttx" + # Python 2 didn't have any of the encodings that we are implementing + # in this file. Python 3 added aliases for the East Asian ones, mapping + # them "temporarily" to the same base encoding as us, with a comment + # suggesting that full implementation will appear some time later. + # As such, try the Python version of the x_mac_... first, if that is found, + # use *that* as our base encoding. This would make our encoding upgrade + # to the full encoding when and if Python finally implements that. + # http://bugs.python.org/issue24041 + base_encodings = [name[:-4], base_encoding] + for base_encoding in base_encodings: + try: + codecs.lookup(base_encoding) + except LookupError: + continue + _cache[name] = ExtendCodec(name, base_encoding, mapping) + break + return _cache[name].info + + return None - return None codecs.register(search_function) |