summaryrefslogtreecommitdiff
path: root/lib/python2.7/encodings/idna.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/encodings/idna.py')
-rw-r--r--lib/python2.7/encodings/idna.py288
1 files changed, 0 insertions, 288 deletions
diff --git a/lib/python2.7/encodings/idna.py b/lib/python2.7/encodings/idna.py
deleted file mode 100644
index ea90d67..0000000
--- a/lib/python2.7/encodings/idna.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
-
-import stringprep, re, codecs
-from unicodedata import ucd_3_2_0 as unicodedata
-
-# IDNA section 3.1
-dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
-
-# IDNA section 5
-ace_prefix = "xn--"
-uace_prefix = unicode(ace_prefix, "ascii")
-
-# This assumes query strings, so AllowUnassigned is true
-def nameprep(label):
- # Map
- newlabel = []
- for c in label:
- if stringprep.in_table_b1(c):
- # Map to nothing
- continue
- newlabel.append(stringprep.map_table_b2(c))
- label = u"".join(newlabel)
-
- # Normalize
- label = unicodedata.normalize("NFKC", label)
-
- # Prohibit
- for c in label:
- if stringprep.in_table_c12(c) or \
- stringprep.in_table_c22(c) or \
- stringprep.in_table_c3(c) or \
- stringprep.in_table_c4(c) or \
- stringprep.in_table_c5(c) or \
- stringprep.in_table_c6(c) or \
- stringprep.in_table_c7(c) or \
- stringprep.in_table_c8(c) or \
- stringprep.in_table_c9(c):
- raise UnicodeError("Invalid character %r" % c)
-
- # Check bidi
- RandAL = map(stringprep.in_table_d1, label)
- for c in RandAL:
- if c:
- # There is a RandAL char in the string. Must perform further
- # tests:
- # 1) The characters in section 5.8 MUST be prohibited.
- # This is table C.8, which was already checked
- # 2) If a string contains any RandALCat character, the string
- # MUST NOT contain any LCat character.
- if filter(stringprep.in_table_d2, label):
- raise UnicodeError("Violation of BIDI requirement 2")
-
- # 3) If a string contains any RandALCat character, a
- # RandALCat character MUST be the first character of the
- # string, and a RandALCat character MUST be the last
- # character of the string.
- if not RandAL[0] or not RandAL[-1]:
- raise UnicodeError("Violation of BIDI requirement 3")
-
- return label
-
-def ToASCII(label):
- try:
- # Step 1: try ASCII
- label = label.encode("ascii")
- except UnicodeError:
- pass
- else:
- # Skip to step 3: UseSTD3ASCIIRules is false, so
- # Skip to step 8.
- if 0 < len(label) < 64:
- return label
- raise UnicodeError("label empty or too long")
-
- # Step 2: nameprep
- label = nameprep(label)
-
- # Step 3: UseSTD3ASCIIRules is false
- # Step 4: try ASCII
- try:
- label = label.encode("ascii")
- except UnicodeError:
- pass
- else:
- # Skip to step 8.
- if 0 < len(label) < 64:
- return label
- raise UnicodeError("label empty or too long")
-
- # Step 5: Check ACE prefix
- if label.startswith(uace_prefix):
- raise UnicodeError("Label starts with ACE prefix")
-
- # Step 6: Encode with PUNYCODE
- label = label.encode("punycode")
-
- # Step 7: Prepend ACE prefix
- label = ace_prefix + label
-
- # Step 8: Check size
- if 0 < len(label) < 64:
- return label
- raise UnicodeError("label empty or too long")
-
-def ToUnicode(label):
- # Step 1: Check for ASCII
- if isinstance(label, str):
- pure_ascii = True
- else:
- try:
- label = label.encode("ascii")
- pure_ascii = True
- except UnicodeError:
- pure_ascii = False
- if not pure_ascii:
- # Step 2: Perform nameprep
- label = nameprep(label)
- # It doesn't say this, but apparently, it should be ASCII now
- try:
- label = label.encode("ascii")
- except UnicodeError:
- raise UnicodeError("Invalid character in IDN label")
- # Step 3: Check for ACE prefix
- if not label.startswith(ace_prefix):
- return unicode(label, "ascii")
-
- # Step 4: Remove ACE prefix
- label1 = label[len(ace_prefix):]
-
- # Step 5: Decode using PUNYCODE
- result = label1.decode("punycode")
-
- # Step 6: Apply ToASCII
- label2 = ToASCII(result)
-
- # Step 7: Compare the result of step 6 with the one of step 3
- # label2 will already be in lower case.
- if label.lower() != label2:
- raise UnicodeError("IDNA does not round-trip", label, label2)
-
- # Step 8: return the result of step 5
- return result
-
-### Codec APIs
-
-class Codec(codecs.Codec):
- def encode(self,input,errors='strict'):
-
- if errors != 'strict':
- # IDNA is quite clear that implementations must be strict
- raise UnicodeError("unsupported error handling "+errors)
-
- if not input:
- return "", 0
-
- result = []
- labels = dots.split(input)
- if labels and len(labels[-1])==0:
- trailing_dot = '.'
- del labels[-1]
- else:
- trailing_dot = ''
- for label in labels:
- result.append(ToASCII(label))
- # Join with U+002E
- return ".".join(result)+trailing_dot, len(input)
-
- def decode(self,input,errors='strict'):
-
- if errors != 'strict':
- raise UnicodeError("Unsupported error handling "+errors)
-
- if not input:
- return u"", 0
-
- # IDNA allows decoding to operate on Unicode strings, too.
- if isinstance(input, unicode):
- labels = dots.split(input)
- else:
- # Must be ASCII string
- input = str(input)
- unicode(input, "ascii")
- labels = input.split(".")
-
- if labels and len(labels[-1]) == 0:
- trailing_dot = u'.'
- del labels[-1]
- else:
- trailing_dot = u''
-
- result = []
- for label in labels:
- result.append(ToUnicode(label))
-
- return u".".join(result)+trailing_dot, len(input)
-
-class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
- def _buffer_encode(self, input, errors, final):
- if errors != 'strict':
- # IDNA is quite clear that implementations must be strict
- raise UnicodeError("unsupported error handling "+errors)
-
- if not input:
- return ("", 0)
-
- labels = dots.split(input)
- trailing_dot = u''
- if labels:
- if not labels[-1]:
- trailing_dot = '.'
- del labels[-1]
- elif not final:
- # Keep potentially unfinished label until the next call
- del labels[-1]
- if labels:
- trailing_dot = '.'
-
- result = []
- size = 0
- for label in labels:
- result.append(ToASCII(label))
- if size:
- size += 1
- size += len(label)
-
- # Join with U+002E
- result = ".".join(result) + trailing_dot
- size += len(trailing_dot)
- return (result, size)
-
-class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
- def _buffer_decode(self, input, errors, final):
- if errors != 'strict':
- raise UnicodeError("Unsupported error handling "+errors)
-
- if not input:
- return (u"", 0)
-
- # IDNA allows decoding to operate on Unicode strings, too.
- if isinstance(input, unicode):
- labels = dots.split(input)
- else:
- # Must be ASCII string
- input = str(input)
- unicode(input, "ascii")
- labels = input.split(".")
-
- trailing_dot = u''
- if labels:
- if not labels[-1]:
- trailing_dot = u'.'
- del labels[-1]
- elif not final:
- # Keep potentially unfinished label until the next call
- del labels[-1]
- if labels:
- trailing_dot = u'.'
-
- result = []
- size = 0
- for label in labels:
- result.append(ToUnicode(label))
- if size:
- size += 1
- size += len(label)
-
- result = u".".join(result) + trailing_dot
- size += len(trailing_dot)
- return (result, size)
-
-class StreamWriter(Codec,codecs.StreamWriter):
- pass
-
-class StreamReader(Codec,codecs.StreamReader):
- pass
-
-### encodings module API
-
-def getregentry():
- return codecs.CodecInfo(
- name='idna',
- encode=Codec().encode,
- decode=Codec().decode,
- incrementalencoder=IncrementalEncoder,
- incrementaldecoder=IncrementalDecoder,
- streamwriter=StreamWriter,
- streamreader=StreamReader,
- )