diff options
Diffstat (limited to 'lib/python2.7/test/test_pyexpat.py')
-rw-r--r-- | lib/python2.7/test/test_pyexpat.py | 657 |
1 files changed, 0 insertions, 657 deletions
diff --git a/lib/python2.7/test/test_pyexpat.py b/lib/python2.7/test/test_pyexpat.py deleted file mode 100644 index b911a20..0000000 --- a/lib/python2.7/test/test_pyexpat.py +++ /dev/null @@ -1,657 +0,0 @@ -# XXX TypeErrors on calling handlers, or on bad return values from a -# handler, are obscure and unhelpful. - -import StringIO, sys -import unittest - -from xml.parsers import expat - -from test import test_support -from test.test_support import sortdict, run_unittest - - -class SetAttributeTest(unittest.TestCase): - def setUp(self): - self.parser = expat.ParserCreate(namespace_separator='!') - self.set_get_pairs = [ - [0, 0], - [1, 1], - [2, 1], - [0, 0], - ] - - def test_returns_unicode(self): - for x, y in self.set_get_pairs: - self.parser.returns_unicode = x - self.assertEqual(self.parser.returns_unicode, y) - - def test_ordered_attributes(self): - for x, y in self.set_get_pairs: - self.parser.ordered_attributes = x - self.assertEqual(self.parser.ordered_attributes, y) - - def test_specified_attributes(self): - for x, y in self.set_get_pairs: - self.parser.specified_attributes = x - self.assertEqual(self.parser.specified_attributes, y) - - -data = '''\ -<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> -<?xml-stylesheet href="stylesheet.css"?> -<!-- comment data --> -<!DOCTYPE quotations SYSTEM "quotations.dtd" [ -<!ELEMENT root ANY> -<!NOTATION notation SYSTEM "notation.jpeg"> -<!ENTITY acirc "â"> -<!ENTITY external_entity SYSTEM "entity.file"> -<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> -%unparsed_entity; -]> - -<root attr1="value1" attr2="value2ὀ"> -<myns:subelement xmlns:myns="http://www.python.org/namespace"> - Contents of subelements -</myns:subelement> -<sub2><![CDATA[contents of CDATA section]]></sub2> -&external_entity; -</root> -''' - - -# Produce UTF-8 output -class ParseTest(unittest.TestCase): - class Outputter: - def __init__(self): - self.out = [] - - def StartElementHandler(self, name, attrs): - self.out.append('Start element: ' + repr(name) + ' ' + - sortdict(attrs)) - - def EndElementHandler(self, name): - self.out.append('End element: ' + repr(name)) - - def CharacterDataHandler(self, data): - data = data.strip() - if data: - self.out.append('Character data: ' + repr(data)) - - def ProcessingInstructionHandler(self, target, data): - self.out.append('PI: ' + repr(target) + ' ' + repr(data)) - - def StartNamespaceDeclHandler(self, prefix, uri): - self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) - - def EndNamespaceDeclHandler(self, prefix): - self.out.append('End of NS decl: ' + repr(prefix)) - - def StartCdataSectionHandler(self): - self.out.append('Start of CDATA section') - - def EndCdataSectionHandler(self): - self.out.append('End of CDATA section') - - def CommentHandler(self, text): - self.out.append('Comment: ' + repr(text)) - - def NotationDeclHandler(self, *args): - name, base, sysid, pubid = args - self.out.append('Notation declared: %s' %(args,)) - - def UnparsedEntityDeclHandler(self, *args): - entityName, base, systemId, publicId, notationName = args - self.out.append('Unparsed entity decl: %s' %(args,)) - - def NotStandaloneHandler(self, userData): - self.out.append('Not standalone') - return 1 - - def ExternalEntityRefHandler(self, *args): - context, base, sysId, pubId = args - self.out.append('External entity ref: %s' %(args[1:],)) - return 1 - - def DefaultHandler(self, userData): - pass - - def DefaultHandlerExpand(self, userData): - pass - - handler_names = [ - 'StartElementHandler', 'EndElementHandler', - 'CharacterDataHandler', 'ProcessingInstructionHandler', - 'UnparsedEntityDeclHandler', 'NotationDeclHandler', - 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', - 'CommentHandler', 'StartCdataSectionHandler', - 'EndCdataSectionHandler', - 'DefaultHandler', 'DefaultHandlerExpand', - #'NotStandaloneHandler', - 'ExternalEntityRefHandler' - ] - - def test_utf8(self): - - out = self.Outputter() - parser = expat.ParserCreate(namespace_separator='!') - for name in self.handler_names: - setattr(parser, name, getattr(out, name)) - parser.returns_unicode = 0 - parser.Parse(data, 1) - - # Verify output - op = out.out - self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'') - self.assertEqual(op[1], "Comment: ' comment data '") - self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)") - self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')") - self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}") - self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'") - self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}") - self.assertEqual(op[7], "Character data: 'Contents of subelements'") - self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'") - self.assertEqual(op[9], "End of NS decl: 'myns'") - self.assertEqual(op[10], "Start element: 'sub2' {}") - self.assertEqual(op[11], 'Start of CDATA section') - self.assertEqual(op[12], "Character data: 'contents of CDATA section'") - self.assertEqual(op[13], 'End of CDATA section') - self.assertEqual(op[14], "End element: 'sub2'") - self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)") - self.assertEqual(op[16], "End element: 'root'") - - def test_unicode(self): - # Try the parse again, this time producing Unicode output - out = self.Outputter() - parser = expat.ParserCreate(namespace_separator='!') - parser.returns_unicode = 1 - for name in self.handler_names: - setattr(parser, name, getattr(out, name)) - - parser.Parse(data, 1) - - op = out.out - self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') - self.assertEqual(op[1], "Comment: u' comment data '") - self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") - self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") - self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") - self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") - self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") - self.assertEqual(op[7], "Character data: u'Contents of subelements'") - self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") - self.assertEqual(op[9], "End of NS decl: u'myns'") - self.assertEqual(op[10], "Start element: u'sub2' {}") - self.assertEqual(op[11], 'Start of CDATA section') - self.assertEqual(op[12], "Character data: u'contents of CDATA section'") - self.assertEqual(op[13], 'End of CDATA section') - self.assertEqual(op[14], "End element: u'sub2'") - self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") - self.assertEqual(op[16], "End element: u'root'") - - def test_parse_file(self): - # Try parsing a file - out = self.Outputter() - parser = expat.ParserCreate(namespace_separator='!') - parser.returns_unicode = 1 - for name in self.handler_names: - setattr(parser, name, getattr(out, name)) - file = StringIO.StringIO(data) - - parser.ParseFile(file) - - op = out.out - self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') - self.assertEqual(op[1], "Comment: u' comment data '") - self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") - self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") - self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") - self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") - self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") - self.assertEqual(op[7], "Character data: u'Contents of subelements'") - self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") - self.assertEqual(op[9], "End of NS decl: u'myns'") - self.assertEqual(op[10], "Start element: u'sub2' {}") - self.assertEqual(op[11], 'Start of CDATA section') - self.assertEqual(op[12], "Character data: u'contents of CDATA section'") - self.assertEqual(op[13], 'End of CDATA section') - self.assertEqual(op[14], "End element: u'sub2'") - self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") - self.assertEqual(op[16], "End element: u'root'") - - # Issue 4877: expat.ParseFile causes segfault on a closed file. - fp = open(test_support.TESTFN, 'wb') - try: - fp.close() - parser = expat.ParserCreate() - with self.assertRaises(ValueError): - parser.ParseFile(fp) - finally: - test_support.unlink(test_support.TESTFN) - - -class NamespaceSeparatorTest(unittest.TestCase): - def test_legal(self): - # Tests that make sure we get errors when the namespace_separator value - # is illegal, and that we don't for good values: - expat.ParserCreate() - expat.ParserCreate(namespace_separator=None) - expat.ParserCreate(namespace_separator=' ') - - def test_illegal(self): - try: - expat.ParserCreate(namespace_separator=42) - self.fail() - except TypeError, e: - self.assertEqual(str(e), - 'ParserCreate() argument 2 must be string or None, not int') - - try: - expat.ParserCreate(namespace_separator='too long') - self.fail() - except ValueError, e: - self.assertEqual(str(e), - 'namespace_separator must be at most one character, omitted, or None') - - def test_zero_length(self): - # ParserCreate() needs to accept a namespace_separator of zero length - # to satisfy the requirements of RDF applications that are required - # to simply glue together the namespace URI and the localname. Though - # considered a wart of the RDF specifications, it needs to be supported. - # - # See XML-SIG mailing list thread starting with - # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html - # - expat.ParserCreate(namespace_separator='') # too short - - -class InterningTest(unittest.TestCase): - def test(self): - # Test the interning machinery. - p = expat.ParserCreate() - L = [] - def collector(name, *args): - L.append(name) - p.StartElementHandler = collector - p.EndElementHandler = collector - p.Parse("<e> <e/> <e></e> </e>", 1) - tag = L[0] - self.assertEqual(len(L), 6) - for entry in L: - # L should have the same string repeated over and over. - self.assertTrue(tag is entry) - - -class BufferTextTest(unittest.TestCase): - def setUp(self): - self.stuff = [] - self.parser = expat.ParserCreate() - self.parser.buffer_text = 1 - self.parser.CharacterDataHandler = self.CharacterDataHandler - - def check(self, expected, label): - self.assertEqual(self.stuff, expected, - "%s\nstuff = %r\nexpected = %r" - % (label, self.stuff, map(unicode, expected))) - - def CharacterDataHandler(self, text): - self.stuff.append(text) - - def StartElementHandler(self, name, attrs): - self.stuff.append("<%s>" % name) - bt = attrs.get("buffer-text") - if bt == "yes": - self.parser.buffer_text = 1 - elif bt == "no": - self.parser.buffer_text = 0 - - def EndElementHandler(self, name): - self.stuff.append("</%s>" % name) - - def CommentHandler(self, data): - self.stuff.append("<!--%s-->" % data) - - def setHandlers(self, handlers=[]): - for name in handlers: - setattr(self.parser, name, getattr(self, name)) - - def test_default_to_disabled(self): - parser = expat.ParserCreate() - self.assertFalse(parser.buffer_text) - - def test_buffering_enabled(self): - # Make sure buffering is turned on - self.assertTrue(self.parser.buffer_text) - self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) - self.assertEqual(self.stuff, ['123'], - "buffered text not properly collapsed") - - def test1(self): - # XXX This test exposes more detail of Expat's text chunking than we - # XXX like, but it tests what we need to concisely. - self.setHandlers(["StartElementHandler"]) - self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) - self.assertEqual(self.stuff, - ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], - "buffering control not reacting as expected") - - def test2(self): - self.parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) - self.assertEqual(self.stuff, ["1<2> \n 3"], - "buffered text not properly collapsed") - - def test3(self): - self.setHandlers(["StartElementHandler"]) - self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) - self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], - "buffered text not properly split") - - def test4(self): - self.setHandlers(["StartElementHandler", "EndElementHandler"]) - self.parser.CharacterDataHandler = None - self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) - self.assertEqual(self.stuff, - ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) - - def test5(self): - self.setHandlers(["StartElementHandler", "EndElementHandler"]) - self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1) - self.assertEqual(self.stuff, - ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) - - def test6(self): - self.setHandlers(["CommentHandler", "EndElementHandler", - "StartElementHandler"]) - self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) - self.assertEqual(self.stuff, - ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], - "buffered text not properly split") - - def test7(self): - self.setHandlers(["CommentHandler", "EndElementHandler", - "StartElementHandler"]) - self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) - self.assertEqual(self.stuff, - ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", - "<!--abc-->", "4", "<!--def-->", "5", "</a>"], - "buffered text not properly split") - - -# Test handling of exception from callback: -class HandlerExceptionTest(unittest.TestCase): - def StartElementHandler(self, name, attrs): - raise RuntimeError(name) - - def test(self): - parser = expat.ParserCreate() - parser.StartElementHandler = self.StartElementHandler - try: - parser.Parse("<a><b><c/></b></a>", 1) - self.fail() - except RuntimeError, e: - self.assertEqual(e.args[0], 'a', - "Expected RuntimeError for element 'a', but" + \ - " found %r" % e.args[0]) - - -# Test Current* members: -class PositionTest(unittest.TestCase): - def StartElementHandler(self, name, attrs): - self.check_pos('s') - - def EndElementHandler(self, name): - self.check_pos('e') - - def check_pos(self, event): - pos = (event, - self.parser.CurrentByteIndex, - self.parser.CurrentLineNumber, - self.parser.CurrentColumnNumber) - self.assertTrue(self.upto < len(self.expected_list), - 'too many parser events') - expected = self.expected_list[self.upto] - self.assertEqual(pos, expected, - 'Expected position %s, got position %s' %(pos, expected)) - self.upto += 1 - - def test(self): - self.parser = expat.ParserCreate() - self.parser.StartElementHandler = self.StartElementHandler - self.parser.EndElementHandler = self.EndElementHandler - self.upto = 0 - self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), - ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] - - xml = '<a>\n <b>\n <c/>\n </b>\n</a>' - self.parser.Parse(xml, 1) - - -class sf1296433Test(unittest.TestCase): - def test_parse_only_xml_data(self): - # http://python.org/sf/1296433 - # - xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) - # this one doesn't crash - #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) - - class SpecificException(Exception): - pass - - def handler(text): - raise SpecificException - - parser = expat.ParserCreate() - parser.CharacterDataHandler = handler - - self.assertRaises(Exception, parser.Parse, xml) - -class ChardataBufferTest(unittest.TestCase): - """ - test setting of chardata buffer size - """ - - def test_1025_bytes(self): - self.assertEqual(self.small_buffer_test(1025), 2) - - def test_1000_bytes(self): - self.assertEqual(self.small_buffer_test(1000), 1) - - def test_wrong_size(self): - parser = expat.ParserCreate() - parser.buffer_text = 1 - def f(size): - parser.buffer_size = size - - self.assertRaises(TypeError, f, sys.maxint+1) - self.assertRaises(ValueError, f, -1) - self.assertRaises(ValueError, f, 0) - - def test_unchanged_size(self): - xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512)) - xml2 = 'a'*512 + '</s>' - parser = expat.ParserCreate() - parser.CharacterDataHandler = self.counting_handler - parser.buffer_size = 512 - parser.buffer_text = 1 - - # Feed 512 bytes of character data: the handler should be called - # once. - self.n = 0 - parser.Parse(xml1) - self.assertEqual(self.n, 1) - - # Reassign to buffer_size, but assign the same size. - parser.buffer_size = parser.buffer_size - self.assertEqual(self.n, 1) - - # Try parsing rest of the document - parser.Parse(xml2) - self.assertEqual(self.n, 2) - - - def test_disabling_buffer(self): - xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512) - xml2 = ('b' * 1024) - xml3 = "%s</a>" % ('c' * 1024) - parser = expat.ParserCreate() - parser.CharacterDataHandler = self.counting_handler - parser.buffer_text = 1 - parser.buffer_size = 1024 - self.assertEqual(parser.buffer_size, 1024) - - # Parse one chunk of XML - self.n = 0 - parser.Parse(xml1, 0) - self.assertEqual(parser.buffer_size, 1024) - self.assertEqual(self.n, 1) - - # Turn off buffering and parse the next chunk. - parser.buffer_text = 0 - self.assertFalse(parser.buffer_text) - self.assertEqual(parser.buffer_size, 1024) - for i in range(10): - parser.Parse(xml2, 0) - self.assertEqual(self.n, 11) - - parser.buffer_text = 1 - self.assertTrue(parser.buffer_text) - self.assertEqual(parser.buffer_size, 1024) - parser.Parse(xml3, 1) - self.assertEqual(self.n, 12) - - - - def make_document(self, bytes): - return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>') - - def counting_handler(self, text): - self.n += 1 - - def small_buffer_test(self, buffer_len): - xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len) - parser = expat.ParserCreate() - parser.CharacterDataHandler = self.counting_handler - parser.buffer_size = 1024 - parser.buffer_text = 1 - - self.n = 0 - parser.Parse(xml) - return self.n - - def test_change_size_1(self): - xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024) - xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) - parser = expat.ParserCreate() - parser.CharacterDataHandler = self.counting_handler - parser.buffer_text = 1 - parser.buffer_size = 1024 - self.assertEqual(parser.buffer_size, 1024) - - self.n = 0 - parser.Parse(xml1, 0) - parser.buffer_size *= 2 - self.assertEqual(parser.buffer_size, 2048) - parser.Parse(xml2, 1) - self.assertEqual(self.n, 2) - - def test_change_size_2(self): - xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023) - xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) - parser = expat.ParserCreate() - parser.CharacterDataHandler = self.counting_handler - parser.buffer_text = 1 - parser.buffer_size = 2048 - self.assertEqual(parser.buffer_size, 2048) - - self.n=0 - parser.Parse(xml1, 0) - parser.buffer_size //= 2 - self.assertEqual(parser.buffer_size, 1024) - parser.Parse(xml2, 1) - self.assertEqual(self.n, 4) - -class MalformedInputText(unittest.TestCase): - def test1(self): - xml = "\0\r\n" - parser = expat.ParserCreate() - try: - parser.Parse(xml, True) - self.fail() - except expat.ExpatError as e: - self.assertEqual(str(e), 'unclosed token: line 2, column 0') - - def test2(self): - xml = "<?xml version\xc2\x85='1.0'?>\r\n" - parser = expat.ParserCreate() - try: - parser.Parse(xml, True) - self.fail() - except expat.ExpatError as e: - self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14') - -class ForeignDTDTests(unittest.TestCase): - """ - Tests for the UseForeignDTD method of expat parser objects. - """ - def test_use_foreign_dtd(self): - """ - If UseForeignDTD is passed True and a document without an external - entity reference is parsed, ExternalEntityRefHandler is first called - with None for the public and system ids. - """ - handler_call_args = [] - def resolve_entity(context, base, system_id, public_id): - handler_call_args.append((public_id, system_id)) - return 1 - - parser = expat.ParserCreate() - parser.UseForeignDTD(True) - parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) - parser.ExternalEntityRefHandler = resolve_entity - parser.Parse("<?xml version='1.0'?><element/>") - self.assertEqual(handler_call_args, [(None, None)]) - - # test UseForeignDTD() is equal to UseForeignDTD(True) - handler_call_args[:] = [] - - parser = expat.ParserCreate() - parser.UseForeignDTD() - parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) - parser.ExternalEntityRefHandler = resolve_entity - parser.Parse("<?xml version='1.0'?><element/>") - self.assertEqual(handler_call_args, [(None, None)]) - - def test_ignore_use_foreign_dtd(self): - """ - If UseForeignDTD is passed True and a document with an external - entity reference is parsed, ExternalEntityRefHandler is called with - the public and system ids from the document. - """ - handler_call_args = [] - def resolve_entity(context, base, system_id, public_id): - handler_call_args.append((public_id, system_id)) - return 1 - - parser = expat.ParserCreate() - parser.UseForeignDTD(True) - parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) - parser.ExternalEntityRefHandler = resolve_entity - parser.Parse( - "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") - self.assertEqual(handler_call_args, [("bar", "baz")]) - - -def test_main(): - run_unittest(SetAttributeTest, - ParseTest, - NamespaceSeparatorTest, - InterningTest, - BufferTextTest, - HandlerExceptionTest, - PositionTest, - sf1296433Test, - ChardataBufferTest, - MalformedInputText, - ForeignDTDTests) - -if __name__ == "__main__": - test_main() |