From 8249938e02a9bf6e7d99d640c8fa8295d0d93e84 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sun, 17 Aug 2025 16:43:41 +0300 Subject: [PATCH 1/3] Update xml from 3.13.7 --- Lib/test/test_xml_dom_xmlbuilder.py | 92 ++++ Lib/test/test_xml_etree.py | 716 ++++++++++++++++++++++------ Lib/test/test_xml_etree_c.py | 278 +++++++++++ Lib/xml/dom/expatbuilder.py | 5 +- Lib/xml/dom/minidom.py | 32 +- Lib/xml/dom/xmlbuilder.py | 12 +- Lib/xml/etree/ElementInclude.py | 13 +- Lib/xml/etree/ElementPath.py | 4 +- Lib/xml/etree/ElementTree.py | 91 ++-- Lib/xml/sax/__init__.py | 21 +- Lib/xml/sax/_exceptions.py | 4 - Lib/xml/sax/expatreader.py | 20 +- Lib/xml/sax/xmlreader.py | 4 +- stdlib/src/pyexpat.rs | 18 +- 14 files changed, 1056 insertions(+), 254 deletions(-) create mode 100644 Lib/test/test_xml_dom_xmlbuilder.py create mode 100644 Lib/test/test_xml_etree_c.py diff --git a/Lib/test/test_xml_dom_xmlbuilder.py b/Lib/test/test_xml_dom_xmlbuilder.py new file mode 100644 index 0000000000..5282e806e4 --- /dev/null +++ b/Lib/test/test_xml_dom_xmlbuilder.py @@ -0,0 +1,92 @@ +import io +import unittest +from http import client +from test.test_httplib import FakeSocket +from unittest import mock +from xml.dom import getDOMImplementation, minidom, xmlbuilder + +SMALL_SAMPLE = b""" + + +Introduction to XSL +
+

A. Namespace

+""" + + +class XMLBuilderTest(unittest.TestCase): + def test_entity_resolver(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + resolver = xmlbuilder.DOMEntityResolver() + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + source = resolver.resolveEntity(None, "http://example.com/2000/svg") + + self.assertIsInstance(source, xmlbuilder.DOMInputSource) + self.assertIsNone(source.publicId) + self.assertEqual(source.systemId, "http://example.com/2000/svg") + self.assertEqual(source.baseURI, "http://example.com/2000/") + self.assertEqual(source.encoding, "utf-8") + self.assertIs(source.byteStream, response) + + self.assertIsNone(source.characterStream) + self.assertIsNone(source.stringData) + + def test_builder(self): + imp = getDOMImplementation() + self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS) + + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + self.assertIsInstance(builder, xmlbuilder.DOMBuilder) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_parse_uri(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + + imp = getDOMImplementation() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + document = builder.parseURI("http://example.com/2000/svg") + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_parse_with_systemId(self): + response = io.BytesIO(SMALL_SAMPLE) + + with mock.patch("urllib.request.urlopen") as mock_open: + mock_open.return_value = response + + imp = getDOMImplementation() + source = imp.createDOMInputSource() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + source.systemId = "http://example.com/2000/svg" + document = builder.parse(source) + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 1a681d5a7c..59b5515529 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -13,13 +13,16 @@ import operator import os import pickle +import pyexpat import sys import textwrap import types import unittest +import unittest.mock as mock import warnings import weakref +from contextlib import nullcontext from functools import partial from itertools import product, islice from test import support @@ -120,6 +123,21 @@ """ +def is_python_implementation(): + assert ET is not None, "ET must be initialized" + assert pyET is not None, "pyET must be initialized" + return ET is pyET + + +def equal_wrapper(cls): + """Mock cls.__eq__ to check whether it has been called or not. + + The behaviour of cls.__eq__ (side-effects included) is left as is. + """ + eq = cls.__eq__ + return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq) + + def checkwarnings(*filters, quiet=False): def decorator(test): def newtest(*args, **kwargs): @@ -200,28 +218,36 @@ class ElementTreeTest(unittest.TestCase): def serialize_check(self, elem, expected): self.assertEqual(serialize(elem), expected) + def test_constructor(self): + # Test constructor behavior. + + with self.assertRaises(TypeError): + tree = ET.ElementTree("") + with self.assertRaises(TypeError): + tree = ET.ElementTree(ET.ElementTree()) + + def test_setroot(self): + # Test _setroot behavior. + + tree = ET.ElementTree() + element = ET.Element("tag") + tree._setroot(element) + self.assertEqual(tree.getroot().tag, "tag") + self.assertEqual(tree.getroot(), element) + + # Test behavior with an invalid root element + + tree = ET.ElementTree() + with self.assertRaises(TypeError): + tree._setroot("") + with self.assertRaises(TypeError): + tree._setroot(ET.ElementTree()) + with self.assertRaises(TypeError): + tree._setroot(None) + def test_interface(self): # Test element tree interface. - def check_string(string): - len(string) - for char in string: - self.assertEqual(len(char), 1, - msg="expected one-character string, got %r" % char) - new_string = string + "" - new_string = string + " " - string[:0] - - def check_mapping(mapping): - len(mapping) - keys = mapping.keys() - items = mapping.items() - for key in keys: - item = mapping[key] - mapping["key"] = "value" - self.assertEqual(mapping["key"], "value", - msg="expected value string, got %r" % mapping["key"]) - def check_element(element): self.assertTrue(ET.iselement(element), msg="not an element") direlem = dir(element) @@ -231,12 +257,12 @@ def check_element(element): self.assertIn(attr, direlem, msg='no %s visible by dir' % attr) - check_string(element.tag) - check_mapping(element.attrib) + self.assertIsInstance(element.tag, str) + self.assertIsInstance(element.attrib, dict) if element.text is not None: - check_string(element.text) + self.assertIsInstance(element.text, str) if element.tail is not None: - check_string(element.tail) + self.assertIsInstance(element.tail, str) for elem in element: check_element(elem) @@ -392,6 +418,7 @@ def test_path_cache(self): from xml.etree import ElementPath elem = ET.XML(SAMPLE_XML) + ElementPath._cache.clear() for i in range(10): ET.ElementTree(elem).find('./'+str(i)) cache_len_10 = len(ElementPath._cache) for i in range(10): ET.ElementTree(elem).find('./'+str(i)) @@ -572,7 +599,9 @@ def test_iterparse(self): iterparse = ET.iterparse context = iterparse(SIMPLE_XMLFILE) + self.assertIsNone(context.root) action, elem = next(context) + self.assertIsNone(context.root) self.assertEqual((action, elem.tag), ('end', 'element')) self.assertEqual([(action, elem.tag) for action, elem in context], [ ('end', 'element'), @@ -589,6 +618,17 @@ def test_iterparse(self): ('end', '{namespace}root'), ]) + with open(SIMPLE_XMLFILE, 'rb') as source: + context = iterparse(source) + action, elem = next(context) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in context], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(context.root.tag, 'root') + events = () context = iterparse(SIMPLE_XMLFILE, events) self.assertEqual([(action, elem.tag) for action, elem in context], []) @@ -680,12 +720,83 @@ def test_iterparse(self): # Not exhausting the iterator still closes the resource (bpo-43292) with warnings_helper.check_no_resource_warning(self): - it = iterparse(TESTFN) + it = iterparse(SIMPLE_XMLFILE) + del it + + with warnings_helper.check_no_resource_warning(self): + it = iterparse(SIMPLE_XMLFILE) + it.close() del it + with warnings_helper.check_no_resource_warning(self): + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + + with warnings_helper.check_no_resource_warning(self): + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + it.close() + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + with self.assertRaises(FileNotFoundError): iterparse("nonexistent") + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_iterparse_close(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + it = iterparse(SIMPLE_XMLFILE) + list(it) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + list(it) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + def test_writefile(self): elem = ET.Element("tag") elem.text = "text" @@ -1427,8 +1538,9 @@ def test_processinginstruction(self): def test_html_empty_elems_serialization(self): # issue 15970 # from http://www.w3.org/TR/html401/index/elements.html - for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', - 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: + for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME', + 'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM', + 'SOURCE', 'TRACK', 'WBR']: for elem in [element, element.lower()]: expected = '<%s>' % elem serialized = serialize(ET.XML('<%s />' % elem), method='html') @@ -1464,12 +1576,14 @@ def test_attlist_default(self): class XMLPullParserTest(unittest.TestCase): - def _feed(self, parser, data, chunk_size=None): + def _feed(self, parser, data, chunk_size=None, flush=False): if chunk_size is None: parser.feed(data) else: for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) + if flush: + parser.flush() def assert_events(self, parser, expected, max_events=None): self.assertEqual( @@ -1489,28 +1603,41 @@ def assert_event_tags(self, parser, expected, max_events=None): # TODO: RUSTPYTHON @unittest.expectedFailure - def test_simple_xml(self): - for chunk_size in (None, 1, 5): - with self.subTest(chunk_size=chunk_size): - parser = ET.XMLPullParser() - self.assert_event_tags(parser, []) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, []) - self._feed(parser, - "\n text\n", chunk_size) - self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, "texttail\n", chunk_size) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [ - ('end', 'element'), - ('end', 'empty-element'), - ]) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [('end', 'root')]) - self.assertIsNone(parser.close()) + def test_simple_xml(self, chunk_size=None, flush=False): + parser = ET.XMLPullParser() + self.assert_event_tags(parser, []) + self._feed(parser, "\n", chunk_size, flush) + self.assert_event_tags(parser, []) + self._feed(parser, + "\n text\n", chunk_size, flush) + self.assert_event_tags(parser, [('end', 'element')]) + self._feed(parser, "texttail\n", chunk_size, flush) + self._feed(parser, "\n", chunk_size, flush) + self.assert_event_tags(parser, [ + ('end', 'element'), + ('end', 'empty-element'), + ]) + self._feed(parser, "\n", chunk_size, flush) + self.assert_event_tags(parser, [('end', 'root')]) + self.assertIsNone(parser.close()) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_simple_xml_chunk_1(self): + self.test_simple_xml(chunk_size=1, flush=True) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_simple_xml_chunk_5(self): + self.test_simple_xml(chunk_size=5, flush=True) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_simple_xml_chunk_22(self): + self.test_simple_xml(chunk_size=22) # TODO: RUSTPYTHON @unittest.expectedFailure @@ -1726,6 +1853,60 @@ def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) + # TODO: RUSTPYTHON + @unittest.expectedFailure + @unittest.skipIf(pyexpat.version_info < (2, 6, 0), + f'Expat {pyexpat.version_info} does not ' + 'support reparse deferral') + def test_flush_reparse_deferral_enabled(self): + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + self.assert_event_tags(parser, []) # i.e. no elements started + if ET is pyET: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + if ET is pyET: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_flush_reparse_deferral_disabled(self): + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + if pyexpat.version_info >= (2, 6, 0): + if not ET is pyET: + self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled ' + 'methods not available in C') + parser._parser._parser.SetReparseDeferralEnabled(False) + self.assert_event_tags(parser, []) # i.e. no elements started + + if ET is pyET: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + if ET is pyET: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) # # xinclude tests (samples from appendix C of the xinclude specification) @@ -2434,6 +2615,22 @@ def test_39495_treebuilder_start(self): self.assertRaises(TypeError, ET.TreeBuilder().start, "tag") self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None) + def test_issue123213_correct_extend_exception(self): + # Does not hide the internal exception when extending the element + self.assertRaises(ZeroDivisionError, ET.Element('tag').extend, + (1/0 for i in range(2))) + + # Still raises the TypeError when extending with a non-iterable + self.assertRaises(TypeError, ET.Element('tag').extend, None) + + # Preserves the TypeError message when extending with a generator + def f(): + raise TypeError("mymessage") + + self.assertRaisesRegex( + TypeError, 'mymessage', + ET.Element('tag').extend, (f() for i in range(2))) + # -------------------------------------------------------------------- @@ -2468,35 +2665,6 @@ def test___init__(self): self.assertIsNot(element_foo.attrib, attrib) self.assertNotEqual(element_foo.attrib, attrib) - def test_copy(self): - # Only run this test if Element.copy() is defined. - if "copy" not in dir(ET.Element): - raise unittest.SkipTest("Element.copy() not present") - - element_foo = ET.Element("foo", { "zix": "wyp" }) - element_foo.append(ET.Element("bar", { "baz": "qix" })) - - with self.assertWarns(DeprecationWarning): - element_foo2 = element_foo.copy() - - # elements are not the same - self.assertIsNot(element_foo2, element_foo) - - # string attributes are equal - self.assertEqual(element_foo2.tag, element_foo.tag) - self.assertEqual(element_foo2.text, element_foo.text) - self.assertEqual(element_foo2.tail, element_foo.tail) - - # number of children is the same - self.assertEqual(len(element_foo2), len(element_foo)) - - # children are the same - for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): - self.assertIs(child1, child2) - - # attrib is a copy - self.assertEqual(element_foo2.attrib, element_foo.attrib) - def test___copy__(self): element_foo = ET.Element("foo", { "zix": "wyp" }) element_foo.append(ET.Element("bar", { "baz": "qix" })) @@ -2662,8 +2830,7 @@ def test_pickle_issue18997(self): 4 """ e1 = dumper.fromstring(XMLTEXT) - if hasattr(e1, '__getstate__'): - self.assertEqual(e1.__getstate__()['tag'], 'group') + self.assertEqual(e1.__getstate__()['tag'], 'group') e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', dumper, loader, proto) self.assertEqual(e2.tag, 'group') @@ -2671,6 +2838,7 @@ def test_pickle_issue18997(self): class BadElementTest(ElementTestCase, unittest.TestCase): + def test_extend_mutable_list(self): class X: @property @@ -2709,20 +2877,170 @@ class Y(X, ET.Element): e = ET.Element('foo') e.extend(L) - @unittest.skip("TODO: RUSTPYTHON, hangs") - def test_remove_with_mutating(self): - class X(ET.Element): + def test_remove_with_clear_assume_missing(self): + # gh-126033: Check that a concurrent clear() for an assumed-to-be + # missing element does not make the interpreter crash. + self.do_test_remove_with_clear(raises=True) + + def test_remove_with_clear_assume_existing(self): + # gh-126033: Check that a concurrent clear() for an assumed-to-be + # existing element does not make the interpreter crash. + self.do_test_remove_with_clear(raises=False) + + def do_test_remove_with_clear(self, *, raises): + + # Until the discrepency between "del root[:]" and "root.clear()" is + # resolved, we need to keep two tests. Previously, using "del root[:]" + # did not crash with the reproducer of gh-126033 while "root.clear()" + # did. + + class E(ET.Element): + """Local class to be able to mock E.__eq__ for introspection.""" + + class X(E): def __eq__(self, o): - del e[:] - return False - e = ET.Element('foo') - e.extend([X('bar')]) - self.assertRaises(ValueError, e.remove, ET.Element('baz')) + del root[:] + return not raises - e = ET.Element('foo') - e.extend([ET.Element('bar')]) - self.assertRaises(ValueError, e.remove, X('baz')) + class Y(E): + def __eq__(self, o): + root.clear() + return not raises + if raises: + get_checker_context = lambda: self.assertRaises(ValueError) + else: + get_checker_context = nullcontext + + self.assertIs(E.__eq__, object.__eq__) + + for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]: + self.enterContext(self.subTest(side_effect=side_effect)) + + # test removing R() from [U()] + for R, U, description in [ + (E, Z, "remove missing E() from [Z()]"), + (Z, E, "remove missing Z() from [E()]"), + (Z, Z, "remove missing Z() from [Z()]"), + ]: + with self.subTest(description): + root = E('top') + root.extend([U('one')]) + with get_checker_context(): + root.remove(R('missing')) + + # test removing R() from [U(), V()] + cases = self.cases_for_remove_missing_with_mutations(E, Z) + for R, U, V, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), V('two')]) + with get_checker_context(): + root.remove(R('missing')) + + # Test removing root[0] from [Z()]. + # + # Since we call root.remove() with root[0], Z.__eq__() + # will not be called (we branch on the fast Py_EQ path). + with self.subTest("remove root[0] from [Z()]"): + root = E('top') + root.append(Z('rem')) + with equal_wrapper(E) as f, equal_wrapper(Z) as g: + root.remove(root[0]) + f.assert_not_called() + g.assert_not_called() + + # Test removing root[1] (of type R) from [U(), R()]. + is_special = is_python_implementation() and raises and Z is Y + if is_python_implementation() and raises and Z is Y: + # In pure Python, using root.clear() sets the children + # list to [] without calling list.clear(). + # + # For this reason, the call to root.remove() first + # checks root[0] and sets the children list to [] + # since either root[0] or root[1] is an evil element. + # + # Since checking root[1] still uses the old reference + # to the children list, PyObject_RichCompareBool() branches + # to the fast Py_EQ path and Y.__eq__() is called exactly + # once (when checking root[0]). + continue + else: + cases = self.cases_for_remove_existing_with_mutations(E, Z) + for R, U, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), R('rem')]) + with get_checker_context(): + root.remove(root[1]) + + def test_remove_with_mutate_root_assume_missing(self): + # gh-126033: Check that a concurrent mutation for an assumed-to-be + # missing element does not make the interpreter crash. + self.do_test_remove_with_mutate_root(raises=True) + + def test_remove_with_mutate_root_assume_existing(self): + # gh-126033: Check that a concurrent mutation for an assumed-to-be + # existing element does not make the interpreter crash. + self.do_test_remove_with_mutate_root(raises=False) + + def do_test_remove_with_mutate_root(self, *, raises): + E = ET.Element + + class Z(E): + def __eq__(self, o): + del root[0] + return not raises + + if raises: + get_checker_context = lambda: self.assertRaises(ValueError) + else: + get_checker_context = nullcontext + + # test removing R() from [U(), V()] + cases = self.cases_for_remove_missing_with_mutations(E, Z) + for R, U, V, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), V('two')]) + with get_checker_context(): + root.remove(R('missing')) + + # test removing root[1] (of type R) from [U(), R()] + cases = self.cases_for_remove_existing_with_mutations(E, Z) + for R, U, description in cases: + with self.subTest(description): + root = E('top') + root.extend([U('one'), R('rem')]) + with get_checker_context(): + root.remove(root[1]) + + def cases_for_remove_missing_with_mutations(self, E, Z): + # Cases for removing R() from [U(), V()]. + # The case U = V = R = E is not interesting as there is no mutation. + for U, V in [(E, Z), (Z, E), (Z, Z)]: + description = (f"remove missing {E.__name__}() from " + f"[{U.__name__}(), {V.__name__}()]") + yield E, U, V, description + + for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]: + description = (f"remove missing {Z.__name__}() from " + f"[{U.__name__}(), {V.__name__}()]") + yield Z, U, V, description + + def cases_for_remove_existing_with_mutations(self, E, Z): + # Cases for removing root[1] (of type R) from [U(), R()]. + # The case U = R = E is not interesting as there is no mutation. + for U, R, description in [ + (E, Z, "remove root[1] from [E(), Z()]"), + (Z, E, "remove root[1] from [Z(), E()]"), + (Z, Z, "remove root[1] from [Z(), Z()]"), + ]: + description = (f"remove root[1] (of type {R.__name__}) " + f"from [{U.__name__}(), {R.__name__}()]") + yield R, U, description + + @support.infinite_recursion(25) def test_recursive_repr(self): # Issue #25455 e = ET.Element('foo') @@ -2821,21 +3139,83 @@ def element_factory(x, y): del b gc_collect() + def test_deepcopy_clear(self): + # Prevent crashes when __deepcopy__() clears the children list. + # See https://github.com/python/cpython/issues/133009. + class X(ET.Element): + def __deepcopy__(self, memo): + root.clear() + return self -class MutatingElementPath(str): + root = ET.Element('a') + evil = X('x') + root.extend([evil, ET.Element('y')]) + if is_python_implementation(): + # Mutating a list over which we iterate raises an error. + self.assertRaises(RuntimeError, copy.deepcopy, root) + else: + c = copy.deepcopy(root) + # In the C implementation, we can still copy the evil element. + self.assertListEqual(list(c), [evil]) + + def test_deepcopy_grow(self): + # Prevent crashes when __deepcopy__() mutates the children list. + # See https://github.com/python/cpython/issues/133009. + a = ET.Element('a') + b = ET.Element('b') + c = ET.Element('c') + + class X(ET.Element): + def __deepcopy__(self, memo): + root.append(a) + root.append(b) + return self + + root = ET.Element('top') + evil1, evil2 = X('1'), X('2') + root.extend([evil1, c, evil2]) + children = list(copy.deepcopy(root)) + # mock deep copies + self.assertIs(children[0], evil1) + self.assertIs(children[2], evil2) + # true deep copies + self.assertEqual(children[1].tag, c.tag) + self.assertEqual([c.tag for c in children[3:]], + [a.tag, b.tag, a.tag, b.tag]) + + +class MutationDeleteElementPath(str): def __new__(cls, elem, *args): self = str.__new__(cls, *args) self.elem = elem return self + def __eq__(self, o): del self.elem[:] return True -MutatingElementPath.__hash__ = str.__hash__ + + __hash__ = str.__hash__ + + +class MutationClearElementPath(str): + def __new__(cls, elem, *args): + self = str.__new__(cls, *args) + self.elem = elem + return self + + def __eq__(self, o): + self.elem.clear() + return True + + __hash__ = str.__hash__ + class BadElementPath(str): def __eq__(self, o): raise 1/0 -BadElementPath.__hash__ = str.__hash__ + + __hash__ = str.__hash__ + class BadElementPathTest(ElementTestCase, unittest.TestCase): def setUp(self): @@ -2850,9 +3230,11 @@ def tearDown(self): super().tearDown() def test_find_with_mutating(self): - e = ET.Element('foo') - e.extend([ET.Element('bar')]) - e.find(MutatingElementPath(e, 'x')) + for cls in [MutationDeleteElementPath, MutationClearElementPath]: + with self.subTest(cls): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + e.find(cls(e, 'x')) def test_find_with_error(self): e = ET.Element('foo') @@ -2863,9 +3245,11 @@ def test_find_with_error(self): pass def test_findtext_with_mutating(self): - e = ET.Element('foo') - e.extend([ET.Element('bar')]) - e.findtext(MutatingElementPath(e, 'x')) + for cls in [MutationDeleteElementPath, MutationClearElementPath]: + with self.subTest(cls): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + e.findtext(cls(e, 'x')) def test_findtext_with_error(self): e = ET.Element('foo') @@ -2875,10 +3259,26 @@ def test_findtext_with_error(self): except ZeroDivisionError: pass + def test_findtext_with_falsey_text_attribute(self): + root_elem = ET.Element('foo') + sub_elem = ET.SubElement(root_elem, 'bar') + falsey = ["", 0, False, [], (), {}] + for val in falsey: + sub_elem.text = val + self.assertEqual(root_elem.findtext('./bar'), val) + + def test_findtext_with_none_text_attribute(self): + root_elem = ET.Element('foo') + sub_elem = ET.SubElement(root_elem, 'bar') + sub_elem.text = None + self.assertEqual(root_elem.findtext('./bar'), '') + def test_findall_with_mutating(self): - e = ET.Element('foo') - e.extend([ET.Element('bar')]) - e.findall(MutatingElementPath(e, 'x')) + for cls in [MutationDeleteElementPath, MutationClearElementPath]: + with self.subTest(cls): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + e.findall(cls(e, 'x')) def test_findall_with_error(self): e = ET.Element('foo') @@ -3233,8 +3633,7 @@ def test_basic(self): # With an explicit parser too (issue #9708) sourcefile = serialize(doc, to_string=False) parser = ET.XMLParser(target=ET.TreeBuilder()) - self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], - 'end') + self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 'end') tree = ET.ElementTree(None) self.assertRaises(AttributeError, tree.iter) @@ -3836,6 +4235,22 @@ def test_setslice_negative_steps(self): e[1::-sys.maxsize<<64] = [ET.Element('d')] self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) + def test_issue123213_setslice_exception(self): + e = ET.Element('tag') + # Does not hide the internal exception when assigning to the element + with self.assertRaises(ZeroDivisionError): + e[:1] = (1/0 for i in range(2)) + + # Still raises the TypeError when assigning with a non-iterable + with self.assertRaises(TypeError): + e[:1] = None + + # Preserve the original TypeError message when assigning. + def f(): + raise TypeError("mymessage") + + with self.assertRaisesRegex(TypeError, 'mymessage'): + e[:1] = (f() for i in range(2)) class IOTest(unittest.TestCase): # TODO: RUSTPYTHON @@ -4163,10 +4578,10 @@ def test_error_code(self): class KeywordArgsTest(unittest.TestCase): - # TODO: RUSTPYTHON - @unittest.expectedFailure # Test various issues with keyword arguments passed to ET.Element # constructor and methods + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_issue14818(self): x = ET.XML("foo") self.assertEqual(x.find('a', None), @@ -4201,12 +4616,11 @@ def test_issue14818(self): # -------------------------------------------------------------------- class NoAcceleratorTest(unittest.TestCase): - def setUp(self): - if not pyET: + @classmethod + def setUpClass(cls): + if ET is not pyET: raise unittest.SkipTest('only for the Python version') - # TODO: RUSTPYTHON - @unittest.expectedFailure # Test that the C accelerator was not imported for pyET def test_correct_import_pyET(self): # The type of methods defined in Python code is types.FunctionType, @@ -4215,6 +4629,27 @@ def test_correct_import_pyET(self): self.assertIsInstance(pyET.Element.__init__, types.FunctionType) self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) +# -------------------------------------------------------------------- + +class BoolTest(unittest.TestCase): + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_warning(self): + e = ET.fromstring('') + msg = ( + r"Testing an element's truth value will always return True in " + r"future versions. " + r"Use specific 'len\(elem\)' or 'elem is not None' test instead.") + with self.assertWarnsRegex(DeprecationWarning, msg): + result = bool(e) + # Emulate prior behavior for now + self.assertIs(result, False) + + # Element with children + ET.SubElement(e, 'b') + with self.assertWarnsRegex(DeprecationWarning, msg): + new_result = bool(e) + self.assertIs(new_result, True) # -------------------------------------------------------------------- @@ -4456,8 +4891,7 @@ def get_option(config, option_name, default=None): # -------------------------------------------------------------------- - -def test_main(module=None): +def setUpModule(module=None): # When invoked without a module, runs the Python ET tests by loading pyET. # Otherwise, uses the given module as the ET. global pyET @@ -4469,62 +4903,30 @@ def test_main(module=None): global ET ET = module - test_classes = [ - ModuleTest, - ElementSlicingTest, - BasicElementTest, - BadElementTest, - BadElementPathTest, - ElementTreeTest, - IOTest, - ParseErrorTest, - XIncludeTest, - ElementTreeTypeTest, - ElementFindTest, - ElementIterTest, - TreeBuilderTest, - XMLParserTest, - XMLPullParserTest, - BugsTest, - KeywordArgsTest, - C14NTest, - ] - - # These tests will only run for the pure-Python version that doesn't import - # _elementtree. We can't use skipUnless here, because pyET is filled in only - # after the module is loaded. - if pyET is not ET: - test_classes.extend([ - NoAcceleratorTest, - ]) + # don't interfere with subsequent tests + def cleanup(): + global ET, pyET + ET = pyET = None + unittest.addModuleCleanup(cleanup) # Provide default namespace mapping and path cache. from xml.etree import ElementPath nsmap = ET.register_namespace._namespace_map # Copy the default namespace mapping nsmap_copy = nsmap.copy() + unittest.addModuleCleanup(nsmap.update, nsmap_copy) + unittest.addModuleCleanup(nsmap.clear) + # Copy the path cache (should be empty) path_cache = ElementPath._cache + unittest.addModuleCleanup(setattr, ElementPath, "_cache", path_cache) ElementPath._cache = path_cache.copy() + # Align the Comment/PI factories. if hasattr(ET, '_set_factories'): old_factories = ET._set_factories(ET.Comment, ET.PI) - else: - old_factories = None - - try: - support.run_unittest(*test_classes) - finally: - from xml.etree import ElementPath - # Restore mapping and path cache - nsmap.clear() - nsmap.update(nsmap_copy) - ElementPath._cache = path_cache - if old_factories is not None: - ET._set_factories(*old_factories) - # don't interfere with subsequent tests - ET = pyET = None + unittest.addModuleCleanup(ET._set_factories, *old_factories) if __name__ == '__main__': - test_main() + unittest.main() diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py new file mode 100644 index 0000000000..3a0fc572f4 --- /dev/null +++ b/Lib/test/test_xml_etree_c.py @@ -0,0 +1,278 @@ +# xml.etree test for cElementTree +import io +import struct +from test import support +from test.support.import_helper import import_fresh_module +import types +import unittest + +cET = import_fresh_module('xml.etree.ElementTree', + fresh=['_elementtree']) +cET_alias = import_fresh_module('xml.etree.cElementTree', + fresh=['_elementtree', 'xml.etree'], + deprecated=True) + + +@unittest.skipUnless(cET, 'requires _elementtree') +class MiscTests(unittest.TestCase): + # Issue #8651. + @support.bigmemtest(size=support._2G + 100, memuse=1, dry_run=False) + def test_length_overflow(self, size): + data = b'x' * size + parser = cET.XMLParser() + try: + self.assertRaises(OverflowError, parser.feed, data) + finally: + data = None + + def test_del_attribute(self): + element = cET.Element('tag') + + element.tag = 'TAG' + with self.assertRaises(AttributeError): + del element.tag + self.assertEqual(element.tag, 'TAG') + + with self.assertRaises(AttributeError): + del element.text + self.assertIsNone(element.text) + element.text = 'TEXT' + with self.assertRaises(AttributeError): + del element.text + self.assertEqual(element.text, 'TEXT') + + with self.assertRaises(AttributeError): + del element.tail + self.assertIsNone(element.tail) + element.tail = 'TAIL' + with self.assertRaises(AttributeError): + del element.tail + self.assertEqual(element.tail, 'TAIL') + + with self.assertRaises(AttributeError): + del element.attrib + self.assertEqual(element.attrib, {}) + element.attrib = {'A': 'B', 'C': 'D'} + with self.assertRaises(AttributeError): + del element.attrib + self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) + + def test_trashcan(self): + # If this test fails, it will most likely die via segfault. + e = root = cET.Element('root') + for i in range(200000): + e = cET.SubElement(e, 'x') + del e + del root + support.gc_collect() + + def test_parser_ref_cycle(self): + # bpo-31499: xmlparser_dealloc() crashed with a segmentation fault when + # xmlparser_gc_clear() was called previously by the garbage collector, + # when the parser was part of a reference cycle. + + def parser_ref_cycle(): + parser = cET.XMLParser() + # Create a reference cycle using an exception to keep the frame + # alive, so the parser will be destroyed by the garbage collector + try: + raise ValueError + except ValueError as exc: + err = exc + + # Create a parser part of reference cycle + parser_ref_cycle() + # Trigger an explicit garbage collection to break the reference cycle + # and so destroy the parser + support.gc_collect() + + def test_bpo_31728(self): + # A crash or an assertion failure shouldn't happen, in case garbage + # collection triggers a call to clear() or a reading of text or tail, + # while a setter or clear() or __setstate__() is already running. + elem = cET.Element('elem') + class X: + def __del__(self): + elem.text + elem.tail + elem.clear() + + elem.text = X() + elem.clear() # shouldn't crash + + elem.tail = X() + elem.clear() # shouldn't crash + + elem.text = X() + elem.text = X() # shouldn't crash + elem.clear() + + elem.tail = X() + elem.tail = X() # shouldn't crash + elem.clear() + + elem.text = X() + elem.__setstate__({'tag': 42}) # shouldn't cause an assertion failure + elem.clear() + + elem.tail = X() + elem.__setstate__({'tag': 42}) # shouldn't cause an assertion failure + + @support.cpython_only + def test_uninitialized_parser(self): + # The interpreter shouldn't crash in case of calling methods or + # accessing attributes of uninitialized XMLParser objects. + parser = cET.XMLParser.__new__(cET.XMLParser) + self.assertRaises(ValueError, parser.close) + self.assertRaises(ValueError, parser.feed, 'foo') + class MockFile: + def read(*args): + return '' + self.assertRaises(ValueError, parser._parse_whole, MockFile()) + self.assertRaises(ValueError, parser._setevents, None) + self.assertIsNone(parser.entity) + self.assertIsNone(parser.target) + + def test_setstate_leaks(self): + # Test reference leaks + elem = cET.Element.__new__(cET.Element) + for i in range(100): + elem.__setstate__({'tag': 'foo', 'attrib': {'bar': 42}, + '_children': [cET.Element('child')], + 'text': 'text goes here', + 'tail': 'opposite of head'}) + + self.assertEqual(elem.tag, 'foo') + self.assertEqual(elem.text, 'text goes here') + self.assertEqual(elem.tail, 'opposite of head') + self.assertEqual(list(elem.attrib.items()), [('bar', 42)]) + self.assertEqual(len(elem), 1) + self.assertEqual(elem[0].tag, 'child') + + def test_iterparse_leaks(self): + # Test reference leaks in TreeBuilder (issue #35502). + # The test is written to be executed in the hunting reference leaks + # mode. + XML = '' + parser = cET.iterparse(io.StringIO(XML)) + next(parser) + del parser + support.gc_collect() + + def test_xmlpullparser_leaks(self): + # Test reference leaks in TreeBuilder (issue #35502). + # The test is written to be executed in the hunting reference leaks + # mode. + XML = '' + parser = cET.XMLPullParser() + parser.feed(XML) + del parser + support.gc_collect() + + def test_dict_disappearing_during_get_item(self): + # test fix for seg fault reported in issue 27946 + class X: + def __hash__(self): + e.attrib = {} # this frees e->extra->attrib + [{i: i} for i in range(1000)] # exhaust the dict keys cache + return 13 + + e = cET.Element("elem", {1: 2}) + r = e.get(X()) + self.assertIsNone(r) + + @support.cpython_only + def test_immutable_types(self): + root = cET.fromstring('') + dataset = ( + cET.Element, + cET.TreeBuilder, + cET.XMLParser, + type(root.iter()), + ) + for tp in dataset: + with self.subTest(tp=tp): + with self.assertRaisesRegex(TypeError, "immutable"): + tp.foo = 1 + + @support.cpython_only + def test_disallow_instantiation(self): + root = cET.fromstring('') + iter_type = type(root.iter()) + support.check_disallow_instantiation(self, iter_type) + + +@unittest.skipUnless(cET, 'requires _elementtree') +class TestAliasWorking(unittest.TestCase): + # Test that the cET alias module is alive + def test_alias_working(self): + e = cET_alias.Element('foo') + self.assertEqual(e.tag, 'foo') + + +@unittest.skipUnless(cET, 'requires _elementtree') +@support.cpython_only +class TestAcceleratorImported(unittest.TestCase): + # Test that the C accelerator was imported, as expected + def test_correct_import_cET(self): + # SubElement is a function so it retains _elementtree as its module. + self.assertEqual(cET.SubElement.__module__, '_elementtree') + + def test_correct_import_cET_alias(self): + self.assertEqual(cET_alias.SubElement.__module__, '_elementtree') + + def test_parser_comes_from_C(self): + # The type of methods defined in Python code is types.FunctionType, + # while the type of methods defined inside _elementtree is + # + self.assertNotIsInstance(cET.Element.__init__, types.FunctionType) + + +@unittest.skipUnless(cET, 'requires _elementtree') +@support.cpython_only +class SizeofTest(unittest.TestCase): + def setUp(self): + self.elementsize = support.calcobjsize('5P') + # extra + self.extra = struct.calcsize('PnnP4P') + + check_sizeof = support.check_sizeof + + def test_element(self): + e = cET.Element('a') + self.check_sizeof(e, self.elementsize) + + def test_element_with_attrib(self): + e = cET.Element('a', href='about:') + self.check_sizeof(e, self.elementsize + self.extra) + + def test_element_with_children(self): + e = cET.Element('a') + for i in range(5): + cET.SubElement(e, 'span') + # should have space for 8 children now + self.check_sizeof(e, self.elementsize + self.extra + + struct.calcsize('8P')) + + +def install_tests(): + # Test classes should have __module__ referring to this module. + from test import test_xml_etree + for name, base in vars(test_xml_etree).items(): + if isinstance(base, type) and issubclass(base, unittest.TestCase): + class Temp(base): + pass + Temp.__name__ = Temp.__qualname__ = name + Temp.__module__ = __name__ + assert name not in globals() + globals()[name] = Temp + +install_tests() + +def setUpModule(): + from test import test_xml_etree + test_xml_etree.setUpModule(module=cET) + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py index 199c22d0af..7dd667bf3f 100644 --- a/Lib/xml/dom/expatbuilder.py +++ b/Lib/xml/dom/expatbuilder.py @@ -200,10 +200,7 @@ def parseFile(self, file): parser = self.getParser() first_buffer = True try: - while 1: - buffer = file.read(16*1024) - if not buffer: - break + while buffer := file.read(16*1024): parser.Parse(buffer, False) if first_buffer and self.document.documentElement: self._setup_subset(buffer) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index d09ef5e7d0..db51f350ea 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -300,12 +300,28 @@ def _in_document(node): node = node.parentNode return False -def _write_data(writer, data): +def _write_data(writer, text, attr): "Writes datachars to writer." - if data: - data = data.replace("&", "&").replace("<", "<"). \ - replace("\"", """).replace(">", ">") - writer.write(data) + if not text: + return + # See the comments in ElementTree.py for behavior and + # implementation details. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if attr: + if '"' in text: + text = text.replace('"', """) + if "\r" in text: + text = text.replace("\r", " ") + if "\n" in text: + text = text.replace("\n", " ") + if "\t" in text: + text = text.replace("\t", " ") + writer.write(text) def _get_elements_by_tagName_helper(parent, name, rc): for node in parent.childNodes: @@ -358,6 +374,8 @@ def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, self._name = qName self.namespaceURI = namespaceURI self._prefix = prefix + if localName is not None: + self._localName = localName self.childNodes = NodeList() # Add the single child node that represents the value of the attr @@ -881,7 +899,7 @@ def writexml(self, writer, indent="", addindent="", newl=""): for a_name in attrs.keys(): writer.write(" %s=\"" % a_name) - _write_data(writer, attrs[a_name].value) + _write_data(writer, attrs[a_name].value, True) writer.write("\"") if self.childNodes: writer.write(">") @@ -1110,7 +1128,7 @@ def splitText(self, offset): return newText def writexml(self, writer, indent="", addindent="", newl=""): - _write_data(writer, "%s%s%s" % (indent, self.data, newl)) + _write_data(writer, "%s%s%s" % (indent, self.data, newl), False) # DOM Level 3 (WD 9 April 2002) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index 8a20026349..a8852625a2 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -189,7 +189,7 @@ def parse(self, input): options.filter = self.filter options.errorHandler = self.errorHandler fp = input.byteStream - if fp is None and options.systemId: + if fp is None and input.systemId: import urllib.request fp = urllib.request.urlopen(input.systemId) return self._parse_bytestream(fp, options) @@ -247,10 +247,12 @@ def _create_opener(self): def _guess_media_encoding(self, source): info = source.byteStream.info() - if "Content-Type" in info: - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() + # import email.message + # assert isinstance(info, email.message.Message) + charset = info.get_param('charset') + if charset is not None: + return charset.lower() + return None class DOMInputSource(object): diff --git a/Lib/xml/etree/ElementInclude.py b/Lib/xml/etree/ElementInclude.py index 40a9b22292..986e6c3bbe 100644 --- a/Lib/xml/etree/ElementInclude.py +++ b/Lib/xml/etree/ElementInclude.py @@ -79,8 +79,8 @@ class LimitedRecursiveIncludeError(FatalIncludeError): # @param parse Parse mode. Either "xml" or "text". # @param encoding Optional text encoding (UTF-8 by default for "text"). # @return The expanded resource. If the parse mode is "xml", this -# is an ElementTree instance. If the parse mode is "text", this -# is a Unicode string. If the loader fails, it can return None +# is an Element instance. If the parse mode is "text", this +# is a string. If the loader fails, it can return None # or raise an OSError exception. # @throws OSError If the loader fails to load the resource. @@ -98,7 +98,7 @@ def default_loader(href, parse, encoding=None): ## # Expand XInclude directives. # -# @param elem Root element. +# @param elem Root Element or any ElementTree of a tree to be expanded # @param loader Optional resource loader. If omitted, it defaults # to {@link default_loader}. If given, it should be a callable # that implements the same interface as default_loader. @@ -106,12 +106,13 @@ def default_loader(href, parse, encoding=None): # relative include file references. # @param max_depth The maximum number of recursive inclusions. # Limited to reduce the risk of malicious content explosion. -# Pass a negative value to disable the limitation. +# Pass None to disable the limitation. # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. # @throws FatalIncludeError If the function fails to include a given # resource, or if the tree contains malformed XInclude elements. -# @throws IOError If the function fails to load a given resource. -# @returns the node or its replacement if it was an XInclude node +# @throws OSError If the function fails to load a given resource. +# @throws ValueError If negative {@link max_depth} is passed. +# @returns None. Modifies tree pointed by {@link elem} def include(elem, loader=None, base_url=None, max_depth=DEFAULT_MAX_INCLUSION_DEPTH): diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index cd3c354d08..dc6bd28c03 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -416,6 +416,8 @@ def findall(elem, path, namespaces=None): def findtext(elem, path, default=None, namespaces=None): try: elem = next(iterfind(elem, path, namespaces)) - return elem.text or "" + if elem.text is None: + return "" + return elem.text except StopIteration: return default diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 2503d9ee76..9bb09ab540 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -99,6 +99,7 @@ import collections import collections.abc import contextlib +import weakref from . import ElementPath @@ -188,19 +189,6 @@ def makeelement(self, tag, attrib): """ return self.__class__(tag, attrib) - def copy(self): - """Return copy of current element. - - This creates a shallow copy. Subelements will be shared with the - original tree. - - """ - warnings.warn( - "elem.copy() is deprecated. Use copy.copy(elem) instead.", - DeprecationWarning - ) - return self.__copy__() - def __copy__(self): elem = self.makeelement(self.tag, self.attrib) elem.text = self.text @@ -213,9 +201,10 @@ def __len__(self): def __bool__(self): warnings.warn( - "The behavior of this method will change in future versions. " + "Testing an element's truth value will always return True in " + "future versions. " "Use specific 'len(elem)' or 'elem is not None' test instead.", - FutureWarning, stacklevel=2 + DeprecationWarning, stacklevel=2 ) return len(self._children) != 0 # emulate old behaviour, for now @@ -534,7 +523,9 @@ class ElementTree: """ def __init__(self, element=None, file=None): - # assert element is None or iselement(element) + if element is not None and not iselement(element): + raise TypeError('expected an Element, not %s' % + type(element).__name__) self._root = element # first node if file: self.parse(file) @@ -550,7 +541,9 @@ def _setroot(self, element): with the given element. Use with care! """ - # assert iselement(element) + if not iselement(element): + raise TypeError('expected an Element, not %s' + % type(element).__name__) self._root = element def parse(self, source, parser=None): @@ -579,10 +572,7 @@ def parse(self, source, parser=None): # it with chunks. self._root = parser._parse_whole(source) return self._root - while True: - data = source.read(65536) - if not data: - break + while data := source.read(65536): parser.feed(data) self._root = parser.close() return self._root @@ -719,6 +709,8 @@ def write(self, file_or_filename, of start/end tags """ + if self._root is None: + raise TypeError('ElementTree not initialized') if not method: method = "xml" elif method not in _serialize: @@ -911,13 +903,9 @@ def _serialize_xml(write, elem, qnames, namespaces, if elem.tail: write(_escape_cdata(elem.tail)) -HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", - "img", "input", "isindex", "link", "meta", "param") - -try: - HTML_EMPTY = set(HTML_EMPTY) -except NameError: - pass +HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param", "source", + "track", "wbr"} def _serialize_html(write, elem, qnames, namespaces, **kwargs): tag = elem.tag @@ -1242,13 +1230,14 @@ def iterparse(source, events=None, parser=None): # parser argument of iterparse is removed, this can be killed. pullparser = XMLPullParser(events=events, _parser=parser) - def iterator(source): + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + else: close_source = False + + def iterator(source): try: - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - yield None while True: yield from pullparser.read_events() # load event buffer @@ -1258,18 +1247,30 @@ def iterator(source): pullparser.feed(data) root = pullparser._close_and_return_root() yield from pullparser.read_events() - it.root = root + it = wr() + if it is not None: + it.root = root finally: if close_source: source.close() + gen = iterator(source) class IterParseIterator(collections.abc.Iterator): - __next__ = iterator(source).__next__ + __next__ = gen.__next__ + def close(self): + if close_source: + source.close() + gen.close() + + def __del__(self): + # TODO: Emit a ResourceWarning if it was not explicitly closed. + # (When the close() method will be supported in all maintained Python versions.) + if close_source: + source.close() + it = IterParseIterator() it.root = None - del iterator, IterParseIterator - - next(it) + wr = weakref.ref(it) return it @@ -1325,6 +1326,11 @@ def read_events(self): else: yield event + def flush(self): + if self._parser is None: + raise ValueError("flush() called after end of stream") + self._parser.flush() + def XML(text, parser=None): """Parse XML document from string constant. @@ -1731,6 +1737,15 @@ def close(self): del self.parser, self._parser del self.target, self._target + def flush(self): + was_enabled = self.parser.GetReparseDeferralEnabled() + try: + self.parser.SetReparseDeferralEnabled(False) + self.parser.Parse(b"", False) + except self._error as v: + self._raiseerror(v) + finally: + self.parser.SetReparseDeferralEnabled(was_enabled) # -------------------------------------------------------------------- # C14N 2.0 diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py index 17b75879eb..b657310207 100644 --- a/Lib/xml/sax/__init__.py +++ b/Lib/xml/sax/__init__.py @@ -60,11 +60,7 @@ def parseString(string, handler, errorHandler=ErrorHandler()): import os, sys if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ: default_parser_list = os.environ["PY_SAX_PARSER"].split(",") -del os - -_key = "python.xml.sax.parser" -if sys.platform[:4] == "java" and sys.registry.containsKey(_key): - default_parser_list = sys.registry.getProperty(_key).split(",") +del os, sys def make_parser(parser_list=()): @@ -93,15 +89,6 @@ def make_parser(parser_list=()): # --- Internal utility methods used by make_parser -if sys.platform[ : 4] == "java": - def _create_parser(parser_name): - from org.python.core import imp - drv_module = imp.importName(parser_name, 0, globals()) - return drv_module.create_parser() - -else: - def _create_parser(parser_name): - drv_module = __import__(parser_name,{},{},['create_parser']) - return drv_module.create_parser() - -del sys +def _create_parser(parser_name): + drv_module = __import__(parser_name,{},{},['create_parser']) + return drv_module.create_parser() diff --git a/Lib/xml/sax/_exceptions.py b/Lib/xml/sax/_exceptions.py index a9b2ba35c6..f292dc3a8e 100644 --- a/Lib/xml/sax/_exceptions.py +++ b/Lib/xml/sax/_exceptions.py @@ -1,8 +1,4 @@ """Different kinds of SAX Exceptions""" -import sys -if sys.platform[:4] == "java": - from java.lang import Exception -del sys # ===== SAXEXCEPTION ===== diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index e334ac9fea..ba3c1e9851 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -12,12 +12,6 @@ from xml.sax.handler import feature_string_interning from xml.sax.handler import property_xml_string, property_interning_dict -# xml.parsers.expat does not raise ImportError in Jython -import sys -if sys.platform[:4] == "java": - raise SAXReaderNotAvailable("expat not available in Java", None) -del sys - try: from xml.parsers import expat except ImportError: @@ -220,6 +214,20 @@ def feed(self, data, isFinal=False): # FIXME: when to invoke error()? self._err_handler.fatalError(exc) + def flush(self): + if self._parser is None: + return + + was_enabled = self._parser.GetReparseDeferralEnabled() + try: + self._parser.SetReparseDeferralEnabled(False) + self._parser.Parse(b"", False) + except expat.error as e: + exc = SAXParseException(expat.ErrorString(e.code), e, self) + self._err_handler.fatalError(exc) + finally: + self._parser.SetReparseDeferralEnabled(was_enabled) + def _close_source(self): source = self._source try: diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py index 716f228404..e906121d23 100644 --- a/Lib/xml/sax/xmlreader.py +++ b/Lib/xml/sax/xmlreader.py @@ -120,10 +120,8 @@ def parse(self, source): file = source.getCharacterStream() if file is None: file = source.getByteStream() - buffer = file.read(self._bufsize) - while buffer: + while buffer := file.read(self._bufsize): self.feed(buffer) - buffer = file.read(self._bufsize) self.close() def feed(self, data): diff --git a/stdlib/src/pyexpat.rs b/stdlib/src/pyexpat.rs index 033fa76c06..45e0328510 100644 --- a/stdlib/src/pyexpat.rs +++ b/stdlib/src/pyexpat.rs @@ -1,8 +1,4 @@ -/* Pyexpat builtin module -* -* -*/ - +/// Pyexpat builtin module use crate::vm::{PyRef, VirtualMachine, builtins::PyModule, extend_module}; pub fn make_module(vm: &VirtualMachine) -> PyRef { @@ -33,15 +29,25 @@ macro_rules! create_property { mod _pyexpat { use crate::vm::{ Context, Py, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, - builtins::{PyStr, PyStrRef, PyType}, + builtins::{PyInt, PyStr, PyStrRef, PyTupleRef, PyType}, function::ArgBytesLike, function::{IntoFuncArgs, OptionalArg}, }; use rustpython_common::lock::PyRwLock; use std::io::Cursor; use xml::reader::XmlEvent; + type MutableObject = PyRwLock; + #[pyattr] + pub fn version_info(vm: &VirtualMachine) -> PyTupleRef { + vm.ctx.new_tuple(vec![ + PyInt::from(2).into_pyobject(vm), + PyInt::from(7).into_pyobject(vm), + PyInt::from(1).into_pyobject(vm), + ]) + } + #[pyattr] #[pyclass(name = "xmlparser", module = false, traverse)] #[derive(Debug, PyPayload)] From 76b65a28c84a900e2c00711d17ed9fed10be1fc1 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Wed, 20 Aug 2025 13:16:22 +0300 Subject: [PATCH 2/3] Update stdlib/src/pyexpat.rs Co-authored-by: Jeong, YunWon <69878+youknowone@users.noreply.github.com> --- stdlib/src/pyexpat.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/stdlib/src/pyexpat.rs b/stdlib/src/pyexpat.rs index 45e0328510..0af0bc32b3 100644 --- a/stdlib/src/pyexpat.rs +++ b/stdlib/src/pyexpat.rs @@ -39,14 +39,8 @@ mod _pyexpat { type MutableObject = PyRwLock; - #[pyattr] - pub fn version_info(vm: &VirtualMachine) -> PyTupleRef { - vm.ctx.new_tuple(vec![ - PyInt::from(2).into_pyobject(vm), - PyInt::from(7).into_pyobject(vm), - PyInt::from(1).into_pyobject(vm), - ]) - } + #[pyattr(name = "version_info")] + pub const VERSION_INFO: (u32, u32, u32) = (2, 7, 1); #[pyattr] #[pyclass(name = "xmlparser", module = false, traverse)] From a4dbfa9f41d67cd430747fdb281b21a9602056e0 Mon Sep 17 00:00:00 2001 From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Wed, 20 Aug 2025 15:27:04 +0300 Subject: [PATCH 3/3] Fix clippy --- stdlib/src/pyexpat.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/src/pyexpat.rs b/stdlib/src/pyexpat.rs index 0af0bc32b3..871ba7d598 100644 --- a/stdlib/src/pyexpat.rs +++ b/stdlib/src/pyexpat.rs @@ -29,7 +29,7 @@ macro_rules! create_property { mod _pyexpat { use crate::vm::{ Context, Py, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, - builtins::{PyInt, PyStr, PyStrRef, PyTupleRef, PyType}, + builtins::{PyStr, PyStrRef, PyType}, function::ArgBytesLike, function::{IntoFuncArgs, OptionalArg}, };