From 8249938e02a9bf6e7d99d640c8fa8295d0d93e84 Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Sun, 17 Aug 2025 16:43:41 +0300
Subject: [PATCH 1/3] Update xml from 3.13.7
---
Lib/test/test_xml_dom_xmlbuilder.py | 92 ++++
Lib/test/test_xml_etree.py | 716 ++++++++++++++++++++++------
Lib/test/test_xml_etree_c.py | 278 +++++++++++
Lib/xml/dom/expatbuilder.py | 5 +-
Lib/xml/dom/minidom.py | 32 +-
Lib/xml/dom/xmlbuilder.py | 12 +-
Lib/xml/etree/ElementInclude.py | 13 +-
Lib/xml/etree/ElementPath.py | 4 +-
Lib/xml/etree/ElementTree.py | 91 ++--
Lib/xml/sax/__init__.py | 21 +-
Lib/xml/sax/_exceptions.py | 4 -
Lib/xml/sax/expatreader.py | 20 +-
Lib/xml/sax/xmlreader.py | 4 +-
stdlib/src/pyexpat.rs | 18 +-
14 files changed, 1056 insertions(+), 254 deletions(-)
create mode 100644 Lib/test/test_xml_dom_xmlbuilder.py
create mode 100644 Lib/test/test_xml_etree_c.py
diff --git a/Lib/test/test_xml_dom_xmlbuilder.py b/Lib/test/test_xml_dom_xmlbuilder.py
new file mode 100644
index 0000000000..5282e806e4
--- /dev/null
+++ b/Lib/test/test_xml_dom_xmlbuilder.py
@@ -0,0 +1,92 @@
+import io
+import unittest
+from http import client
+from test.test_httplib import FakeSocket
+from unittest import mock
+from xml.dom import getDOMImplementation, minidom, xmlbuilder
+
+SMALL_SAMPLE = b"""
+
+
+
Introduction to XSL
+
+A. Namespace
+"""
+
+
+class XMLBuilderTest(unittest.TestCase):
+ def test_entity_resolver(self):
+ body = (
+ b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+ + SMALL_SAMPLE
+ )
+
+ sock = FakeSocket(body)
+ response = client.HTTPResponse(sock)
+ response.begin()
+ attrs = {"open.return_value": response}
+ opener = mock.Mock(**attrs)
+
+ resolver = xmlbuilder.DOMEntityResolver()
+
+ with mock.patch("urllib.request.build_opener") as mock_build:
+ mock_build.return_value = opener
+ source = resolver.resolveEntity(None, "http://example.com/2000/svg")
+
+ self.assertIsInstance(source, xmlbuilder.DOMInputSource)
+ self.assertIsNone(source.publicId)
+ self.assertEqual(source.systemId, "http://example.com/2000/svg")
+ self.assertEqual(source.baseURI, "http://example.com/2000/")
+ self.assertEqual(source.encoding, "utf-8")
+ self.assertIs(source.byteStream, response)
+
+ self.assertIsNone(source.characterStream)
+ self.assertIsNone(source.stringData)
+
+ def test_builder(self):
+ imp = getDOMImplementation()
+ self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS)
+
+ builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+ self.assertIsInstance(builder, xmlbuilder.DOMBuilder)
+
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_parse_uri(self):
+ body = (
+ b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+ + SMALL_SAMPLE
+ )
+
+ sock = FakeSocket(body)
+ response = client.HTTPResponse(sock)
+ response.begin()
+ attrs = {"open.return_value": response}
+ opener = mock.Mock(**attrs)
+
+ with mock.patch("urllib.request.build_opener") as mock_build:
+ mock_build.return_value = opener
+
+ imp = getDOMImplementation()
+ builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+ document = builder.parseURI("http://example.com/2000/svg")
+
+ self.assertIsInstance(document, minidom.Document)
+ self.assertEqual(len(document.childNodes), 1)
+
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_parse_with_systemId(self):
+ response = io.BytesIO(SMALL_SAMPLE)
+
+ with mock.patch("urllib.request.urlopen") as mock_open:
+ mock_open.return_value = response
+
+ imp = getDOMImplementation()
+ source = imp.createDOMInputSource()
+ builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
+ source.systemId = "http://example.com/2000/svg"
+ document = builder.parse(source)
+
+ self.assertIsInstance(document, minidom.Document)
+ self.assertEqual(len(document.childNodes), 1)
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 1a681d5a7c..59b5515529 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -13,13 +13,16 @@
import operator
import os
import pickle
+import pyexpat
import sys
import textwrap
import types
import unittest
+import unittest.mock as mock
import warnings
import weakref
+from contextlib import nullcontext
from functools import partial
from itertools import product, islice
from test import support
@@ -120,6 +123,21 @@
"""
+def is_python_implementation():
+ assert ET is not None, "ET must be initialized"
+ assert pyET is not None, "pyET must be initialized"
+ return ET is pyET
+
+
+def equal_wrapper(cls):
+ """Mock cls.__eq__ to check whether it has been called or not.
+
+ The behaviour of cls.__eq__ (side-effects included) is left as is.
+ """
+ eq = cls.__eq__
+ return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
+
+
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@@ -200,28 +218,36 @@ class ElementTreeTest(unittest.TestCase):
def serialize_check(self, elem, expected):
self.assertEqual(serialize(elem), expected)
+ def test_constructor(self):
+ # Test constructor behavior.
+
+ with self.assertRaises(TypeError):
+ tree = ET.ElementTree("")
+ with self.assertRaises(TypeError):
+ tree = ET.ElementTree(ET.ElementTree())
+
+ def test_setroot(self):
+ # Test _setroot behavior.
+
+ tree = ET.ElementTree()
+ element = ET.Element("tag")
+ tree._setroot(element)
+ self.assertEqual(tree.getroot().tag, "tag")
+ self.assertEqual(tree.getroot(), element)
+
+ # Test behavior with an invalid root element
+
+ tree = ET.ElementTree()
+ with self.assertRaises(TypeError):
+ tree._setroot("")
+ with self.assertRaises(TypeError):
+ tree._setroot(ET.ElementTree())
+ with self.assertRaises(TypeError):
+ tree._setroot(None)
+
def test_interface(self):
# Test element tree interface.
- def check_string(string):
- len(string)
- for char in string:
- self.assertEqual(len(char), 1,
- msg="expected one-character string, got %r" % char)
- new_string = string + ""
- new_string = string + " "
- string[:0]
-
- def check_mapping(mapping):
- len(mapping)
- keys = mapping.keys()
- items = mapping.items()
- for key in keys:
- item = mapping[key]
- mapping["key"] = "value"
- self.assertEqual(mapping["key"], "value",
- msg="expected value string, got %r" % mapping["key"])
-
def check_element(element):
self.assertTrue(ET.iselement(element), msg="not an element")
direlem = dir(element)
@@ -231,12 +257,12 @@ def check_element(element):
self.assertIn(attr, direlem,
msg='no %s visible by dir' % attr)
- check_string(element.tag)
- check_mapping(element.attrib)
+ self.assertIsInstance(element.tag, str)
+ self.assertIsInstance(element.attrib, dict)
if element.text is not None:
- check_string(element.text)
+ self.assertIsInstance(element.text, str)
if element.tail is not None:
- check_string(element.tail)
+ self.assertIsInstance(element.tail, str)
for elem in element:
check_element(elem)
@@ -392,6 +418,7 @@ def test_path_cache(self):
from xml.etree import ElementPath
elem = ET.XML(SAMPLE_XML)
+ ElementPath._cache.clear()
for i in range(10): ET.ElementTree(elem).find('./'+str(i))
cache_len_10 = len(ElementPath._cache)
for i in range(10): ET.ElementTree(elem).find('./'+str(i))
@@ -572,7 +599,9 @@ def test_iterparse(self):
iterparse = ET.iterparse
context = iterparse(SIMPLE_XMLFILE)
+ self.assertIsNone(context.root)
action, elem = next(context)
+ self.assertIsNone(context.root)
self.assertEqual((action, elem.tag), ('end', 'element'))
self.assertEqual([(action, elem.tag) for action, elem in context], [
('end', 'element'),
@@ -589,6 +618,17 @@ def test_iterparse(self):
('end', '{namespace}root'),
])
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ context = iterparse(source)
+ action, elem = next(context)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ self.assertEqual([(action, elem.tag) for action, elem in context], [
+ ('end', 'element'),
+ ('end', 'empty-element'),
+ ('end', 'root'),
+ ])
+ self.assertEqual(context.root.tag, 'root')
+
events = ()
context = iterparse(SIMPLE_XMLFILE, events)
self.assertEqual([(action, elem.tag) for action, elem in context], [])
@@ -680,12 +720,83 @@ def test_iterparse(self):
# Not exhausting the iterator still closes the resource (bpo-43292)
with warnings_helper.check_no_resource_warning(self):
- it = iterparse(TESTFN)
+ it = iterparse(SIMPLE_XMLFILE)
+ del it
+
+ with warnings_helper.check_no_resource_warning(self):
+ it = iterparse(SIMPLE_XMLFILE)
+ it.close()
del it
+ with warnings_helper.check_no_resource_warning(self):
+ it = iterparse(SIMPLE_XMLFILE)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ del it, elem
+
+ with warnings_helper.check_no_resource_warning(self):
+ it = iterparse(SIMPLE_XMLFILE)
+ action, elem = next(it)
+ it.close()
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ del it, elem
+
with self.assertRaises(FileNotFoundError):
iterparse("nonexistent")
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_iterparse_close(self):
+ iterparse = ET.iterparse
+
+ it = iterparse(SIMPLE_XMLFILE)
+ it.close()
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ it = iterparse(source)
+ it.close()
+ self.assertFalse(source.closed)
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ it = iterparse(SIMPLE_XMLFILE)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ it.close()
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ it = iterparse(source)
+ action, elem = next(it)
+ self.assertEqual((action, elem.tag), ('end', 'element'))
+ it.close()
+ self.assertFalse(source.closed)
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ it = iterparse(SIMPLE_XMLFILE)
+ list(it)
+ it.close()
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
+ with open(SIMPLE_XMLFILE, 'rb') as source:
+ it = iterparse(source)
+ list(it)
+ it.close()
+ self.assertFalse(source.closed)
+ with self.assertRaises(StopIteration):
+ next(it)
+ it.close() # idempotent
+
def test_writefile(self):
elem = ET.Element("tag")
elem.text = "text"
@@ -1427,8 +1538,9 @@ def test_processinginstruction(self):
def test_html_empty_elems_serialization(self):
# issue 15970
# from http://www.w3.org/TR/html401/index/elements.html
- for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
- 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
+ for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME',
+ 'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM',
+ 'SOURCE', 'TRACK', 'WBR']:
for elem in [element, element.lower()]:
expected = '<%s>' % elem
serialized = serialize(ET.XML('<%s />' % elem), method='html')
@@ -1464,12 +1576,14 @@ def test_attlist_default(self):
class XMLPullParserTest(unittest.TestCase):
- def _feed(self, parser, data, chunk_size=None):
+ def _feed(self, parser, data, chunk_size=None, flush=False):
if chunk_size is None:
parser.feed(data)
else:
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])
+ if flush:
+ parser.flush()
def assert_events(self, parser, expected, max_events=None):
self.assertEqual(
@@ -1489,28 +1603,41 @@ def assert_event_tags(self, parser, expected, max_events=None):
# TODO: RUSTPYTHON
@unittest.expectedFailure
- def test_simple_xml(self):
- for chunk_size in (None, 1, 5):
- with self.subTest(chunk_size=chunk_size):
- parser = ET.XMLPullParser()
- self.assert_event_tags(parser, [])
- self._feed(parser, "\n", chunk_size)
- self.assert_event_tags(parser, [])
- self._feed(parser,
- "\n text\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'element')])
- self._feed(parser, "texttail\n", chunk_size)
- self._feed(parser, "\n", chunk_size)
- self.assert_event_tags(parser, [
- ('end', 'element'),
- ('end', 'empty-element'),
- ])
- self._feed(parser, "\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'root')])
- self.assertIsNone(parser.close())
+ def test_simple_xml(self, chunk_size=None, flush=False):
+ parser = ET.XMLPullParser()
+ self.assert_event_tags(parser, [])
+ self._feed(parser, "\n", chunk_size, flush)
+ self.assert_event_tags(parser, [])
+ self._feed(parser,
+ "\n text\n", chunk_size, flush)
+ self.assert_event_tags(parser, [('end', 'element')])
+ self._feed(parser, "texttail\n", chunk_size, flush)
+ self._feed(parser, "\n", chunk_size, flush)
+ self.assert_event_tags(parser, [
+ ('end', 'element'),
+ ('end', 'empty-element'),
+ ])
+ self._feed(parser, "\n", chunk_size, flush)
+ self.assert_event_tags(parser, [('end', 'root')])
+ self.assertIsNone(parser.close())
+
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_simple_xml_chunk_1(self):
+ self.test_simple_xml(chunk_size=1, flush=True)
+
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_simple_xml_chunk_5(self):
+ self.test_simple_xml(chunk_size=5, flush=True)
+
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_simple_xml_chunk_22(self):
+ self.test_simple_xml(chunk_size=22)
# TODO: RUSTPYTHON
@unittest.expectedFailure
@@ -1726,6 +1853,60 @@ def test_unknown_event(self):
with self.assertRaises(ValueError):
ET.XMLPullParser(events=('start', 'end', 'bogus'))
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ @unittest.skipIf(pyexpat.version_info < (2, 6, 0),
+ f'Expat {pyexpat.version_info} does not '
+ 'support reparse deferral')
+ def test_flush_reparse_deferral_enabled(self):
+ parser = ET.XMLPullParser(events=('start', 'end'))
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ self.assert_event_tags(parser, []) # i.e. no elements started
+ if ET is pyET:
+ self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assert_event_tags(parser, [('start', 'doc')])
+ if ET is pyET:
+ self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.feed("")
+ parser.close()
+
+ self.assert_event_tags(parser, [('end', 'doc')])
+
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_flush_reparse_deferral_disabled(self):
+ parser = ET.XMLPullParser(events=('start', 'end'))
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ if pyexpat.version_info >= (2, 6, 0):
+ if not ET is pyET:
+ self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled '
+ 'methods not available in C')
+ parser._parser._parser.SetReparseDeferralEnabled(False)
+ self.assert_event_tags(parser, []) # i.e. no elements started
+
+ if ET is pyET:
+ self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assert_event_tags(parser, [('start', 'doc')])
+ if ET is pyET:
+ self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.feed("")
+ parser.close()
+
+ self.assert_event_tags(parser, [('end', 'doc')])
#
# xinclude tests (samples from appendix C of the xinclude specification)
@@ -2434,6 +2615,22 @@ def test_39495_treebuilder_start(self):
self.assertRaises(TypeError, ET.TreeBuilder().start, "tag")
self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None)
+ def test_issue123213_correct_extend_exception(self):
+ # Does not hide the internal exception when extending the element
+ self.assertRaises(ZeroDivisionError, ET.Element('tag').extend,
+ (1/0 for i in range(2)))
+
+ # Still raises the TypeError when extending with a non-iterable
+ self.assertRaises(TypeError, ET.Element('tag').extend, None)
+
+ # Preserves the TypeError message when extending with a generator
+ def f():
+ raise TypeError("mymessage")
+
+ self.assertRaisesRegex(
+ TypeError, 'mymessage',
+ ET.Element('tag').extend, (f() for i in range(2)))
+
# --------------------------------------------------------------------
@@ -2468,35 +2665,6 @@ def test___init__(self):
self.assertIsNot(element_foo.attrib, attrib)
self.assertNotEqual(element_foo.attrib, attrib)
- def test_copy(self):
- # Only run this test if Element.copy() is defined.
- if "copy" not in dir(ET.Element):
- raise unittest.SkipTest("Element.copy() not present")
-
- element_foo = ET.Element("foo", { "zix": "wyp" })
- element_foo.append(ET.Element("bar", { "baz": "qix" }))
-
- with self.assertWarns(DeprecationWarning):
- element_foo2 = element_foo.copy()
-
- # elements are not the same
- self.assertIsNot(element_foo2, element_foo)
-
- # string attributes are equal
- self.assertEqual(element_foo2.tag, element_foo.tag)
- self.assertEqual(element_foo2.text, element_foo.text)
- self.assertEqual(element_foo2.tail, element_foo.tail)
-
- # number of children is the same
- self.assertEqual(len(element_foo2), len(element_foo))
-
- # children are the same
- for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
- self.assertIs(child1, child2)
-
- # attrib is a copy
- self.assertEqual(element_foo2.attrib, element_foo.attrib)
-
def test___copy__(self):
element_foo = ET.Element("foo", { "zix": "wyp" })
element_foo.append(ET.Element("bar", { "baz": "qix" }))
@@ -2662,8 +2830,7 @@ def test_pickle_issue18997(self):
4
"""
e1 = dumper.fromstring(XMLTEXT)
- if hasattr(e1, '__getstate__'):
- self.assertEqual(e1.__getstate__()['tag'], 'group')
+ self.assertEqual(e1.__getstate__()['tag'], 'group')
e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
dumper, loader, proto)
self.assertEqual(e2.tag, 'group')
@@ -2671,6 +2838,7 @@ def test_pickle_issue18997(self):
class BadElementTest(ElementTestCase, unittest.TestCase):
+
def test_extend_mutable_list(self):
class X:
@property
@@ -2709,20 +2877,170 @@ class Y(X, ET.Element):
e = ET.Element('foo')
e.extend(L)
- @unittest.skip("TODO: RUSTPYTHON, hangs")
- def test_remove_with_mutating(self):
- class X(ET.Element):
+ def test_remove_with_clear_assume_missing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=True)
+
+ def test_remove_with_clear_assume_existing(self):
+ # gh-126033: Check that a concurrent clear() for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_clear(raises=False)
+
+ def do_test_remove_with_clear(self, *, raises):
+
+ # Until the discrepency between "del root[:]" and "root.clear()" is
+ # resolved, we need to keep two tests. Previously, using "del root[:]"
+ # did not crash with the reproducer of gh-126033 while "root.clear()"
+ # did.
+
+ class E(ET.Element):
+ """Local class to be able to mock E.__eq__ for introspection."""
+
+ class X(E):
def __eq__(self, o):
- del e[:]
- return False
- e = ET.Element('foo')
- e.extend([X('bar')])
- self.assertRaises(ValueError, e.remove, ET.Element('baz'))
+ del root[:]
+ return not raises
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- self.assertRaises(ValueError, e.remove, X('baz'))
+ class Y(E):
+ def __eq__(self, o):
+ root.clear()
+ return not raises
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ self.assertIs(E.__eq__, object.__eq__)
+
+ for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
+ self.enterContext(self.subTest(side_effect=side_effect))
+
+ # test removing R() from [U()]
+ for R, U, description in [
+ (E, Z, "remove missing E() from [Z()]"),
+ (Z, E, "remove missing Z() from [E()]"),
+ (Z, Z, "remove missing Z() from [Z()]"),
+ ]:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # Test removing root[0] from [Z()].
+ #
+ # Since we call root.remove() with root[0], Z.__eq__()
+ # will not be called (we branch on the fast Py_EQ path).
+ with self.subTest("remove root[0] from [Z()]"):
+ root = E('top')
+ root.append(Z('rem'))
+ with equal_wrapper(E) as f, equal_wrapper(Z) as g:
+ root.remove(root[0])
+ f.assert_not_called()
+ g.assert_not_called()
+
+ # Test removing root[1] (of type R) from [U(), R()].
+ is_special = is_python_implementation() and raises and Z is Y
+ if is_python_implementation() and raises and Z is Y:
+ # In pure Python, using root.clear() sets the children
+ # list to [] without calling list.clear().
+ #
+ # For this reason, the call to root.remove() first
+ # checks root[0] and sets the children list to []
+ # since either root[0] or root[1] is an evil element.
+ #
+ # Since checking root[1] still uses the old reference
+ # to the children list, PyObject_RichCompareBool() branches
+ # to the fast Py_EQ path and Y.__eq__() is called exactly
+ # once (when checking root[0]).
+ continue
+ else:
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def test_remove_with_mutate_root_assume_missing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # missing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=True)
+
+ def test_remove_with_mutate_root_assume_existing(self):
+ # gh-126033: Check that a concurrent mutation for an assumed-to-be
+ # existing element does not make the interpreter crash.
+ self.do_test_remove_with_mutate_root(raises=False)
+
+ def do_test_remove_with_mutate_root(self, *, raises):
+ E = ET.Element
+
+ class Z(E):
+ def __eq__(self, o):
+ del root[0]
+ return not raises
+
+ if raises:
+ get_checker_context = lambda: self.assertRaises(ValueError)
+ else:
+ get_checker_context = nullcontext
+
+ # test removing R() from [U(), V()]
+ cases = self.cases_for_remove_missing_with_mutations(E, Z)
+ for R, U, V, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), V('two')])
+ with get_checker_context():
+ root.remove(R('missing'))
+
+ # test removing root[1] (of type R) from [U(), R()]
+ cases = self.cases_for_remove_existing_with_mutations(E, Z)
+ for R, U, description in cases:
+ with self.subTest(description):
+ root = E('top')
+ root.extend([U('one'), R('rem')])
+ with get_checker_context():
+ root.remove(root[1])
+
+ def cases_for_remove_missing_with_mutations(self, E, Z):
+ # Cases for removing R() from [U(), V()].
+ # The case U = V = R = E is not interesting as there is no mutation.
+ for U, V in [(E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {E.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield E, U, V, description
+
+ for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
+ description = (f"remove missing {Z.__name__}() from "
+ f"[{U.__name__}(), {V.__name__}()]")
+ yield Z, U, V, description
+
+ def cases_for_remove_existing_with_mutations(self, E, Z):
+ # Cases for removing root[1] (of type R) from [U(), R()].
+ # The case U = R = E is not interesting as there is no mutation.
+ for U, R, description in [
+ (E, Z, "remove root[1] from [E(), Z()]"),
+ (Z, E, "remove root[1] from [Z(), E()]"),
+ (Z, Z, "remove root[1] from [Z(), Z()]"),
+ ]:
+ description = (f"remove root[1] (of type {R.__name__}) "
+ f"from [{U.__name__}(), {R.__name__}()]")
+ yield R, U, description
+
+ @support.infinite_recursion(25)
def test_recursive_repr(self):
# Issue #25455
e = ET.Element('foo')
@@ -2821,21 +3139,83 @@ def element_factory(x, y):
del b
gc_collect()
+ def test_deepcopy_clear(self):
+ # Prevent crashes when __deepcopy__() clears the children list.
+ # See https://github.com/python/cpython/issues/133009.
+ class X(ET.Element):
+ def __deepcopy__(self, memo):
+ root.clear()
+ return self
-class MutatingElementPath(str):
+ root = ET.Element('a')
+ evil = X('x')
+ root.extend([evil, ET.Element('y')])
+ if is_python_implementation():
+ # Mutating a list over which we iterate raises an error.
+ self.assertRaises(RuntimeError, copy.deepcopy, root)
+ else:
+ c = copy.deepcopy(root)
+ # In the C implementation, we can still copy the evil element.
+ self.assertListEqual(list(c), [evil])
+
+ def test_deepcopy_grow(self):
+ # Prevent crashes when __deepcopy__() mutates the children list.
+ # See https://github.com/python/cpython/issues/133009.
+ a = ET.Element('a')
+ b = ET.Element('b')
+ c = ET.Element('c')
+
+ class X(ET.Element):
+ def __deepcopy__(self, memo):
+ root.append(a)
+ root.append(b)
+ return self
+
+ root = ET.Element('top')
+ evil1, evil2 = X('1'), X('2')
+ root.extend([evil1, c, evil2])
+ children = list(copy.deepcopy(root))
+ # mock deep copies
+ self.assertIs(children[0], evil1)
+ self.assertIs(children[2], evil2)
+ # true deep copies
+ self.assertEqual(children[1].tag, c.tag)
+ self.assertEqual([c.tag for c in children[3:]],
+ [a.tag, b.tag, a.tag, b.tag])
+
+
+class MutationDeleteElementPath(str):
def __new__(cls, elem, *args):
self = str.__new__(cls, *args)
self.elem = elem
return self
+
def __eq__(self, o):
del self.elem[:]
return True
-MutatingElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
+
+class MutationClearElementPath(str):
+ def __new__(cls, elem, *args):
+ self = str.__new__(cls, *args)
+ self.elem = elem
+ return self
+
+ def __eq__(self, o):
+ self.elem.clear()
+ return True
+
+ __hash__ = str.__hash__
+
class BadElementPath(str):
def __eq__(self, o):
raise 1/0
-BadElementPath.__hash__ = str.__hash__
+
+ __hash__ = str.__hash__
+
class BadElementPathTest(ElementTestCase, unittest.TestCase):
def setUp(self):
@@ -2850,9 +3230,11 @@ def tearDown(self):
super().tearDown()
def test_find_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.find(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.find(cls(e, 'x'))
def test_find_with_error(self):
e = ET.Element('foo')
@@ -2863,9 +3245,11 @@ def test_find_with_error(self):
pass
def test_findtext_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findtext(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findtext(cls(e, 'x'))
def test_findtext_with_error(self):
e = ET.Element('foo')
@@ -2875,10 +3259,26 @@ def test_findtext_with_error(self):
except ZeroDivisionError:
pass
+ def test_findtext_with_falsey_text_attribute(self):
+ root_elem = ET.Element('foo')
+ sub_elem = ET.SubElement(root_elem, 'bar')
+ falsey = ["", 0, False, [], (), {}]
+ for val in falsey:
+ sub_elem.text = val
+ self.assertEqual(root_elem.findtext('./bar'), val)
+
+ def test_findtext_with_none_text_attribute(self):
+ root_elem = ET.Element('foo')
+ sub_elem = ET.SubElement(root_elem, 'bar')
+ sub_elem.text = None
+ self.assertEqual(root_elem.findtext('./bar'), '')
+
def test_findall_with_mutating(self):
- e = ET.Element('foo')
- e.extend([ET.Element('bar')])
- e.findall(MutatingElementPath(e, 'x'))
+ for cls in [MutationDeleteElementPath, MutationClearElementPath]:
+ with self.subTest(cls):
+ e = ET.Element('foo')
+ e.extend([ET.Element('bar')])
+ e.findall(cls(e, 'x'))
def test_findall_with_error(self):
e = ET.Element('foo')
@@ -3233,8 +3633,7 @@ def test_basic(self):
# With an explicit parser too (issue #9708)
sourcefile = serialize(doc, to_string=False)
parser = ET.XMLParser(target=ET.TreeBuilder())
- self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
- 'end')
+ self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 'end')
tree = ET.ElementTree(None)
self.assertRaises(AttributeError, tree.iter)
@@ -3836,6 +4235,22 @@ def test_setslice_negative_steps(self):
e[1::-sys.maxsize<<64] = [ET.Element('d')]
self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
+ def test_issue123213_setslice_exception(self):
+ e = ET.Element('tag')
+ # Does not hide the internal exception when assigning to the element
+ with self.assertRaises(ZeroDivisionError):
+ e[:1] = (1/0 for i in range(2))
+
+ # Still raises the TypeError when assigning with a non-iterable
+ with self.assertRaises(TypeError):
+ e[:1] = None
+
+ # Preserve the original TypeError message when assigning.
+ def f():
+ raise TypeError("mymessage")
+
+ with self.assertRaisesRegex(TypeError, 'mymessage'):
+ e[:1] = (f() for i in range(2))
class IOTest(unittest.TestCase):
# TODO: RUSTPYTHON
@@ -4163,10 +4578,10 @@ def test_error_code(self):
class KeywordArgsTest(unittest.TestCase):
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
# Test various issues with keyword arguments passed to ET.Element
# constructor and methods
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
def test_issue14818(self):
x = ET.XML("foo")
self.assertEqual(x.find('a', None),
@@ -4201,12 +4616,11 @@ def test_issue14818(self):
# --------------------------------------------------------------------
class NoAcceleratorTest(unittest.TestCase):
- def setUp(self):
- if not pyET:
+ @classmethod
+ def setUpClass(cls):
+ if ET is not pyET:
raise unittest.SkipTest('only for the Python version')
- # TODO: RUSTPYTHON
- @unittest.expectedFailure
# Test that the C accelerator was not imported for pyET
def test_correct_import_pyET(self):
# The type of methods defined in Python code is types.FunctionType,
@@ -4215,6 +4629,27 @@ def test_correct_import_pyET(self):
self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
+# --------------------------------------------------------------------
+
+class BoolTest(unittest.TestCase):
+ # TODO: RUSTPYTHON
+ @unittest.expectedFailure
+ def test_warning(self):
+ e = ET.fromstring('')
+ msg = (
+ r"Testing an element's truth value will always return True in "
+ r"future versions. "
+ r"Use specific 'len\(elem\)' or 'elem is not None' test instead.")
+ with self.assertWarnsRegex(DeprecationWarning, msg):
+ result = bool(e)
+ # Emulate prior behavior for now
+ self.assertIs(result, False)
+
+ # Element with children
+ ET.SubElement(e, 'b')
+ with self.assertWarnsRegex(DeprecationWarning, msg):
+ new_result = bool(e)
+ self.assertIs(new_result, True)
# --------------------------------------------------------------------
@@ -4456,8 +4891,7 @@ def get_option(config, option_name, default=None):
# --------------------------------------------------------------------
-
-def test_main(module=None):
+def setUpModule(module=None):
# When invoked without a module, runs the Python ET tests by loading pyET.
# Otherwise, uses the given module as the ET.
global pyET
@@ -4469,62 +4903,30 @@ def test_main(module=None):
global ET
ET = module
- test_classes = [
- ModuleTest,
- ElementSlicingTest,
- BasicElementTest,
- BadElementTest,
- BadElementPathTest,
- ElementTreeTest,
- IOTest,
- ParseErrorTest,
- XIncludeTest,
- ElementTreeTypeTest,
- ElementFindTest,
- ElementIterTest,
- TreeBuilderTest,
- XMLParserTest,
- XMLPullParserTest,
- BugsTest,
- KeywordArgsTest,
- C14NTest,
- ]
-
- # These tests will only run for the pure-Python version that doesn't import
- # _elementtree. We can't use skipUnless here, because pyET is filled in only
- # after the module is loaded.
- if pyET is not ET:
- test_classes.extend([
- NoAcceleratorTest,
- ])
+ # don't interfere with subsequent tests
+ def cleanup():
+ global ET, pyET
+ ET = pyET = None
+ unittest.addModuleCleanup(cleanup)
# Provide default namespace mapping and path cache.
from xml.etree import ElementPath
nsmap = ET.register_namespace._namespace_map
# Copy the default namespace mapping
nsmap_copy = nsmap.copy()
+ unittest.addModuleCleanup(nsmap.update, nsmap_copy)
+ unittest.addModuleCleanup(nsmap.clear)
+
# Copy the path cache (should be empty)
path_cache = ElementPath._cache
+ unittest.addModuleCleanup(setattr, ElementPath, "_cache", path_cache)
ElementPath._cache = path_cache.copy()
+
# Align the Comment/PI factories.
if hasattr(ET, '_set_factories'):
old_factories = ET._set_factories(ET.Comment, ET.PI)
- else:
- old_factories = None
-
- try:
- support.run_unittest(*test_classes)
- finally:
- from xml.etree import ElementPath
- # Restore mapping and path cache
- nsmap.clear()
- nsmap.update(nsmap_copy)
- ElementPath._cache = path_cache
- if old_factories is not None:
- ET._set_factories(*old_factories)
- # don't interfere with subsequent tests
- ET = pyET = None
+ unittest.addModuleCleanup(ET._set_factories, *old_factories)
if __name__ == '__main__':
- test_main()
+ unittest.main()
diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py
new file mode 100644
index 0000000000..3a0fc572f4
--- /dev/null
+++ b/Lib/test/test_xml_etree_c.py
@@ -0,0 +1,278 @@
+# xml.etree test for cElementTree
+import io
+import struct
+from test import support
+from test.support.import_helper import import_fresh_module
+import types
+import unittest
+
+cET = import_fresh_module('xml.etree.ElementTree',
+ fresh=['_elementtree'])
+cET_alias = import_fresh_module('xml.etree.cElementTree',
+ fresh=['_elementtree', 'xml.etree'],
+ deprecated=True)
+
+
+@unittest.skipUnless(cET, 'requires _elementtree')
+class MiscTests(unittest.TestCase):
+ # Issue #8651.
+ @support.bigmemtest(size=support._2G + 100, memuse=1, dry_run=False)
+ def test_length_overflow(self, size):
+ data = b'x' * size
+ parser = cET.XMLParser()
+ try:
+ self.assertRaises(OverflowError, parser.feed, data)
+ finally:
+ data = None
+
+ def test_del_attribute(self):
+ element = cET.Element('tag')
+
+ element.tag = 'TAG'
+ with self.assertRaises(AttributeError):
+ del element.tag
+ self.assertEqual(element.tag, 'TAG')
+
+ with self.assertRaises(AttributeError):
+ del element.text
+ self.assertIsNone(element.text)
+ element.text = 'TEXT'
+ with self.assertRaises(AttributeError):
+ del element.text
+ self.assertEqual(element.text, 'TEXT')
+
+ with self.assertRaises(AttributeError):
+ del element.tail
+ self.assertIsNone(element.tail)
+ element.tail = 'TAIL'
+ with self.assertRaises(AttributeError):
+ del element.tail
+ self.assertEqual(element.tail, 'TAIL')
+
+ with self.assertRaises(AttributeError):
+ del element.attrib
+ self.assertEqual(element.attrib, {})
+ element.attrib = {'A': 'B', 'C': 'D'}
+ with self.assertRaises(AttributeError):
+ del element.attrib
+ self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
+
+ def test_trashcan(self):
+ # If this test fails, it will most likely die via segfault.
+ e = root = cET.Element('root')
+ for i in range(200000):
+ e = cET.SubElement(e, 'x')
+ del e
+ del root
+ support.gc_collect()
+
+ def test_parser_ref_cycle(self):
+ # bpo-31499: xmlparser_dealloc() crashed with a segmentation fault when
+ # xmlparser_gc_clear() was called previously by the garbage collector,
+ # when the parser was part of a reference cycle.
+
+ def parser_ref_cycle():
+ parser = cET.XMLParser()
+ # Create a reference cycle using an exception to keep the frame
+ # alive, so the parser will be destroyed by the garbage collector
+ try:
+ raise ValueError
+ except ValueError as exc:
+ err = exc
+
+ # Create a parser part of reference cycle
+ parser_ref_cycle()
+ # Trigger an explicit garbage collection to break the reference cycle
+ # and so destroy the parser
+ support.gc_collect()
+
+ def test_bpo_31728(self):
+ # A crash or an assertion failure shouldn't happen, in case garbage
+ # collection triggers a call to clear() or a reading of text or tail,
+ # while a setter or clear() or __setstate__() is already running.
+ elem = cET.Element('elem')
+ class X:
+ def __del__(self):
+ elem.text
+ elem.tail
+ elem.clear()
+
+ elem.text = X()
+ elem.clear() # shouldn't crash
+
+ elem.tail = X()
+ elem.clear() # shouldn't crash
+
+ elem.text = X()
+ elem.text = X() # shouldn't crash
+ elem.clear()
+
+ elem.tail = X()
+ elem.tail = X() # shouldn't crash
+ elem.clear()
+
+ elem.text = X()
+ elem.__setstate__({'tag': 42}) # shouldn't cause an assertion failure
+ elem.clear()
+
+ elem.tail = X()
+ elem.__setstate__({'tag': 42}) # shouldn't cause an assertion failure
+
+ @support.cpython_only
+ def test_uninitialized_parser(self):
+ # The interpreter shouldn't crash in case of calling methods or
+ # accessing attributes of uninitialized XMLParser objects.
+ parser = cET.XMLParser.__new__(cET.XMLParser)
+ self.assertRaises(ValueError, parser.close)
+ self.assertRaises(ValueError, parser.feed, 'foo')
+ class MockFile:
+ def read(*args):
+ return ''
+ self.assertRaises(ValueError, parser._parse_whole, MockFile())
+ self.assertRaises(ValueError, parser._setevents, None)
+ self.assertIsNone(parser.entity)
+ self.assertIsNone(parser.target)
+
+ def test_setstate_leaks(self):
+ # Test reference leaks
+ elem = cET.Element.__new__(cET.Element)
+ for i in range(100):
+ elem.__setstate__({'tag': 'foo', 'attrib': {'bar': 42},
+ '_children': [cET.Element('child')],
+ 'text': 'text goes here',
+ 'tail': 'opposite of head'})
+
+ self.assertEqual(elem.tag, 'foo')
+ self.assertEqual(elem.text, 'text goes here')
+ self.assertEqual(elem.tail, 'opposite of head')
+ self.assertEqual(list(elem.attrib.items()), [('bar', 42)])
+ self.assertEqual(len(elem), 1)
+ self.assertEqual(elem[0].tag, 'child')
+
+ def test_iterparse_leaks(self):
+ # Test reference leaks in TreeBuilder (issue #35502).
+ # The test is written to be executed in the hunting reference leaks
+ # mode.
+ XML = ''
+ parser = cET.iterparse(io.StringIO(XML))
+ next(parser)
+ del parser
+ support.gc_collect()
+
+ def test_xmlpullparser_leaks(self):
+ # Test reference leaks in TreeBuilder (issue #35502).
+ # The test is written to be executed in the hunting reference leaks
+ # mode.
+ XML = ''
+ parser = cET.XMLPullParser()
+ parser.feed(XML)
+ del parser
+ support.gc_collect()
+
+ def test_dict_disappearing_during_get_item(self):
+ # test fix for seg fault reported in issue 27946
+ class X:
+ def __hash__(self):
+ e.attrib = {} # this frees e->extra->attrib
+ [{i: i} for i in range(1000)] # exhaust the dict keys cache
+ return 13
+
+ e = cET.Element("elem", {1: 2})
+ r = e.get(X())
+ self.assertIsNone(r)
+
+ @support.cpython_only
+ def test_immutable_types(self):
+ root = cET.fromstring('')
+ dataset = (
+ cET.Element,
+ cET.TreeBuilder,
+ cET.XMLParser,
+ type(root.iter()),
+ )
+ for tp in dataset:
+ with self.subTest(tp=tp):
+ with self.assertRaisesRegex(TypeError, "immutable"):
+ tp.foo = 1
+
+ @support.cpython_only
+ def test_disallow_instantiation(self):
+ root = cET.fromstring('')
+ iter_type = type(root.iter())
+ support.check_disallow_instantiation(self, iter_type)
+
+
+@unittest.skipUnless(cET, 'requires _elementtree')
+class TestAliasWorking(unittest.TestCase):
+ # Test that the cET alias module is alive
+ def test_alias_working(self):
+ e = cET_alias.Element('foo')
+ self.assertEqual(e.tag, 'foo')
+
+
+@unittest.skipUnless(cET, 'requires _elementtree')
+@support.cpython_only
+class TestAcceleratorImported(unittest.TestCase):
+ # Test that the C accelerator was imported, as expected
+ def test_correct_import_cET(self):
+ # SubElement is a function so it retains _elementtree as its module.
+ self.assertEqual(cET.SubElement.__module__, '_elementtree')
+
+ def test_correct_import_cET_alias(self):
+ self.assertEqual(cET_alias.SubElement.__module__, '_elementtree')
+
+ def test_parser_comes_from_C(self):
+ # The type of methods defined in Python code is types.FunctionType,
+ # while the type of methods defined inside _elementtree is
+ #
+ self.assertNotIsInstance(cET.Element.__init__, types.FunctionType)
+
+
+@unittest.skipUnless(cET, 'requires _elementtree')
+@support.cpython_only
+class SizeofTest(unittest.TestCase):
+ def setUp(self):
+ self.elementsize = support.calcobjsize('5P')
+ # extra
+ self.extra = struct.calcsize('PnnP4P')
+
+ check_sizeof = support.check_sizeof
+
+ def test_element(self):
+ e = cET.Element('a')
+ self.check_sizeof(e, self.elementsize)
+
+ def test_element_with_attrib(self):
+ e = cET.Element('a', href='about:')
+ self.check_sizeof(e, self.elementsize + self.extra)
+
+ def test_element_with_children(self):
+ e = cET.Element('a')
+ for i in range(5):
+ cET.SubElement(e, 'span')
+ # should have space for 8 children now
+ self.check_sizeof(e, self.elementsize + self.extra +
+ struct.calcsize('8P'))
+
+
+def install_tests():
+ # Test classes should have __module__ referring to this module.
+ from test import test_xml_etree
+ for name, base in vars(test_xml_etree).items():
+ if isinstance(base, type) and issubclass(base, unittest.TestCase):
+ class Temp(base):
+ pass
+ Temp.__name__ = Temp.__qualname__ = name
+ Temp.__module__ = __name__
+ assert name not in globals()
+ globals()[name] = Temp
+
+install_tests()
+
+def setUpModule():
+ from test import test_xml_etree
+ test_xml_etree.setUpModule(module=cET)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py
index 199c22d0af..7dd667bf3f 100644
--- a/Lib/xml/dom/expatbuilder.py
+++ b/Lib/xml/dom/expatbuilder.py
@@ -200,10 +200,7 @@ def parseFile(self, file):
parser = self.getParser()
first_buffer = True
try:
- while 1:
- buffer = file.read(16*1024)
- if not buffer:
- break
+ while buffer := file.read(16*1024):
parser.Parse(buffer, False)
if first_buffer and self.document.documentElement:
self._setup_subset(buffer)
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index d09ef5e7d0..db51f350ea 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -300,12 +300,28 @@ def _in_document(node):
node = node.parentNode
return False
-def _write_data(writer, data):
+def _write_data(writer, text, attr):
"Writes datachars to writer."
- if data:
- data = data.replace("&", "&").replace("<", "<"). \
- replace("\"", """).replace(">", ">")
- writer.write(data)
+ if not text:
+ return
+ # See the comments in ElementTree.py for behavior and
+ # implementation details.
+ if "&" in text:
+ text = text.replace("&", "&")
+ if "<" in text:
+ text = text.replace("<", "<")
+ if ">" in text:
+ text = text.replace(">", ">")
+ if attr:
+ if '"' in text:
+ text = text.replace('"', """)
+ if "\r" in text:
+ text = text.replace("\r", "
")
+ if "\n" in text:
+ text = text.replace("\n", "
")
+ if "\t" in text:
+ text = text.replace("\t", " ")
+ writer.write(text)
def _get_elements_by_tagName_helper(parent, name, rc):
for node in parent.childNodes:
@@ -358,6 +374,8 @@ def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
self._name = qName
self.namespaceURI = namespaceURI
self._prefix = prefix
+ if localName is not None:
+ self._localName = localName
self.childNodes = NodeList()
# Add the single child node that represents the value of the attr
@@ -881,7 +899,7 @@ def writexml(self, writer, indent="", addindent="", newl=""):
for a_name in attrs.keys():
writer.write(" %s=\"" % a_name)
- _write_data(writer, attrs[a_name].value)
+ _write_data(writer, attrs[a_name].value, True)
writer.write("\"")
if self.childNodes:
writer.write(">")
@@ -1110,7 +1128,7 @@ def splitText(self, offset):
return newText
def writexml(self, writer, indent="", addindent="", newl=""):
- _write_data(writer, "%s%s%s" % (indent, self.data, newl))
+ _write_data(writer, "%s%s%s" % (indent, self.data, newl), False)
# DOM Level 3 (WD 9 April 2002)
diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py
index 8a20026349..a8852625a2 100644
--- a/Lib/xml/dom/xmlbuilder.py
+++ b/Lib/xml/dom/xmlbuilder.py
@@ -189,7 +189,7 @@ def parse(self, input):
options.filter = self.filter
options.errorHandler = self.errorHandler
fp = input.byteStream
- if fp is None and options.systemId:
+ if fp is None and input.systemId:
import urllib.request
fp = urllib.request.urlopen(input.systemId)
return self._parse_bytestream(fp, options)
@@ -247,10 +247,12 @@ def _create_opener(self):
def _guess_media_encoding(self, source):
info = source.byteStream.info()
- if "Content-Type" in info:
- for param in info.getplist():
- if param.startswith("charset="):
- return param.split("=", 1)[1].lower()
+ # import email.message
+ # assert isinstance(info, email.message.Message)
+ charset = info.get_param('charset')
+ if charset is not None:
+ return charset.lower()
+ return None
class DOMInputSource(object):
diff --git a/Lib/xml/etree/ElementInclude.py b/Lib/xml/etree/ElementInclude.py
index 40a9b22292..986e6c3bbe 100644
--- a/Lib/xml/etree/ElementInclude.py
+++ b/Lib/xml/etree/ElementInclude.py
@@ -79,8 +79,8 @@ class LimitedRecursiveIncludeError(FatalIncludeError):
# @param parse Parse mode. Either "xml" or "text".
# @param encoding Optional text encoding (UTF-8 by default for "text").
# @return The expanded resource. If the parse mode is "xml", this
-# is an ElementTree instance. If the parse mode is "text", this
-# is a Unicode string. If the loader fails, it can return None
+# is an Element instance. If the parse mode is "text", this
+# is a string. If the loader fails, it can return None
# or raise an OSError exception.
# @throws OSError If the loader fails to load the resource.
@@ -98,7 +98,7 @@ def default_loader(href, parse, encoding=None):
##
# Expand XInclude directives.
#
-# @param elem Root element.
+# @param elem Root Element or any ElementTree of a tree to be expanded
# @param loader Optional resource loader. If omitted, it defaults
# to {@link default_loader}. If given, it should be a callable
# that implements the same interface as default_loader.
@@ -106,12 +106,13 @@ def default_loader(href, parse, encoding=None):
# relative include file references.
# @param max_depth The maximum number of recursive inclusions.
# Limited to reduce the risk of malicious content explosion.
-# Pass a negative value to disable the limitation.
+# Pass None to disable the limitation.
# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
# @throws FatalIncludeError If the function fails to include a given
# resource, or if the tree contains malformed XInclude elements.
-# @throws IOError If the function fails to load a given resource.
-# @returns the node or its replacement if it was an XInclude node
+# @throws OSError If the function fails to load a given resource.
+# @throws ValueError If negative {@link max_depth} is passed.
+# @returns None. Modifies tree pointed by {@link elem}
def include(elem, loader=None, base_url=None,
max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index cd3c354d08..dc6bd28c03 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -416,6 +416,8 @@ def findall(elem, path, namespaces=None):
def findtext(elem, path, default=None, namespaces=None):
try:
elem = next(iterfind(elem, path, namespaces))
- return elem.text or ""
+ if elem.text is None:
+ return ""
+ return elem.text
except StopIteration:
return default
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 2503d9ee76..9bb09ab540 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -99,6 +99,7 @@
import collections
import collections.abc
import contextlib
+import weakref
from . import ElementPath
@@ -188,19 +189,6 @@ def makeelement(self, tag, attrib):
"""
return self.__class__(tag, attrib)
- def copy(self):
- """Return copy of current element.
-
- This creates a shallow copy. Subelements will be shared with the
- original tree.
-
- """
- warnings.warn(
- "elem.copy() is deprecated. Use copy.copy(elem) instead.",
- DeprecationWarning
- )
- return self.__copy__()
-
def __copy__(self):
elem = self.makeelement(self.tag, self.attrib)
elem.text = self.text
@@ -213,9 +201,10 @@ def __len__(self):
def __bool__(self):
warnings.warn(
- "The behavior of this method will change in future versions. "
+ "Testing an element's truth value will always return True in "
+ "future versions. "
"Use specific 'len(elem)' or 'elem is not None' test instead.",
- FutureWarning, stacklevel=2
+ DeprecationWarning, stacklevel=2
)
return len(self._children) != 0 # emulate old behaviour, for now
@@ -534,7 +523,9 @@ class ElementTree:
"""
def __init__(self, element=None, file=None):
- # assert element is None or iselement(element)
+ if element is not None and not iselement(element):
+ raise TypeError('expected an Element, not %s' %
+ type(element).__name__)
self._root = element # first node
if file:
self.parse(file)
@@ -550,7 +541,9 @@ def _setroot(self, element):
with the given element. Use with care!
"""
- # assert iselement(element)
+ if not iselement(element):
+ raise TypeError('expected an Element, not %s'
+ % type(element).__name__)
self._root = element
def parse(self, source, parser=None):
@@ -579,10 +572,7 @@ def parse(self, source, parser=None):
# it with chunks.
self._root = parser._parse_whole(source)
return self._root
- while True:
- data = source.read(65536)
- if not data:
- break
+ while data := source.read(65536):
parser.feed(data)
self._root = parser.close()
return self._root
@@ -719,6 +709,8 @@ def write(self, file_or_filename,
of start/end tags
"""
+ if self._root is None:
+ raise TypeError('ElementTree not initialized')
if not method:
method = "xml"
elif method not in _serialize:
@@ -911,13 +903,9 @@ def _serialize_xml(write, elem, qnames, namespaces,
if elem.tail:
write(_escape_cdata(elem.tail))
-HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
- "img", "input", "isindex", "link", "meta", "param")
-
-try:
- HTML_EMPTY = set(HTML_EMPTY)
-except NameError:
- pass
+HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
+ "img", "input", "isindex", "link", "meta", "param", "source",
+ "track", "wbr"}
def _serialize_html(write, elem, qnames, namespaces, **kwargs):
tag = elem.tag
@@ -1242,13 +1230,14 @@ def iterparse(source, events=None, parser=None):
# parser argument of iterparse is removed, this can be killed.
pullparser = XMLPullParser(events=events, _parser=parser)
- def iterator(source):
+ if not hasattr(source, "read"):
+ source = open(source, "rb")
+ close_source = True
+ else:
close_source = False
+
+ def iterator(source):
try:
- if not hasattr(source, "read"):
- source = open(source, "rb")
- close_source = True
- yield None
while True:
yield from pullparser.read_events()
# load event buffer
@@ -1258,18 +1247,30 @@ def iterator(source):
pullparser.feed(data)
root = pullparser._close_and_return_root()
yield from pullparser.read_events()
- it.root = root
+ it = wr()
+ if it is not None:
+ it.root = root
finally:
if close_source:
source.close()
+ gen = iterator(source)
class IterParseIterator(collections.abc.Iterator):
- __next__ = iterator(source).__next__
+ __next__ = gen.__next__
+ def close(self):
+ if close_source:
+ source.close()
+ gen.close()
+
+ def __del__(self):
+ # TODO: Emit a ResourceWarning if it was not explicitly closed.
+ # (When the close() method will be supported in all maintained Python versions.)
+ if close_source:
+ source.close()
+
it = IterParseIterator()
it.root = None
- del iterator, IterParseIterator
-
- next(it)
+ wr = weakref.ref(it)
return it
@@ -1325,6 +1326,11 @@ def read_events(self):
else:
yield event
+ def flush(self):
+ if self._parser is None:
+ raise ValueError("flush() called after end of stream")
+ self._parser.flush()
+
def XML(text, parser=None):
"""Parse XML document from string constant.
@@ -1731,6 +1737,15 @@ def close(self):
del self.parser, self._parser
del self.target, self._target
+ def flush(self):
+ was_enabled = self.parser.GetReparseDeferralEnabled()
+ try:
+ self.parser.SetReparseDeferralEnabled(False)
+ self.parser.Parse(b"", False)
+ except self._error as v:
+ self._raiseerror(v)
+ finally:
+ self.parser.SetReparseDeferralEnabled(was_enabled)
# --------------------------------------------------------------------
# C14N 2.0
diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py
index 17b75879eb..b657310207 100644
--- a/Lib/xml/sax/__init__.py
+++ b/Lib/xml/sax/__init__.py
@@ -60,11 +60,7 @@ def parseString(string, handler, errorHandler=ErrorHandler()):
import os, sys
if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ:
default_parser_list = os.environ["PY_SAX_PARSER"].split(",")
-del os
-
-_key = "python.xml.sax.parser"
-if sys.platform[:4] == "java" and sys.registry.containsKey(_key):
- default_parser_list = sys.registry.getProperty(_key).split(",")
+del os, sys
def make_parser(parser_list=()):
@@ -93,15 +89,6 @@ def make_parser(parser_list=()):
# --- Internal utility methods used by make_parser
-if sys.platform[ : 4] == "java":
- def _create_parser(parser_name):
- from org.python.core import imp
- drv_module = imp.importName(parser_name, 0, globals())
- return drv_module.create_parser()
-
-else:
- def _create_parser(parser_name):
- drv_module = __import__(parser_name,{},{},['create_parser'])
- return drv_module.create_parser()
-
-del sys
+def _create_parser(parser_name):
+ drv_module = __import__(parser_name,{},{},['create_parser'])
+ return drv_module.create_parser()
diff --git a/Lib/xml/sax/_exceptions.py b/Lib/xml/sax/_exceptions.py
index a9b2ba35c6..f292dc3a8e 100644
--- a/Lib/xml/sax/_exceptions.py
+++ b/Lib/xml/sax/_exceptions.py
@@ -1,8 +1,4 @@
"""Different kinds of SAX Exceptions"""
-import sys
-if sys.platform[:4] == "java":
- from java.lang import Exception
-del sys
# ===== SAXEXCEPTION =====
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index e334ac9fea..ba3c1e9851 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -12,12 +12,6 @@
from xml.sax.handler import feature_string_interning
from xml.sax.handler import property_xml_string, property_interning_dict
-# xml.parsers.expat does not raise ImportError in Jython
-import sys
-if sys.platform[:4] == "java":
- raise SAXReaderNotAvailable("expat not available in Java", None)
-del sys
-
try:
from xml.parsers import expat
except ImportError:
@@ -220,6 +214,20 @@ def feed(self, data, isFinal=False):
# FIXME: when to invoke error()?
self._err_handler.fatalError(exc)
+ def flush(self):
+ if self._parser is None:
+ return
+
+ was_enabled = self._parser.GetReparseDeferralEnabled()
+ try:
+ self._parser.SetReparseDeferralEnabled(False)
+ self._parser.Parse(b"", False)
+ except expat.error as e:
+ exc = SAXParseException(expat.ErrorString(e.code), e, self)
+ self._err_handler.fatalError(exc)
+ finally:
+ self._parser.SetReparseDeferralEnabled(was_enabled)
+
def _close_source(self):
source = self._source
try:
diff --git a/Lib/xml/sax/xmlreader.py b/Lib/xml/sax/xmlreader.py
index 716f228404..e906121d23 100644
--- a/Lib/xml/sax/xmlreader.py
+++ b/Lib/xml/sax/xmlreader.py
@@ -120,10 +120,8 @@ def parse(self, source):
file = source.getCharacterStream()
if file is None:
file = source.getByteStream()
- buffer = file.read(self._bufsize)
- while buffer:
+ while buffer := file.read(self._bufsize):
self.feed(buffer)
- buffer = file.read(self._bufsize)
self.close()
def feed(self, data):
diff --git a/stdlib/src/pyexpat.rs b/stdlib/src/pyexpat.rs
index 033fa76c06..45e0328510 100644
--- a/stdlib/src/pyexpat.rs
+++ b/stdlib/src/pyexpat.rs
@@ -1,8 +1,4 @@
-/* Pyexpat builtin module
-*
-*
-*/
-
+/// Pyexpat builtin module
use crate::vm::{PyRef, VirtualMachine, builtins::PyModule, extend_module};
pub fn make_module(vm: &VirtualMachine) -> PyRef {
@@ -33,15 +29,25 @@ macro_rules! create_property {
mod _pyexpat {
use crate::vm::{
Context, Py, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine,
- builtins::{PyStr, PyStrRef, PyType},
+ builtins::{PyInt, PyStr, PyStrRef, PyTupleRef, PyType},
function::ArgBytesLike,
function::{IntoFuncArgs, OptionalArg},
};
use rustpython_common::lock::PyRwLock;
use std::io::Cursor;
use xml::reader::XmlEvent;
+
type MutableObject = PyRwLock;
+ #[pyattr]
+ pub fn version_info(vm: &VirtualMachine) -> PyTupleRef {
+ vm.ctx.new_tuple(vec![
+ PyInt::from(2).into_pyobject(vm),
+ PyInt::from(7).into_pyobject(vm),
+ PyInt::from(1).into_pyobject(vm),
+ ])
+ }
+
#[pyattr]
#[pyclass(name = "xmlparser", module = false, traverse)]
#[derive(Debug, PyPayload)]
From 76b65a28c84a900e2c00711d17ed9fed10be1fc1 Mon Sep 17 00:00:00 2001
From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Wed, 20 Aug 2025 13:16:22 +0300
Subject: [PATCH 2/3] Update stdlib/src/pyexpat.rs
Co-authored-by: Jeong, YunWon <69878+youknowone@users.noreply.github.com>
---
stdlib/src/pyexpat.rs | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/stdlib/src/pyexpat.rs b/stdlib/src/pyexpat.rs
index 45e0328510..0af0bc32b3 100644
--- a/stdlib/src/pyexpat.rs
+++ b/stdlib/src/pyexpat.rs
@@ -39,14 +39,8 @@ mod _pyexpat {
type MutableObject = PyRwLock;
- #[pyattr]
- pub fn version_info(vm: &VirtualMachine) -> PyTupleRef {
- vm.ctx.new_tuple(vec![
- PyInt::from(2).into_pyobject(vm),
- PyInt::from(7).into_pyobject(vm),
- PyInt::from(1).into_pyobject(vm),
- ])
- }
+ #[pyattr(name = "version_info")]
+ pub const VERSION_INFO: (u32, u32, u32) = (2, 7, 1);
#[pyattr]
#[pyclass(name = "xmlparser", module = false, traverse)]
From a4dbfa9f41d67cd430747fdb281b21a9602056e0 Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Wed, 20 Aug 2025 15:27:04 +0300
Subject: [PATCH 3/3] Fix clippy
---
stdlib/src/pyexpat.rs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/stdlib/src/pyexpat.rs b/stdlib/src/pyexpat.rs
index 0af0bc32b3..871ba7d598 100644
--- a/stdlib/src/pyexpat.rs
+++ b/stdlib/src/pyexpat.rs
@@ -29,7 +29,7 @@ macro_rules! create_property {
mod _pyexpat {
use crate::vm::{
Context, Py, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine,
- builtins::{PyInt, PyStr, PyStrRef, PyTupleRef, PyType},
+ builtins::{PyStr, PyStrRef, PyType},
function::ArgBytesLike,
function::{IntoFuncArgs, OptionalArg},
};