aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjkriegshauser <jkriegshauser@gmail.com>2024-03-27 08:24:46 -0700
committerGitHub <noreply@github.com>2024-03-27 16:24:46 +0100
commit40d77b93672406d1c6e80760fec1321cb1b3473a (patch)
tree052d837e573ed69fe86f58a15a4503be2e0ea761
parentf7c7e72a1c3bfe2f5c791a73557f9b29d369c89b (diff)
downloadcpython3-upstream-3.9.tar.gz
[3.9] gh-116773: Fix overlapped memory corruption crash (GH-116774) (GH-117080)upstream-3.9
Co-authored-by: Ɓukasz Langa <lukasz@langa.pl>
-rw-r--r--Lib/asyncio/windows_events.py14
-rw-r--r--Lib/test/test_asyncio/test_windows_events.py50
-rw-r--r--Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst1
-rw-r--r--Modules/overlapped.c18
4 files changed, 71 insertions, 12 deletions
diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py
index da81ab435b..b31270e0e6 100644
--- a/Lib/asyncio/windows_events.py
+++ b/Lib/asyncio/windows_events.py
@@ -323,13 +323,13 @@ class ProactorEventLoop(proactor_events.BaseProactorEventLoop):
if self._self_reading_future is not None:
ov = self._self_reading_future._ov
self._self_reading_future.cancel()
- # self_reading_future was just cancelled so if it hasn't been
- # finished yet, it never will be (it's possible that it has
- # already finished and its callback is waiting in the queue,
- # where it could still happen if the event loop is restarted).
- # Unregister it otherwise IocpProactor.close will wait for it
- # forever
- if ov is not None:
+ # self_reading_future always uses IOCP, so even though it's
+ # been cancelled, we need to make sure that the IOCP message
+ # is received so that the kernel is not holding on to the
+ # memory, possibly causing memory corruption later. Only
+ # unregister it if IO is complete in all respects. Otherwise
+ # we need another _poll() later to complete the IO.
+ if ov is not None and not ov.pending:
self._proactor._unregister(ov)
self._self_reading_future = None
diff --git a/Lib/test/test_asyncio/test_windows_events.py b/Lib/test/test_asyncio/test_windows_events.py
index f276cd205a..a8939d5854 100644
--- a/Lib/test/test_asyncio/test_windows_events.py
+++ b/Lib/test/test_asyncio/test_windows_events.py
@@ -36,7 +36,23 @@ class UpperProto(asyncio.Protocol):
self.trans.close()
-class ProactorLoopCtrlC(test_utils.TestCase):
+class WindowsEventsTestCase(test_utils.TestCase):
+ def _unraisablehook(self, unraisable):
+ # Storing unraisable.object can resurrect an object which is being
+ # finalized. Storing unraisable.exc_value creates a reference cycle.
+ self._unraisable = unraisable
+ print(unraisable)
+
+ def setUp(self):
+ self._prev_unraisablehook = sys.unraisablehook
+ self._unraisable = None
+ sys.unraisablehook = self._unraisablehook
+
+ def tearDown(self):
+ sys.unraisablehook = self._prev_unraisablehook
+ self.assertIsNone(self._unraisable)
+
+class ProactorLoopCtrlC(WindowsEventsTestCase):
def test_ctrl_c(self):
@@ -58,7 +74,7 @@ class ProactorLoopCtrlC(test_utils.TestCase):
thread.join()
-class ProactorMultithreading(test_utils.TestCase):
+class ProactorMultithreading(WindowsEventsTestCase):
def test_run_from_nonmain_thread(self):
finished = False
@@ -79,7 +95,7 @@ class ProactorMultithreading(test_utils.TestCase):
self.assertTrue(finished)
-class ProactorTests(test_utils.TestCase):
+class ProactorTests(WindowsEventsTestCase):
def setUp(self):
super().setUp()
@@ -239,8 +255,32 @@ class ProactorTests(test_utils.TestCase):
self.close_loop(self.loop)
self.assertFalse(self.loop.call_exception_handler.called)
-
-class WinPolicyTests(test_utils.TestCase):
+ def test_loop_restart(self):
+ # We're fishing for the "RuntimeError: <_overlapped.Overlapped object at XXX>
+ # still has pending operation at deallocation, the process may crash" error
+ stop = threading.Event()
+ def threadMain():
+ while not stop.is_set():
+ self.loop.call_soon_threadsafe(lambda: None)
+ time.sleep(0.01)
+ thr = threading.Thread(target=threadMain)
+
+ # In 10 60-second runs of this test prior to the fix:
+ # time in seconds until failure: (none), 15.0, 6.4, (none), 7.6, 8.3, 1.7, 22.2, 23.5, 8.3
+ # 10 seconds had a 50% failure rate but longer would be more costly
+ end_time = time.time() + 10 # Run for 10 seconds
+ self.loop.call_soon(thr.start)
+ while not self._unraisable: # Stop if we got an unraisable exc
+ self.loop.stop()
+ self.loop.run_forever()
+ if time.time() >= end_time:
+ break
+
+ stop.set()
+ thr.join()
+
+
+class WinPolicyTests(WindowsEventsTestCase):
def test_selector_win_policy(self):
async def main():
diff --git a/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst b/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst
new file mode 100644
index 0000000000..8fc3fe8004
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst
@@ -0,0 +1 @@
+Fix instances of ``<_overlapped.Overlapped object at 0xXXX> still has pending operation at deallocation, the process may crash``.
diff --git a/Modules/overlapped.c b/Modules/overlapped.c
index cd7869fa8a..5f8a823473 100644
--- a/Modules/overlapped.c
+++ b/Modules/overlapped.c
@@ -624,6 +624,24 @@ Overlapped_dealloc(OverlappedObject *self)
if (!HasOverlappedIoCompleted(&self->overlapped) &&
self->type != TYPE_NOT_STARTED)
{
+ // NOTE: We should not get here, if we do then something is wrong in
+ // the IocpProactor or ProactorEventLoop. Since everything uses IOCP if
+ // the overlapped IO hasn't completed yet then we should not be
+ // deallocating!
+ //
+ // The problem is likely that this OverlappedObject was removed from
+ // the IocpProactor._cache before it was complete. The _cache holds a
+ // reference while IO is pending so that it does not get deallocated
+ // while the kernel has retained the OVERLAPPED structure.
+ //
+ // CancelIoEx (likely called from self.cancel()) may have successfully
+ // completed, but the OVERLAPPED is still in use until either
+ // HasOverlappedIoCompleted() is true or GetQueuedCompletionStatus has
+ // returned this OVERLAPPED object.
+ //
+ // NOTE: Waiting when IOCP is in use can hang indefinitely, but this
+ // CancelIoEx is superfluous in that self.cancel() was already called,
+ // so I've only ever seen this return FALSE with GLE=ERROR_NOT_FOUND
if (Py_CancelIoEx && Py_CancelIoEx(self->handle, &self->overlapped))
wait = TRUE;