bpo-38323: Fix rare MultiLoopChildWatcher hangs.

This changes asyncio.MultiLoopChildWatcher's attach_loop() method to call loop.add_signal_handler() instead of calling only signal.signal(). This should eliminate some rare hangs since loop.add_signal_handler() calls signal.set_wakeup_fd(). Without this, the main thread sometimes wasn't getting awakened if a signal occurred during an await.
python · cjerdonek · May 16, 2020 · May 17, 2020 · Oct 16, 2020 · Oct 16, 2020
commit 5d1013256c133b61587b6a80a0f9d509ac11d123
@@ -1036,7 +1036,9 @@ Unix signals
    The callback will be invoked by *loop*, along with other queued callbacks
    and runnable coroutines of that event loop. Unlike signal handlers
    registered using :func:`signal.signal`, a callback registered with this
-   function is allowed to interact with the event loop.
+   method is allowed to interact with the event loop. Using
+   :func:`signal.signal` instead of this method can also cause the event
+   loop not to awaken in rare situations when a signal is received.
 
    Raise :exc:`ValueError` if the signal number is invalid or uncatchable.
    Raise :exc:`RuntimeError` if there is a problem setting up the handler.

@@ -219,7 +219,7 @@ implementation used by the asyncio event loop:
 
    This implementation registers a :py:data:`SIGCHLD` signal handler on
    instantiation. That can break third-party code that installs a custom handler for
-   `SIGCHLD`.  signal).
+   the :py:data:`SIGCHLD` signal).
 
    The watcher avoids disrupting other code spawning processes
    by polling every process explicitly on a :py:data:`SIGCHLD` signal.
@@ -233,6 +233,17 @@ implementation used by the asyncio event loop:
 
    .. versionadded:: 3.8
 
+   .. method:: attach_loop(loop)
+
+      Registers the :py:data:`SIGCHLD` signal handler.  Like
+      :meth:`loop.add_signal_handler`, this method can only be invoked
+      from the main thread.
+
+     .. versionchanged:: 3.9
+
+        The method now calls :func:`signal.set_wakeup_fd` as part of the
+        handler initialization.
+
 .. class:: SafeChildWatcher
 
    This implementation uses active event loop from the main thread to handle

@@ -78,6 +78,8 @@ def _process_self_data(self, data):
     def add_signal_handler(self, sig, callback, *args):
         """Add a handler for a signal.  UNIX only.
 
+        This method can only be called from the main thread.
+
         Raise ValueError if the signal number is invalid or uncatchable.
         Raise RuntimeError if there is a problem setting up the handler.
         """
@@ -1232,10 +1234,15 @@ def close(self):
         self._callbacks.clear()
         if self._saved_sighandler is not None:
             handler = signal.getsignal(signal.SIGCHLD)
-            if handler != self._sig_chld:
+            # add_signal_handler() sets the handler to _sighandler_noop.
+            if handler != _sighandler_noop:
                 logger.warning("SIGCHLD handler was changed by outside code")
             else:
+                loop = self._loop
+                # This clears the wakeup file descriptor if necessary.
+                loop.remove_signal_handler(signal.SIGCHLD)
                 signal.signal(signal.SIGCHLD, self._saved_sighandler)
+
             self._saved_sighandler = None
 
     def __enter__(self):
@@ -1263,15 +1270,24 @@ def attach_loop(self, loop):
         # The reason to do it here is that attach_loop() is called from
         # unix policy only for the main thread.
         # Main thread is required for subscription on SIGCHLD signal
+        if loop is None or self._saved_sighandler is not None:
+            return
+
+        self._loop = loop
+        self._saved_sighandler = signal.getsignal(signal.SIGCHLD)
         if self._saved_sighandler is None:
-            self._saved_sighandler = signal.signal(signal.SIGCHLD, self._sig_chld)
-            if self._saved_sighandler is None:
-                logger.warning("Previous SIGCHLD handler was set by non-Python code, "
-                               "restore to default handler on watcher close.")
-                self._saved_sighandler = signal.SIG_DFL
+            logger.warning("Previous SIGCHLD handler was set by non-Python code, "
+                           "restore to default handler on watcher close.")
+            self._saved_sighandler = signal.SIG_DFL
 
-            # Set SA_RESTART to limit EINTR occurrences.
-            signal.siginterrupt(signal.SIGCHLD, False)
+        if self._callbacks:
+            warnings.warn(
+                'A loop is being detached '
+                'from a child watcher with pending handlers',
+                RuntimeWarning)
+
+        # This also sets up the wakeup file descriptor.
+        loop.add_signal_handler(signal.SIGCHLD, self._sig_chld)
 
     def _do_waitpid_all(self):
         for pid in list(self._callbacks):
@@ -1314,7 +1330,7 @@ def _do_waitpid(self, expected_pid):
                                  expected_pid, returncode)
                 loop.call_soon_threadsafe(callback, pid, returncode, *args)
 
-    def _sig_chld(self, signum, frame):
+    def _sig_chld(self, *args):
         try:
             self._do_waitpid_all()
         except (SystemExit, KeyboardInterrupt):

diff --git a/Lib/test/test_asyncio/test_subprocess.py b/Lib/test/test_asyncio/test_subprocess.py
@@ -672,12 +672,13 @@ def setUp(self):
             policy.set_child_watcher(watcher)
 
         def tearDown(self):
-            super().tearDown()
             policy = asyncio.get_event_loop_policy()
             watcher = policy.get_child_watcher()
             policy.set_child_watcher(None)
             watcher.attach_loop(None)
             watcher.close()
+            # Since setUp() does super().setUp() first, do tearDown() last.
+            super().tearDown()
 
     class SubprocessThreadedWatcherTests(SubprocessWatcherMixin,
                                          test_utils.TestCase):

diff --git a/Misc/NEWS.d/next/Library/2020-05-16-17-50-10.bpo-38323.Ar35np.rst b/Misc/NEWS.d/next/Library/2020-05-16-17-50-10.bpo-38323.Ar35np.rst
@@ -0,0 +1,2 @@
+Fix rare cases with ``MultiLoopChildWatcher`` where the event loop can
+fail to awaken in response to a :py:data:`SIGCHLD` signal.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fix rare cases with ``MultiLoopChildWatcher`` where the event loop can
cjerdonek marked this conversation as resolved. Outdated Show resolved Hide resolved
		fail to awaken in response to a :py:data:`SIGCHLD` signal.