Merge pull request #5875 from kevin-bates/kernel-list-race-condition

Fix race condition with async kernel management
This commit is contained in:
Zachary Sailer 2020-12-23 12:06:07 -08:00 committed by GitHub
commit 63450082ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -294,7 +294,6 @@ class MappingKernelManager(MultiKernelManager):
kernel._activity_stream.close() kernel._activity_stream.close()
kernel._activity_stream = None kernel._activity_stream = None
self.stop_buffering(kernel_id) self.stop_buffering(kernel_id)
self._kernel_connections.pop(kernel_id, None)
# Decrease the metric of number of kernels # Decrease the metric of number of kernels
# running for the relevant kernel type by 1 # running for the relevant kernel type by 1
@ -302,7 +301,12 @@ class MappingKernelManager(MultiKernelManager):
type=self._kernels[kernel_id].kernel_name type=self._kernels[kernel_id].kernel_name
).dec() ).dec()
return self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart) self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart)
# Unlike its async sibling method in AsyncMappingKernelManager, removing the kernel_id
# from the connections dictionary isn't as problematic before the shutdown since the
# method is synchronous. However, we'll keep the relative call orders the same from
# a maintenance perspective.
self._kernel_connections.pop(kernel_id, None)
async def restart_kernel(self, kernel_id, now=False): async def restart_kernel(self, kernel_id, now=False):
"""Restart a kernel by kernel_id""" """Restart a kernel by kernel_id"""
@ -376,8 +380,11 @@ class MappingKernelManager(MultiKernelManager):
kernels = [] kernels = []
kernel_ids = self.pinned_superclass.list_kernel_ids(self) kernel_ids = self.pinned_superclass.list_kernel_ids(self)
for kernel_id in kernel_ids: for kernel_id in kernel_ids:
model = self.kernel_model(kernel_id) try:
kernels.append(model) model = self.kernel_model(kernel_id)
kernels.append(model)
except (web.HTTPError, KeyError):
pass # Probably due to a (now) non-existent kernel, continue building the list
return kernels return kernels
# override _check_kernel_id to raise 404 instead of KeyError # override _check_kernel_id to raise 404 instead of KeyError
@ -498,7 +505,6 @@ class AsyncMappingKernelManager(MappingKernelManager, AsyncMultiKernelManager):
kernel._activity_stream.close() kernel._activity_stream.close()
kernel._activity_stream = None kernel._activity_stream = None
self.stop_buffering(kernel_id) self.stop_buffering(kernel_id)
self._kernel_connections.pop(kernel_id, None)
# Decrease the metric of number of kernels # Decrease the metric of number of kernels
# running for the relevant kernel type by 1 # running for the relevant kernel type by 1
@ -506,4 +512,9 @@ class AsyncMappingKernelManager(MappingKernelManager, AsyncMultiKernelManager):
type=self._kernels[kernel_id].kernel_name type=self._kernels[kernel_id].kernel_name
).dec() ).dec()
return await self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart) await self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart)
# Remove kernel_id from the connections dictionary only after kernel has been shutdown,
# otherwise a race condition can occur since the shutdown may take a while - allowing
# list/fetch kernel operations to access _kernel_connections for a non-existent key
# (kernel_id) while "awaiting" the result of the shutdown.
self._kernel_connections.pop(kernel_id, None)