add first_beat delay to notebook heartbeats

Heartbeats start immediately, causing false heart failures on slow systems that can take a while to start kernel subprocesses.

Also adds a 'flush' to the heartbeat callback (just like in IPython.parallel), to protect against server load being detected as heart failures.
This commit is contained in:
MinRK 2011-12-22 22:38:57 -08:00
parent 13fc7e5c41
commit 15ca2aaa20
2 changed files with 9 additions and 2 deletions

View File

@ -18,6 +18,7 @@ Authors:
import logging
import Cookie
import time
import uuid
from tornado import web
@ -412,6 +413,7 @@ class IOPubHandler(AuthenticatedZMQStreamHandler):
return
km = self.application.kernel_manager
self.time_to_dead = km.time_to_dead
self.first_beat = km.first_beat
kernel_id = self.kernel_id
try:
self.iopub_stream = km.create_iopub_stream(kernel_id)
@ -446,6 +448,7 @@ class IOPubHandler(AuthenticatedZMQStreamHandler):
self._kernel_alive = True
def ping_or_dead():
self.hb_stream.flush()
if self._kernel_alive:
self._kernel_alive = False
self.hb_stream.send(b'ping')
@ -461,8 +464,9 @@ class IOPubHandler(AuthenticatedZMQStreamHandler):
self._kernel_alive = True
self.hb_stream.on_recv(beat_received)
self._hb_periodic_callback = ioloop.PeriodicCallback(ping_or_dead, self.time_to_dead*1000)
self._hb_periodic_callback.start()
loop = ioloop.IOLoop.instance()
self._hb_periodic_callback = ioloop.PeriodicCallback(ping_or_dead, self.time_to_dead*1000, loop)
loop.add_timeout(time.time()+self.first_beat, self._hb_periodic_callback.start)
self._beating= True
def stop_hb(self):

View File

@ -195,7 +195,10 @@ class MappingKernelManager(MultiKernelManager):
kernel_argv = List(Unicode)
kernel_manager = Instance(KernelManager)
time_to_dead = Float(3.0, config=True, help="""Kernel heartbeat interval in seconds.""")
first_beat = Float(5.0, config=True, help="Delay (in seconds) before sending first heartbeat.")
max_msg_size = Integer(65536, config=True, help="""
The max raw message size accepted from the browser
over a WebSocket connection.