3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
17 Sometimes CUDA devices can get stuck, 'deadlock'. In this case it is often 18 better just the kill the process automatically. Use this guard to set a 19 maximum timespan for a python call, such as RunNet(). If it does not complete 20 in time, process is killed. 23 with timeout_guard.CompleteInTimeOrDie(10.0): 30 def __init__(self, timeout_secs):
31 threading.Thread.__init__(self)
45 log = logging.getLogger(
"timeout_guard")
46 log.error(
"Call did not finish in time. Timeout:{}s PID: {}".format(
54 log.info(
"Prepared output, dumping threads. ")
56 print(
"-----After force------")
60 for threadId, stack
in sys._current_frames().items():
62 code.append(
"\n# ThreadID: %s" % threadId)
63 for filename, lineno, name, line
in traceback.extract_stack(stack):
64 code.append(
'File: "%s", line %d, in %s' % (filename, lineno, name))
66 code.append(
" %s" % (line.strip()))
68 print(
"\n".join(code))
69 log.error(
"Process did not terminate cleanly in 10 s, forcing")
72 forcet = threading.Thread(target=forcequit, args=())
76 print(
"-----Before forcing------")
80 for threadId, stack
in sys._current_frames().items():
81 code.append(
"\n# ThreadID: %s" % threadId)
82 for filename, lineno, name, line
in traceback.extract_stack(stack):
83 code.append(
'File: "%s", line %d, in %s' % (filename, lineno, name))
85 code.append(
" %s" % (line.strip()))
87 print(
"\n".join(code))
88 os.kill(os.getpid(), signal.SIGINT)
91 @contextlib.contextmanager
92 def CompleteInTimeOrDie(timeout_secs):
96 watcher.completed =
True 97 watcher.condition.acquire()
98 watcher.condition.notify()
99 watcher.condition.release()
104 Call this if you have problem with process getting stuck at shutdown. 105 It will kill the process if it does not terminate in timeout_secs.
def EuthanizeIfNecessary(timeout_secs=120)