diff options
author | Luis Lozano <llozano@chromium.org> | 2015-09-30 11:36:27 -0700 |
---|---|---|
committer | chrome-bot <chrome-bot@chromium.org> | 2015-10-01 15:16:04 -0700 |
commit | 45b53c5fee4c044cebd74dbbbd6119fbd51554e7 (patch) | |
tree | c98938747c167dbfb15a8265e5897e6290669e95 | |
parent | 60a68a5680235ac1a101fb35561901a58298e8ef (diff) | |
download | toolchain-utils-45b53c5fee4c044cebd74dbbbd6119fbd51554e7.tar.gz |
Fixed some issues with signal handling and cleanup.
Fixed several issues related to signal handling and cleanup:
- Got rid of the extra process wrapper around crosperf. We now do an
"exec" to get rid of this extra process. This facilitated signal
handling.
- Found a better fix for the problem we had with ignoring
ctrl-c. Instead of creating a pipe for child process stdin, we now
create the child with setsid which has the side effect of creating the
child without a terminal associated to it (see man setsid).
- By creating the child with setsid, we also allow for easier killing of
the child and its descendants since it allows us to use killpg (see man
killpg).
- Handle SIGTERM by "converting" it into an exit call. This will make
sure the atexit routines are called and, furthermore, we can terminate
child process when you catch the "SystemExit" exception.
- With these changes, crosperf will send a SIGTERM to any cros_sdk,
test_that processes that it has started. Before, we would have left over
test_that processes that could interfere with a new test_that
invocation.
BUG=chromium:492826
BUG=chromium:470658
TEST=verified by hand signals are working. Ran toolchain suite.
Change-Id: Ibd09ff428a7b402ea2296ecf9b9a34aa5756c93f
Reviewed-on: https://chrome-internal-review.googlesource.com/232616
Commit-Ready: Luis Lozano <llozano@chromium.org>
Tested-by: Luis Lozano <llozano@chromium.org>
Reviewed-by: Yunlian Jiang <yunlian@google.com>
-rwxr-xr-x | crosperf/crosperf | 2 | ||||
-rwxr-xr-x | crosperf/crosperf.py | 12 | ||||
-rw-r--r-- | crosperf/experiment_runner.py | 19 | ||||
-rw-r--r-- | crosperf/suite_runner.py | 25 | ||||
-rw-r--r-- | utils/command_executer.py | 67 |
5 files changed, 73 insertions, 52 deletions
diff --git a/crosperf/crosperf b/crosperf/crosperf index 904a172a..a29dcbfa 100755 --- a/crosperf/crosperf +++ b/crosperf/crosperf @@ -1,2 +1,2 @@ #!/bin/bash -PYTHONPATH=$(dirname $0)/..:$PYTHONPATH python $(dirname $0)/crosperf.py "$@" +PYTHONPATH=$(dirname $0)/..:$PYTHONPATH exec python $(dirname $0)/crosperf.py "$@" diff --git a/crosperf/crosperf.py b/crosperf/crosperf.py index fc0098d2..e107ea48 100755 --- a/crosperf/crosperf.py +++ b/crosperf/crosperf.py @@ -7,6 +7,7 @@ import atexit import optparse import os +import signal import sys from experiment_runner import ExperimentRunner from experiment_runner import MockExperimentRunner @@ -58,6 +59,16 @@ def Cleanup(experiment): experiment.Cleanup() +def CallExitHandler(signum, _): + """Signal handler that transforms a signal into a call to exit. + + This is useful because functionality registered by "atexit" will + be called. It also means you can "catch" the signal by catching + the SystemExit exception. + """ + sys.exit(128 + signum) + + def Main(argv): parser = optparse.OptionParser(usage=Help().GetUsage(), description=Help().GetHelp(), @@ -104,6 +115,7 @@ def Main(argv): json_report = experiment_file.GetGlobalSettings().GetField("json_report") + signal.signal(signal.SIGTERM, CallExitHandler) atexit.register(Cleanup, experiment) if options.dry_run: diff --git a/crosperf/experiment_runner.py b/crosperf/experiment_runner.py index d7749b53..01324b04 100644 --- a/crosperf/experiment_runner.py +++ b/crosperf/experiment_runner.py @@ -180,6 +180,12 @@ class ExperimentRunner(object): self._terminated = True self.l.LogError("Ctrl-c pressed. Cleaning up...") experiment.Terminate() + raise + except SystemExit: + self._terminate = True + self.l.LogError("Unexpected exit. Cleaning up...") + experiment.Terminate() + raise finally: if not experiment.locks_dir: self._UnlockAllMachines(experiment) @@ -252,11 +258,14 @@ class ExperimentRunner(object): benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp) def Run(self): - self._Run(self._experiment) - self._PrintTable(self._experiment) - if not self._terminated: - self._StoreResults(self._experiment) - self._Email(self._experiment) + try: + self._Run(self._experiment) + finally: + # Always print the report at the end of the run. + self._PrintTable(self._experiment) + if not self._terminated: + self._StoreResults(self._experiment) + self._Email(self._experiment) class MockExperimentRunner(ExperimentRunner): diff --git a/crosperf/suite_runner.py b/crosperf/suite_runner.py index f9a2ba30..6531045f 100644 --- a/crosperf/suite_runner.py +++ b/crosperf/suite_runner.py @@ -154,10 +154,14 @@ class SuiteRunner(object): if self.log_level != "verbose": self._logger.LogOutput("Running test.") self._logger.LogOutput("CMD: %s" % command) + # Use --no-ns-pid so that cros_sdk does not create a different + # process namespace and we can kill process created easily by + # their process group. return self._ce.ChrootRunCommand(label.chromeos_root, command, True, - self._ct) + self._ct, + cros_sdk_options="--no-ns-pid") def RemoveTelemetryTempFile (self, machine, chromeos_root): filename = "telemetry@%s" % machine @@ -208,20 +212,23 @@ class SuiteRunner(object): profiler_args, machine)) - chrome_root_options = "" - chrome_root_options = (" --chrome_root={} --chrome_root_mount={} " - " FEATURES=\"-usersandbox\" " + # Use --no-ns-pid so that cros_sdk does not create a different + # process namespace and we can kill process created easily by their + # process group. + chrome_root_options = ("--no-ns-pid " + "--chrome_root={} --chrome_root_mount={} " + "FEATURES=\"-usersandbox\" " "CHROME_ROOT={}".format(label.chrome_src, CHROME_MOUNT_DIR, CHROME_MOUNT_DIR)) if self.log_level != "verbose": self._logger.LogOutput("Running test.") self._logger.LogOutput("CMD: %s" % cmd) - return self._ce.ChrootRunCommand (label.chromeos_root, - cmd, - return_output=True, - command_terminator=self._ct, - cros_sdk_options=chrome_root_options) + return self._ce.ChrootRunCommand(label.chromeos_root, + cmd, + return_output=True, + command_terminator=self._ct, + cros_sdk_options=chrome_root_options) def Telemetry_Run(self, machine, label, benchmark, profiler_args): diff --git a/utils/command_executer.py b/utils/command_executer.py index 08a5dc74..851791f8 100644 --- a/utils/command_executer.py +++ b/utils/command_executer.py @@ -8,6 +8,7 @@ import getpass import os import re import select +import signal import subprocess import tempfile import time @@ -55,11 +56,7 @@ class CommandExecuter: command_timeout=None, terminated_timeout=10, print_to_console=True): - """Run a command. - - Note: As this is written, the stdin for the process executed is - not associated with the stdin of the caller of this routine. - """ + """Run a command.""" cmd = str(cmd) @@ -84,13 +81,13 @@ class CommandExecuter: user = username + "@" cmd = "ssh -t -t %s%s -- '%s'" % (user, machine, cmd) - p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) - - # We explicitly disconect the client stdin from the command to - # execute by explicitly requesting a new pipe. Now, let's close it - # so that the executed process does not try to read from it. - p.stdin.close() + # We use setsid so that the child will have a different session id + # and we can easily kill the process group. This is also important + # because the child will be disassociated from the parent terminal. + # In this way the child cannot mess the parent's terminal. + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True, + preexec_fn=os.setsid) full_stdout = "" full_stderr = "" @@ -108,15 +105,12 @@ class CommandExecuter: while len(pipes): if command_terminator and command_terminator.IsTerminated(): - self.RunCommand("sudo kill -9 " + str(p.pid), - print_to_console=print_to_console) - wait = p.wait() + os.killpg(os.getpgid(p.pid), signal.SIGTERM) if self.logger: - self.logger.LogError("Command was terminated!", print_to_console) - if return_output: - return (p.wait, full_stdout, full_stderr) - else: - return wait + self.logger.LogError("Command received termination request. " + "Killed child process group.", + print_to_console) + break l=my_poll.poll(100) for (fd, evt) in l: @@ -144,21 +138,19 @@ class CommandExecuter: terminated_time = time.time() elif (terminated_timeout is not None and time.time() - terminated_time > terminated_timeout): - m = ("Timeout of %s seconds reached since process termination." - % terminated_timeout) if self.logger: - self.logger.LogWarning(m, print_to_console) + self.logger.LogWarning("Timeout of %s seconds reached since " + "process termination." + % terminated_timeout, print_to_console) break if (command_timeout is not None and time.time() - started_time > command_timeout): - m = ("Timeout of %s seconds reached since process started." - % command_timeout) + os.killpg(os.getpgid(p.pid), signal.SIGTERM) if self.logger: - self.logger.LogWarning(m, print_to_console) - self.RunCommand("kill %d || sudo kill %d || sudo kill -9 %d" % - (p.pid, p.pid, p.pid), - print_to_console=print_to_console) + self.logger.LogWarning("Timeout of %s seconds reached since process" + "started. Killed child process group." + % command_timeout, print_to_console) break if out == err == "": @@ -448,15 +440,16 @@ class CommandExecuter: self.logger.LogCmd(cmd) elif self.logger: self.logger.LogCmdToFileOnly(cmd) + + # We use setsid so that the child will have a different session id + # and we can easily kill the process group. This is also important + # because the child will be disassociated from the parent terminal. + # In this way the child cannot mess the parent's terminal. pobject = subprocess.Popen( cmd, cwd=cwd, bufsize=1024, env=env, shell=shell, - universal_newlines=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT if join_stderr else subprocess.PIPE) - - # We explicitly disconect the client stdin from the command to - # execute by explicitly requesting a new pipe. Now, let's close it - # so that the executed process does not try to read from it. - pobject.stdin.close() + universal_newlines=True, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT if join_stderr else subprocess.PIPE, + preexec_fn=os.setsid) # We provide a default line_consumer if line_consumer is None: @@ -484,7 +477,7 @@ class CommandExecuter: del handlermap[fd] if timeout is not None and (time.time() - start_time > timeout): - pobject.kill() + os.killpg(os.getpgid(pobject.pid), signal.SIGTERM) return pobject.wait() |