diff options
Diffstat (limited to 'catapult/common/py_utils/py_utils/webpagereplay_go_server.py')
-rw-r--r-- | catapult/common/py_utils/py_utils/webpagereplay_go_server.py | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/catapult/common/py_utils/py_utils/webpagereplay_go_server.py b/catapult/common/py_utils/py_utils/webpagereplay_go_server.py new file mode 100644 index 00000000..950e8adc --- /dev/null +++ b/catapult/common/py_utils/py_utils/webpagereplay_go_server.py @@ -0,0 +1,402 @@ +# Copyright 2017 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Start and stop Web Page Replay.""" + +import logging +import os +import re +import signal +import subprocess +import sys +import tempfile +import urllib + +import py_utils +from py_utils import atexit_with_log +from py_utils import binary_manager + + +_WPR_DIR = os.path.abspath(os.path.join( + py_utils.GetCatapultDir(), 'web_page_replay_go')) + +TELEMETRY_PROJECT_CONFIG = os.path.join( + py_utils.GetCatapultDir(), 'telemetry', 'telemetry', + 'binary_dependencies.json') + +CHROME_BINARY_CONFIG = os.path.join( + py_utils.GetCatapultDir(), 'common', 'py_utils', 'py_utils', + 'chrome_binaries.json') + +RECORD = '--record' +INJECT_SCRIPTS = '--inject_scripts=' + +class ReplayError(Exception): + """Catch-all exception for the module.""" + pass + + +class ReplayNotFoundError(ReplayError): + def __init__(self, label, path): + """ + Create a ReplayNotFoundError instance. + + Args: + + label: A string of label of this error. + path: A string of the path in this error. + + """ + super(ReplayNotFoundError, self).__init__() + self.args = (label, path) + + def __str__(self): + label, path = self.args + return 'Path does not exist for %s: %s' % (label, path) + + +class ReplayNotStartedError(ReplayError): + pass + + +class ReplayServer(object): + """Start and Stop Web Page Replay. + + Web Page Replay is a proxy that can record and "replay" web pages with + simulated network characteristics -- without having to edit the pages + by hand. With WPR, tests can use "real" web content, and catch + performance issues that may result from introducing network delays and + bandwidth throttling. + + This class could be used as a context manager. + + Example: + with ReplayServer(archive_path): + self.NavigateToURL(start_url) + self.WaitUntil(...) + """ + + _go_binary_path = None + + def __init__(self, archive_path, replay_host, http_port, https_port, + replay_options, binary_downloader=None): + """Initialize ReplayServer. + + Args: + archive_path: a path to a specific WPR archive. + replay_host: the hostname to serve traffic. + http_port: an integer port on which to serve HTTP traffic. May be zero + to let the OS choose an available port. + https_port: an integer port on which to serve HTTPS traffic. May be zero + to let the OS choose an available port. + replay_options: an iterable of options strings to forward to replay.py. + binary_downloader: a function to be used to fetch binary. May be None to + use py_utils.binary_manager.FetchPath as default downloader. + """ + self.archive_path = archive_path + self._replay_host = replay_host + self._started_ports = {} # a dict such as {'http': 80, 'https': 443} + + # A temporary path for storing stdout & stderr of the webpagereplay + # subprocess. + self._temp_log_file_path = None + + # Assign the downloader func and binary_manager + downloader = None + if binary_downloader: + downloader = binary_downloader + else: + configs = [CHROME_BINARY_CONFIG, TELEMETRY_PROJECT_CONFIG] + downloader = binary_manager.BinaryManager(configs).FetchPath + + self._cmd_line = self._GetCommandLine( + self._GetGoBinaryPath(downloader=downloader), http_port, https_port, + replay_options, archive_path) + + if RECORD in replay_options or 'record' in replay_options: + self._AssertPathExists('archive directory', + os.path.dirname(self.archive_path)) + elif not os.path.exists(self.archive_path): + self._AssertPathExists('archive file', self.archive_path) + + self.replay_process = None + + @classmethod + def _GetGoBinaryPath(cls, downloader): + if not cls._go_binary_path: + cls._go_binary_path = downloader( + 'wpr_go', py_utils.GetHostOsName(), py_utils.GetHostArchName()) + return cls._go_binary_path + + @classmethod + def SetGoBinaryPath(cls, go_binary_path): + """Overrides the _go_binary_path. + + This allows the server to use WPRGO files retrieved from somewhere + other than GCS, such as CIPD.""" + cls._go_binary_path = go_binary_path + + @property + def http_port(self): + return self._started_ports['http'] + + @property + def https_port(self): + return self._started_ports['https'] + + @staticmethod + def _GetCommandLine(go_binary_path, http_port, https_port, + options, archive_path): + """Set WPR command-line arguments. Can be overridden if needed. + + Keyword arguments: + + * go_binary_path: A string of the path to the wpr.go binary. + * http_port: A decimal of the port that handles http requests. + * https_port: A decimal of the port that handles https requests. + * options: A list of options, such as '--record', + '--inject_scripts', etc. + * archive_path: A string of the path to the archive file. + + """ + bad_options = [] + for option in options: + if option not in [RECORD, INJECT_SCRIPTS]: + bad_options.append(option) + if len(bad_options) > 0: + raise ValueError("Invalid replay options %s" % bad_options) + + cmd_line = [go_binary_path] + if RECORD in options: + cmd_line.append('record') + else: + cmd_line.append('replay') + key_file = os.path.join(_WPR_DIR, 'wpr_key.pem') + cert_file = os.path.join(_WPR_DIR, 'wpr_cert.pem') + inject_script = os.path.join(_WPR_DIR, 'deterministic.js') + cmd_line.extend([ + '--http_port=%s' % http_port, + '--https_port=%s' % https_port, + '--https_key_file=%s' % key_file, + '--https_cert_file=%s' % cert_file]) + if INJECT_SCRIPTS in options: + cmd_line.append(INJECT_SCRIPTS) + else: + cmd_line.append('--inject_scripts=%s' % inject_script) + cmd_line.append(archive_path) + return cmd_line + + def _AssertPathExists(self, label, path): + if not os.path.exists(path): + raise ReplayNotFoundError(label, path) + + def _OpenLogFile(self): + """Opens the log file for writing.""" + log_dir = os.path.dirname(self._temp_log_file_path) + if not os.path.isdir(log_dir): + os.makedirs(log_dir) + return open(self._temp_log_file_path, 'w') + + def _LogLines(self): + """Yields any log lines that have been writtent to disk.""" + if (not self._temp_log_file_path or + not os.path.isfile(self._temp_log_file_path)): + yield '(N/A)' + return + with open(self._temp_log_file_path) as f: + for line in f: + yield line + + def _IsStarted(self): + """Returns true if the server is up and running.""" + if not self._IsReplayProcessStarted(): + return False + + def HasIncompleteStartedPorts(): + return ('http' not in self._started_ports or + 'https' not in self._started_ports) + + if HasIncompleteStartedPorts(): + self._started_ports = self._ParseLogFilePorts(self._LogLines()) + if HasIncompleteStartedPorts(): + return False + + try: + # HTTPS may require SNI (which urllib does not speak), so only check + # that HTTP responds. + return self._UrlOpen('web-page-replay-generate-200').getcode() == 200 + except IOError: + return False + + @staticmethod + def _ParseLogFilePorts(log_lines): + """Returns the ports on which replay listens as reported in its log file. + + Only matches HTTP, HTTPS, and DNS. One call may return only some + of the ports depending on what has been written to the log file. + + Example log lines: + 2014-09-03 17:04:27,978 Starting server on http://:51673 + 2014-09-03 17:04:27,978 Starting server on https://:35270 + + Returns: + a dict with ports available in log_lines. For example, + {} # no ports found + {'http': 1234, 'https': 2345, 'dns': 3456} + """ + ports = {} + port_re = re.compile( + r'.*Starting server on ' + r'(?P<protocol>http|https)://' + r'(?P<host>[^:]*):' + r'(?P<port>\d+)') + for line in log_lines: + m = port_re.match(line.strip()) + if m: + protocol = m.group('protocol').lower() + ports[protocol] = int(m.group('port')) + return ports + + def StartServer(self): + """Start Web Page Replay and verify that it started. + + Returns: + A dictionary mapping the keys 'http', 'https', and (if used) 'dns' + to the respective ports of the replay server. + Raises: + ReplayNotStartedError: if Replay start-up fails. + """ + is_posix = sys.platform.startswith('linux') or sys.platform == 'darwin' + logging.info('Starting Web-Page-Replay: %s', self._cmd_line) + self._CreateTempLogFilePath() + with self._OpenLogFile() as log_fh: + self.replay_process = subprocess.Popen( + self._cmd_line, stdout=log_fh, stderr=subprocess.STDOUT, + preexec_fn=(_ResetInterruptHandler if is_posix else None)) + try: + # TODO(crbug.com/805418): consider changing this to wait with I/O timeout. + # The 120s timeout is based on past failures (e.g: crbug.com/812639). + py_utils.WaitFor(self._IsStarted, timeout=120) + logging.info('WPR ports: %s', self._started_ports) + atexit_with_log.Register(self.StopServer) + return dict(self._started_ports) + except Exception: + self.StopServer(logging.ERROR) + raise ReplayNotStartedError('Web Page Replay failed to start.') + + def _IsReplayProcessStarted(self): + if not self.replay_process: + return False + return self.replay_process and self.replay_process.poll() is None + + def StopServer(self, log_level=logging.DEBUG): + """Stop Web Page Replay. + + This also attempts to return stdout/stderr logs of wpr process if there is + any. If there is none, '(N/A)' string is returned (see _LogLines() + implementation). + """ + if self._IsReplayProcessStarted(): + self._StopReplayProcess() + self._CleanUpTempLogFilePath(log_level) + self._started_ports = {} + + def _StopReplayProcess(self): + if not self.replay_process: + return + logging.debug('Trying to stop Web-Page-Replay gracefully') + try: + if self._started_ports: + self._UrlOpen('web-page-replay-command-exit').close() + except IOError: + # IOError is possible because the server might exit without response. + pass + try: + py_utils.WaitFor(lambda: self.replay_process.poll() is not None, 10) + except py_utils.TimeoutException: + try: + # Use a SIGINT so that it can do graceful cleanup. + self.replay_process.send_signal(signal.SIGINT) + except Exception: # pylint: disable=broad-except + # On Windows, we are left with no other option than terminate(). + is_primary_nameserver_changed_by_replay = ( + self._replay_host == '127.0.0.1') + if is_primary_nameserver_changed_by_replay: + # Replay changes the DNS nameserver configuration so that DNS + # requests are resolved by replay's own DNS server. It resolves + # all DNS requests to it own IP address to it can server the + # HTTP and HTTPS requests. + # If the replay host is not '127.0.0.1', then replay skips the + # nameserver change because it assumes a different mechanism + # will be used to route DNS requests to replay's DNS server. + logging.warning( + 'Unable to stop Web-Page-Replay gracefully.\n' + 'Replay changed the DNS nameserver configuration to make replay ' + 'the primary nameserver. That might not be restored!') + self.replay_process.terminate() + self.replay_process.communicate() + finally: + self.replay_process = None + + def _CreateTempLogFilePath(self): + assert self._temp_log_file_path is None + handle, self._temp_log_file_path = tempfile.mkstemp() + os.close(handle) + + def _CleanUpTempLogFilePath(self, log_level): + if not self._temp_log_file_path: + return '' + if logging.getLogger('').isEnabledFor(log_level): + with open(self._temp_log_file_path, 'r') as f: + wpr_log_output = f.read() + logging.log(log_level, '\n'.join([ + '************************** WPR LOG *****************************', + wpr_log_output, + '************************** END OF WPR LOG **********************'])) + os.remove(self._temp_log_file_path) + self._temp_log_file_path = None + + def __enter__(self): + """Add support for with-statement.""" + self.StartServer() + return self + + def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb): + """Add support for with-statement.""" + self.StopServer() + + def _UrlOpen(self, url_path, protocol='http'): + """Open a Replay URL. + + For matching requests in the archive, Replay relies on the "Host:" header. + For Replay command URLs, the "Host:" header is not needed. + + Args: + url_path: WPR server request path. + protocol: 'http' or 'https' + Returns: + a file-like object from urllib.urlopen + """ + url = '%s://%s:%s/%s' % ( + protocol, self._replay_host, self._started_ports[protocol], url_path) + return urllib.urlopen(url, proxies={}) + +def _ResetInterruptHandler(): + """Reset the interrupt handler back to the default. + + The replay process is stopped gracefully by making an HTTP request + ('web-page-replay-command-exit'). The graceful exit is important for + restoring the DNS configuration. If the HTTP request fails, the fallback + is to send SIGINT to the process. + + On posix system, running this function before starting replay fixes a + bug that shows up when Telemetry is run as a background command from a + script. https://crbug.com/254572. + + Background: Signal masks on Linux are inherited from parent + processes. If anything invoking us accidentally masks SIGINT + (e.g. by putting a process in the background from a shell script), + sending a SIGINT to the child will fail to terminate it. + """ + signal.signal(signal.SIGINT, signal.SIG_DFL) |