aboutsummaryrefslogtreecommitdiff
path: root/catapult/common/py_utils/py_utils/webpagereplay_go_server.py
diff options
context:
space:
mode:
Diffstat (limited to 'catapult/common/py_utils/py_utils/webpagereplay_go_server.py')
-rw-r--r--catapult/common/py_utils/py_utils/webpagereplay_go_server.py402
1 files changed, 402 insertions, 0 deletions
diff --git a/catapult/common/py_utils/py_utils/webpagereplay_go_server.py b/catapult/common/py_utils/py_utils/webpagereplay_go_server.py
new file mode 100644
index 00000000..950e8adc
--- /dev/null
+++ b/catapult/common/py_utils/py_utils/webpagereplay_go_server.py
@@ -0,0 +1,402 @@
+# Copyright 2017 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Start and stop Web Page Replay."""
+
+import logging
+import os
+import re
+import signal
+import subprocess
+import sys
+import tempfile
+import urllib
+
+import py_utils
+from py_utils import atexit_with_log
+from py_utils import binary_manager
+
+
+_WPR_DIR = os.path.abspath(os.path.join(
+ py_utils.GetCatapultDir(), 'web_page_replay_go'))
+
+TELEMETRY_PROJECT_CONFIG = os.path.join(
+ py_utils.GetCatapultDir(), 'telemetry', 'telemetry',
+ 'binary_dependencies.json')
+
+CHROME_BINARY_CONFIG = os.path.join(
+ py_utils.GetCatapultDir(), 'common', 'py_utils', 'py_utils',
+ 'chrome_binaries.json')
+
+RECORD = '--record'
+INJECT_SCRIPTS = '--inject_scripts='
+
+class ReplayError(Exception):
+ """Catch-all exception for the module."""
+ pass
+
+
+class ReplayNotFoundError(ReplayError):
+ def __init__(self, label, path):
+ """
+ Create a ReplayNotFoundError instance.
+
+ Args:
+
+ label: A string of label of this error.
+ path: A string of the path in this error.
+
+ """
+ super(ReplayNotFoundError, self).__init__()
+ self.args = (label, path)
+
+ def __str__(self):
+ label, path = self.args
+ return 'Path does not exist for %s: %s' % (label, path)
+
+
+class ReplayNotStartedError(ReplayError):
+ pass
+
+
+class ReplayServer(object):
+ """Start and Stop Web Page Replay.
+
+ Web Page Replay is a proxy that can record and "replay" web pages with
+ simulated network characteristics -- without having to edit the pages
+ by hand. With WPR, tests can use "real" web content, and catch
+ performance issues that may result from introducing network delays and
+ bandwidth throttling.
+
+ This class could be used as a context manager.
+
+ Example:
+ with ReplayServer(archive_path):
+ self.NavigateToURL(start_url)
+ self.WaitUntil(...)
+ """
+
+ _go_binary_path = None
+
+ def __init__(self, archive_path, replay_host, http_port, https_port,
+ replay_options, binary_downloader=None):
+ """Initialize ReplayServer.
+
+ Args:
+ archive_path: a path to a specific WPR archive.
+ replay_host: the hostname to serve traffic.
+ http_port: an integer port on which to serve HTTP traffic. May be zero
+ to let the OS choose an available port.
+ https_port: an integer port on which to serve HTTPS traffic. May be zero
+ to let the OS choose an available port.
+ replay_options: an iterable of options strings to forward to replay.py.
+ binary_downloader: a function to be used to fetch binary. May be None to
+ use py_utils.binary_manager.FetchPath as default downloader.
+ """
+ self.archive_path = archive_path
+ self._replay_host = replay_host
+ self._started_ports = {} # a dict such as {'http': 80, 'https': 443}
+
+ # A temporary path for storing stdout & stderr of the webpagereplay
+ # subprocess.
+ self._temp_log_file_path = None
+
+ # Assign the downloader func and binary_manager
+ downloader = None
+ if binary_downloader:
+ downloader = binary_downloader
+ else:
+ configs = [CHROME_BINARY_CONFIG, TELEMETRY_PROJECT_CONFIG]
+ downloader = binary_manager.BinaryManager(configs).FetchPath
+
+ self._cmd_line = self._GetCommandLine(
+ self._GetGoBinaryPath(downloader=downloader), http_port, https_port,
+ replay_options, archive_path)
+
+ if RECORD in replay_options or 'record' in replay_options:
+ self._AssertPathExists('archive directory',
+ os.path.dirname(self.archive_path))
+ elif not os.path.exists(self.archive_path):
+ self._AssertPathExists('archive file', self.archive_path)
+
+ self.replay_process = None
+
+ @classmethod
+ def _GetGoBinaryPath(cls, downloader):
+ if not cls._go_binary_path:
+ cls._go_binary_path = downloader(
+ 'wpr_go', py_utils.GetHostOsName(), py_utils.GetHostArchName())
+ return cls._go_binary_path
+
+ @classmethod
+ def SetGoBinaryPath(cls, go_binary_path):
+ """Overrides the _go_binary_path.
+
+ This allows the server to use WPRGO files retrieved from somewhere
+ other than GCS, such as CIPD."""
+ cls._go_binary_path = go_binary_path
+
+ @property
+ def http_port(self):
+ return self._started_ports['http']
+
+ @property
+ def https_port(self):
+ return self._started_ports['https']
+
+ @staticmethod
+ def _GetCommandLine(go_binary_path, http_port, https_port,
+ options, archive_path):
+ """Set WPR command-line arguments. Can be overridden if needed.
+
+ Keyword arguments:
+
+ * go_binary_path: A string of the path to the wpr.go binary.
+ * http_port: A decimal of the port that handles http requests.
+ * https_port: A decimal of the port that handles https requests.
+ * options: A list of options, such as '--record',
+ '--inject_scripts', etc.
+ * archive_path: A string of the path to the archive file.
+
+ """
+ bad_options = []
+ for option in options:
+ if option not in [RECORD, INJECT_SCRIPTS]:
+ bad_options.append(option)
+ if len(bad_options) > 0:
+ raise ValueError("Invalid replay options %s" % bad_options)
+
+ cmd_line = [go_binary_path]
+ if RECORD in options:
+ cmd_line.append('record')
+ else:
+ cmd_line.append('replay')
+ key_file = os.path.join(_WPR_DIR, 'wpr_key.pem')
+ cert_file = os.path.join(_WPR_DIR, 'wpr_cert.pem')
+ inject_script = os.path.join(_WPR_DIR, 'deterministic.js')
+ cmd_line.extend([
+ '--http_port=%s' % http_port,
+ '--https_port=%s' % https_port,
+ '--https_key_file=%s' % key_file,
+ '--https_cert_file=%s' % cert_file])
+ if INJECT_SCRIPTS in options:
+ cmd_line.append(INJECT_SCRIPTS)
+ else:
+ cmd_line.append('--inject_scripts=%s' % inject_script)
+ cmd_line.append(archive_path)
+ return cmd_line
+
+ def _AssertPathExists(self, label, path):
+ if not os.path.exists(path):
+ raise ReplayNotFoundError(label, path)
+
+ def _OpenLogFile(self):
+ """Opens the log file for writing."""
+ log_dir = os.path.dirname(self._temp_log_file_path)
+ if not os.path.isdir(log_dir):
+ os.makedirs(log_dir)
+ return open(self._temp_log_file_path, 'w')
+
+ def _LogLines(self):
+ """Yields any log lines that have been writtent to disk."""
+ if (not self._temp_log_file_path or
+ not os.path.isfile(self._temp_log_file_path)):
+ yield '(N/A)'
+ return
+ with open(self._temp_log_file_path) as f:
+ for line in f:
+ yield line
+
+ def _IsStarted(self):
+ """Returns true if the server is up and running."""
+ if not self._IsReplayProcessStarted():
+ return False
+
+ def HasIncompleteStartedPorts():
+ return ('http' not in self._started_ports or
+ 'https' not in self._started_ports)
+
+ if HasIncompleteStartedPorts():
+ self._started_ports = self._ParseLogFilePorts(self._LogLines())
+ if HasIncompleteStartedPorts():
+ return False
+
+ try:
+ # HTTPS may require SNI (which urllib does not speak), so only check
+ # that HTTP responds.
+ return self._UrlOpen('web-page-replay-generate-200').getcode() == 200
+ except IOError:
+ return False
+
+ @staticmethod
+ def _ParseLogFilePorts(log_lines):
+ """Returns the ports on which replay listens as reported in its log file.
+
+ Only matches HTTP, HTTPS, and DNS. One call may return only some
+ of the ports depending on what has been written to the log file.
+
+ Example log lines:
+ 2014-09-03 17:04:27,978 Starting server on http://:51673
+ 2014-09-03 17:04:27,978 Starting server on https://:35270
+
+ Returns:
+ a dict with ports available in log_lines. For example,
+ {} # no ports found
+ {'http': 1234, 'https': 2345, 'dns': 3456}
+ """
+ ports = {}
+ port_re = re.compile(
+ r'.*Starting server on '
+ r'(?P<protocol>http|https)://'
+ r'(?P<host>[^:]*):'
+ r'(?P<port>\d+)')
+ for line in log_lines:
+ m = port_re.match(line.strip())
+ if m:
+ protocol = m.group('protocol').lower()
+ ports[protocol] = int(m.group('port'))
+ return ports
+
+ def StartServer(self):
+ """Start Web Page Replay and verify that it started.
+
+ Returns:
+ A dictionary mapping the keys 'http', 'https', and (if used) 'dns'
+ to the respective ports of the replay server.
+ Raises:
+ ReplayNotStartedError: if Replay start-up fails.
+ """
+ is_posix = sys.platform.startswith('linux') or sys.platform == 'darwin'
+ logging.info('Starting Web-Page-Replay: %s', self._cmd_line)
+ self._CreateTempLogFilePath()
+ with self._OpenLogFile() as log_fh:
+ self.replay_process = subprocess.Popen(
+ self._cmd_line, stdout=log_fh, stderr=subprocess.STDOUT,
+ preexec_fn=(_ResetInterruptHandler if is_posix else None))
+ try:
+ # TODO(crbug.com/805418): consider changing this to wait with I/O timeout.
+ # The 120s timeout is based on past failures (e.g: crbug.com/812639).
+ py_utils.WaitFor(self._IsStarted, timeout=120)
+ logging.info('WPR ports: %s', self._started_ports)
+ atexit_with_log.Register(self.StopServer)
+ return dict(self._started_ports)
+ except Exception:
+ self.StopServer(logging.ERROR)
+ raise ReplayNotStartedError('Web Page Replay failed to start.')
+
+ def _IsReplayProcessStarted(self):
+ if not self.replay_process:
+ return False
+ return self.replay_process and self.replay_process.poll() is None
+
+ def StopServer(self, log_level=logging.DEBUG):
+ """Stop Web Page Replay.
+
+ This also attempts to return stdout/stderr logs of wpr process if there is
+ any. If there is none, '(N/A)' string is returned (see _LogLines()
+ implementation).
+ """
+ if self._IsReplayProcessStarted():
+ self._StopReplayProcess()
+ self._CleanUpTempLogFilePath(log_level)
+ self._started_ports = {}
+
+ def _StopReplayProcess(self):
+ if not self.replay_process:
+ return
+ logging.debug('Trying to stop Web-Page-Replay gracefully')
+ try:
+ if self._started_ports:
+ self._UrlOpen('web-page-replay-command-exit').close()
+ except IOError:
+ # IOError is possible because the server might exit without response.
+ pass
+ try:
+ py_utils.WaitFor(lambda: self.replay_process.poll() is not None, 10)
+ except py_utils.TimeoutException:
+ try:
+ # Use a SIGINT so that it can do graceful cleanup.
+ self.replay_process.send_signal(signal.SIGINT)
+ except Exception: # pylint: disable=broad-except
+ # On Windows, we are left with no other option than terminate().
+ is_primary_nameserver_changed_by_replay = (
+ self._replay_host == '127.0.0.1')
+ if is_primary_nameserver_changed_by_replay:
+ # Replay changes the DNS nameserver configuration so that DNS
+ # requests are resolved by replay's own DNS server. It resolves
+ # all DNS requests to it own IP address to it can server the
+ # HTTP and HTTPS requests.
+ # If the replay host is not '127.0.0.1', then replay skips the
+ # nameserver change because it assumes a different mechanism
+ # will be used to route DNS requests to replay's DNS server.
+ logging.warning(
+ 'Unable to stop Web-Page-Replay gracefully.\n'
+ 'Replay changed the DNS nameserver configuration to make replay '
+ 'the primary nameserver. That might not be restored!')
+ self.replay_process.terminate()
+ self.replay_process.communicate()
+ finally:
+ self.replay_process = None
+
+ def _CreateTempLogFilePath(self):
+ assert self._temp_log_file_path is None
+ handle, self._temp_log_file_path = tempfile.mkstemp()
+ os.close(handle)
+
+ def _CleanUpTempLogFilePath(self, log_level):
+ if not self._temp_log_file_path:
+ return ''
+ if logging.getLogger('').isEnabledFor(log_level):
+ with open(self._temp_log_file_path, 'r') as f:
+ wpr_log_output = f.read()
+ logging.log(log_level, '\n'.join([
+ '************************** WPR LOG *****************************',
+ wpr_log_output,
+ '************************** END OF WPR LOG **********************']))
+ os.remove(self._temp_log_file_path)
+ self._temp_log_file_path = None
+
+ def __enter__(self):
+ """Add support for with-statement."""
+ self.StartServer()
+ return self
+
+ def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb):
+ """Add support for with-statement."""
+ self.StopServer()
+
+ def _UrlOpen(self, url_path, protocol='http'):
+ """Open a Replay URL.
+
+ For matching requests in the archive, Replay relies on the "Host:" header.
+ For Replay command URLs, the "Host:" header is not needed.
+
+ Args:
+ url_path: WPR server request path.
+ protocol: 'http' or 'https'
+ Returns:
+ a file-like object from urllib.urlopen
+ """
+ url = '%s://%s:%s/%s' % (
+ protocol, self._replay_host, self._started_ports[protocol], url_path)
+ return urllib.urlopen(url, proxies={})
+
+def _ResetInterruptHandler():
+ """Reset the interrupt handler back to the default.
+
+ The replay process is stopped gracefully by making an HTTP request
+ ('web-page-replay-command-exit'). The graceful exit is important for
+ restoring the DNS configuration. If the HTTP request fails, the fallback
+ is to send SIGINT to the process.
+
+ On posix system, running this function before starting replay fixes a
+ bug that shows up when Telemetry is run as a background command from a
+ script. https://crbug.com/254572.
+
+ Background: Signal masks on Linux are inherited from parent
+ processes. If anything invoking us accidentally masks SIGINT
+ (e.g. by putting a process in the background from a shell script),
+ sending a SIGINT to the child will fail to terminate it.
+ """
+ signal.signal(signal.SIGINT, signal.SIG_DFL)