diff options
author | Matthew Sartori <msartori@chromium.org> | 2015-06-04 10:39:41 -0700 |
---|---|---|
committer | ChromeOS Commit Bot <chromeos-commit-bot@chromium.org> | 2015-06-12 19:36:27 +0000 |
commit | e1bec9fce7a11b4066d42bd9a21692c14d6ae5a6 (patch) | |
tree | b780655862134d6a478c312524d22682921528a1 /mobmonitor | |
parent | a4375fc3df7af4fdb57b3b318f110c7355c45243 (diff) | |
download | chromite-e1bec9fce7a11b4066d42bd9a21692c14d6ae5a6.tar.gz |
mobmonitor: Mob* Monitor Checkfile Collection
This CL implements a recurring background task using
cherrypy plugins that will periodically collect checkfiles
from the specified directory.
BUG=chromium:490788
TEST=Unittests and tested collection on local machine.
Change-Id: Ia92a755e4712fd26fa9215b760ecb759b72df35b
Reviewed-on: https://chromium-review.googlesource.com/276241
Tested-by: Matthew Sartori <msartori@chromium.org>
Reviewed-by: Simran Basi <sbasi@chromium.org>
Commit-Queue: Matthew Sartori <msartori@chromium.org>
Diffstat (limited to 'mobmonitor')
-rw-r--r-- | mobmonitor/checkfile/__init__.py | 0 | ||||
-rw-r--r-- | mobmonitor/checkfile/manager.py | 182 | ||||
l--------- | mobmonitor/checkfile/manager_unittest | 1 | ||||
-rw-r--r-- | mobmonitor/checkfile/manager_unittest.py | 340 |
4 files changed, 523 insertions, 0 deletions
diff --git a/mobmonitor/checkfile/__init__.py b/mobmonitor/checkfile/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/mobmonitor/checkfile/__init__.py diff --git a/mobmonitor/checkfile/manager.py b/mobmonitor/checkfile/manager.py new file mode 100644 index 000000000..da403b57c --- /dev/null +++ b/mobmonitor/checkfile/manager.py @@ -0,0 +1,182 @@ +# Copyright 2015 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Store and manage Mob* Monitor checkfiles.""" + +from __future__ import print_function + +import cherrypy +import collections +import imp +import inspect +import os + +from cherrypy.process import plugins +from chromite.lib import cros_logging as logging + + +HEALTH_CHECK_METHODS = ['Check', 'Diagnose'] + +CHECKFILE_SERVICE = 'SERVICE' +CHECKFILE_DIR = '/etc/mobmonitor/checkfiles/' +CHECKFILE_ENDING = '_check.py' + +SERVICE_STATUS = collections.namedtuple('service_status', ['health_state', + 'description', + 'actions']) + + +class CollectionError(Exception): + """Raise when an error occurs during checkfile collection.""" + + +def IsHealthCheck(obj): + """A sanity check to see if a class implements the health check interface. + + Args: + obj: A Python object. + + Returns: + True if obj has 'check' and 'diagnose' functions. + False otherwise. + """ + return all(callable(getattr(obj, m, None)) for m in HEALTH_CHECK_METHODS) + + +def ImportCheckfile(checkfile_path): + """Import the checkfile. + + Args: + checkfile_path: The path of the checkfile to import. + + Returns: + A tuple containing the name of the service this checkfile is + associated with and the list of health checks in the module. + + Raises: + SyntaxError may be raised by imp.load_source if the python file + specified by checkfile_path has errors. + """ + # Import the checkfile + modname = os.path.basename(os.path.splitext(checkfile_path)[0]) + check = imp.load_source(modname, checkfile_path) + + # Gather the service name and the health checks + service_name = None + healthchecks = [] + for name in dir(check): + obj = getattr(check, name) + if CHECKFILE_SERVICE == name: + service_name = obj + if inspect.isclass(obj) and IsHealthCheck(obj): + healthchecks.append(obj()) + + return service_name, healthchecks, os.path.getmtime(checkfile_path) + + +class CheckFileManager(object): + """Manage the health checks that are associated with each service.""" + + def __init__(self, collect_interval=3, checkdir=CHECKFILE_DIR): + if not os.path.exists(checkdir): + raise CollectionError('Check directory does not exist: %s' % checkdir) + + self.collect_interval = collect_interval + self.checkdir = checkdir + self.collect_monitor = None + + self.healthcheck_results = {} + self.service_checks = {} + self.service_states = {} + + def Update(self, service, healthchecks, mtime): + """Update the health checks that are associated with each service. + + Args: + service: The name of the service that the health check corresponds to. + healthchecks: A list of health check objects. + mtime: The time of latest modification of the health check module. + """ + # The update and callback procedure used here leverages the cherrypy + # Monitor plugin. When a file that was read during collection is modified, + # cherrypy detects the change and restarts the Monitor and main thread. + # Thus, we get on-the-fly check file change detection and we do not need + # to provide extra logic for purging existing health check objects. + for healthcheck in healthchecks: + hcname = healthcheck.__class__.__name__ + self.service_checks.setdefault(service, {}) + + stored_mtime, _ = self.service_checks[service].get(hcname, (None, None)) + if stored_mtime is None or mtime > stored_mtime: + self.service_checks[service][hcname] = (mtime, healthcheck) + logging.info('Updated healthcheck "%s" for service "%s" at time "%s"' % + (hcname, service, mtime)) + + def CollectionCallback(self): + """Callback for cherrypy Monitor. Collect checkfiles from the checkdir.""" + # Collect the paths of each checkfile to import. + checkfile_paths = [] + for root, _dirs, files in os.walk(self.checkdir): + for file_ in files: + if file_.endswith(CHECKFILE_ENDING): + checkfile_paths.append(os.path.join(root, file_)) + + # Import each checkfile and update the check collection. + for path in checkfile_paths: + try: + service_name, health_checks, mtime = ImportCheckfile(path) + self.Update(service_name, health_checks, mtime) + # At least SyntaxError and NameError may be raised when attempting + # to import a bad check file. Catch general exceptions here in + # the event that unforeseen errors do not bring down the monitor. + except Exception, e: + logging.warning('Checkfile %s has errors: %s' % (path, e)) + + def StartCollection(self): + # The Monitor frequency is mis-named. It's the time between + # each callback execution. + self.collect_monitor = plugins.Monitor(cherrypy.engine, + self.CollectionCallback, + frequency=self.collect_interval) + self.collect_monitor.subscribe() + + # TODO (msartori): Implement crbug.com/490798. + def Execute(self): + """Execute all health checks and collect service state information.""" + + # TODO (msartori): Implement crbug.com/493318. + def GetServiceList(self): + """Return a list of the monitored services. + + Returns: + A list of the services for which we have checks defined. + """ + + # TODO (msartori): Implement crbug.com/493319. + def GetStatus(self, service): + """Query the current health state of the service. + + Args: + service: The name of service that we are querying the health state of. + + Returns: + A named tuple with the following fields: + health_state: A boolean, True if all checks passed, False if not. + description: A description of the error state. This is provided + by the 'diagnose' method of health check classes. + actions: A list of actions that can be taken as defined by the health + check class. + """ + + # TODO (msartori): Implement crbug.com/493320. + def RepairService(self, service, action): + """Execute the repair action on the specified service. + + Args: + service: The name of the service to be repaired. + action: The name of the action to execute. + + Returns: + The same return value of GetStatus(service). + """ diff --git a/mobmonitor/checkfile/manager_unittest b/mobmonitor/checkfile/manager_unittest new file mode 120000 index 000000000..ef3e37b67 --- /dev/null +++ b/mobmonitor/checkfile/manager_unittest @@ -0,0 +1 @@ +../../scripts/wrapper.py
\ No newline at end of file diff --git a/mobmonitor/checkfile/manager_unittest.py b/mobmonitor/checkfile/manager_unittest.py new file mode 100644 index 000000000..457b58ed3 --- /dev/null +++ b/mobmonitor/checkfile/manager_unittest.py @@ -0,0 +1,340 @@ +# Copyright 2015 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Unittests for Mob* Monitor checkfile manager.""" + +from __future__ import print_function + +import mock +import imp +import os +import subprocess +import time +import threading + +from cherrypy.process import plugins +from chromite.lib import cros_test_lib +from chromite.lib import osutils +from chromite.mobmonitor.checkfile import manager + +# Test health check and related attributes +class TestHealthCheck(object): + """Test health check.""" + + def Check(self): + """Stub Check.""" + return 0 + + def Diagnose(self, _errcode): + """Stub Diagnose.""" + return ('Unknown Error.', []) + +TEST_SERVICE_NAME = 'test-service' +TEST_MTIME = 100 +CHECKDIR = '.' + +# Strings that are used to mock actual check modules. +CHECKFILE_MANY_SIMPLE = ''' +SERVICE = 'test-service' + +class MyHealthCheck2(object): + def Check(self): + return 0 + + def Diagnose(self, errcode): + return ('Unknown error.', []) + +class MyHealthCheck3(object): + def Check(self): + return 0 + + def Diagnose(self, errcode): + return ('Unknown error.', []) + +class MyHealthCheck4(object): + def Check(self): + return 0 + + def Diagnose(self, errcode): + return ('Unknown error.', []) +''' + +CHECKFILE_MANY_SIMPLE_ONE_BAD = ''' +SERVICE = 'test-service' + +class MyHealthCheck(object): + def Check(self): + return 0 + + def Diagnose(self, errcode): + return ('Unknown error.', []) + +class NotAHealthCheck(object): + def Diagnose(self, errcode): + return ('Unknown error.', []) + +class MyHealthCheck2(object): + def Check(self): + return 0 + + def Diagnose(self, errcode): + return ('Unknown error.', []) +''' + +NOT_A_CHECKFILE = ''' +class NotAHealthCheck(object): + def NotCheckNorDiagnose(self): + return -1 +''' + +ANOTHER_NOT_A_CHECKFILE = ''' +class AnotherNotAHealthCheck(object): + def AnotherNotCheckNorDiagnose(self): + return -2 +''' + + +class RunCommand(threading.Thread): + """Helper class for executing the Mob* Monitor with a timeout.""" + + def __init__(self, cmd, timeout): + threading.Thread.__init__(self) + self.cmd = cmd + self.timeout = timeout + self.p = None + + def run(self): + self.p = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + self.p.wait() + + def Stop(self): + self.join(self.timeout) + + if self.is_alive(): + self.p.terminate() + self.join() + + return self.p.stdout.read() + + +class CheckFileManagerHelperTest(cros_test_lib.MockTestCase): + """Unittests for CheckFileManager helper functions.""" + + def testIsHealthCheck(self): + """Test that IsHealthCheck properly asserts the health check interface.""" + + class NoAttrs(object): + """Test health check missing 'check' and 'diagnose' methods.""" + + class NoCheckAttr(object): + """Test health check missing 'check' method.""" + def Diagnose(self, errcode): + pass + + class NoDiagnoseAttr(object): + """Test health check missing 'diagnose' method.""" + def Check(self): + pass + + class GoodHealthCheck(object): + """Test health check that implements 'check' and 'diagnose' methods.""" + def Check(self): + pass + + def Diagnose(self, errcode): + pass + + self.assertFalse(manager.IsHealthCheck(NoAttrs())) + self.assertFalse(manager.IsHealthCheck(NoCheckAttr())) + self.assertFalse(manager.IsHealthCheck(NoDiagnoseAttr())) + self.assertTrue(manager.IsHealthCheck(GoodHealthCheck())) + + def testImportCheckFileAllHealthChecks(self): + """Test that health checks and service name are collected.""" + self.StartPatcher(mock.patch('os.path.splitext')) + os.path.splitext.return_value = '/path/to/test_check.py' + + self.StartPatcher(mock.patch('os.path.getmtime')) + os.path.getmtime.return_value = TEST_MTIME + + checkmodule = imp.new_module('test_check') + exec CHECKFILE_MANY_SIMPLE in checkmodule.__dict__ + self.StartPatcher(mock.patch('imp.load_source')) + imp.load_source.return_value = checkmodule + + service_name, healthchecks, mtime = manager.ImportCheckfile('/') + + self.assertEquals(service_name, 'test-service') + self.assertEquals(len(healthchecks), 3) + self.assertEquals(mtime, TEST_MTIME) + + def testImportCheckFileSomeHealthChecks(self): + """Test importing when not all classes are actually health checks.""" + self.StartPatcher(mock.patch('os.path.splitext')) + os.path.splitext.return_value = '/path/to/test_check.py' + + self.StartPatcher(mock.patch('os.path.getmtime')) + os.path.getmtime.return_value = TEST_MTIME + + checkmodule = imp.new_module('test_check') + exec CHECKFILE_MANY_SIMPLE_ONE_BAD in checkmodule.__dict__ + self.StartPatcher(mock.patch('imp.load_source')) + imp.load_source.return_value = checkmodule + + service_name, healthchecks, mtime = manager.ImportCheckfile('/') + + self.assertEquals(service_name, 'test-service') + self.assertEquals(len(healthchecks), 2) + self.assertEquals(mtime, TEST_MTIME) + + +class CheckFileManagerTest(cros_test_lib.MockTestCase): + """Unittests for CheckFileManager.""" + + def testCollectionCallback(self): + """Test the CollectionCallback.""" + self.StartPatcher(mock.patch('os.walk')) + os.walk.return_value = [['/checkdir/', [], ['test_check.py']]] + + myobj = TestHealthCheck() + manager.ImportCheckfile = mock.Mock( + return_value=[TEST_SERVICE_NAME, [myobj], 100]) + cfm = manager.CheckFileManager(checkdir=CHECKDIR) + cfm.CollectionCallback() + + manager.ImportCheckfile.assert_called_once_with('/checkdir/test_check.py') + + self.assertTrue(TEST_SERVICE_NAME in cfm.service_checks) + self.assertEquals(cfm.service_checks[TEST_SERVICE_NAME], + {myobj.__class__.__name__: (100, myobj)}) + + def testCollectionCallbackNoChecks(self): + """Test the CollectionCallback with no valid check files.""" + self.StartPatcher(mock.patch('os.walk')) + os.walk.return_value = [['/checkdir/', [], ['test.py']]] + + manager.ImportCheckfile = mock.Mock(return_value=None) + cfm = manager.CheckFileManager(checkdir=CHECKDIR) + cfm.CollectionCallback() + + self.assertFalse(manager.ImportCheckfile.called) + + self.assertFalse(TEST_SERVICE_NAME in cfm.service_checks) + + def testStartCollection(self): + """Test the StartCollection method.""" + plugins.Monitor = mock.Mock() + + cfm = manager.CheckFileManager(checkdir=CHECKDIR) + cfm.StartCollection() + + self.assertTrue(plugins.Monitor.called) + + def testUpdateExisting(self): + """Test update when a health check exists and is not stale.""" + cfm = manager.CheckFileManager(checkdir=CHECKDIR) + + myobj = TestHealthCheck() + + cfm.service_checks[TEST_SERVICE_NAME] = {myobj.__class__.__name__: + (TEST_MTIME, myobj)} + + myobj2 = TestHealthCheck() + cfm.Update(TEST_SERVICE_NAME, [myobj2], TEST_MTIME) + self.assertTrue(TEST_SERVICE_NAME in cfm.service_checks) + self.assertEquals(cfm.service_checks[TEST_SERVICE_NAME], + {myobj.__class__.__name__: (TEST_MTIME, myobj)}) + + + def testUpdateNonExisting(self): + """Test adding a new health check to the manager.""" + cfm = manager.CheckFileManager(checkdir=CHECKDIR) + cfm.service_checks = {} + + myobj = TestHealthCheck() + cfm.Update(TEST_SERVICE_NAME, [myobj], TEST_MTIME) + + self.assertTrue(TEST_SERVICE_NAME in cfm.service_checks) + self.assertEquals(cfm.service_checks[TEST_SERVICE_NAME], + {myobj.__class__.__name__: (TEST_MTIME, myobj)}) + + +class CheckFileModificationTest(cros_test_lib.MockTempDirTestCase): + """Unittests for checking when live changes are made to a checkfile.""" + + MOBMONITOR_BASENAME = 'chromite' + MOBMONITOR_REL_CMD = 'bin/mobmonitor' + CHECKFILE_REL_PATH = 'test_check.py' + NOTACHECK_REL_PATH = 'notacheck.py' + CHERRYPY_RESTART_STR = 'ENGINE Restarting because %(checkfile)s changed.' + CHECKFILE_MOD_ATTEMPTS = 3 + TIMEOUT_SEC = 5 + + def CreateFile(self, relpath, filestr): + """Create a file from a string in the temp dir.""" + abspath = os.path.join(self.checkdir, relpath) + osutils.WriteFile(abspath, filestr) + return abspath + + def RunCheckfileMod(self, expect_handler, modpath, modfilestr): + """Test Mob* Monitor restart behaviour with checkfile modification.""" + # Retry the test several times, each time with more relaxed timeouts, + # to try to control for flakiness as these testcases are dependent + # on cherrypy startup time and module change detection time. + for attempt in range(1, self.CHECKFILE_MOD_ATTEMPTS + 1): + # This target should appear in the output if a checkfile is changed. + target = self.CHERRYPY_RESTART_STR % {'checkfile': + os.path.join(self.checkdir, + modpath)} + + # Start the Mob* Monitor in a separate thread. The timeout + # is how long we will wait to join the thread/wait for output + # after we have modified the file. + mobmon = RunCommand(self.cmd, self.TIMEOUT_SEC * attempt) + mobmon.start() + + # Wait for the monitor to start up fully, then update the file. + time.sleep(self.TIMEOUT_SEC * attempt) + self.checkfile = self.CreateFile(modpath, modfilestr) + + # Test whether the target is contained in output and if it + # matches the expectation. + if expect_handler(target in mobmon.Stop()): + return True + + # The test failed. + return False + + def setUp(self): + """Setup the check directory and the Mob* Monitor process.""" + # Create the test check directory and the test files. + self.checkdir = self.tempdir + self.checkfile = self.CreateFile(self.CHECKFILE_REL_PATH, + CHECKFILE_MANY_SIMPLE) + self.notacheck = self.CreateFile(self.NOTACHECK_REL_PATH, + NOT_A_CHECKFILE) + + # Setup the Mob* Monitor command. + path = os.path.abspath(__file__) + while os.path.basename(path) != self.MOBMONITOR_BASENAME: + path = os.path.dirname(path) + path = os.path.join(path, self.MOBMONITOR_REL_CMD) + self.cmd = [path, '-d', self.checkdir] + + def testModifyCheckfile(self): + """Test restart behaviour when modifying an imported checkfile.""" + expect_handler = lambda x: x == True + + self.assertTrue(self.RunCheckfileMod(expect_handler, + self.CHECKFILE_REL_PATH, + CHECKFILE_MANY_SIMPLE_ONE_BAD)) + + def testModifyNotACheckfile(self): + """Test that no restart occurs when a non-checkfile is modified.""" + expect_handler = lambda x: x == False + + self.assertTrue(self.RunCheckfileMod(expect_handler, + self.NOTACHECK_REL_PATH, + ANOTHER_NOT_A_CHECKFILE)) |