summaryrefslogtreecommitdiff
path: root/mobmonitor
diff options
context:
space:
mode:
authorMatthew Sartori <msartori@chromium.org>2015-06-04 10:39:41 -0700
committerChromeOS Commit Bot <chromeos-commit-bot@chromium.org>2015-06-12 19:36:27 +0000
commite1bec9fce7a11b4066d42bd9a21692c14d6ae5a6 (patch)
treeb780655862134d6a478c312524d22682921528a1 /mobmonitor
parenta4375fc3df7af4fdb57b3b318f110c7355c45243 (diff)
downloadchromite-e1bec9fce7a11b4066d42bd9a21692c14d6ae5a6.tar.gz
mobmonitor: Mob* Monitor Checkfile Collection
This CL implements a recurring background task using cherrypy plugins that will periodically collect checkfiles from the specified directory. BUG=chromium:490788 TEST=Unittests and tested collection on local machine. Change-Id: Ia92a755e4712fd26fa9215b760ecb759b72df35b Reviewed-on: https://chromium-review.googlesource.com/276241 Tested-by: Matthew Sartori <msartori@chromium.org> Reviewed-by: Simran Basi <sbasi@chromium.org> Commit-Queue: Matthew Sartori <msartori@chromium.org>
Diffstat (limited to 'mobmonitor')
-rw-r--r--mobmonitor/checkfile/__init__.py0
-rw-r--r--mobmonitor/checkfile/manager.py182
l---------mobmonitor/checkfile/manager_unittest1
-rw-r--r--mobmonitor/checkfile/manager_unittest.py340
4 files changed, 523 insertions, 0 deletions
diff --git a/mobmonitor/checkfile/__init__.py b/mobmonitor/checkfile/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/mobmonitor/checkfile/__init__.py
diff --git a/mobmonitor/checkfile/manager.py b/mobmonitor/checkfile/manager.py
new file mode 100644
index 000000000..da403b57c
--- /dev/null
+++ b/mobmonitor/checkfile/manager.py
@@ -0,0 +1,182 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Store and manage Mob* Monitor checkfiles."""
+
+from __future__ import print_function
+
+import cherrypy
+import collections
+import imp
+import inspect
+import os
+
+from cherrypy.process import plugins
+from chromite.lib import cros_logging as logging
+
+
+HEALTH_CHECK_METHODS = ['Check', 'Diagnose']
+
+CHECKFILE_SERVICE = 'SERVICE'
+CHECKFILE_DIR = '/etc/mobmonitor/checkfiles/'
+CHECKFILE_ENDING = '_check.py'
+
+SERVICE_STATUS = collections.namedtuple('service_status', ['health_state',
+ 'description',
+ 'actions'])
+
+
+class CollectionError(Exception):
+ """Raise when an error occurs during checkfile collection."""
+
+
+def IsHealthCheck(obj):
+ """A sanity check to see if a class implements the health check interface.
+
+ Args:
+ obj: A Python object.
+
+ Returns:
+ True if obj has 'check' and 'diagnose' functions.
+ False otherwise.
+ """
+ return all(callable(getattr(obj, m, None)) for m in HEALTH_CHECK_METHODS)
+
+
+def ImportCheckfile(checkfile_path):
+ """Import the checkfile.
+
+ Args:
+ checkfile_path: The path of the checkfile to import.
+
+ Returns:
+ A tuple containing the name of the service this checkfile is
+ associated with and the list of health checks in the module.
+
+ Raises:
+ SyntaxError may be raised by imp.load_source if the python file
+ specified by checkfile_path has errors.
+ """
+ # Import the checkfile
+ modname = os.path.basename(os.path.splitext(checkfile_path)[0])
+ check = imp.load_source(modname, checkfile_path)
+
+ # Gather the service name and the health checks
+ service_name = None
+ healthchecks = []
+ for name in dir(check):
+ obj = getattr(check, name)
+ if CHECKFILE_SERVICE == name:
+ service_name = obj
+ if inspect.isclass(obj) and IsHealthCheck(obj):
+ healthchecks.append(obj())
+
+ return service_name, healthchecks, os.path.getmtime(checkfile_path)
+
+
+class CheckFileManager(object):
+ """Manage the health checks that are associated with each service."""
+
+ def __init__(self, collect_interval=3, checkdir=CHECKFILE_DIR):
+ if not os.path.exists(checkdir):
+ raise CollectionError('Check directory does not exist: %s' % checkdir)
+
+ self.collect_interval = collect_interval
+ self.checkdir = checkdir
+ self.collect_monitor = None
+
+ self.healthcheck_results = {}
+ self.service_checks = {}
+ self.service_states = {}
+
+ def Update(self, service, healthchecks, mtime):
+ """Update the health checks that are associated with each service.
+
+ Args:
+ service: The name of the service that the health check corresponds to.
+ healthchecks: A list of health check objects.
+ mtime: The time of latest modification of the health check module.
+ """
+ # The update and callback procedure used here leverages the cherrypy
+ # Monitor plugin. When a file that was read during collection is modified,
+ # cherrypy detects the change and restarts the Monitor and main thread.
+ # Thus, we get on-the-fly check file change detection and we do not need
+ # to provide extra logic for purging existing health check objects.
+ for healthcheck in healthchecks:
+ hcname = healthcheck.__class__.__name__
+ self.service_checks.setdefault(service, {})
+
+ stored_mtime, _ = self.service_checks[service].get(hcname, (None, None))
+ if stored_mtime is None or mtime > stored_mtime:
+ self.service_checks[service][hcname] = (mtime, healthcheck)
+ logging.info('Updated healthcheck "%s" for service "%s" at time "%s"' %
+ (hcname, service, mtime))
+
+ def CollectionCallback(self):
+ """Callback for cherrypy Monitor. Collect checkfiles from the checkdir."""
+ # Collect the paths of each checkfile to import.
+ checkfile_paths = []
+ for root, _dirs, files in os.walk(self.checkdir):
+ for file_ in files:
+ if file_.endswith(CHECKFILE_ENDING):
+ checkfile_paths.append(os.path.join(root, file_))
+
+ # Import each checkfile and update the check collection.
+ for path in checkfile_paths:
+ try:
+ service_name, health_checks, mtime = ImportCheckfile(path)
+ self.Update(service_name, health_checks, mtime)
+ # At least SyntaxError and NameError may be raised when attempting
+ # to import a bad check file. Catch general exceptions here in
+ # the event that unforeseen errors do not bring down the monitor.
+ except Exception, e:
+ logging.warning('Checkfile %s has errors: %s' % (path, e))
+
+ def StartCollection(self):
+ # The Monitor frequency is mis-named. It's the time between
+ # each callback execution.
+ self.collect_monitor = plugins.Monitor(cherrypy.engine,
+ self.CollectionCallback,
+ frequency=self.collect_interval)
+ self.collect_monitor.subscribe()
+
+ # TODO (msartori): Implement crbug.com/490798.
+ def Execute(self):
+ """Execute all health checks and collect service state information."""
+
+ # TODO (msartori): Implement crbug.com/493318.
+ def GetServiceList(self):
+ """Return a list of the monitored services.
+
+ Returns:
+ A list of the services for which we have checks defined.
+ """
+
+ # TODO (msartori): Implement crbug.com/493319.
+ def GetStatus(self, service):
+ """Query the current health state of the service.
+
+ Args:
+ service: The name of service that we are querying the health state of.
+
+ Returns:
+ A named tuple with the following fields:
+ health_state: A boolean, True if all checks passed, False if not.
+ description: A description of the error state. This is provided
+ by the 'diagnose' method of health check classes.
+ actions: A list of actions that can be taken as defined by the health
+ check class.
+ """
+
+ # TODO (msartori): Implement crbug.com/493320.
+ def RepairService(self, service, action):
+ """Execute the repair action on the specified service.
+
+ Args:
+ service: The name of the service to be repaired.
+ action: The name of the action to execute.
+
+ Returns:
+ The same return value of GetStatus(service).
+ """
diff --git a/mobmonitor/checkfile/manager_unittest b/mobmonitor/checkfile/manager_unittest
new file mode 120000
index 000000000..ef3e37b67
--- /dev/null
+++ b/mobmonitor/checkfile/manager_unittest
@@ -0,0 +1 @@
+../../scripts/wrapper.py \ No newline at end of file
diff --git a/mobmonitor/checkfile/manager_unittest.py b/mobmonitor/checkfile/manager_unittest.py
new file mode 100644
index 000000000..457b58ed3
--- /dev/null
+++ b/mobmonitor/checkfile/manager_unittest.py
@@ -0,0 +1,340 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unittests for Mob* Monitor checkfile manager."""
+
+from __future__ import print_function
+
+import mock
+import imp
+import os
+import subprocess
+import time
+import threading
+
+from cherrypy.process import plugins
+from chromite.lib import cros_test_lib
+from chromite.lib import osutils
+from chromite.mobmonitor.checkfile import manager
+
+# Test health check and related attributes
+class TestHealthCheck(object):
+ """Test health check."""
+
+ def Check(self):
+ """Stub Check."""
+ return 0
+
+ def Diagnose(self, _errcode):
+ """Stub Diagnose."""
+ return ('Unknown Error.', [])
+
+TEST_SERVICE_NAME = 'test-service'
+TEST_MTIME = 100
+CHECKDIR = '.'
+
+# Strings that are used to mock actual check modules.
+CHECKFILE_MANY_SIMPLE = '''
+SERVICE = 'test-service'
+
+class MyHealthCheck2(object):
+ def Check(self):
+ return 0
+
+ def Diagnose(self, errcode):
+ return ('Unknown error.', [])
+
+class MyHealthCheck3(object):
+ def Check(self):
+ return 0
+
+ def Diagnose(self, errcode):
+ return ('Unknown error.', [])
+
+class MyHealthCheck4(object):
+ def Check(self):
+ return 0
+
+ def Diagnose(self, errcode):
+ return ('Unknown error.', [])
+'''
+
+CHECKFILE_MANY_SIMPLE_ONE_BAD = '''
+SERVICE = 'test-service'
+
+class MyHealthCheck(object):
+ def Check(self):
+ return 0
+
+ def Diagnose(self, errcode):
+ return ('Unknown error.', [])
+
+class NotAHealthCheck(object):
+ def Diagnose(self, errcode):
+ return ('Unknown error.', [])
+
+class MyHealthCheck2(object):
+ def Check(self):
+ return 0
+
+ def Diagnose(self, errcode):
+ return ('Unknown error.', [])
+'''
+
+NOT_A_CHECKFILE = '''
+class NotAHealthCheck(object):
+ def NotCheckNorDiagnose(self):
+ return -1
+'''
+
+ANOTHER_NOT_A_CHECKFILE = '''
+class AnotherNotAHealthCheck(object):
+ def AnotherNotCheckNorDiagnose(self):
+ return -2
+'''
+
+
+class RunCommand(threading.Thread):
+ """Helper class for executing the Mob* Monitor with a timeout."""
+
+ def __init__(self, cmd, timeout):
+ threading.Thread.__init__(self)
+ self.cmd = cmd
+ self.timeout = timeout
+ self.p = None
+
+ def run(self):
+ self.p = subprocess.Popen(self.cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ self.p.wait()
+
+ def Stop(self):
+ self.join(self.timeout)
+
+ if self.is_alive():
+ self.p.terminate()
+ self.join()
+
+ return self.p.stdout.read()
+
+
+class CheckFileManagerHelperTest(cros_test_lib.MockTestCase):
+ """Unittests for CheckFileManager helper functions."""
+
+ def testIsHealthCheck(self):
+ """Test that IsHealthCheck properly asserts the health check interface."""
+
+ class NoAttrs(object):
+ """Test health check missing 'check' and 'diagnose' methods."""
+
+ class NoCheckAttr(object):
+ """Test health check missing 'check' method."""
+ def Diagnose(self, errcode):
+ pass
+
+ class NoDiagnoseAttr(object):
+ """Test health check missing 'diagnose' method."""
+ def Check(self):
+ pass
+
+ class GoodHealthCheck(object):
+ """Test health check that implements 'check' and 'diagnose' methods."""
+ def Check(self):
+ pass
+
+ def Diagnose(self, errcode):
+ pass
+
+ self.assertFalse(manager.IsHealthCheck(NoAttrs()))
+ self.assertFalse(manager.IsHealthCheck(NoCheckAttr()))
+ self.assertFalse(manager.IsHealthCheck(NoDiagnoseAttr()))
+ self.assertTrue(manager.IsHealthCheck(GoodHealthCheck()))
+
+ def testImportCheckFileAllHealthChecks(self):
+ """Test that health checks and service name are collected."""
+ self.StartPatcher(mock.patch('os.path.splitext'))
+ os.path.splitext.return_value = '/path/to/test_check.py'
+
+ self.StartPatcher(mock.patch('os.path.getmtime'))
+ os.path.getmtime.return_value = TEST_MTIME
+
+ checkmodule = imp.new_module('test_check')
+ exec CHECKFILE_MANY_SIMPLE in checkmodule.__dict__
+ self.StartPatcher(mock.patch('imp.load_source'))
+ imp.load_source.return_value = checkmodule
+
+ service_name, healthchecks, mtime = manager.ImportCheckfile('/')
+
+ self.assertEquals(service_name, 'test-service')
+ self.assertEquals(len(healthchecks), 3)
+ self.assertEquals(mtime, TEST_MTIME)
+
+ def testImportCheckFileSomeHealthChecks(self):
+ """Test importing when not all classes are actually health checks."""
+ self.StartPatcher(mock.patch('os.path.splitext'))
+ os.path.splitext.return_value = '/path/to/test_check.py'
+
+ self.StartPatcher(mock.patch('os.path.getmtime'))
+ os.path.getmtime.return_value = TEST_MTIME
+
+ checkmodule = imp.new_module('test_check')
+ exec CHECKFILE_MANY_SIMPLE_ONE_BAD in checkmodule.__dict__
+ self.StartPatcher(mock.patch('imp.load_source'))
+ imp.load_source.return_value = checkmodule
+
+ service_name, healthchecks, mtime = manager.ImportCheckfile('/')
+
+ self.assertEquals(service_name, 'test-service')
+ self.assertEquals(len(healthchecks), 2)
+ self.assertEquals(mtime, TEST_MTIME)
+
+
+class CheckFileManagerTest(cros_test_lib.MockTestCase):
+ """Unittests for CheckFileManager."""
+
+ def testCollectionCallback(self):
+ """Test the CollectionCallback."""
+ self.StartPatcher(mock.patch('os.walk'))
+ os.walk.return_value = [['/checkdir/', [], ['test_check.py']]]
+
+ myobj = TestHealthCheck()
+ manager.ImportCheckfile = mock.Mock(
+ return_value=[TEST_SERVICE_NAME, [myobj], 100])
+ cfm = manager.CheckFileManager(checkdir=CHECKDIR)
+ cfm.CollectionCallback()
+
+ manager.ImportCheckfile.assert_called_once_with('/checkdir/test_check.py')
+
+ self.assertTrue(TEST_SERVICE_NAME in cfm.service_checks)
+ self.assertEquals(cfm.service_checks[TEST_SERVICE_NAME],
+ {myobj.__class__.__name__: (100, myobj)})
+
+ def testCollectionCallbackNoChecks(self):
+ """Test the CollectionCallback with no valid check files."""
+ self.StartPatcher(mock.patch('os.walk'))
+ os.walk.return_value = [['/checkdir/', [], ['test.py']]]
+
+ manager.ImportCheckfile = mock.Mock(return_value=None)
+ cfm = manager.CheckFileManager(checkdir=CHECKDIR)
+ cfm.CollectionCallback()
+
+ self.assertFalse(manager.ImportCheckfile.called)
+
+ self.assertFalse(TEST_SERVICE_NAME in cfm.service_checks)
+
+ def testStartCollection(self):
+ """Test the StartCollection method."""
+ plugins.Monitor = mock.Mock()
+
+ cfm = manager.CheckFileManager(checkdir=CHECKDIR)
+ cfm.StartCollection()
+
+ self.assertTrue(plugins.Monitor.called)
+
+ def testUpdateExisting(self):
+ """Test update when a health check exists and is not stale."""
+ cfm = manager.CheckFileManager(checkdir=CHECKDIR)
+
+ myobj = TestHealthCheck()
+
+ cfm.service_checks[TEST_SERVICE_NAME] = {myobj.__class__.__name__:
+ (TEST_MTIME, myobj)}
+
+ myobj2 = TestHealthCheck()
+ cfm.Update(TEST_SERVICE_NAME, [myobj2], TEST_MTIME)
+ self.assertTrue(TEST_SERVICE_NAME in cfm.service_checks)
+ self.assertEquals(cfm.service_checks[TEST_SERVICE_NAME],
+ {myobj.__class__.__name__: (TEST_MTIME, myobj)})
+
+
+ def testUpdateNonExisting(self):
+ """Test adding a new health check to the manager."""
+ cfm = manager.CheckFileManager(checkdir=CHECKDIR)
+ cfm.service_checks = {}
+
+ myobj = TestHealthCheck()
+ cfm.Update(TEST_SERVICE_NAME, [myobj], TEST_MTIME)
+
+ self.assertTrue(TEST_SERVICE_NAME in cfm.service_checks)
+ self.assertEquals(cfm.service_checks[TEST_SERVICE_NAME],
+ {myobj.__class__.__name__: (TEST_MTIME, myobj)})
+
+
+class CheckFileModificationTest(cros_test_lib.MockTempDirTestCase):
+ """Unittests for checking when live changes are made to a checkfile."""
+
+ MOBMONITOR_BASENAME = 'chromite'
+ MOBMONITOR_REL_CMD = 'bin/mobmonitor'
+ CHECKFILE_REL_PATH = 'test_check.py'
+ NOTACHECK_REL_PATH = 'notacheck.py'
+ CHERRYPY_RESTART_STR = 'ENGINE Restarting because %(checkfile)s changed.'
+ CHECKFILE_MOD_ATTEMPTS = 3
+ TIMEOUT_SEC = 5
+
+ def CreateFile(self, relpath, filestr):
+ """Create a file from a string in the temp dir."""
+ abspath = os.path.join(self.checkdir, relpath)
+ osutils.WriteFile(abspath, filestr)
+ return abspath
+
+ def RunCheckfileMod(self, expect_handler, modpath, modfilestr):
+ """Test Mob* Monitor restart behaviour with checkfile modification."""
+ # Retry the test several times, each time with more relaxed timeouts,
+ # to try to control for flakiness as these testcases are dependent
+ # on cherrypy startup time and module change detection time.
+ for attempt in range(1, self.CHECKFILE_MOD_ATTEMPTS + 1):
+ # This target should appear in the output if a checkfile is changed.
+ target = self.CHERRYPY_RESTART_STR % {'checkfile':
+ os.path.join(self.checkdir,
+ modpath)}
+
+ # Start the Mob* Monitor in a separate thread. The timeout
+ # is how long we will wait to join the thread/wait for output
+ # after we have modified the file.
+ mobmon = RunCommand(self.cmd, self.TIMEOUT_SEC * attempt)
+ mobmon.start()
+
+ # Wait for the monitor to start up fully, then update the file.
+ time.sleep(self.TIMEOUT_SEC * attempt)
+ self.checkfile = self.CreateFile(modpath, modfilestr)
+
+ # Test whether the target is contained in output and if it
+ # matches the expectation.
+ if expect_handler(target in mobmon.Stop()):
+ return True
+
+ # The test failed.
+ return False
+
+ def setUp(self):
+ """Setup the check directory and the Mob* Monitor process."""
+ # Create the test check directory and the test files.
+ self.checkdir = self.tempdir
+ self.checkfile = self.CreateFile(self.CHECKFILE_REL_PATH,
+ CHECKFILE_MANY_SIMPLE)
+ self.notacheck = self.CreateFile(self.NOTACHECK_REL_PATH,
+ NOT_A_CHECKFILE)
+
+ # Setup the Mob* Monitor command.
+ path = os.path.abspath(__file__)
+ while os.path.basename(path) != self.MOBMONITOR_BASENAME:
+ path = os.path.dirname(path)
+ path = os.path.join(path, self.MOBMONITOR_REL_CMD)
+ self.cmd = [path, '-d', self.checkdir]
+
+ def testModifyCheckfile(self):
+ """Test restart behaviour when modifying an imported checkfile."""
+ expect_handler = lambda x: x == True
+
+ self.assertTrue(self.RunCheckfileMod(expect_handler,
+ self.CHECKFILE_REL_PATH,
+ CHECKFILE_MANY_SIMPLE_ONE_BAD))
+
+ def testModifyNotACheckfile(self):
+ """Test that no restart occurs when a non-checkfile is modified."""
+ expect_handler = lambda x: x == False
+
+ self.assertTrue(self.RunCheckfileMod(expect_handler,
+ self.NOTACHECK_REL_PATH,
+ ANOTHER_NOT_A_CHECKFILE))