From 8b9c0f140b48253cdbcc7c050f115c5e3bda6d88 Mon Sep 17 00:00:00 2001
From: Yuheng Long <yuhenglong@google.com>
Date: Tue, 16 Jul 2013 09:38:16 -0700
Subject: Added the base Generation base class.

Algorithm specific subclasses, e.g., Genetic Algorithm and Hill Climbing,
should extend this base class.

BUG=None
TEST=unit testing for the pipeline stage, pipeline workers and generation.

Change-Id: Icc79b02099521db74fed7c1017691e2a8719be23
Reviewed-on: https://gerrit-int.chromium.org/41081
Reviewed-by: Luis Lozano <llozano@chromium.org>
Reviewed-by: Simon Que <sque@google.com>
Tested-by: Yuheng Long <yuhenglong@google.com>
Commit-Queue: Yuheng Long <yuhenglong@google.com>
---
 bestflags/generation.py      | 110 ++++++++++++++++++++++++++++++++++++-------
 bestflags/generation_test.py |  83 ++++++++++++++++++++++++--------
 2 files changed, 155 insertions(+), 38 deletions(-)

(limited to 'bestflags')

diff --git a/bestflags/generation.py b/bestflags/generation.py
index 29be9685..1a0ad4c0 100644
--- a/bestflags/generation.py
+++ b/bestflags/generation.py
@@ -2,44 +2,118 @@
 
 Part of the Chrome build flags optimization.
 
-This module contains the core algorithm of producing the next generation of
-execution.
+This module contains the base generation class. This class contains the tasks of
+this current generation. The generation will not evolve to the next generation
+until all the tasks in this generation are done executing. It also contains a
+set of tasks that could potentially be used to generate the next generation,
+e.g., in the genetic algorithm, a set of good species will be kept to evolve to
+the successive generations. For the hill climbing algorithm example, the
+candidate_pool will contain a current task t being evaluated and the exe_pool
+will contains all the task t's neighbor.
 """
 
 __author__ = 'yuhenglong@google.com (Yuheng Long)'
 
 
+class NoneOverridingError(Exception):
+  """Define an Exception class for subclasses not overriding certain methods."""
+  pass
+
+
 class Generation(object):
   """A generation of a framework run.
 
-  This also contains the core implementation, reproducing new generations.
+  The base class of generation. Concrete subclasses, e.g., GAGeneration should
+  override the Next and Improve method to implement algorithm specific
+  applications.
   """
 
-  def __init__(self, pool):
+  def __init__(self, exe_pool, candidate_pool):
     """Set up the tasks set of this generation.
 
     Args:
-        pool: a set of tasks to be run
+      exe_pool: A set of tasks to be run.
+      candidate_pool: A set of tasks to be considered to be used to generate the
+        next generation.
     """
-    self._pool = pool
 
-  def Next(self):
-    """Calculate the next generation.
+    self._exe_pool = exe_pool
+    self._candidate_pool = candidate_pool
+
+    # Keeping the record of how many tasks are pending. Pending tasks are the
+    # ones that have been sent out to the next stage for execution but have not
+    # finished. A generation is not ready for the reproduction of the new
+    # generations until all its pending tasks have been executed.
+    self._pending = len(exe_pool)
+
+  def Pool(self):
+    """Return the task set of this generation."""
 
-    This is the core of the framework implementation.
+    return self._exe_pool
+
+  def Done(self):
+    """All the tasks in this generation are done.
 
     Returns:
-      A new generation.
+      True if all the tasks have been executed. That is the number of pending
+      task is 0.
     """
 
-  def Pool(self):
-    """Return the task set of this generation."""
-    pass
+    return self._pending == 0
+
+  def UpdateTask(self, task):
+    """Match a task t in this generation that is equal to the input task.
+
+    This method is called when the input task has just finished execution. This
+    method finds out whether there is a pending task t in the current generation
+    that is the same as the input task. Two tasks are the same if their flag
+    options are the same. A task is pending if it has not been performed.
+    If there is a pending task t that matches the input task, task t will be
+    substituted with the input task in this generation. In that case, the input
+    task, as well as its build and test results encapsulated in the task, will
+    be stored in the current generation. These results could be used to produce
+    the next generation.
+    If there is a match, the current generation will have one less pending task.
+    When there is no pending task, the generation can be used to produce the
+    next generation.
+    The caller of this function is responsible for not calling this method on
+    the same task more than once.
+
+    Args:
+      task: A task that has its results ready.
+    """
+
+    # If there is a match.
+    if task in self._exe_pool:
+      # Remove the place holder task in this generation and store the new input
+      # task and its result.
+      self._exe_pool.remove(task)
+      self._exe_pool.add(task)
+
+      # The current generation will have one less task to wait on.
+      self._pending -= 1
+
+      assert self._pending >= 0
 
   def Improve(self):
-    """True if this generation has improvement over its parent generation."""
-    pass
+    """True if this generation has improvement over its parent generation.
+
+    Raises:
+      NoneOverridingError: The subclass should override this method.
+    """
+    raise NoneOverridingError('Must be implemented by child class')
+
+  def Next(self):
+    """Calculate the next generation.
+
+    This is the core of the framework implementation. It must be overridden by
+    the concrete subclass to implement algorithm specific generations.
+
+    Returns:
+      A set of new generations.
+
+    Raises:
+      NoneOverridingError: The subclass should override this method.
+    """
 
-  def GetBest(self):
-    """Get the best flagset."""
-    return self._pool[0]
+    raise NoneOverridingError('Must be implemented by child class')
diff --git a/bestflags/generation_test.py b/bestflags/generation_test.py
index b9779255..a8bdf599 100644
--- a/bestflags/generation_test.py
+++ b/bestflags/generation_test.py
@@ -5,40 +5,83 @@ Part of the Chrome build flags optimization.
 
 __author__ = 'yuhenglong@google.com (Yuheng Long)'
 
+import random
 import unittest
 
-import generation
+from generation import Generation
+from mock_task import MockTask
 
 
-class GenerationTest(unittest.TestCase):
-  """This class test the Generation class.
+# Pick an integer at random.
+TESTSTAGE = -125
+
+# The number of tasks to be put in a generation to be tested.
+NUMTASKS = 20
+
+# The stride of permutation used to shuffle the input list of tasks. Should be
+# relatively prime with NUMTASKS.
+STRIDE = 7
+
 
-  A generation class should not produce a task that has been generated before.
-  The task returned as the best task should really be the best.
+class GenerationMockTask(MockTask):
+  """This class defines the mock task to test the Generation class.
 
-  Given two generations, if the second one has improved upon the first one,
-  the result method should return true and false otherwise.
+  The task instances will be inserted into a set. Therefore the hash and the
+  equal methods are overridden. The generation class considers the identifier to
+  set the cost of the task in a set, thus the identifier is used in the
+  overriding methods.
   """
 
-  def setUp(self):
-    pass
+  def __hash__(self):
+    return self._identifier
 
-  def testNext(self):
-    """"Test the next method.
+  def __eq__(self, other):
+    if isinstance(other, MockTask):
+      return self._identifier == other.GetIdentifier(self._stage)
+    return False
 
-    Call the next method n times and all the tasks in each generation should be
-    unique.
-    """
-    pass
 
-  def testImprove(self):
-    """"Test the improve method.
+class GenerationTest(unittest.TestCase):
+  """This class test the Generation class.
+
+  Given a set of tasks in the generation, if there is any task that is pending,
+  then the Done method will return false, and true otherwise.
+  """
+
+  def testDone(self):
+    """"Test the Done method.
 
-    If the successor generation has improvement upon the parent generation, the
-    result from the improve method should indicate so.
+    Produce a generation with a set of tasks. Set the cost of the task one by
+    one and verify that the Done method returns false before setting the cost
+    for all the tasks. After the costs of all the tasks are set, the Done method
+    should return true.
     """
 
-    pass
+    random.seed(0)
+
+    testing_tasks = range(NUMTASKS)
+
+    # The tasks for the generation to be tested.
+    generation_tasks = [GenerationMockTask(TESTSTAGE, t) for t in testing_tasks]
+
+    gen = Generation(set(generation_tasks), None)
+
+    # Permute the list.
+    permutation = [(t * STRIDE) % NUMTASKS for t in range(NUMTASKS)]
+    permuted_tasks = [testing_tasks[index] for index in permutation]
+
+    # The Done method of the Generation should return false before all the tasks
+    # in the permuted list are set.
+    for testing_task in permuted_tasks:
+      assert not gen.Done()
+
+      # Mark a task as done by calling the UpdateTask method of the generation.
+      # Send the generation the task as well as its results.
+      gen.UpdateTask(GenerationMockTask(TESTSTAGE, testing_task))
+
+    # The Done method should return true after all the tasks in the permuted
+    # list is set.
+    assert gen.Done()
 
 if __name__ == '__main__':
   unittest.main()
-- 
cgit v1.2.3