Add the Genetic Algorithm.

BUG=None TEST=unit testings for the pipeline stage, pipeline workers, generation, steering, task, flag, hill climbing and genetic algorithm. Change-Id: I2864d6a6859fff43bc2d3afb059c672c54bbe385 Reviewed-on: https://gerrit-int.chromium.org/42472 Reviewed-by: Simon Que <sque@google.com> Commit-Queue: Yuheng Long <yuhenglong@google.com> Tested-by: Yuheng Long <yuhenglong@google.com>
author: Yuheng Long <yuhenglong@google.com> 2013-08-07 08:48:05 -0700
committer: ChromeBot <chrome-bot@google.com> 2013-08-07 22:14:42 -0700
commit: 057ad5c77ef0ea44ee260718eb6d30afef2f7f83 (patch)
tree: 75dc92c063a32304259fc834382c9cf7fa314602 /bestflags
parent: 570b2ce931d40a335f3b02ed859b376040b5fb67 (diff)
download: toolchain-utils-057ad5c77ef0ea44ee260718eb6d30afef2f7f83.tar.gz
3 files changed, 375 insertions, 5 deletions
diff --git a/bestflags/generation.py b/bestflags/generation.py
index 0bc2f57c..8e1358ad 100644
--- a/bestflags/generation.py
+++ b/bestflags/generation.py
@@ -50,6 +50,11 @@ class Generation(object):
     # generations until all its pending tasks have been executed.
     self._pending = len(exe_pool)
 
+  def CandidatePool(self):
+    """Return the candidate tasks of this generation."""
+
+    return self._candidate_pool
+
   def Pool(self):
     """Return the task set of this generation."""
 
diff --git a/bestflags/genetic_algorithm.py b/bestflags/genetic_algorithm.py
new file mode 100644
index 00000000..56b1dcc1
--- /dev/null
+++ b/bestflags/genetic_algorithm.py
@@ -0,0 +1,295 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""The hill genetic algorithm.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import random
+
+import flags
+from flags import Flag
+from flags import FlagSet
+from generation import Generation
+from task import Task
+
+
+def CrossoverWith(first_flag, second_flag):
+  """Get a crossed over gene.
+
+  At present, this just picks either/or of these values.  However, it could be
+  implemented as an integer maskover effort, if required.
+
+  Args:
+    first_flag: The first gene (Flag) to cross over with.
+    second_flag: The second gene (Flag) to cross over with.
+
+  Returns:
+    A Flag that can be considered appropriately randomly blended between the
+    first and second input flag.
+  """
+
+  return first_flag if random.randint(0, 1) else second_flag
+
+
+def RandomMutate(specs, flag_set, mutation_rate):
+  """Randomly mutate the content of a task.
+
+  Args:
+    specs: A list of spec from which the flag set is created.
+    flag_set: The current flag set being mutated
+    mutation_rate: What fraction of genes to mutate.
+
+  Returns:
+    A Genetic Task constructed by randomly mutating the input flag set.
+  """
+
+  results_flags = []
+
+  for spec in specs:
+    # Randomly choose whether this flag should be mutated.
+    if random.randint(0, int(1 / mutation_rate)):
+      continue
+
+    # If the flag is not already in the flag set, it is added.
+    if spec not in flag_set:
+      results_flags.append(Flag(spec))
+      continue
+
+    # If the flag is already in the flag set, it is mutated.
+    result = flags.Search(spec)
+
+    # The value of a numeric flag will be changed, and a boolean flag will be
+    # dropped.
+    if not result:
+      continue
+
+    value = flag_set[spec].GetValue()
+
+    # Randomly select a nearby value of the current value of the flag.
+    rand_arr = [value]
+    if value + 1 < int(result.group('end')):
+      rand_arr.append(value + 1)
+
+    rand_arr.append(value - 1)
+    value = random.sample(rand_arr, 1)[0]
+
+    # If the value is smaller than the start of the spec, this flag will be
+    # dropped.
+    if value != int(result.group('start')) - 1:
+      results_flags.append(Flag(spec, value))
+
+  return GATask(FlagSet(results_flags))
+
+
+class GATask(Task):
+  def __init__(self, flag_set):
+    Task.__init__(self, flag_set)
+
+  def ReproduceWith(self, other, specs, mutation_rate):
+    """Reproduce with other FlagSet.
+
+    Args:
+      other: A FlagSet to reproduce with.
+      specs: A list of spec from which the flag set is created.
+      mutation_rate: one in mutation_rate flags will be mutated (replaced by a
+        random version of the same flag, instead of one from either of the
+        parents).  Set to 0 to disable mutation.
+
+    Returns:
+      A GA task made by mixing self with other.
+    """
+
+    # Get the flag dictionary.
+    father_flags = self.GetFlags().GetFlags()
+    mother_flags = other.GetFlags().GetFlags()
+
+    # Flags that are common in both parents and flags that belong to only one
+    # parent.
+    self_flags = []
+    other_flags = []
+    common_flags = []
+
+    # Find out flags that are common to both parent and flags that belong soly
+    # to one parent.
+    for self_flag in father_flags:
+      if self_flag in mother_flags:
+        common_flags.append(self_flag)
+      else:
+        self_flags.append(self_flag)
+
+    for other_flag in mother_flags:
+      if other_flag not in father_flags:
+        other_flags.append(other_flag)
+
+    # Randomly select flags that belong to only one parent.
+    output_flags = [father_flags[f] for f in self_flags if random.randint(0, 1)]
+    others = [mother_flags[f] for f in other_flags if random.randint(0, 1)]
+    output_flags.extend(others)
+    # Turn on flags that belong to both parent. Randomly choose the value of the
+    # flag from either parent.
+    for flag in common_flags:
+      output_flags.append(CrossoverWith(father_flags[flag], mother_flags[flag]))
+
+    # Mutate flags
+    if mutation_rate:
+      return RandomMutate(specs, FlagSet(output_flags), mutation_rate)
+
+    return GATask(FlagSet(output_flags))
+
+
+class GAGeneration(Generation):
+  """The Genetic Algorithm."""
+
+  # The value checks whether the algorithm has converged and arrives at a fixed
+  # point. If STOP_THRESHOLD of generations have not seen any performance
+  # improvement, the Genetic Algorithm stops.
+  STOP_THRESHOLD = None
+
+  # Number of tasks in each generation.
+  NUM_CHROMOSOMES = None
+
+  # The value checks whether the algorithm has converged and arrives at a fixed
+  # point. If NUM_TRIALS of trials have been attempted to generate a new task
+  # without a success, the Genetic Algorithm stops.
+  NUM_TRIALS = None
+
+  # The flags that can be used to generate new tasks.
+  SPECS = None
+
+  # What fraction of genes to mutate.
+  MUTATION_RATE = 0
+
+  @staticmethod
+  def InitMetaData(stop_threshold, num_chromosomes, num_trials, specs,
+                   mutation_rate):
+    """Set up the meta data for the Genetic Algorithm.
+
+    Args:
+      stop_threshold: The number of generations, upon which no performance has
+        seen, the Genetic Algorithm stops.
+      num_chromosomes: Number of tasks in each generation.
+      num_trials: The number of trials, upon which new task has been tried to
+        generated without success, the Genetic Algorithm stops.
+      specs: The flags that can be used to generate new tasks.
+      mutation_rate: What fraction of genes to mutate.
+    """
+
+    GAGeneration.STOP_THRESHOLD = stop_threshold
+    GAGeneration.NUM_CHROMOSOMES = num_chromosomes
+    GAGeneration.NUM_TRIALS = num_trials
+    GAGeneration.SPECS = specs
+    GAGeneration.MUTATION_RATE = mutation_rate
+
+  def __init__(self, tasks, parents, total_stucks):
+    """Set up the meta data for the Genetic Algorithm.
+
+    Args:
+      tasks: A set of tasks to be run.
+      parents: A set of tasks from which this new generation is produced. This
+        set also contains the best tasks generated so far.
+      total_stucks: The number of generations that have not seen improvement.
+        The Genetic Algorithm will stop once the total_stucks equals to
+        NUM_TRIALS defined in the GAGeneration class.
+    """
+
+    Generation.__init__(self, tasks, parents)
+    self._total_stucks = total_stucks
+
+  def Improve(self):
+    """True if this generation has improvement upon its parent generation."""
+
+    tasks = self.Pool()
+    parents = self.CandidatePool()
+
+    # The first generate does not have parents.
+    if not parents:
+      return True
+
+    # Found out whether a task has improvement upon the best task in the
+    # parent generation.
+    best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0]
+    best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0]
+
+    # At least one task has improvement.
+    if best_current.Improve(best_parent):
+      self._total_stucks = 0
+      return True
+
+    # If STOP_THRESHOLD of generations have no improvement, the algorithm stops.
+    if self._total_stucks >= GAGeneration.STOP_THRESHOLD:
+      return False
+
+    self._total_stucks += 1
+    return True
+
+  def Next(self, cache):
+    """Calculate the next generation.
+
+    Generate a new generation from the a set of tasks. This set contains the
+      best set seen so far and the tasks executed in the parent generation.
+
+    Args:
+      cache: A set of tasks that have been generated before.
+
+    Returns:
+      A set of new generations.
+    """
+
+    target_len = GAGeneration.NUM_CHROMOSOMES
+    specs = GAGeneration.SPECS
+    mutation_rate = GAGeneration.MUTATION_RATE
+
+    # Collect a set of size target_len of tasks. This set will be used to
+    # produce a new generation of tasks.
+    gen_tasks = [task for task in self.Pool()]
+
+    parents = self.CandidatePool()
+    if parents:
+      gen_tasks.extend(parents)
+
+    # A set of tasks that are the best. This set will be used as the parent
+    # generation to produce the next generation.
+    sort_func = lambda task: task.GetTestResult()
+    retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len]
+
+    child_pool = set()
+    for father in retained_tasks:
+      num_trials = 0
+      # Try num_trials times to produce a new child.
+      while num_trials < GAGeneration.NUM_TRIALS:
+        # Randomly select another parent.
+        mother = random.choice(retained_tasks)
+        # Cross over.
+        child = mother.ReproduceWith(father, specs, mutation_rate)
+        if child not in child_pool and child not in cache:
+          child_pool.add(child)
+          break
+        else:
+          num_trials += 1
+
+    num_trials = 0
+
+    while len(child_pool) < target_len and num_trials < GAGeneration.NUM_TRIALS:
+      for keep_task in retained_tasks:
+        # Mutation.
+        child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate)
+        if child not in child_pool and child not in cache:
+          child_pool.add(child)
+          if len(child_pool) >= target_len:
+            break
+        else:
+          num_trials += 1
+
+    # If NUM_TRIALS of tries have been attempted without generating a set of new
+    # tasks, the algorithm stops.
+    if num_trials >= GAGeneration.NUM_TRIALS:
+      return []
+
+    assert len(child_pool) == target_len
+
+    return [GAGeneration(child_pool, set(retained_tasks), self._total_stucks)]
diff --git a/bestflags/testing_batch.py b/bestflags/testing_batch.py
index 8ec2cd9d..ad1ab4b7 100644
--- a/bestflags/testing_batch.py
+++ b/bestflags/testing_batch.py
@@ -19,6 +19,8 @@ import unittest
 import flags
 from flags import Flag
 from flags import FlagSet
+from genetic_algorithm import GAGeneration
+from genetic_algorithm import GATask
 from hill_climb_best_neighbor import HillClimbingBestBranch
 import pipeline_process
 from steering import Steering
@@ -33,6 +35,12 @@ NUM_FLAGS = 5
 # The value range of the flags.
 FLAG_RANGES = 10
 
+# The following variables are meta data for the Genetic Algorithm.
+STOP_THRESHOLD = 20
+NUM_CHROMOSOMES = 10
+NUM_TRIALS = 20
+MUTATION_RATE = 0.03
+
 
 def _GenerateRandomRasks(specs):
   """Generate a task that has random values.
@@ -92,6 +100,35 @@ def _GenerateNoFlagTask():
   return set([Task(FlagSet([]))])
 
 
+def _GenerateRandomGATasks(specs, num_tasks, num_trials):
+  """Generate a set of tasks for the Genetic Algorithm.
+
+  Args:
+    specs: A list of spec from which the flag set is created.
+    num_tasks: number of tasks that should be generated.
+    num_trials: the maximum number of tries should be attempted to generate the
+      set of tasks.
+
+  Returns:
+    A set of randomly generated tasks.
+  """
+
+  tasks = set([])
+
+  total_trials = 0
+  while len(tasks) < num_tasks and total_trials < num_trials:
+    new_flag = FlagSet([Flag(spec) for spec in specs if random.randint(0, 1)])
+    new_task = GATask(new_flag)
+
+    if new_task in tasks:
+      total_trials += 1
+    else:
+      tasks.add(new_task)
+      total_trials = 0
+
+  return tasks
+
+
 def _ComputeCost(cost_func, specs, flag_set):
   """Compute the mock cost of the flag_set using the input cost function.
 
@@ -148,7 +185,9 @@ def _TestAlgorithm(cost_func, specs, generations, best_result):
     specs: All the specs that are used in the algorithm. This is used to check
       whether certain flag is disabled in the flag_set dictionary.
     generations: The initial generations to be evaluated.
-    best_result: The expected best result of the algorithm.
+    best_result: The expected best result of the algorithm. If best_result is
+      -1, the algorithm may or may not return the best value. Therefore, no
+      assertion will be inserted.
   """
 
   # Set up the utilities to test the framework.
@@ -184,16 +223,19 @@ def _TestAlgorithm(cost_func, specs, generations, best_result):
     output_queue.put(task)
 
   pp_steer.join()
-  assert best_result == result
+
+  # Only do this test when best_result is not -1.
+  if best_result != -1:
+    assert best_result == result
 
 
 class FlagAlgorithms(unittest.TestCase):
   """This class test the FlagSet class."""
 
   def testBestHillClimb(self):
-    """Test the equal method of the Class FlagSet.
+    """Test the best hill climb algorithm.
 
-    Two FlagSet instances are equal if all their flags are equal.
+    Test whether it finds the best results as expected.
     """
 
     # Initiate the build/test command and the log directory.
@@ -217,7 +259,7 @@ class FlagAlgorithms(unittest.TestCase):
     # This test uses a cost function that is the negative of the previous cost
     # function. Therefore, the best result should be found in task with all the
     # flags enabled.
-    cost_function = '-sum(values[0:len(values)])'
+    cost_function = 'sys.maxint - sum(values[0:len(values)])'
     all_flags = list(generation_tasks)[0].GetFlags()
     cost = _ComputeCost(cost_function, specs, all_flags)
 
@@ -230,6 +272,34 @@ class FlagAlgorithms(unittest.TestCase):
     # 0, i.e., when all the flags are disabled.
     _TestAlgorithm(cost_function, specs, generations, cost)
 
+  def testGeneticAlgorithm(self):
+    """Test the Genetic Algorithm.
+
+    Do a function testing here and see how well it scales.
+    """
+
+    # Initiate the build/test command and the log directory.
+    Task.InitLogCommand(None, None, 'output')
+
+    # Generate the testing specs.
+    mock_test_file = 'scale_mock_test'
+    _GenerateTestFlags(NUM_FLAGS, FLAG_RANGES, mock_test_file)
+    specs = flags.ReadConf(mock_test_file)
+
+    # Initiate the build/test command and the log directory.
+    GAGeneration.InitMetaData(STOP_THRESHOLD, NUM_CHROMOSOMES, NUM_TRIALS,
+                              specs, MUTATION_RATE)
+
+    # Generate the initial generations.
+    generation_tasks = _GenerateRandomGATasks(specs, NUM_CHROMOSOMES,
+                                              NUM_TRIALS)
+    generations = [GAGeneration(generation_tasks, set([]), 0)]
+
+    # Test the algorithm.
+    _TestAlgorithm('sum(values[0:len(values)])', specs, generations, -1)
+    cost_func = 'sys.maxint - sum(values[0:len(values)])'
+    _TestAlgorithm(cost_func, specs, generations, -1)
+
 
 if __name__ == '__main__':
   unittest.main()
author	Yuheng Long <yuhenglong@google.com>	2013-08-07 08:48:05 -0700
committer	ChromeBot <chrome-bot@google.com>	2013-08-07 22:14:42 -0700
commit	057ad5c77ef0ea44ee260718eb6d30afef2f7f83 (patch)
tree	75dc92c063a32304259fc834382c9cf7fa314602 /bestflags
parent	570b2ce931d40a335f3b02ed859b376040b5fb67 (diff)
download	toolchain-utils-057ad5c77ef0ea44ee260718eb6d30afef2f7f83.tar.gz