25 files changed, 3565 insertions, 0 deletions
diff --git a/bestflags/README b/bestflags/README
new file mode 100644
index 00000000..d9fc5ba6
--- /dev/null
+++ b/bestflags/README
@@ -0,0 +1,21 @@
+There is a vast set of compiler flags that can be used to build Chrome for
+ChromeOS. This option space has not been explored before. This directory
+provides an infrastructure to build Chrome with certain flag combinations, test
+it, gather results and prepare a fresh batch of flags to repeat the process. The
+infrastructure supports plug-in modules that implement algorithms for searching
+in the N-Dimensional space of compiler flag combinations.
+
+Currently, three different algorithms are built, namely genetic algorithm, hill
+climbing and negative flag iterative elimination. The module 'testing_batch.py'
+contains the testing of these algorithms.
+
+To run the script, type in python testing_batch.py.
+
+For further information about the project, please refer to the design document
+at:
+
+https://docs.google.com/a/google.com/document/d/19iE9rhszTWjISBpKJ3qK8uBCoUjs0o4etWDRkyEeUOw/
+
+There is also a presentation slide available at:
+
+https://docs.google.com/a/google.com/presentation/d/13rS9jALXffbP48YsF0-bsqovrVBfgzEud4e-XpavOdA/edit#slide=id.gf880fcd4_180
diff --git a/bestflags/example_algorithms.py b/bestflags/example_algorithms.py
new file mode 100644
index 00000000..9775d491
--- /dev/null
+++ b/bestflags/example_algorithms.py
@@ -0,0 +1,196 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""An example main file running the algorithms.
+
+Part of the Chrome build flags optimization.
+
+An example use of the framework. It parses the input json configuration file.
+Then it initiates the variables of the generation. Finally, it sets up the
+processes for different modules and runs the experiment.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import json
+import multiprocessing
+from optparse import OptionParser
+import sys
+
+import flags
+from genetic_algorithm import GAGeneration
+from pipeline_process import PipelineProcess
+import pipeline_worker
+from steering import Steering
+from task import BUILD_STAGE
+from task import Task
+from task import TEST_STAGE
+import testing_batch
+
+parser = OptionParser()
+
+parser.add_option('-f',
+                  '--file',
+                  dest='filename',
+                  help='configuration file FILE input',
+                  metavar='FILE')
+
+# The meta data for the genetic algorithm.
+BUILD_CMD = 'BUILD_CMD'
+TEST_CMD = 'TEST_CMD'
+OUTPUT = 'OUTPUT'
+DEFAULT_OUTPUT = 'output'
+CONF = 'CONF'
+DEFAULT_CONF = 'conf'
+NUM_BUILDER = 'NUM_BUILDER'
+DEFAULT_NUM_BUILDER = 1
+NUM_TESTER = 'NUM_TESTER'
+DEFAULT_NUM_TESTER = 1
+STOP_THRESHOLD = 'STOP_THRESHOLD'
+DEFAULT_STOP_THRESHOLD = 1
+NUM_CHROMOSOMES = 'NUM_CHROMOSOMES'
+DEFAULT_NUM_CHROMOSOMES = 20
+NUM_TRIALS = 'NUM_TRIALS'
+DEFAULT_NUM_TRIALS = 20
+MUTATION_RATE = 'MUTATION_RATE'
+DEFAULT_MUTATION_RATE = 0.01
+
+
+def _ProcessGA(meta_data):
+  """Set up the meta data for the genetic algorithm.
+
+  Args:
+    meta_data: the meta data for the genetic algorithm.
+  """
+  assert BUILD_CMD in meta_data
+  build_cmd = meta_data[BUILD_CMD]
+
+  assert TEST_CMD in meta_data
+  test_cmd = meta_data[TEST_CMD]
+
+  if OUTPUT not in meta_data:
+    output_file = DEFAULT_OUTPUT
+  else:
+    output_file = meta_data[OUTPUT]
+
+  if CONF not in meta_data:
+    conf_file = DEFAULT_CONF
+  else:
+    conf_file = meta_data[CONF]
+
+  if NUM_BUILDER not in meta_data:
+    num_builders = DEFAULT_NUM_BUILDER
+  else:
+    num_builders = meta_data[NUM_BUILDER]
+
+  if NUM_TESTER not in meta_data:
+    num_testers = DEFAULT_NUM_TESTER
+  else:
+    num_testers = meta_data[NUM_TESTER]
+
+  if STOP_THRESHOLD not in meta_data:
+    stop_threshold = DEFAULT_STOP_THRESHOLD
+  else:
+    stop_threshold = meta_data[STOP_THRESHOLD]
+
+  if NUM_CHROMOSOMES not in meta_data:
+    num_chromosomes = DEFAULT_NUM_CHROMOSOMES
+  else:
+    num_chromosomes = meta_data[NUM_CHROMOSOMES]
+
+  if NUM_TRIALS not in meta_data:
+    num_trials = DEFAULT_NUM_TRIALS
+  else:
+    num_trials = meta_data[NUM_TRIALS]
+
+  if MUTATION_RATE not in meta_data:
+    mutation_rate = DEFAULT_MUTATION_RATE
+  else:
+    mutation_rate = meta_data[MUTATION_RATE]
+
+  specs = flags.ReadConf(conf_file)
+
+  # Initiate the build/test command and the log directory.
+  Task.InitLogCommand(build_cmd, test_cmd, output_file)
+
+  # Initiate the build/test command and the log directory.
+  GAGeneration.InitMetaData(stop_threshold, num_chromosomes, num_trials, specs,
+                            mutation_rate)
+
+  # Generate the initial generations.
+  generation_tasks = testing_batch.GenerateRandomGATasks(specs, num_chromosomes,
+                                                         num_trials)
+  generations = [GAGeneration(generation_tasks, set([]), 0)]
+
+  # Execute the experiment.
+  _StartExperiment(num_builders, num_testers, generations)
+
+
+def _ParseJson(file_name):
+  """Parse the input json file.
+
+  Parse the input json file and call the proper function to perform the
+  algorithms.
+
+  Args:
+    file_name: the input json file name.
+  """
+
+  experiments = json.load(open(file_name))
+
+  for experiment in experiments:
+    if experiment == 'GA':
+      # An GA experiment
+      _ProcessGA(experiments[experiment])
+
+
+def _StartExperiment(num_builders, num_testers, generations):
+  """Set up the experiment environment and execute the framework.
+
+  Args:
+    num_builders: number of concurrent builders.
+    num_testers: number of concurrent testers.
+    generations: the initial generation for the framework.
+  """
+
+  manager = multiprocessing.Manager()
+
+  # The queue between the steering algorithm and the builder.
+  steering_build = manager.Queue()
+  # The queue between the builder and the tester.
+  build_test = manager.Queue()
+  # The queue between the tester and the steering algorithm.
+  test_steering = manager.Queue()
+
+  # Set up the processes for the builder, tester and steering algorithm module.
+  build_process = PipelineProcess(num_builders, 'builder', {}, BUILD_STAGE,
+                                  steering_build, pipeline_worker.Helper,
+                                  pipeline_worker.Worker, build_test)
+
+  test_process = PipelineProcess(num_testers, 'tester', {}, TEST_STAGE,
+                                 build_test, pipeline_worker.Helper,
+                                 pipeline_worker.Worker, test_steering)
+
+  steer_process = multiprocessing.Process(
+      target=Steering,
+      args=(set([]), generations, test_steering, steering_build))
+
+  # Start the processes.
+  build_process.start()
+  test_process.start()
+  steer_process.start()
+
+  # Wait for the processes to finish.
+  build_process.join()
+  test_process.join()
+  steer_process.join()
+
+
+def main(argv):
+  (options, _) = parser.parse_args(argv)
+  assert options.filename
+  _ParseJson(options.filename)
+
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/bestflags/examples/omnetpp/README b/bestflags/examples/omnetpp/README
new file mode 100644
index 00000000..eba522fe
--- /dev/null
+++ b/bestflags/examples/omnetpp/README
@@ -0,0 +1,23 @@
+This directory contains the omnetpp example in SPEC2006 benchmark.
+
+It also contains the json configuration file which includes the meta data
+information to run the experiment.
+
+This directory contains a build file build_omnetpp which is used by the build
+module of the framework to compile the application.
+This directory contains a test file test_omnetpp which is used by the test
+module of the framework to benchmark the optimization compilation.
+This directory contains a conf file which includes the set of optimization flags
+the experiment will try.
+
+To use this direction, first gives the file the executable permission.
+  chmod a+x build_bikjmp
+  chmod a+x test_bikjmp
+
+Copy the SPEC2006 benchmark into this directory.
+
+To run, invoke the example_algorithm.py in the parent directory.
+  python example_algorithms.py --file=examples/omnetpp/example.json
+
+For help,
+  python example_algorithms.py --help
+\ No newline at end of file
diff --git a/bestflags/examples/omnetpp/build_omnetpp b/bestflags/examples/omnetpp/build_omnetpp
new file mode 100755
index 00000000..35e9ec13
--- /dev/null
+++ b/bestflags/examples/omnetpp/build_omnetpp
@@ -0,0 +1,69 @@
+#!/bin/bash -x
+
+cd examples/omnetpp/cpu2006-redhat-ia32
+
+# Contains the optimization flags.
+flags=''
+
+# The index of the parameter.
+i=1
+
+# Indicate whether it is parsing the gcc param.
+in_gcc_param=false
+
+for parameter in "$@"
+  do
+    #  The last parameter is the file name.
+    if [ "$i" == "$#" ]; then
+      file=$parameter
+      break
+    fi
+
+    # The param is not a flag, it combines with the flag that comes right after.
+    # For example, --param max-inline-insns-single 
+    if [ "$parameter" == "-param" ]; then
+      in_gcc_param=true
+      flags+=-$parameter' '
+      let i++
+      continue
+    fi
+
+    # In in_gcc_param section, this flag follows the key word '--param'.
+    if [ $in_gcc_param == true ]; then
+      flags+=$parameter' '
+      let i++
+      in_gcc_param=false
+      continue
+    fi
+
+    # Normal flags.
+    flags+=-$parameter' '
+    let i++
+  done
+
+# Change the configuration file.
+content=$(sed s/amd64-m64-gcc41-kk/test$file/ config/linux64-amd64-pgi.cfg)
+echo "$content" | sed s/-O2/-O1\ "$flags"/ >config/linux64-amd64-pgi$file.cfg
+. ./shrc
+/usr/bin/time -o temp$file runspec --config linux64-amd64-pgi$file -D --action=build  471.omnetpp
+
+state=$?
+
+outfile="./benchspec/CPU2006/471.omnetpp/run/build_base_test$file.0000/omnetpp"
+
+if [ $state -eq 0 ];then
+  user_time=$(cat build_timer$file | grep "user" | cut -d "u" -f 1)
+  output_file="$file"
+
+  checksum=$(readelf -x .text $outfile | md5sum | cut -d " " -f 1)
+  file_size=$(ls -l $outfile | cut -d " " -f 5)
+  text_section=$(readelf -SW $outfile | grep ".text")
+  size_hex=$(echo $text_section | sed "s/\s\{1,\}/\ /g" | cut -d ' ' -f 6)
+  size=$(echo $size_hex | ( echo "ibase=16" ; tr '[:lower:]' '[:upper:]') | bc)
+
+  echo $checksum $user_time $output_file $file_size $size
+else
+  echo "error" "error" "error" "error" "error"
+fi
+
+return $state
+\ No newline at end of file
diff --git a/bestflags/examples/omnetpp/conf b/bestflags/examples/omnetpp/conf
new file mode 100644
index 00000000..c7059e12
--- /dev/null
+++ b/bestflags/examples/omnetpp/conf
@@ -0,0 +1,2 @@
+fgcse-after-reload
+ftree-vectorize
diff --git a/bestflags/examples/omnetpp/example.json b/bestflags/examples/omnetpp/example.json
new file mode 100644
index 00000000..dda6dd11
--- /dev/null
+++ b/bestflags/examples/omnetpp/example.json
@@ -0,0 +1,24 @@
+{
+"GA":{
+
+"BUILD_CMD": "./examples/omnetpp/build_omnetpp",
+
+"TEST_CMD": "./examples/omnetpp/test_omnetpp",
+
+"OUTPUT": "output",
+
+"CONF": "examples/omnetpp/conf",
+
+"NUM_BUILDER": 3,
+
+"NUM_TESTER": 2,
+
+"STOP_THRESHOLD": 20,
+
+"NUM_CHROMOSOMES": 10,
+
+"NUM_TRIALS": 20,
+
+"MUTATION_RATE": 0.03
+}
+}
+\ No newline at end of file
diff --git a/bestflags/examples/omnetpp/test_omnetpp b/bestflags/examples/omnetpp/test_omnetpp
new file mode 100755
index 00000000..aeedb634
--- /dev/null
+++ b/bestflags/examples/omnetpp/test_omnetpp
@@ -0,0 +1,24 @@
+#!/bin/bash -ux
+
+cd /usr/local/google/home/yuhenglong/Desktop/spec2006/cpu2006-redhat-ia32/
+cd benchspec/CPU2006/471.omnetpp/run/build_base_test$1.0000
+
+(time ./omnetpp$1 ../../data/train/input/omnetpp.ini) 1>log-file 2>time.txt
+
+state=$?
+
+if [ $state -eq 0 ];then
+  diff ../../data/train/output/omnetpp.sca.result omnetpp.sca
+  state=$?
+  if [ $state -eq 0 ];then
+    time=$(cat time.txt | grep real | cut -f2 -s | cut -d 's' -f 1)
+    time=$(echo $time | awk -Fm '{ print ($1 * 60) + $2 }')
+    echo $time
+  else
+    echo "error"
+  fi
+else
+  echo "error"
+fi
+
+return $state
+\ No newline at end of file
diff --git a/bestflags/flags.py b/bestflags/flags.py
new file mode 100644
index 00000000..b316421e
--- /dev/null
+++ b/bestflags/flags.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Manage bundles of flags used for the optimizing of ChromeOS.
+
+Part of the Chrome build flags optimization.
+
+The content of this module is adapted from the Trakhelp JVM project. This module
+contains the basic Class Flag and the Class FlagSet. The core abstractions are:
+
+The class Flag, which takes a domain specific language describing how to fill
+the flags with values.
+
+The class FlagSet, which contains a number of flags and can create new FlagSets
+by mixing with other FlagSets.
+
+The Flag DSL works by replacing value ranges in [x-y] with numbers in the range
+x through y.
+
+Examples:
+  "foo[0-9]bar" will expand to e.g. "foo5bar".
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import random
+import re
+
+#
+# This matches a [...] group in the internal representation for a flag
+# specification, and is used in "filling out" flags - placing values inside
+# the flag_spec.  The internal flag_spec format is like "foo[0]", with
+# values filled out like 5; this would be transformed by
+# FormattedForUse() into "foo5".
+_FLAG_FILLOUT_VALUE_RE = re.compile(r'\[([^\]]*)\]')
+
+# This matches a numeric flag flag=[start-end].
+rx = re.compile(r'\[(?P<start>\d+)-(?P<end>\d+)\]')
+
+
+# Search the numeric flag pattern.
+def Search(spec):
+  return rx.search(spec)
+
+
+class NoSuchFileError(Exception):
+  """Define an Exception class for user providing invalid input file."""
+  pass
+
+
+def ReadConf(file_name):
+  """Parse the configuration file.
+
+  The configuration contains one flag specification in each line.
+
+  Args:
+    file_name: The name of the configuration file.
+
+  Returns:
+    A list of specs in the configuration file.
+
+  Raises:
+    NoSuchFileError: The caller should provide a valid configuration file.
+  """
+
+  with open(file_name, 'r') as input_file:
+    lines = input_file.readlines()
+
+    return sorted([line.strip() for line in lines if line.strip()])
+
+  raise NoSuchFileError()
+
+
+class Flag(object):
+  """A class representing a particular command line flag argument.
+
+  The Flag consists of two parts: The spec and the value.
+  The spec is a definition of the following form: a string with escaped
+  sequences of the form [<start>-<end>] where start and end is an positive
+  integer for a fillable value.
+
+  An example of a spec is "foo[0-9]".
+  There are two kinds of flags, boolean flag and numeric flags. Boolean flags
+  can either be turned on or off, which numeric flags can have different
+  positive integer values. For example, -finline-limit=[1-1000] is a numeric
+  flag and -ftree-vectorize is a boolean flag.
+
+  A (boolean/numeric) flag is not turned on if it is not selected in the
+  FlagSet.
+  """
+
+  def __init__(self, spec, value=-1):
+    self._spec = spec
+
+    # If the value is not specified, generate a random value to use.
+    if value == -1:
+      # If creating a boolean flag, the value will be 0.
+      value = 0
+
+      # Parse the spec's expression for the flag value's numeric range.
+      numeric_flag_match = Search(spec)
+
+      # If this is a numeric flag, a value is chosen within start and end, start
+      # inclusive and end exclusive.
+      if numeric_flag_match:
+        start = int(numeric_flag_match.group('start'))
+        end = int(numeric_flag_match.group('end'))
+
+        assert start < end
+        value = random.randint(start, end)
+
+    self._value = value
+
+  def __eq__(self, other):
+    if isinstance(other, Flag):
+      return self._spec == other.GetSpec() and self._value == other.GetValue()
+    return False
+
+  def __hash__(self):
+    return hash(self._spec) + self._value
+
+  def GetValue(self):
+    """Get the value for this flag.
+
+    Returns:
+     The value.
+    """
+
+    return self._value
+
+  def GetSpec(self):
+    """Get the spec for this flag.
+
+    Returns:
+     The spec.
+    """
+
+    return self._spec
+
+  def FormattedForUse(self):
+    """Calculate the combination of flag_spec and values.
+
+    For e.g. the flag_spec 'foo[0-9]' and the value equals to 5, this will
+    return 'foo5'. The filled out version of the flag is the text string you use
+    when you actually want to pass the flag to some binary.
+
+    Returns:
+      A string that represent the filled out flag, e.g. the flag with the
+      FlagSpec '-X[0-9]Y' and value equals to 5 would return '-X5Y'.
+    """
+
+    return _FLAG_FILLOUT_VALUE_RE.sub(str(self._value), self._spec)
+
+
+class FlagSet(object):
+  """A dictionary of Flag objects.
+
+  The flags dictionary stores the spec and flag pair.
+  """
+
+  def __init__(self, flag_array):
+    # Store the flags as a dictionary mapping of spec -> flag object
+    self._flags = dict([(flag.GetSpec(), flag) for flag in flag_array])
+
+  def __eq__(self, other):
+    return isinstance(other, FlagSet) and self._flags == other.GetFlags()
+
+  def __hash__(self):
+    return sum([hash(flag) for flag in self._flags.values()])
+
+  def __getitem__(self, flag_spec):
+    """Get flag with a particular flag_spec.
+
+    Args:
+      flag_spec: The flag_spec to find.
+
+    Returns:
+      A flag.
+    """
+
+    return self._flags[flag_spec]
+
+  def __contains__(self, flag_spec):
+    return self._flags.has_key(flag_spec)
+
+  def GetFlags(self):
+    return self._flags
+
+  def FormattedForUse(self):
+    """Format this for use in an application.
+
+    Returns:
+      A list of flags, sorted alphabetically and filled in with the values
+      for each flag.
+    """
+
+    return sorted([f.FormattedForUse() for f in self._flags.values()])
diff --git a/bestflags/flags_test.py b/bestflags/flags_test.py
new file mode 100644
index 00000000..dbbea77c
--- /dev/null
+++ b/bestflags/flags_test.py
@@ -0,0 +1,190 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Unit tests for the classes in module 'flags'.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import random
+import sys
+import unittest
+
+from flags import Flag
+from flags import FlagSet
+
+# The number of tests to test.
+NUM_TESTS = 20
+
+
+class FlagTest(unittest.TestCase):
+  """This class tests the Flag class."""
+
+  def testInit(self):
+    """The value generated should fall within start and end of the spec.
+
+    If the value is not specified, the value generated should fall within start
+    and end of the spec.
+    """
+
+    for _ in range(NUM_TESTS):
+      start = random.randint(1, sys.maxint - 1)
+      end = random.randint(start + 1, sys.maxint)
+
+      spec = 'flag=[%s-%s]' % (start, end)
+
+      test_flag = Flag(spec)
+
+      value = test_flag.GetValue()
+
+      # If the value is not specified when the flag is constructed, a random
+      # value is chosen. This value should fall within start and end of the
+      # spec.
+      assert start <= value and value < end
+
+  def testEqual(self):
+    """Test the equal operator (==) of the flag.
+
+    Two flags are equal if and only if their spec and value are equal.
+    """
+
+    tests = range(NUM_TESTS)
+
+    # Two tasks having the same spec and value should be equivalent.
+    for test in tests:
+      assert Flag(str(test), test) == Flag(str(test), test)
+
+    # Two tasks having different flag set should be different.
+    for test in tests:
+      flag = Flag(str(test), test)
+      other_flag_sets = [other for other in tests if test != other]
+      for other_test in other_flag_sets:
+        assert flag != Flag(str(other_test), other_test)
+
+  def testFormattedForUse(self):
+    """Test the FormattedForUse method of the flag.
+
+    The FormattedForUse replaces the string within the [] with the actual value.
+    """
+
+    for _ in range(NUM_TESTS):
+      start = random.randint(1, sys.maxint - 1)
+      end = random.randint(start + 1, sys.maxint)
+      value = random.randint(start, end - 1)
+
+      spec = 'flag=[%s-%s]' % (start, end)
+
+      test_flag = Flag(spec, value)
+
+      # For numeric flag, the FormattedForUse replaces the string within the []
+      # with the actual value.
+      test_value = test_flag.FormattedForUse()
+      actual_value = 'flag=%s' % value
+
+      assert test_value == actual_value
+
+    for _ in range(NUM_TESTS):
+      value = random.randint(1, sys.maxint - 1)
+
+      test_flag = Flag('flag', value)
+
+      # For boolean flag, the FormattedForUse returns the spec.
+      test_value = test_flag.FormattedForUse()
+      actual_value = 'flag'
+      assert test_value == actual_value
+
+
+class FlagSetTest(unittest.TestCase):
+  """This class test the FlagSet class."""
+
+  def testEqual(self):
+    """Test the equal method of the Class FlagSet.
+
+    Two FlagSet instances are equal if all their flags are equal.
+    """
+
+    flag_names = range(NUM_TESTS)
+
+    # Two flag sets having the same flags should be equivalent.
+    for flag_name in flag_names:
+      spec = '%s' % flag_name
+
+      assert FlagSet([Flag(spec)]) == FlagSet([Flag(spec)])
+
+    # Two flag sets having different flags should be different.
+    for flag_name in flag_names:
+      spec = '%s' % flag_name
+      flag_set = FlagSet([Flag(spec)])
+      other_flag_sets = [other for other in flag_names if flag_name != other]
+      for other_name in other_flag_sets:
+        other_spec = '%s' % other_name
+        assert flag_set != FlagSet([Flag(other_spec)])
+
+  def testGetItem(self):
+    """Test the get item method of the Class FlagSet.
+
+    The flag set is also indexed by the specs. The flag set should return the
+    appropriate flag given the spec.
+    """
+
+    tests = range(NUM_TESTS)
+
+    specs = [str(spec) for spec in tests]
+    flag_array = [Flag(spec) for spec in specs]
+
+    flag_set = FlagSet(flag_array)
+
+    # Created a dictionary of spec and flag, the flag set should return the flag
+    # the same as this dictionary.
+    spec_flag = dict(zip(specs, flag_array))
+
+    for spec in spec_flag:
+      assert flag_set[spec] == spec_flag[spec]
+
+  def testContain(self):
+    """Test the contain method of the Class FlagSet.
+
+    The flag set is also indexed by the specs. The flag set should return true
+    for spec if it contains a flag containing spec.
+    """
+
+    true_tests = range(NUM_TESTS)
+    false_tests = range(NUM_TESTS, NUM_TESTS * 2)
+
+    specs = [str(spec) for spec in true_tests]
+
+    flag_set = FlagSet([Flag(spec) for spec in specs])
+
+    for spec in specs:
+      assert spec in flag_set
+
+    for spec in false_tests:
+      assert spec not in flag_set
+
+  def testFormattedForUse(self):
+    """Test the FormattedForUse method of the Class FlagSet.
+
+    The output should be a sorted list of strings.
+    """
+
+    flag_names = range(NUM_TESTS)
+    flag_names.reverse()
+    flags = []
+    result = []
+
+    # Construct the flag set.
+    for flag_name in flag_names:
+      spec = '%s' % flag_name
+      flags.append(Flag(spec))
+      result.append(spec)
+
+    flag_set = FlagSet(flags)
+
+    # The results string should be sorted.
+    assert sorted(result) == flag_set.FormattedForUse()
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/bestflags/flags_util.py b/bestflags/flags_util.py
new file mode 100644
index 00000000..20be57fb
--- /dev/null
+++ b/bestflags/flags_util.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utility functions to explore the neighbor flags.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import flags
+from flags import Flag
+
+
+def ClimbNext(flags_dict, climb_spec):
+  """Get the flags that are different from |flags_dict| by |climb_spec|.
+
+  Given a set of flags, |flags_dict|, return a new set of flags that are
+  adjacent along the flag spec |climb_spec|.
+
+  An example flags_dict is {foo=[1-9]:foo=5, bar=[1-5]:bar=2} and climb_spec is
+  bar=[1-5]. This method changes the flag that contains the spec bar=[1-5]. The
+  results are its neighbors dictionaries, i.e., {foo=[1-9]:foo=5,
+  bar=[1-5]:bar=1} and {foo=[1-9]:foo=5, bar=[1-5]:bar=3}.
+
+  Args:
+    flags_dict: The dictionary containing the original flags whose neighbors are
+      to be explored.
+    climb_spec: The spec in the flags_dict is to be changed. The spec is a
+      definition in the little language, a string with escaped sequences of the
+      form [<start>-<end>] where start and end is an positive integer for a
+      fillable value. An example of a spec is "foo[0-9]".
+
+  Returns:
+    List of dictionaries of neighbor flags.
+  """
+
+  # This method searches for a pattern [start-end] in the spec. If the spec
+  # contains this pattern, it is a numeric flag. Otherwise it is a boolean flag.
+  # For example, -finline-limit=[1-1000] is a numeric flag and -falign-jumps is
+  # a boolean flag.
+  numeric_flag_match = flags.Search(climb_spec)
+
+  # If the flags do not contain the spec.
+  if climb_spec not in flags_dict:
+    results = flags_dict.copy()
+
+    if numeric_flag_match:
+      # Numeric flags.
+      results[climb_spec] = Flag(climb_spec,
+                                 int(numeric_flag_match.group('start')))
+    else:
+      # Boolean flags.
+      results[climb_spec] = Flag(climb_spec)
+
+    return [results]
+
+  # The flags contain the spec.
+  if not numeric_flag_match:
+    # Boolean flags.
+    results = flags_dict.copy()
+
+    # Turn off the flag. A flag is turned off if it is not presented in the
+    # flags_dict.
+    del results[climb_spec]
+    return [results]
+
+  # Numeric flags.
+  flag = flags_dict[climb_spec]
+
+  # The value of the flag having spec.
+  value = flag.GetValue()
+  results = []
+
+  if value + 1 < int(numeric_flag_match.group('end')):
+    # If the value is not the end value, explore the value that is 1 larger than
+    # the current value.
+    neighbor = flags_dict.copy()
+    neighbor[climb_spec] = Flag(climb_spec, value + 1)
+    results.append(neighbor)
+
+  if value > int(numeric_flag_match.group('start')):
+    # If the value is not the start value, explore the value that is 1 lesser
+    # than the current value.
+    neighbor = flags_dict.copy()
+    neighbor[climb_spec] = Flag(climb_spec, value - 1)
+    results.append(neighbor)
+  else:
+    # Delete the value, i.e., turn off the flag. A flag is turned off if it is
+    # not presented in the flags_dict.
+    neighbor = flags_dict.copy()
+    del neighbor[climb_spec]
+    results.append(neighbor)
+
+  return results
diff --git a/bestflags/generation.py b/bestflags/generation.py
new file mode 100644
index 00000000..67c379f5
--- /dev/null
+++ b/bestflags/generation.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""A generation of a set of tasks.
+
+Part of the Chrome build flags optimization.
+
+This module contains the base generation class. This class contains the tasks of
+this current generation. The generation will not evolve to the next generation
+until all the tasks in this generation are done executing. It also contains a
+set of tasks that could potentially be used to generate the next generation,
+e.g., in the genetic algorithm, a set of good species will be kept to evolve to
+the successive generations. For the hill climbing algorithm example, the
+candidate_pool will contain a current task t being evaluated and the exe_set
+will contains all the task t's neighbor.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+
+class NoneOverridingError(Exception):
+  """Define an Exception class for subclasses not overriding certain methods."""
+  pass
+
+
+class Generation(object):
+  """A generation of a framework run.
+
+  The base class of generation. Concrete subclasses, e.g., GAGeneration should
+  override the Next and IsImproved method to implement algorithm specific
+  applications.
+  """
+
+  def __init__(self, exe_set, candidate_pool):
+    """Set up the tasks set of this generation.
+
+    Args:
+      exe_set: A set of tasks to be run.
+      candidate_pool: A set of tasks to be considered to be used to generate the
+        next generation.
+    """
+
+    self._exe_set = exe_set
+    self._candidate_pool = candidate_pool
+
+    # Keeping the record of how many tasks are pending. Pending tasks are the
+    # ones that have been sent out to the next stage for execution but have not
+    # finished. A generation is not ready for the reproduction of the new
+    # generations until all its pending tasks have been executed.
+    self._pending = len(exe_set)
+
+  def CandidatePool(self):
+    """Return the candidate tasks of this generation."""
+
+    return self._candidate_pool
+
+  def Pool(self):
+    """Return the task set of this generation."""
+
+    return self._exe_set
+
+  def Done(self):
+    """All the tasks in this generation are done.
+
+    Returns:
+      True if all the tasks have been executed. That is the number of pending
+      task is 0.
+    """
+
+    return self._pending == 0
+
+  def UpdateTask(self, task):
+    """Match a task t in this generation that is equal to the input task.
+
+    This method is called when the input task has just finished execution. This
+    method finds out whether there is a pending task t in the current generation
+    that is the same as the input task. Two tasks are the same if their flag
+    options are the same. A task is pending if it has not been performed.
+    If there is a pending task t that matches the input task, task t will be
+    substituted with the input task in this generation. In that case, the input
+    task, as well as its build and test results encapsulated in the task, will
+    be stored in the current generation. These results could be used to produce
+    the next generation.
+    If there is a match, the current generation will have one less pending task.
+    When there is no pending task, the generation can be used to produce the
+    next generation.
+    The caller of this function is responsible for not calling this method on
+    the same task more than once.
+
+    Args:
+      task: A task that has its results ready.
+
+    Returns:
+      Whether the input task belongs to this generation.
+    """
+
+    # If there is a match, the input task belongs to this generation.
+    if task not in self._exe_set:
+      return False
+
+    # Remove the place holder task in this generation and store the new input
+    # task and its result.
+    self._exe_set.remove(task)
+    self._exe_set.add(task)
+
+    # The current generation will have one less task to wait on.
+    self._pending -= 1
+
+    assert self._pending >= 0
+
+    return True
+
+  def IsImproved(self):
+    """True if this generation has improvement upon its parent generation.
+
+    Raises:
+      NoneOverridingError: The subclass should override this method.
+    """
+    raise NoneOverridingError('Must be implemented by child class')
+
+  def Next(self, _):
+    """Calculate the next generation.
+
+    This is the core of the framework implementation. It must be overridden by
+    the concrete subclass to implement algorithm specific generations.
+
+    Args:
+      _: A set of tasks that have been generated before. The overridden method
+        in the subclasses can use this so as not to generate task that has been
+        generated before.
+
+    Returns:
+      A set of new generations.
+
+    Raises:
+      NoneOverridingError: The subclass should override this method.
+    """
+
+    raise NoneOverridingError('Must be implemented by child class')
diff --git a/bestflags/generation_test.py b/bestflags/generation_test.py
new file mode 100644
index 00000000..2e042d49
--- /dev/null
+++ b/bestflags/generation_test.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Generation unittest.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import random
+import unittest
+
+from generation import Generation
+from mock_task import IdentifierMockTask
+
+# Pick an integer at random.
+TEST_STAGE = -125
+
+# The number of tasks to be put in a generation to be tested.
+NUM_TASKS = 20
+
+# The stride of permutation used to shuffle the input list of tasks. Should be
+# relatively prime with NUM_TASKS.
+STRIDE = 7
+
+
+class GenerationTest(unittest.TestCase):
+  """This class test the Generation class.
+
+  Given a set of tasks in the generation, if there is any task that is pending,
+  then the Done method will return false, and true otherwise.
+  """
+
+  def testDone(self):
+    """"Test the Done method.
+
+    Produce a generation with a set of tasks. Set the cost of the task one by
+    one and verify that the Done method returns false before setting the cost
+    for all the tasks. After the costs of all the tasks are set, the Done method
+    should return true.
+    """
+
+    random.seed(0)
+
+    testing_tasks = range(NUM_TASKS)
+
+    # The tasks for the generation to be tested.
+    tasks = [IdentifierMockTask(TEST_STAGE, t) for t in testing_tasks]
+
+    gen = Generation(set(tasks), None)
+
+    # Permute the list.
+    permutation = [(t * STRIDE) % NUM_TASKS for t in range(NUM_TASKS)]
+    permuted_tasks = [testing_tasks[index] for index in permutation]
+
+    # The Done method of the Generation should return false before all the tasks
+    # in the permuted list are set.
+    for testing_task in permuted_tasks:
+      assert not gen.Done()
+
+      # Mark a task as done by calling the UpdateTask method of the generation.
+      # Send the generation the task as well as its results.
+      gen.UpdateTask(IdentifierMockTask(TEST_STAGE, testing_task))
+
+    # The Done method should return true after all the tasks in the permuted
+    # list is set.
+    assert gen.Done()
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/bestflags/genetic_algorithm.py b/bestflags/genetic_algorithm.py
new file mode 100644
index 00000000..deb83f12
--- /dev/null
+++ b/bestflags/genetic_algorithm.py
@@ -0,0 +1,295 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""The hill genetic algorithm.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import random
+
+import flags
+from flags import Flag
+from flags import FlagSet
+from generation import Generation
+from task import Task
+
+
+def CrossoverWith(first_flag, second_flag):
+  """Get a crossed over gene.
+
+  At present, this just picks either/or of these values.  However, it could be
+  implemented as an integer maskover effort, if required.
+
+  Args:
+    first_flag: The first gene (Flag) to cross over with.
+    second_flag: The second gene (Flag) to cross over with.
+
+  Returns:
+    A Flag that can be considered appropriately randomly blended between the
+    first and second input flag.
+  """
+
+  return first_flag if random.randint(0, 1) else second_flag
+
+
+def RandomMutate(specs, flag_set, mutation_rate):
+  """Randomly mutate the content of a task.
+
+  Args:
+    specs: A list of spec from which the flag set is created.
+    flag_set: The current flag set being mutated
+    mutation_rate: What fraction of genes to mutate.
+
+  Returns:
+    A Genetic Task constructed by randomly mutating the input flag set.
+  """
+
+  results_flags = []
+
+  for spec in specs:
+    # Randomly choose whether this flag should be mutated.
+    if random.randint(0, int(1 / mutation_rate)):
+      continue
+
+    # If the flag is not already in the flag set, it is added.
+    if spec not in flag_set:
+      results_flags.append(Flag(spec))
+      continue
+
+    # If the flag is already in the flag set, it is mutated.
+    numeric_flag_match = flags.Search(spec)
+
+    # The value of a numeric flag will be changed, and a boolean flag will be
+    # dropped.
+    if not numeric_flag_match:
+      continue
+
+    value = flag_set[spec].GetValue()
+
+    # Randomly select a nearby value of the current value of the flag.
+    rand_arr = [value]
+    if value + 1 < int(numeric_flag_match.group('end')):
+      rand_arr.append(value + 1)
+
+    rand_arr.append(value - 1)
+    value = random.sample(rand_arr, 1)[0]
+
+    # If the value is smaller than the start of the spec, this flag will be
+    # dropped.
+    if value != int(numeric_flag_match.group('start')) - 1:
+      results_flags.append(Flag(spec, value))
+
+  return GATask(FlagSet(results_flags))
+
+
+class GATask(Task):
+
+  def __init__(self, flag_set):
+    Task.__init__(self, flag_set)
+
+  def ReproduceWith(self, other, specs, mutation_rate):
+    """Reproduce with other FlagSet.
+
+    Args:
+      other: A FlagSet to reproduce with.
+      specs: A list of spec from which the flag set is created.
+      mutation_rate: one in mutation_rate flags will be mutated (replaced by a
+        random version of the same flag, instead of one from either of the
+        parents).  Set to 0 to disable mutation.
+
+    Returns:
+      A GA task made by mixing self with other.
+    """
+
+    # Get the flag dictionary.
+    father_flags = self.GetFlags().GetFlags()
+    mother_flags = other.GetFlags().GetFlags()
+
+    # Flags that are common in both parents and flags that belong to only one
+    # parent.
+    self_flags = []
+    other_flags = []
+    common_flags = []
+
+    # Find out flags that are common to both parent and flags that belong soly
+    # to one parent.
+    for self_flag in father_flags:
+      if self_flag in mother_flags:
+        common_flags.append(self_flag)
+      else:
+        self_flags.append(self_flag)
+
+    for other_flag in mother_flags:
+      if other_flag not in father_flags:
+        other_flags.append(other_flag)
+
+    # Randomly select flags that belong to only one parent.
+    output_flags = [father_flags[f] for f in self_flags if random.randint(0, 1)]
+    others = [mother_flags[f] for f in other_flags if random.randint(0, 1)]
+    output_flags.extend(others)
+    # Turn on flags that belong to both parent. Randomly choose the value of the
+    # flag from either parent.
+    for flag in common_flags:
+      output_flags.append(CrossoverWith(father_flags[flag], mother_flags[flag]))
+
+    # Mutate flags
+    if mutation_rate:
+      return RandomMutate(specs, FlagSet(output_flags), mutation_rate)
+
+    return GATask(FlagSet(output_flags))
+
+
+class GAGeneration(Generation):
+  """The Genetic Algorithm."""
+
+  # The value checks whether the algorithm has converged and arrives at a fixed
+  # point. If STOP_THRESHOLD of generations have not seen any performance
+  # improvement, the Genetic Algorithm stops.
+  STOP_THRESHOLD = None
+
+  # Number of tasks in each generation.
+  NUM_CHROMOSOMES = None
+
+  # The value checks whether the algorithm has converged and arrives at a fixed
+  # point. If NUM_TRIALS of trials have been attempted to generate a new task
+  # without a success, the Genetic Algorithm stops.
+  NUM_TRIALS = None
+
+  # The flags that can be used to generate new tasks.
+  SPECS = None
+
+  # What fraction of genes to mutate.
+  MUTATION_RATE = 0
+
+  @staticmethod
+  def InitMetaData(stop_threshold, num_chromosomes, num_trials, specs,
+                   mutation_rate):
+    """Set up the meta data for the Genetic Algorithm.
+
+    Args:
+      stop_threshold: The number of generations, upon which no performance has
+        seen, the Genetic Algorithm stops.
+      num_chromosomes: Number of tasks in each generation.
+      num_trials: The number of trials, upon which new task has been tried to
+        generated without success, the Genetic Algorithm stops.
+      specs: The flags that can be used to generate new tasks.
+      mutation_rate: What fraction of genes to mutate.
+    """
+
+    GAGeneration.STOP_THRESHOLD = stop_threshold
+    GAGeneration.NUM_CHROMOSOMES = num_chromosomes
+    GAGeneration.NUM_TRIALS = num_trials
+    GAGeneration.SPECS = specs
+    GAGeneration.MUTATION_RATE = mutation_rate
+
+  def __init__(self, tasks, parents, total_stucks):
+    """Set up the meta data for the Genetic Algorithm.
+
+    Args:
+      tasks: A set of tasks to be run.
+      parents: A set of tasks from which this new generation is produced. This
+        set also contains the best tasks generated so far.
+      total_stucks: The number of generations that have not seen improvement.
+        The Genetic Algorithm will stop once the total_stucks equals to
+        NUM_TRIALS defined in the GAGeneration class.
+    """
+
+    Generation.__init__(self, tasks, parents)
+    self._total_stucks = total_stucks
+
+  def IsImproved(self):
+    """True if this generation has improvement upon its parent generation."""
+
+    tasks = self.Pool()
+    parents = self.CandidatePool()
+
+    # The first generate does not have parents.
+    if not parents:
+      return True
+
+    # Found out whether a task has improvement upon the best task in the
+    # parent generation.
+    best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0]
+    best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0]
+
+    # At least one task has improvement.
+    if best_current.IsImproved(best_parent):
+      self._total_stucks = 0
+      return True
+
+    # If STOP_THRESHOLD of generations have no improvement, the algorithm stops.
+    if self._total_stucks >= GAGeneration.STOP_THRESHOLD:
+      return False
+
+    self._total_stucks += 1
+    return True
+
+  def Next(self, cache):
+    """Calculate the next generation.
+
+    Generate a new generation from the a set of tasks. This set contains the
+      best set seen so far and the tasks executed in the parent generation.
+
+    Args:
+      cache: A set of tasks that have been generated before.
+
+    Returns:
+      A set of new generations.
+    """
+
+    target_len = GAGeneration.NUM_CHROMOSOMES
+    specs = GAGeneration.SPECS
+    mutation_rate = GAGeneration.MUTATION_RATE
+
+    # Collect a set of size target_len of tasks. This set will be used to
+    # produce a new generation of tasks.
+    gen_tasks = [task for task in self.Pool()]
+
+    parents = self.CandidatePool()
+    if parents:
+      gen_tasks.extend(parents)
+
+    # A set of tasks that are the best. This set will be used as the parent
+    # generation to produce the next generation.
+    sort_func = lambda task: task.GetTestResult()
+    retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len]
+
+    child_pool = set()
+    for father in retained_tasks:
+      num_trials = 0
+      # Try num_trials times to produce a new child.
+      while num_trials < GAGeneration.NUM_TRIALS:
+        # Randomly select another parent.
+        mother = random.choice(retained_tasks)
+        # Cross over.
+        child = mother.ReproduceWith(father, specs, mutation_rate)
+        if child not in child_pool and child not in cache:
+          child_pool.add(child)
+          break
+        else:
+          num_trials += 1
+
+    num_trials = 0
+
+    while len(child_pool) < target_len and num_trials < GAGeneration.NUM_TRIALS:
+      for keep_task in retained_tasks:
+        # Mutation.
+        child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate)
+        if child not in child_pool and child not in cache:
+          child_pool.add(child)
+          if len(child_pool) >= target_len:
+            break
+        else:
+          num_trials += 1
+
+    # If NUM_TRIALS of tries have been attempted without generating a set of new
+    # tasks, the algorithm stops.
+    if num_trials >= GAGeneration.NUM_TRIALS:
+      return []
+
+    assert len(child_pool) == target_len
+
+    return [GAGeneration(child_pool, set(retained_tasks), self._total_stucks)]
diff --git a/bestflags/hill_climb_best_neighbor.py b/bestflags/hill_climb_best_neighbor.py
new file mode 100644
index 00000000..7bb5a7ff
--- /dev/null
+++ b/bestflags/hill_climb_best_neighbor.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""A variation of the hill climbing algorithm.
+
+Part of the Chrome build flags optimization.
+
+This algorithm explores all the neighbors of the current task. If at least one
+neighbor gives better performance than the current task, it explores the best
+neighbor.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+from flags import FlagSet
+import flags_util
+from generation import Generation
+from task import Task
+
+
+class HillClimbingBestBranch(Generation):
+  """A variation of the hill climbing algorithm.
+
+  Given a task, it explores all its neighbors. Pick the best neighbor for the
+  next iteration.
+  """
+
+  def __init__(self, exe_set, parents, specs):
+    """Set up the tasks set of this generation.
+
+    Args:
+      exe_set: A set of tasks to be run.
+      parents: A set of tasks to be used to check whether their neighbors have
+        improved upon them.
+      specs: A list of specs to explore. The spec specifies the flags that can
+        be changed to find neighbors of a task.
+    """
+
+    Generation.__init__(self, exe_set, parents)
+    self._specs = specs
+
+    # This variable will be used, by the Next method, to generate the tasks for
+    # the next iteration. This self._next_task contains the best task in the
+    # current iteration and it will be set by the IsImproved method. The tasks
+    # of the next iteration are the neighbor of self._next_task.
+    self._next_task = None
+
+  def IsImproved(self):
+    """True if this generation has improvement over its parent generation.
+
+    If this generation improves upon the previous generation, this method finds
+    out the best task in this generation and sets it to _next_task for the
+    method Next to use.
+
+    Returns:
+      True if the best neighbor improves upon the parent task.
+    """
+
+    # Find the best neighbor.
+    best_task = None
+    for task in self._exe_set:
+      if not best_task or task.IsImproved(best_task):
+        best_task = task
+
+    if not best_task:
+      return False
+
+    # The first generation may not have parent generation.
+    parents = list(self._candidate_pool)
+    if parents:
+      assert len(parents) == 1
+      self._next_task = best_task
+      # If the best neighbor improves upon the parent task.
+      return best_task.IsImproved(parents[0])
+
+    self._next_task = best_task
+    return True
+
+  def Next(self, cache):
+    """Calculate the next generation.
+
+    The best neighbor b of the current task is the parent of the next
+    generation. The neighbors of b will be the set of tasks to be evaluated
+    next.
+
+    Args:
+      cache: A set of tasks that have been generated before.
+
+    Returns:
+      A set of new generations.
+    """
+
+    # The best neighbor.
+    current_task = self._next_task
+    flag_set = current_task.GetFlags()
+
+    # The neighbors of the best neighbor.
+    children_tasks = set([])
+    for spec in self._specs:
+      for next_flag in flags_util.ClimbNext(flag_set.GetFlags(), spec):
+        new_task = Task(FlagSet(next_flag.values()))
+
+        if new_task not in cache:
+          children_tasks.add(new_task)
+
+    return [HillClimbingBestBranch(children_tasks, set([current_task]),
+                                   self._specs)]
diff --git a/bestflags/iterative_elimination.py b/bestflags/iterative_elimination.py
new file mode 100644
index 00000000..2f4c41d1
--- /dev/null
+++ b/bestflags/iterative_elimination.py
@@ -0,0 +1,177 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Iterative flags elimination.
+
+Part of the Chrome build flags optimization.
+
+This module implements the flag iterative elimination algorithm (IE) adopted
+from the paper
+Z. Pan et al. Fast and Effective Orchestration of Compiler Optimizations for
+Automatic Performance Tuning.
+
+IE begins with the base line that turns on all the optimizations flags and
+setting the numeric flags to their highest values. IE turns off the one boolean
+flag or lower the value of a numeric flag with the most negative effect from the
+baseline. This process repeats with all remaining flags, until none of them
+causes performance degradation. The complexity of IE is O(n^2).
+
+For example, -fstrict-aliasing and -ftree-vectorize. The base line is
+b=[-fstrict-aliasing, -ftree-vectorize]. The two tasks in the first iteration
+are t0=[-fstrict-aliasing] and t1=[-ftree-vectorize]. The algorithm compares b
+with t0 and t1, respectively, and see whether setting the numeric flag with a
+lower value or removing the boolean flag -fstrict-aliasing produce a better
+fitness value.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import flags
+from generation import Generation
+import task
+
+
+def _DecreaseFlag(flags_dict, spec):
+  """Decrease the value of the flag that has the specification spec.
+
+  If the flag that contains the spec is a boolean flag, it is eliminated.
+  Otherwise the flag is a numeric flag, its value will be reduced by one.
+
+  Args:
+    flags_dict: The dictionary containing the original flags whose neighbors are
+      to be explored.
+    spec: The spec in the flags_dict is to be changed.
+
+  Returns:
+    Dictionary of neighbor flag that is only different from the original
+    dictionary by the spec.
+  """
+
+  # The specification must be held by one of the flags.
+  assert spec in flags_dict
+
+  # The results this method returns.
+  results = flags_dict.copy()
+
+  # This method searches for a pattern [start-end] in the spec. If the spec
+  # contains this pattern, it is a numeric flag. Otherwise it is a boolean flag.
+  # For example, -finline-limit=[1-1000] is a numeric flag and -falign-jumps is
+  # a boolean flag.
+  numeric_flag_match = flags.Search(spec)
+
+  if numeric_flag_match:
+    # numeric flag
+    val = results[spec].GetValue()
+
+    # If the value of the flag is the lower boundary of the specification, this
+    # flag will be turned off. Because it already contains the lowest value and
+    # can not be decreased any more.
+    if val == int(numeric_flag_match.group('start')):
+      # Turn off the flag. A flag is turned off if it is not presented in the
+      # flags_dict.
+      del results[spec]
+    else:
+      results[spec] = flags.Flag(spec, val - 1)
+  else:
+    # Turn off the flag. A flag is turned off if it is not presented in the
+    # flags_dict.
+    del results[spec]
+
+  return results
+
+
+class IterativeEliminationGeneration(Generation):
+  """The negative flag iterative elimination algorithm."""
+
+  def __init__(self, exe_set, parent_task):
+    """Set up the base line parent task.
+
+    The parent task is the base line against which the new tasks are compared.
+    The new tasks are only different from the base line from one flag f by
+    either turning this flag f off, or lower the flag value by 1.
+    If a new task is better than the base line, one flag is identified that
+    gives degradation. The flag that give the worst degradation will be removed
+    or lower the value by 1 in the base in each iteration.
+
+    Args:
+      exe_set: A set of tasks to be run. Each one only differs from the
+        parent_task by one flag.
+      parent_task: The base line task, against which the new tasks in exe_set
+        are compared.
+    """
+
+    Generation.__init__(self, exe_set, None)
+    self._parent_task = parent_task
+
+  def IsImproved(self):
+    """Whether any new task has improvement upon the parent task."""
+
+    parent = self._parent_task
+    # Whether there is any new task that has improvement over the parent base
+    # line task.
+    for curr in [curr for curr in self.Pool() if curr != parent]:
+      if curr.IsImproved(parent):
+        return True
+
+    return False
+
+  def Next(self, cache):
+    """Find out the flag that gives the worst degradation.
+
+    Found out the flag that gives the worst degradation. Turn that flag off from
+    the base line and use the new base line for the new generation.
+
+    Args:
+      cache: A set of tasks that have been generated before.
+
+    Returns:
+      A set of new generations.
+    """
+    parent_task = self._parent_task
+
+    # Find out the task that gives the worst degradation.
+    worst_task = parent_task
+
+    for curr in [curr for curr in self.Pool() if curr != parent_task]:
+      # The method IsImproved, which is supposed to be called before, ensures
+      # that there is at least a task that improves upon the parent_task.
+      if curr.IsImproved(worst_task):
+        worst_task = curr
+
+    assert worst_task != parent_task
+
+    # The flags_set of the worst task.
+    work_flags_set = worst_task.GetFlags().GetFlags()
+
+    results = set([])
+
+    # If the flags_set contains no flag, i.e., all the flags have been
+    # eliminated, the algorithm stops.
+    if not work_flags_set:
+      return []
+
+    # Turn of the remaining flags one by one for the next generation.
+    for spec in work_flags_set:
+      flag_set = flags.FlagSet(_DecreaseFlag(work_flags_set, spec).values())
+      new_task = task.Task(flag_set)
+      if new_task not in cache:
+        results.add(new_task)
+
+    return [IterativeEliminationGeneration(results, worst_task)]
+
+
+class IterativeEliminationFirstGeneration(IterativeEliminationGeneration):
+  """The first iteration of the iterative elimination algorithm.
+
+  The first iteration also evaluates the base line task. The base line tasks in
+  the subsequent iterations have been evaluated. Therefore,
+  IterativeEliminationGeneration does not include the base line task in the
+  execution set.
+  """
+
+  def IsImproved(self):
+    # Find out the base line task in the execution set.
+    parent = next(task for task in self.Pool() if task == self._parent_task)
+    self._parent_task = parent
+
+    return IterativeEliminationGeneration.IsImproved(self)
diff --git a/bestflags/mock_task.py b/bestflags/mock_task.py
new file mode 100644
index 00000000..6de2b35c
--- /dev/null
+++ b/bestflags/mock_task.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""This module defines the common mock tasks used by various unit tests.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+# Pick an integer at random.
+POISONPILL = 975
+
+
+class MockTask(object):
+  """This class emulates an actual task.
+
+  It does not do the actual work, but simply returns the result as given when
+  this task is constructed.
+  """
+
+  def __init__(self, stage, identifier, cost=0):
+    """Set up the results for this task.
+
+    Args:
+      stage: the stage of this test is in.
+      identifier: the identifier of this task.
+      cost: the mock cost of this task.
+
+      The _cost field stored the cost. Once this task is performed, i.e., by
+      calling the work method or by setting the result from other task, the
+      _cost field will have this cost. The stage field verifies that the module
+      being tested and the unitest are in the same stage. If the unitest does
+      not care about cost of this task, the cost parameter should be leaved
+      blank.
+    """
+
+    self._identifier = identifier
+    self._cost = cost
+    self._stage = stage
+
+    # Indicate that this method has not been performed yet.
+    self._performed = False
+
+  def __eq__(self, other):
+    if isinstance(other, MockTask):
+      return (self._identifier == other.GetIdentifier(self._stage) and
+              self._cost == other.GetResult(self._stage))
+    return False
+
+  def GetIdentifier(self, stage):
+    assert stage == self._stage
+    return self._identifier
+
+  def SetResult(self, stage, cost):
+    assert stage == self._stage
+    self._cost = cost
+    self._performed = True
+
+  def Work(self, stage):
+    assert stage == self._stage
+    self._performed = True
+
+  def GetResult(self, stage):
+    assert stage == self._stage
+    return self._cost
+
+  def Done(self, stage):
+    """Indicates whether the task has been performed."""
+
+    assert stage == self._stage
+    return self._performed
+
+  def LogSteeringCost(self):
+    pass
+
+
+class IdentifierMockTask(MockTask):
+  """This class defines the mock task that does not consider the cost.
+
+  The task instances will be inserted into a set. Therefore the hash and the
+  equal methods are overridden. The unittests that compares identities of the
+  tasks for equality can use this mock task instead of the base mock tack.
+  """
+
+  def __hash__(self):
+    return self._identifier
+
+  def __eq__(self, other):
+    if isinstance(other, MockTask):
+      return self._identifier == other.GetIdentifier(self._stage)
+    return False
diff --git a/bestflags/pipeline_process.py b/bestflags/pipeline_process.py
new file mode 100644
index 00000000..31f5f21f
--- /dev/null
+++ b/bestflags/pipeline_process.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Pipeline process that encapsulates the actual content.
+
+Part of the Chrome build flags optimization.
+
+The actual stages include the builder and the executor.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import multiprocessing
+
+# Pick an integer at random.
+POISONPILL = 975
+
+
+class PipelineProcess(multiprocessing.Process):
+  """A process that encapsulates the actual content pipeline stage.
+
+  The actual pipeline stage can be the builder or the tester.  This process
+  continuously pull tasks from the queue until a poison pill is received.
+  Once a job is received, it will hand it to the actual stage for processing.
+
+  Each pipeline stage contains three modules.
+  The first module continuously pulls task from the input queue. It searches the
+  cache to check whether the task has encountered before. If so, duplicate
+  computation can be avoided.
+  The second module consists of a pool of workers that do the actual work, e.g.,
+  the worker will compile the source code and get the image in the builder
+  pipeline stage.
+  The third module is a helper that put the result cost to the cost field of the
+  duplicate tasks. For example, if two tasks are equivalent, only one task, say
+  t1 will be executed and the other task, say t2 will not be executed. The third
+  mode gets the result from t1, when it is available and set the cost of t2 to
+  be the same as that of t1.
+  """
+
+  def __init__(self, num_processes, name, cache, stage, task_queue, helper,
+               worker, result_queue):
+    """Set up input/output queue and the actual method to be called.
+
+    Args:
+      num_processes: Number of helpers subprocessors this stage has.
+      name: The name of this stage.
+      cache: The computed tasks encountered before.
+      stage: An int value that specifies the stage for this pipeline stage, for
+        example, build stage or test stage. This value will be used to retrieve
+        the keys in different stage. I.e., the flags set is the key in build
+        stage and the checksum is the key in the test stage. The key is used to
+        detect duplicates.
+      task_queue: The input task queue for this pipeline stage.
+      helper: The method hosted by the helper module to fill up the cost of the
+        duplicate tasks.
+      worker: The method hosted by the worker pools to do the actual work, e.g.,
+        compile the image.
+      result_queue: The output task queue for this pipeline stage.
+    """
+
+    multiprocessing.Process.__init__(self)
+
+    self._name = name
+    self._task_queue = task_queue
+    self._result_queue = result_queue
+
+    self._helper = helper
+    self._worker = worker
+
+    self._cache = cache
+    self._stage = stage
+    self._num_processes = num_processes
+
+    # the queues used by the modules for communication
+    manager = multiprocessing.Manager()
+    self._helper_queue = manager.Queue()
+    self._work_queue = manager.Queue()
+
+  def run(self):
+    """Busy pulling the next task from the queue for execution.
+
+    Once a job is pulled, this stage invokes the actual stage method and submits
+    the result to the next pipeline stage.
+
+    The process will terminate on receiving the poison pill from previous stage.
+    """
+
+    # the worker pool
+    work_pool = multiprocessing.Pool(self._num_processes)
+
+    # the helper process
+    helper_process = multiprocessing.Process(
+        target=self._helper,
+        args=(self._stage, self._cache, self._helper_queue, self._work_queue,
+              self._result_queue))
+    helper_process.start()
+    mycache = self._cache.keys()
+
+    while True:
+      task = self._task_queue.get()
+      if task == POISONPILL:
+        # Poison pill means shutdown
+        self._result_queue.put(POISONPILL)
+        break
+
+      task_key = task.GetIdentifier(self._stage)
+      if task_key in mycache:
+        # The task has been encountered before. It will be sent to the helper
+        # module for further processing.
+        self._helper_queue.put(task)
+      else:
+        # Let the workers do the actual work.
+        work_pool.apply_async(
+            self._worker,
+            args=(self._stage, task, self._work_queue, self._result_queue))
+        mycache.append(task_key)
+
+    # Shutdown the workers pool and the helper process.
+    work_pool.close()
+    work_pool.join()
+
+    self._helper_queue.put(POISONPILL)
+    helper_process.join()
diff --git a/bestflags/pipeline_process_test.py b/bestflags/pipeline_process_test.py
new file mode 100644
index 00000000..b9d84067
--- /dev/null
+++ b/bestflags/pipeline_process_test.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Pipeline Process unittest.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import multiprocessing
+import unittest
+
+from mock_task import MockTask
+import pipeline_process
+
+# Pick an integer at random.
+ERROR = -334
+# Pick an integer at random.
+TEST_STAGE = -8
+
+
+def MockHelper(stage, done_dict, helper_queue, _, result_queue):
+  """This method echos input to the output."""
+
+  assert stage == TEST_STAGE
+  while True:
+    if not helper_queue.empty():
+      task = helper_queue.get()
+      if task == pipeline_process.POISONPILL:
+        # Poison pill means shutdown
+        break
+
+      if task in done_dict:
+        # verify that it does not get duplicate "1"s in the test.
+        result_queue.put(ERROR)
+      else:
+        result_queue.put(('helper', task.GetIdentifier(TEST_STAGE)))
+
+
+def MockWorker(stage, task, _, result_queue):
+  assert stage == TEST_STAGE
+  result_queue.put(('worker', task.GetIdentifier(TEST_STAGE)))
+
+
+class PipelineProcessTest(unittest.TestCase):
+  """This class test the PipelineProcess.
+
+  All the task inserted into the input queue should be taken out and hand to the
+  actual pipeline handler, except for the POISON_PILL.  All these task should
+  also be passed to the next pipeline stage via the output queue.
+  """
+
+  def testRun(self):
+    """Test the run method.
+
+    Ensure that all the tasks inserted into the queue are properly handled.
+    """
+
+    manager = multiprocessing.Manager()
+    inp = manager.Queue()
+    output = manager.Queue()
+
+    process = pipeline_process.PipelineProcess(
+        2, 'testing', {}, TEST_STAGE, inp, MockHelper, MockWorker, output)
+
+    process.start()
+    inp.put(MockTask(TEST_STAGE, 1))
+    inp.put(MockTask(TEST_STAGE, 1))
+    inp.put(MockTask(TEST_STAGE, 2))
+    inp.put(pipeline_process.POISONPILL)
+    process.join()
+
+    # All tasks are processed once and only once.
+    result = [('worker', 1), ('helper', 1), ('worker', 2),
+              pipeline_process.POISONPILL]
+    while result:
+      task = output.get()
+
+      # One "1"s is passed to the worker and one to the helper.
+      self.assertNotEqual(task, ERROR)
+
+      # The messages received should be exactly the same as the result.
+      self.assertTrue(task in result)
+      result.remove(task)
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/bestflags/pipeline_worker.py b/bestflags/pipeline_worker.py
new file mode 100644
index 00000000..e21ec2c8
--- /dev/null
+++ b/bestflags/pipeline_worker.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""The pipeline_worker functions of the build and test stage of the framework.
+
+Part of the Chrome build flags optimization.
+
+This module defines the helper and the worker. If there are duplicate tasks, for
+example t1 and t2, needs to be built/tested, one of them, for example t1, will
+be built/tested and the helper waits for the result of t1 and set the results of
+the other task, t2 here, to be the same as that of t1. Setting the result of t2
+to be the same as t1 is referred to as resolving the result of t2.
+The worker invokes the work method of the tasks that are not duplicate.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import pipeline_process
+
+
+def Helper(stage, done_dict, helper_queue, completed_queue, result_queue):
+  """Helper that filters duplicate tasks.
+
+  This method Continuously pulls duplicate tasks from the helper_queue. The
+  duplicate tasks need not be compiled/tested. This method also pulls completed
+  tasks from the worker queue and let the results of the duplicate tasks be the
+  same as their corresponding finished task.
+
+  Args:
+    stage: The current stage of the pipeline, for example, build stage or test
+      stage.
+    done_dict: A dictionary of tasks that are done. The key of the dictionary is
+      the identifier of the task. The value of the dictionary is the results of
+      performing the corresponding task.
+    helper_queue: A queue of duplicate tasks whose results need to be resolved.
+      This is a communication channel between the pipeline_process and this
+      helper process.
+    completed_queue: A queue of tasks that have been built/tested. The results
+      of these tasks are needed to resolve the results of the duplicate tasks.
+      This is the communication channel between the workers and this helper
+      process.
+    result_queue: After the results of the duplicate tasks have been resolved,
+      the duplicate tasks will be sent to the next stage via this queue.
+  """
+
+  # The list of duplicate tasks, the results of which need to be resolved.
+  waiting_list = []
+
+  while True:
+    # Pull duplicate task from the helper queue.
+    if not helper_queue.empty():
+      task = helper_queue.get()
+
+      if task == pipeline_process.POISONPILL:
+        # Poison pill means no more duplicate task from the helper queue.
+        break
+
+      # The task has not been performed before.
+      assert not task.Done(stage)
+
+      # The identifier of this task.
+      identifier = task.GetIdentifier(stage)
+
+      # If a duplicate task comes before the corresponding resolved results from
+      # the completed_queue, it will be put in the waiting list. If the result
+      # arrives before the duplicate task, the duplicate task will be resolved
+      # right away.
+      if identifier in done_dict:
+        # This task has been encountered before and the result is available. The
+        # result can be resolved right away.
+        task.SetResult(stage, done_dict[identifier])
+        result_queue.put(task)
+      else:
+        waiting_list.append(task)
+
+    # Check and get completed tasks from completed_queue.
+    GetResultFromCompletedQueue(stage, completed_queue, done_dict, waiting_list,
+                                result_queue)
+
+  # Wait to resolve the results of the remaining duplicate tasks.
+  while waiting_list:
+    GetResultFromCompletedQueue(stage, completed_queue, done_dict, waiting_list,
+                                result_queue)
+
+
+def GetResultFromCompletedQueue(stage, completed_queue, done_dict, waiting_list,
+                                result_queue):
+  """Pull results from the completed queue and resolves duplicate tasks.
+
+  Args:
+    stage: The current stage of the pipeline, for example, build stage or test
+      stage.
+    completed_queue: A queue of tasks that have been performed. The results of
+      these tasks are needed to resolve the results of the duplicate tasks. This
+      is the communication channel between the workers and this method.
+    done_dict: A dictionary of tasks that are done. The key of the dictionary is
+      the optimization flags of the task. The value of the dictionary is the
+      compilation results of the corresponding task.
+    waiting_list: The list of duplicate tasks, the results of which need to be
+      resolved.
+    result_queue: After the results of the duplicate tasks have been resolved,
+      the duplicate tasks will be sent to the next stage via this queue.
+
+  This helper method tries to pull a completed task from the completed queue.
+  If it gets a task from the queue, it resolves the results of all the relevant
+  duplicate tasks in the waiting list. Relevant tasks are the tasks that have
+  the same flags as the currently received results from the completed_queue.
+  """
+  # Pull completed task from the worker queue.
+  if not completed_queue.empty():
+    (identifier, result) = completed_queue.get()
+    done_dict[identifier] = result
+
+    tasks = [t for t in waiting_list if t.GetIdentifier(stage) == identifier]
+    for duplicate_task in tasks:
+      duplicate_task.SetResult(stage, result)
+      result_queue.put(duplicate_task)
+      waiting_list.remove(duplicate_task)
+
+
+def Worker(stage, task, helper_queue, result_queue):
+  """Worker that performs the task.
+
+  This method calls the work method of the input task and distribute the result
+  to the helper and the next stage.
+
+  Args:
+    stage: The current stage of the pipeline, for example, build stage or test
+      stage.
+    task: Input task that needs to be performed.
+    helper_queue: Queue that holds the completed tasks and the results. This is
+      the communication channel between the worker and the helper.
+    result_queue: Queue that holds the completed tasks and the results. This is
+      the communication channel between the worker and the next stage.
+  """
+
+  # The task has not been completed before.
+  assert not task.Done(stage)
+
+  task.Work(stage)
+  helper_queue.put((task.GetIdentifier(stage), task.GetResult(stage)))
+  result_queue.put(task)
diff --git a/bestflags/pipeline_worker_test.py b/bestflags/pipeline_worker_test.py
new file mode 100644
index 00000000..e3de5e12
--- /dev/null
+++ b/bestflags/pipeline_worker_test.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Unittest for the pipeline_worker functions in the build/test stage.
+
+Part of the Chrome build flags optimization.
+
+This module tests the helper method and the worker method.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import multiprocessing
+import random
+import sys
+import unittest
+
+from mock_task import MockTask
+import pipeline_process
+import pipeline_worker
+
+# Pick an integer at random.
+TEST_STAGE = -3
+
+
+def MockTaskCostGenerator():
+  """Calls a random number generator and returns a negative number."""
+  return random.randint(-sys.maxint - 1, -1)
+
+
+class PipelineWorkerTest(unittest.TestCase):
+  """This class tests the pipeline_worker functions.
+
+  Given the same identifier, the cost should result the same from the
+  pipeline_worker functions.
+  """
+
+  def testHelper(self):
+    """"Test the helper.
+
+    Call the helper method twice, and test the results. The results should be
+    the same, i.e., the cost should be the same.
+    """
+
+    # Set up the input, helper and output queue for the helper method.
+    manager = multiprocessing.Manager()
+    helper_queue = manager.Queue()
+    result_queue = manager.Queue()
+    completed_queue = manager.Queue()
+
+    # Set up the helper process that holds the helper method.
+    helper_process = multiprocessing.Process(
+        target=pipeline_worker.Helper,
+        args=(TEST_STAGE, {}, helper_queue, completed_queue, result_queue))
+    helper_process.start()
+
+    # A dictionary defines the mock result to the helper.
+    mock_result = {1: 1995, 2: 59, 9: 1027}
+
+    # Test if there is a task that is done before, whether the duplicate task
+    # will have the same result. Here, two different scenarios are tested. That
+    # is the mock results are added to the completed_queue before and after the
+    # corresponding mock tasks being added to the input queue.
+    completed_queue.put((9, mock_result[9]))
+
+    # The output of the helper should contain all the following tasks.
+    results = [1, 1, 2, 9]
+
+    # Testing the correctness of having tasks having the same identifier, here
+    # 1.
+    for result in results:
+      helper_queue.put(MockTask(TEST_STAGE, result, MockTaskCostGenerator()))
+
+    completed_queue.put((2, mock_result[2]))
+    completed_queue.put((1, mock_result[1]))
+
+    # Signal there is no more duplicate task.
+    helper_queue.put(pipeline_process.POISONPILL)
+    helper_process.join()
+
+    while results:
+      task = result_queue.get()
+      identifier = task.GetIdentifier(TEST_STAGE)
+      self.assertTrue(identifier in results)
+      if identifier in mock_result:
+        self.assertTrue(task.GetResult(TEST_STAGE), mock_result[identifier])
+      results.remove(identifier)
+
+  def testWorker(self):
+    """"Test the worker method.
+
+    The worker should process all the input tasks and output the tasks to the
+    helper and result queue.
+    """
+
+    manager = multiprocessing.Manager()
+    result_queue = manager.Queue()
+    completed_queue = manager.Queue()
+
+    # A dictionary defines the mock tasks and their corresponding results.
+    mock_work_tasks = {1: 86, 2: 788}
+
+    mock_tasks = []
+
+    for flag, cost in mock_work_tasks.iteritems():
+      mock_tasks.append(MockTask(TEST_STAGE, flag, cost))
+
+    # Submit the mock tasks to the worker.
+    for mock_task in mock_tasks:
+      pipeline_worker.Worker(TEST_STAGE, mock_task, completed_queue,
+                             result_queue)
+
+    # The tasks, from the output queue, should be the same as the input and
+    # should be performed.
+    for task in mock_tasks:
+      output = result_queue.get()
+      self.assertEqual(output, task)
+      self.assertTrue(output.Done(TEST_STAGE))
+
+    # The tasks, from the completed_queue, should be defined in the
+    # mock_work_tasks dictionary.
+    for flag, cost in mock_work_tasks.iteritems():
+      helper_input = completed_queue.get()
+      self.assertEqual(helper_input, (flag, cost))
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/bestflags/steering.py b/bestflags/steering.py
new file mode 100644
index 00000000..320f7c37
--- /dev/null
+++ b/bestflags/steering.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""The framework stage that produces the next generation of tasks to run.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import pipeline_process
+
+
+def Steering(cache, generations, input_queue, result_queue):
+  """The core method template that produces the next generation of tasks to run.
+
+  This method waits for the results of the tasks from the previous generation.
+  Upon the arrival of all these results, the method uses them to generate the
+  next generation of tasks.
+
+  The main logic of producing the next generation from previous generation is
+  application specific. For example, in the genetic algorithm, a task is
+  produced by combining two parents tasks, while in the hill climbing algorithm,
+  a task is generated by its immediate neighbor. The method 'Next' is overridden
+  in the concrete subclasses of the class Generation to produce the next
+  application-specific generation. The steering method invokes the 'Next'
+  method, produces the next generation and submits the tasks in this generation
+  to the next stage, e.g., the build/compilation stage.
+
+  Args:
+    cache: It stores the experiments that have been conducted before. Used to
+      avoid duplicate works.
+    generations: The initial generations of tasks to be run.
+    input_queue: The input results from the last stage of the framework. These
+      results will trigger new iteration of the algorithm.
+    result_queue: The output task queue for this pipeline stage. The new tasks
+      generated by the steering algorithm will be sent to the next stage via
+      this queue.
+  """
+
+  # Generations that have pending tasks to be executed. Pending tasks are those
+  # whose results are not ready. The tasks that have their results ready are
+  # referenced to as ready tasks. Once there is no pending generation, the
+  # algorithm terminates.
+  waiting = generations
+
+  # Record how many initial tasks there are. If there is no task at all, the
+  # algorithm can terminate right away.
+  num_tasks = 0
+
+  # Submit all the tasks in the initial generations to the next stage of the
+  # framework. The next stage can be the build/compilation stage.
+  for generation in generations:
+    # Only send the task that has not been performed before to the next stage.
+    for task in [task for task in generation.Pool() if task not in cache]:
+      result_queue.put(task)
+      cache.add(task)
+      num_tasks += 1
+
+  # If there is no task to be executed at all, the algorithm returns right away.
+  if not num_tasks:
+    # Inform the next stage that there will be no more task.
+    result_queue.put(pipeline_process.POISONPILL)
+    return
+
+  # The algorithm is done if there is no pending generation. A generation is
+  # pending if it has pending task.
+  while waiting:
+    # Busy-waiting for the next task.
+    if input_queue.empty():
+      continue
+
+    # If there is a task whose result is ready from the last stage of the
+    # feedback loop, there will be one less pending task.
+
+    task = input_queue.get()
+
+    # Store the result of this ready task. Intermediate results can be used to
+    # generate report for final result or be used to reboot from a crash from
+    # the failure of any module of the framework.
+    task.LogSteeringCost()
+
+    # Find out which pending generation this ready task belongs to. This pending
+    # generation will have one less pending task. The "next" expression iterates
+    # the generations in waiting until the first generation whose UpdateTask
+    # method returns true.
+    generation = next(gen for gen in waiting if gen.UpdateTask(task))
+
+    # If there is still any pending task, do nothing.
+    if not generation.Done():
+      continue
+
+    # All the tasks in the generation are finished. The generation is ready to
+    # produce the next generation.
+    waiting.remove(generation)
+
+    # Check whether a generation should generate the next generation.
+    # A generation may not generate the next generation, e.g., because a
+    # fixpoint has been reached, there has not been any improvement for a few
+    # generations or a local maxima is reached.
+    if not generation.IsImproved():
+      continue
+
+    for new_generation in generation.Next(cache):
+      # Make sure that each generation should contain at least one task.
+      assert new_generation.Pool()
+      waiting.append(new_generation)
+
+      # Send the tasks of the new generations to the next stage for execution.
+      for new_task in new_generation.Pool():
+        result_queue.put(new_task)
+        cache.add(new_task)
+
+  # Steering algorithm is finished and it informs the next stage that there will
+  # be no more task.
+  result_queue.put(pipeline_process.POISONPILL)
diff --git a/bestflags/steering_test.py b/bestflags/steering_test.py
new file mode 100644
index 00000000..c96e362f
--- /dev/null
+++ b/bestflags/steering_test.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Steering stage unittest.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import multiprocessing
+import unittest
+
+from generation import Generation
+from mock_task import IdentifierMockTask
+import pipeline_process
+import steering
+
+# Pick an integer at random.
+STEERING_TEST_STAGE = -8
+
+# The number of generations to be used to do the testing.
+NUMBER_OF_GENERATIONS = 20
+
+# The number of tasks to be put in each generation to be tested.
+NUMBER_OF_TASKS = 20
+
+# The stride of permutation used to shuffle the input list of tasks. Should be
+# relatively prime with NUMBER_OF_TASKS.
+STRIDE = 7
+
+
+class MockGeneration(Generation):
+  """This class emulates an actual generation.
+
+  It will output the next_generations when the method Next is called. The
+  next_generations is initiated when the MockGeneration instance is constructed.
+  """
+
+  def __init__(self, tasks, next_generations):
+    """Set up the next generations for this task.
+
+    Args:
+      tasks: A set of tasks to be run.
+      next_generations: A list of generations as the next generation of the
+        current generation.
+    """
+    Generation.__init__(self, tasks, None)
+    self._next_generations = next_generations
+
+  def Next(self, _):
+    return self._next_generations
+
+  def IsImproved(self):
+    if self._next_generations:
+      return True
+    return False
+
+
+class SteeringTest(unittest.TestCase):
+  """This class test the steering method.
+
+  The steering algorithm should return if there is no new task in the initial
+  generation. The steering algorithm should send all the tasks to the next stage
+  and should terminate once there is no pending generation. A generation is
+  pending if it contains pending task. A task is pending if its (test) result
+  is not ready.
+  """
+
+  def testSteering(self):
+    """Test that the steering algorithm processes all the tasks properly.
+
+    Test that the steering algorithm sends all the tasks to the next stage. Test
+    that the steering algorithm terminates once all the tasks have been
+    processed, i.e., the results for the tasks are all ready.
+    """
+
+    # A list of generations used to test the steering stage.
+    generations = []
+
+    task_index = 0
+    previous_generations = None
+
+    # Generate a sequence of generations to be tested. Each generation will
+    # output the next generation in reverse order of the list when the "Next"
+    # method is called.
+    for _ in range(NUMBER_OF_GENERATIONS):
+      # Use a consecutive sequence of numbers as identifiers for the set of
+      # tasks put into a generation.
+      test_ranges = range(task_index, task_index + NUMBER_OF_TASKS)
+      tasks = [IdentifierMockTask(STEERING_TEST_STAGE, t) for t in test_ranges]
+      steering_tasks = set(tasks)
+
+      # Let the previous generation as the offspring generation of the current
+      # generation.
+      current_generation = MockGeneration(steering_tasks, previous_generations)
+      generations.insert(0, current_generation)
+      previous_generations = [current_generation]
+
+      task_index += NUMBER_OF_TASKS
+
+    # If there is no generation at all, the unittest returns right away.
+    if not current_generation:
+      return
+
+    # Set up the input and result queue for the steering method.
+    manager = multiprocessing.Manager()
+    input_queue = manager.Queue()
+    result_queue = manager.Queue()
+
+    steering_process = multiprocessing.Process(
+        target=steering.Steering,
+        args=(set(), [current_generation], input_queue, result_queue))
+    steering_process.start()
+
+    # Test that each generation is processed properly. I.e., the generations are
+    # processed in order.
+    while generations:
+      generation = generations.pop(0)
+      tasks = [task for task in generation.Pool()]
+
+      # Test that all the tasks are processed once and only once.
+      while tasks:
+        task = result_queue.get()
+
+        assert task in tasks
+        tasks.remove(task)
+
+        input_queue.put(task)
+
+    task = result_queue.get()
+
+    # Test that the steering algorithm returns properly after processing all
+    # the generations.
+    assert task == pipeline_process.POISONPILL
+
+    steering_process.join()
+
+  def testCache(self):
+    """The steering algorithm returns immediately if there is no new tasks.
+
+    If all the new tasks have been cached before, the steering algorithm does
+    not have to execute these tasks again and thus can terminate right away.
+    """
+
+    # Put a set of tasks in the cache and add this set to initial generation.
+    test_ranges = range(NUMBER_OF_TASKS)
+    tasks = [IdentifierMockTask(STEERING_TEST_STAGE, t) for t in test_ranges]
+    steering_tasks = set(tasks)
+
+    current_generation = MockGeneration(steering_tasks, None)
+
+    # Set up the input and result queue for the steering method.
+    manager = multiprocessing.Manager()
+    input_queue = manager.Queue()
+    result_queue = manager.Queue()
+
+    steering_process = multiprocessing.Process(
+        target=steering.Steering,
+        args=(steering_tasks, [current_generation], input_queue, result_queue))
+
+    steering_process.start()
+
+    # Test that the steering method returns right away.
+    assert result_queue.get() == pipeline_process.POISONPILL
+    steering_process.join()
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/bestflags/task.py b/bestflags/task.py
new file mode 100644
index 00000000..f055fc75
--- /dev/null
+++ b/bestflags/task.py
@@ -0,0 +1,450 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""A reproducing entity.
+
+Part of the Chrome build flags optimization.
+
+The Task class is used by different modules. Each module fills in the
+corresponding information into a Task instance. Class Task contains the bit set
+representing the flags selection. The builder module is responsible for filling
+the image and the checksum field of a Task. The executor module will put the
+execution output to the execution field.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import os
+import subprocess
+import sys
+from uuid import uuid4
+
+BUILD_STAGE = 1
+TEST_STAGE = 2
+
+# Message indicating that the build or test failed.
+ERROR_STRING = 'error'
+
+# The maximum number of tries a build can have. Some compilations may fail due
+# to unexpected environment circumstance. This variable defines how many tries
+# the build should attempt before giving up.
+BUILD_TRIES = 3
+
+# The maximum number of tries a test can have. Some tests may fail due to
+# unexpected environment circumstance. This variable defines how many tries the
+# test should attempt before giving up.
+TEST_TRIES = 3
+
+
+# Create the file/directory if it does not already exist.
+def _CreateDirectory(file_name):
+  directory = os.path.dirname(file_name)
+  if not os.path.exists(directory):
+    os.makedirs(directory)
+
+
+class Task(object):
+  """A single reproducing entity.
+
+  A single test of performance with a particular set of flags. It records the
+  flag set, the image, the check sum of the image and the cost.
+  """
+
+  # The command that will be used in the build stage to compile the tasks.
+  BUILD_COMMAND = None
+  # The command that will be used in the test stage to test the tasks.
+  TEST_COMMAND = None
+  # The directory to log the compilation and test results.
+  LOG_DIRECTORY = None
+
+  @staticmethod
+  def InitLogCommand(build_command, test_command, log_directory):
+    """Set up the build and test command for the task and the log directory.
+
+    This framework is generic. It lets the client specify application specific
+    compile and test methods by passing different build_command and
+    test_command.
+
+    Args:
+      build_command: The command that will be used in the build stage to compile
+        this task.
+      test_command: The command that will be used in the test stage to test this
+        task.
+      log_directory: The directory to log the compilation and test results.
+    """
+
+    Task.BUILD_COMMAND = build_command
+    Task.TEST_COMMAND = test_command
+    Task.LOG_DIRECTORY = log_directory
+
+  def __init__(self, flag_set):
+    """Set up the optimization flag selection for this task.
+
+    Args:
+      flag_set: The optimization flag set that is encapsulated by this task.
+    """
+
+    self._flag_set = flag_set
+
+    # A unique identifier that distinguishes this task from other tasks.
+    self._task_identifier = uuid4()
+
+    self._log_path = (Task.LOG_DIRECTORY, self._task_identifier)
+
+    # Initiate the hash value. The hash value is used so as not to recompute it
+    # every time the hash method is called.
+    self._hash_value = None
+
+    # Indicate that the task has not been compiled/tested.
+    self._build_cost = None
+    self._exe_cost = None
+    self._checksum = None
+    self._image = None
+    self._file_length = None
+    self._text_length = None
+
+  def __eq__(self, other):
+    """Test whether two tasks are equal.
+
+    Two tasks are equal if their flag_set are equal.
+
+    Args:
+      other: The other task with which this task is tested equality.
+    Returns:
+      True if the encapsulated flag sets are equal.
+    """
+    if isinstance(other, Task):
+      return self.GetFlags() == other.GetFlags()
+    return False
+
+  def __hash__(self):
+    if self._hash_value is None:
+      # Cache the hash value of the flags, so as not to recompute them.
+      self._hash_value = hash(self._flag_set)
+    return self._hash_value
+
+  def GetIdentifier(self, stage):
+    """Get the identifier of the task in the stage.
+
+    The flag set uniquely identifies a task in the build stage. The checksum of
+    the image of the task uniquely identifies the task in the test stage.
+
+    Args:
+      stage: The stage (build/test) in which this method is called.
+    Returns:
+      Return the flag set in build stage and return the checksum in test stage.
+    """
+
+    # Define the dictionary for different stage function lookup.
+    get_identifier_functions = {BUILD_STAGE: self.FormattedFlags,
+                                TEST_STAGE: self.__GetCheckSum}
+
+    assert stage in get_identifier_functions
+    return get_identifier_functions[stage]()
+
+  def GetResult(self, stage):
+    """Get the performance results of the task in the stage.
+
+    Args:
+      stage: The stage (build/test) in which this method is called.
+    Returns:
+      Performance results.
+    """
+
+    # Define the dictionary for different stage function lookup.
+    get_result_functions = {BUILD_STAGE: self.__GetBuildResult,
+                            TEST_STAGE: self.GetTestResult}
+
+    assert stage in get_result_functions
+
+    return get_result_functions[stage]()
+
+  def SetResult(self, stage, result):
+    """Set the performance results of the task in the stage.
+
+    This method is called by the pipeling_worker to set the results for
+    duplicated tasks.
+
+    Args:
+      stage: The stage (build/test) in which this method is called.
+      result: The performance results of the stage.
+    """
+
+    # Define the dictionary for different stage function lookup.
+    set_result_functions = {BUILD_STAGE: self.__SetBuildResult,
+                            TEST_STAGE: self.__SetTestResult}
+
+    assert stage in set_result_functions
+
+    set_result_functions[stage](result)
+
+  def Done(self, stage):
+    """Check whether the stage is done.
+
+    Args:
+      stage: The stage to be checked, build or test.
+    Returns:
+      True if the stage is done.
+    """
+
+    # Define the dictionary for different result string lookup.
+    done_string = {BUILD_STAGE: self._build_cost, TEST_STAGE: self._exe_cost}
+
+    assert stage in done_string
+
+    return done_string[stage] is not None
+
+  def Work(self, stage):
+    """Perform the task.
+
+    Args:
+      stage: The stage in which the task is performed, compile or test.
+    """
+
+    # Define the dictionary for different stage function lookup.
+    work_functions = {BUILD_STAGE: self.__Compile, TEST_STAGE: self.__Test}
+
+    assert stage in work_functions
+
+    work_functions[stage]()
+
+  def FormattedFlags(self):
+    """Format the optimization flag set of this task.
+
+    Returns:
+      The formatted optimization flag set that is encapsulated by this task.
+    """
+    return str(self._flag_set.FormattedForUse())
+
+  def GetFlags(self):
+    """Get the optimization flag set of this task.
+
+    Returns:
+      The optimization flag set that is encapsulated by this task.
+    """
+
+    return self._flag_set
+
+  def __GetCheckSum(self):
+    """Get the compilation image checksum of this task.
+
+    Returns:
+      The compilation image checksum of this task.
+    """
+
+    # The checksum should be computed before this method is called.
+    assert self._checksum is not None
+    return self._checksum
+
+  def __Compile(self):
+    """Run a compile.
+
+    This method compile an image using the present flags, get the image,
+    test the existent of the image and gathers monitoring information, and sets
+    the internal cost (fitness) for this set of flags.
+    """
+
+    # Format the flags as a string as input to compile command. The unique
+    # identifier is passed to the compile command. If concurrent processes are
+    # used to compile different tasks, these processes can use the identifier to
+    # write to different file.
+    flags = self._flag_set.FormattedForUse()
+    command = '%s %s %s' % (Task.BUILD_COMMAND, ' '.join(flags),
+                            self._task_identifier)
+
+    # Try BUILD_TRIES number of times before confirming that the build fails.
+    for _ in range(BUILD_TRIES):
+      try:
+        # Execute the command and get the execution status/results.
+        p = subprocess.Popen(command.split(),
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        (out, err) = p.communicate()
+
+        if out:
+          out = out.strip()
+          if out != ERROR_STRING:
+            # Each build results contains the checksum of the result image, the
+            # performance cost of the build, the compilation image, the length
+            # of the build, and the length of the text section of the build.
+            (checksum, cost, image, file_length, text_length) = out.split()
+            # Build successfully.
+            break
+
+        # Build failed.
+        cost = ERROR_STRING
+      except _:
+        # If there is exception getting the cost information of the build, the
+        # build failed.
+        cost = ERROR_STRING
+
+    # Convert the build cost from String to integer. The build cost is used to
+    # compare a task with another task. Set the build cost of the failing task
+    # to the max integer. The for loop will keep trying until either there is a
+    # success or BUILD_TRIES number of tries have been conducted.
+    self._build_cost = sys.maxint if cost == ERROR_STRING else float(cost)
+
+    self._checksum = checksum
+    self._file_length = file_length
+    self._text_length = text_length
+    self._image = image
+
+    self.__LogBuildCost(err)
+
+  def __Test(self):
+    """__Test the task against benchmark(s) using the input test command."""
+
+    # Ensure that the task is compiled before being tested.
+    assert self._image is not None
+
+    # If the task does not compile, no need to test.
+    if self._image == ERROR_STRING:
+      self._exe_cost = ERROR_STRING
+      return
+
+    # The unique identifier is passed to the test command. If concurrent
+    # processes are used to compile different tasks, these processes can use the
+    # identifier to write to different file.
+    command = '%s %s %s' % (Task.TEST_COMMAND, self._image,
+                            self._task_identifier)
+
+    # Try TEST_TRIES number of times before confirming that the build fails.
+    for _ in range(TEST_TRIES):
+      try:
+        p = subprocess.Popen(command.split(),
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        (out, err) = p.communicate()
+
+        if out:
+          out = out.strip()
+          if out != ERROR_STRING:
+            # The test results contains the performance cost of the test.
+            cost = out
+            # Test successfully.
+            break
+
+        # Test failed.
+        cost = ERROR_STRING
+      except _:
+        # If there is exception getting the cost information of the test, the
+        # test failed. The for loop will keep trying until either there is a
+        # success or TEST_TRIES number of tries have been conducted.
+        cost = ERROR_STRING
+
+    self._exe_cost = sys.maxint if (cost == ERROR_STRING) else float(cost)
+
+    self.__LogTestCost(err)
+
+  def __SetBuildResult(self, (checksum, build_cost, image, file_length,
+                              text_length)):
+    self._checksum = checksum
+    self._build_cost = build_cost
+    self._image = image
+    self._file_length = file_length
+    self._text_length = text_length
+
+  def __GetBuildResult(self):
+    return (self._checksum, self._build_cost, self._image, self._file_length,
+            self._text_length)
+
+  def GetTestResult(self):
+    return self._exe_cost
+
+  def __SetTestResult(self, exe_cost):
+    self._exe_cost = exe_cost
+
+  def LogSteeringCost(self):
+    """Log the performance results for the task.
+
+    This method is called by the steering stage and this method writes the
+    results out to a file. The results include the build and the test results.
+    """
+
+    steering_log = '%s/%s/steering.txt' % self._log_path
+
+    _CreateDirectory(steering_log)
+
+    with open(steering_log, 'w') as out_file:
+      # Include the build and the test results.
+      steering_result = (self._flag_set, self._checksum, self._build_cost,
+                         self._image, self._file_length, self._text_length,
+                         self._exe_cost)
+
+      # Write out the result in the comma-separated format (CSV).
+      out_file.write('%s,%s,%s,%s,%s,%s,%s\n' % steering_result)
+
+  def __LogBuildCost(self, log):
+    """Log the build results for the task.
+
+    The build results include the compilation time of the build, the result
+    image, the checksum, the file length and the text length of the image.
+    The file length of the image includes the length of the file of the image.
+    The text length only includes the length of the text section of the image.
+
+    Args:
+      log: The build log of this task.
+    """
+
+    build_result_log = '%s/%s/build.txt' % self._log_path
+
+    _CreateDirectory(build_result_log)
+
+    with open(build_result_log, 'w') as out_file:
+      build_result = (self._flag_set, self._build_cost, self._image,
+                      self._checksum, self._file_length, self._text_length)
+
+      # Write out the result in the comma-separated format (CSV).
+      out_file.write('%s,%s,%s,%s,%s,%s\n' % build_result)
+
+    # The build information about running the build.
+    build_run_log = '%s/%s/build_log.txt' % self._log_path
+    _CreateDirectory(build_run_log)
+
+    with open(build_run_log, 'w') as out_log_file:
+      # Write out the execution information.
+      out_log_file.write('%s' % log)
+
+  def __LogTestCost(self, log):
+    """Log the test results for the task.
+
+    The test results include the runtime execution time of the test.
+
+    Args:
+      log: The test log of this task.
+    """
+
+    test_log = '%s/%s/test.txt' % self._log_path
+
+    _CreateDirectory(test_log)
+
+    with open(test_log, 'w') as out_file:
+      test_result = (self._flag_set, self._checksum, self._exe_cost)
+
+      # Write out the result in the comma-separated format (CSV).
+      out_file.write('%s,%s,%s\n' % test_result)
+
+    # The execution information about running the test.
+    test_run_log = '%s/%s/test_log.txt' % self._log_path
+
+    _CreateDirectory(test_run_log)
+
+    with open(test_run_log, 'w') as out_log_file:
+      # Append the test log information.
+      out_log_file.write('%s' % log)
+
+  def IsImproved(self, other):
+    """Compare the current task with another task.
+
+    Args:
+      other: The other task against which the current task is compared.
+
+    Returns:
+      True if this task has improvement upon the other task.
+    """
+
+    # The execution costs must have been initiated.
+    assert self._exe_cost is not None
+    assert other.GetTestResult() is not None
+
+    return self._exe_cost < other.GetTestResult()
diff --git a/bestflags/task_test.py b/bestflags/task_test.py
new file mode 100644
index 00000000..68a7bf78
--- /dev/null
+++ b/bestflags/task_test.py
@@ -0,0 +1,174 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Task unittest.
+
+Part of the Chrome build flags optimization.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import random
+import sys
+import unittest
+
+import task
+from task import Task
+
+# The number of flags be tested.
+NUM_FLAGS = 20
+
+# The random build result values used to test get set result method.
+RANDOM_BUILD_RESULT = 100
+
+# The random test result values used to test get set result method.
+RANDOM_TESTRESULT = 100
+
+
+class MockFlagSet(object):
+  """This class emulates a set of flags.
+
+  It returns the flags and hash value, when the FormattedForUse method and the
+  __hash__ method is called, respectively. These values are initialized when the
+  MockFlagSet instance is constructed.
+  """
+
+  def __init__(self, flags=0, hash_value=-1):
+    self._flags = flags
+    self._hash_value = hash_value
+
+  def __eq__(self, other):
+    assert isinstance(other, MockFlagSet)
+    return self._flags == other.FormattedForUse()
+
+  def FormattedForUse(self):
+    return self._flags
+
+  def __hash__(self):
+    return self._hash_value
+
+  def GetHash(self):
+    return self._hash_value
+
+
+class TaskTest(unittest.TestCase):
+  """This class test the Task class."""
+
+  def testEqual(self):
+    """Test the equal method of the task.
+
+    Two tasks are equal if and only if their encapsulated flag_sets are equal.
+    """
+
+    flags = range(NUM_FLAGS)
+
+    # Two tasks having the same flag set should be equivalent.
+    flag_sets = [MockFlagSet(flag) for flag in flags]
+    for flag_set in flag_sets:
+      assert Task(flag_set) == Task(flag_set)
+
+    # Two tasks having different flag set should be different.
+    for flag_set in flag_sets:
+      test_task = Task(flag_set)
+      other_flag_sets = [flags for flags in flag_sets if flags != flag_set]
+      for flag_set1 in other_flag_sets:
+        assert test_task != Task(flag_set1)
+
+  def testHash(self):
+    """Test the hash method of the task.
+
+    Two tasks are equal if and only if their encapsulated flag_sets are equal.
+    """
+
+    # Random identifier that is not relevant in this test.
+    identifier = random.randint(-sys.maxint - 1, -1)
+
+    flag_sets = [MockFlagSet(identifier, value) for value in range(NUM_FLAGS)]
+    for flag_set in flag_sets:
+      # The hash of a task is the same as the hash of its flag set.
+      hash_task = Task(flag_set)
+      hash_value = hash(hash_task)
+      assert hash_value == flag_set.GetHash()
+
+      # The hash of a task does not change.
+      assert hash_value == hash(hash_task)
+
+  def testGetIdentifier(self):
+    """Test the get identifier method of the task.
+
+    The get identifier method should returns the flag set in the build stage.
+    """
+
+    flag_sets = [MockFlagSet(flag) for flag in range(NUM_FLAGS)]
+    for flag_set in flag_sets:
+      identifier_task = Task(flag_set)
+
+      identifier = identifier_task.GetIdentifier(task.BUILD_STAGE)
+
+      # The task formats the flag set into a string.
+      assert identifier == str(flag_set.FormattedForUse())
+
+  def testGetSetResult(self):
+    """Test the get and set result methods of the task.
+
+    The get result method should return the same results as were set.
+    """
+
+    flag_sets = [MockFlagSet(flag) for flag in range(NUM_FLAGS)]
+    for flag_set in flag_sets:
+      result_task = Task(flag_set)
+
+      # The get result method should return the same results as were set, in
+      # build stage. Currently, the build result is a 5-element tuple containing
+      # the checksum of the result image, the performance cost of the build, the
+      # compilation image, the length of the build, and the length of the text
+      # section of the build.
+      result = tuple([random.randint(0, RANDOM_BUILD_RESULT) for _ in range(5)])
+      result_task.SetResult(task.BUILD_STAGE, result)
+      assert result == result_task.GetResult(task.BUILD_STAGE)
+
+      # The checksum is the identifier of the test stage.
+      identifier = result_task.GetIdentifier(task.TEST_STAGE)
+      # The first element of the result tuple is the checksum.
+      assert identifier == result[0]
+
+      # The get result method should return the same results as were set, in
+      # test stage.
+      random_test_result = random.randint(0, RANDOM_TESTRESULT)
+      result_task.SetResult(task.TEST_STAGE, random_test_result)
+      test_result = result_task.GetResult(task.TEST_STAGE)
+      assert test_result == random_test_result
+
+  def testDone(self):
+    """Test the done methods of the task.
+
+    The done method should return false is the task has not perform and return
+    true after the task is finished.
+    """
+
+    flags = range(NUM_FLAGS)
+
+    flag_sets = [MockFlagSet(flag) for flag in flags]
+    for flag_set in flag_sets:
+      work_task = Task(flag_set)
+
+      # The task has not been compiled nor tested.
+      assert not work_task.Done(task.TEST_STAGE)
+      assert not work_task.Done(task.BUILD_STAGE)
+
+      # After the task has been compiled, it should indicate finished in BUILD
+      # stage.
+      result = tuple([random.randint(0, RANDOM_BUILD_RESULT) for _ in range(5)])
+      work_task.SetResult(task.BUILD_STAGE, result)
+      assert not work_task.Done(task.TEST_STAGE)
+      assert work_task.Done(task.BUILD_STAGE)
+
+      # After the task has been tested, it should indicate finished in TEST
+      # stage.
+      work_task.SetResult(task.TEST_STAGE, random.randint(0, RANDOM_TESTRESULT))
+      assert work_task.Done(task.TEST_STAGE)
+      assert work_task.Done(task.BUILD_STAGE)
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/bestflags/testing_batch.py b/bestflags/testing_batch.py
new file mode 100644
index 00000000..ffe19448
--- /dev/null
+++ b/bestflags/testing_batch.py
@@ -0,0 +1,450 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Hill climbing unitest.
+
+Part of the Chrome build flags optimization.
+
+Test the best branching hill climbing algorithms, genetic algorithm and
+iterative elimination algorithm.
+"""
+
+__author__ = 'yuhenglong@google.com (Yuheng Long)'
+
+import multiprocessing
+import random
+import sys
+import unittest
+
+import flags
+from flags import Flag
+from flags import FlagSet
+from genetic_algorithm import GAGeneration
+from genetic_algorithm import GATask
+from hill_climb_best_neighbor import HillClimbingBestBranch
+from iterative_elimination import IterativeEliminationFirstGeneration
+import pipeline_process
+from steering import Steering
+from task import BUILD_STAGE
+from task import Task
+from task import TEST_STAGE
+
+# The number of flags be tested.
+NUM_FLAGS = 5
+
+# The value range of the flags.
+FLAG_RANGES = 10
+
+# The following variables are meta data for the Genetic Algorithm.
+STOP_THRESHOLD = 20
+NUM_CHROMOSOMES = 10
+NUM_TRIALS = 20
+MUTATION_RATE = 0.03
+
+
+def _GenerateRandomRasks(specs):
+  """Generate a task that has random values.
+
+  Args:
+    specs: A list of spec from which the flag set is created.
+
+  Returns:
+    A set containing a task that has random values.
+  """
+
+  flag_set = []
+
+  for spec in specs:
+    numeric_flag_match = flags.Search(spec)
+    if numeric_flag_match:
+      # Numeric flags.
+      start = int(numeric_flag_match.group('start'))
+      end = int(numeric_flag_match.group('end'))
+
+      value = random.randint(start - 1, end - 1)
+      if value != start - 1:
+        # If the value falls in the range, this flag is enabled.
+        flag_set.append(Flag(spec, value))
+    else:
+      # Boolean flags.
+      if random.randint(0, 1):
+        flag_set.append(Flag(spec))
+
+  return set([Task(FlagSet(flag_set))])
+
+
+def _GenerateAllFlagsTasks(specs):
+  """Generate a task that all the flags are enable.
+
+  All the boolean flags in the specs will be enabled and all the numeric flag
+  with have the largest legal value.
+
+  Args:
+    specs: A list of spec from which the flag set is created.
+
+  Returns:
+    A set containing a task that has all flags enabled.
+  """
+
+  flag_set = []
+
+  for spec in specs:
+    numeric_flag_match = flags.Search(spec)
+
+    if numeric_flag_match:
+      value = (int(numeric_flag_match.group('end')) - 1)
+    else:
+      value = -1
+    flag_set.append(Flag(spec, value))
+
+  return set([Task(FlagSet(flag_set))])
+
+
+def _GenerateNoFlagTask():
+  return set([Task(FlagSet([]))])
+
+
+def GenerateRandomGATasks(specs, num_tasks, num_trials):
+  """Generate a set of tasks for the Genetic Algorithm.
+
+  Args:
+    specs: A list of spec from which the flag set is created.
+    num_tasks: number of tasks that should be generated.
+    num_trials: the maximum number of tries should be attempted to generate the
+      set of tasks.
+
+  Returns:
+    A set of randomly generated tasks.
+  """
+
+  tasks = set([])
+
+  total_trials = 0
+  while len(tasks) < num_tasks and total_trials < num_trials:
+    new_flag = FlagSet([Flag(spec) for spec in specs if random.randint(0, 1)])
+    new_task = GATask(new_flag)
+
+    if new_task in tasks:
+      total_trials += 1
+    else:
+      tasks.add(new_task)
+      total_trials = 0
+
+  return tasks
+
+
+def _GenerateInitialFlags(specs, spec):
+  """Generate the flag_set of a task in the flag elimination algorithm.
+
+  Set the value of all the flags to the largest value, except for the flag that
+  contains spec.
+
+  For example, if the specs are [-finline-limit=[1-1000], -fstrict-aliasing] and
+  the spec is -finline-limit=[1-1000], then the result is
+  [-finline-limit=[1-1000]:-finline-limit=998,
+   -fstrict-aliasing:-fstrict-aliasing]
+
+  Args:
+    specs: an array of specifications from which the result flag_set is created.
+      The flag_set contains one and only one flag that contain the specification
+      spec.
+    spec: The flag containing this spec should have a value that is smaller than
+      the highest value the flag can have.
+
+  Returns:
+    An array of flags, each of which contains one spec in specs. All the values
+    of the flags are the largest values in specs, expect the one that contains
+    spec.
+  """
+
+  flag_set = []
+  for other_spec in specs:
+    numeric_flag_match = flags.Search(other_spec)
+    # Found the spec in the array specs.
+    if other_spec == spec:
+      # Numeric flag will have a value that is smaller than the largest value
+      # and Boolean flag will be deleted.
+      if numeric_flag_match:
+        end = int(numeric_flag_match.group('end'))
+        flag_set.append(flags.Flag(other_spec, end - 2))
+
+      continue
+
+    # other_spec != spec
+    if numeric_flag_match:
+      # numeric flag
+      end = int(numeric_flag_match.group('end'))
+      flag_set.append(flags.Flag(other_spec, end - 1))
+      continue
+
+    # boolean flag
+    flag_set.append(flags.Flag(other_spec))
+
+  return flag_set
+
+
+def _GenerateAllIterativeEliminationTasks(specs):
+  """Generate the initial tasks for the negative flag elimination algorithm.
+
+  Generate the base line task that turns on all the boolean flags and sets the
+  value to be the largest value for the numeric flag.
+
+  For example, if the specs are [-finline-limit=[1-1000], -fstrict-aliasing],
+  the base line is [-finline-limit=[1-1000]:-finline-limit=999,
+  -fstrict-aliasing:-fstrict-aliasing]
+
+  Generate a set of task, each turns off one of the flag or sets a value that is
+  smaller than the largest value for the flag.
+
+  Args:
+    specs: an array of specifications from which the result flag_set is created.
+
+  Returns:
+    An array containing one generation of the initial tasks for the negative
+    flag elimination algorithm.
+  """
+
+  # The set of tasks to be generated.
+  results = set([])
+  flag_set = []
+
+  for spec in specs:
+    numeric_flag_match = flags.Search(spec)
+    if numeric_flag_match:
+      # Numeric flag.
+      end_value = int(numeric_flag_match.group('end'))
+      flag_set.append(flags.Flag(spec, end_value - 1))
+      continue
+
+    # Boolean flag.
+    flag_set.append(flags.Flag(spec))
+
+  # The base line task that set all the flags to their largest values.
+  parent_task = Task(flags.FlagSet(flag_set))
+  results.add(parent_task)
+
+  for spec in specs:
+    results.add(Task(flags.FlagSet(_GenerateInitialFlags(specs, spec))))
+
+  return [IterativeEliminationFirstGeneration(results, parent_task)]
+
+
+def _ComputeCost(cost_func, specs, flag_set):
+  """Compute the mock cost of the flag_set using the input cost function.
+
+  All the boolean flags in the specs will be enabled and all the numeric flag
+  with have the largest legal value.
+
+  Args:
+    cost_func: The cost function which is used to compute the mock cost of a
+      dictionary of flags.
+    specs: All the specs that are used in the algorithm. This is used to check
+      whether certain flag is disabled in the flag_set dictionary.
+    flag_set: a dictionary of the spec and flag pairs.
+
+  Returns:
+    The mock cost of the input dictionary of the flags.
+  """
+
+  values = []
+
+  for spec in specs:
+    # If a flag is enabled, its value is added. Otherwise a padding 0 is added.
+    values.append(flag_set[spec].GetValue() if spec in flag_set else 0)
+
+  # The cost function string can use the values array.
+  return eval(cost_func)
+
+
+def _GenerateTestFlags(num_flags, upper_bound, file_name):
+  """Generate a set of mock flags and write it to a configuration file.
+
+  Generate a set of mock flags
+
+  Args:
+    num_flags: Number of numeric flags to be generated.
+    upper_bound: The value of the upper bound of the range.
+    file_name: The configuration file name into which the mock flags are put.
+  """
+
+  with open(file_name, 'w') as output_file:
+    num_flags = int(num_flags)
+    upper_bound = int(upper_bound)
+    for i in range(num_flags):
+      output_file.write('%s=[1-%d]\n' % (i, upper_bound))
+
+
+def _TestAlgorithm(cost_func, specs, generations, best_result):
+  """Test the best result the algorithm should return.
+
+  Set up the framework, run the input algorithm and verify the result.
+
+  Args:
+    cost_func: The cost function which is used to compute the mock cost of a
+      dictionary of flags.
+    specs: All the specs that are used in the algorithm. This is used to check
+      whether certain flag is disabled in the flag_set dictionary.
+    generations: The initial generations to be evaluated.
+    best_result: The expected best result of the algorithm. If best_result is
+      -1, the algorithm may or may not return the best value. Therefore, no
+      assertion will be inserted.
+  """
+
+  # Set up the utilities to test the framework.
+  manager = multiprocessing.Manager()
+  input_queue = manager.Queue()
+  output_queue = manager.Queue()
+  pp_steer = multiprocessing.Process(
+      target=Steering,
+      args=(set(), generations, output_queue, input_queue))
+  pp_steer.start()
+
+  # The best result of the algorithm so far.
+  result = sys.maxint
+
+  while True:
+    task = input_queue.get()
+
+    # POISONPILL signal the ends of the algorithm.
+    if task == pipeline_process.POISONPILL:
+      break
+
+    task.SetResult(BUILD_STAGE, (0, 0, 0, 0, 0))
+
+    # Compute the mock cost for the task.
+    task_result = _ComputeCost(cost_func, specs, task.GetFlags())
+    task.SetResult(TEST_STAGE, task_result)
+
+    # If the mock result of the current task is the best so far, set this
+    # result to be the best result.
+    if task_result < result:
+      result = task_result
+
+    output_queue.put(task)
+
+  pp_steer.join()
+
+  # Only do this test when best_result is not -1.
+  if best_result != -1:
+    assert best_result == result
+
+
+class MockAlgorithmsTest(unittest.TestCase):
+  """This class mock tests different steering algorithms.
+
+  The steering algorithms are responsible for generating the next set of tasks
+  to run in each iteration. This class does a functional testing on the
+  algorithms. It mocks out the computation of the fitness function from the
+  build and test phases by letting the user define the fitness function.
+  """
+
+  def _GenerateFlagSpecifications(self):
+    """Generate the testing specifications."""
+
+    mock_test_file = 'scale_mock_test'
+    _GenerateTestFlags(NUM_FLAGS, FLAG_RANGES, mock_test_file)
+    return flags.ReadConf(mock_test_file)
+
+  def testBestHillClimb(self):
+    """Test the best hill climb algorithm.
+
+    Test whether it finds the best results as expected.
+    """
+
+    # Initiate the build/test command and the log directory.
+    Task.InitLogCommand(None, None, 'output')
+
+    # Generate the testing specs.
+    specs = self._GenerateFlagSpecifications()
+
+    # Generate the initial generations for a test whose cost function is the
+    # summation of the values of all the flags.
+    generation_tasks = _GenerateAllFlagsTasks(specs)
+    generations = [HillClimbingBestBranch(generation_tasks, set([]), specs)]
+
+    # Test the algorithm. The cost function is the summation of all the values
+    # of all the flags. Therefore, the best value is supposed to be 0, i.e.,
+    # when all the flags are disabled.
+    _TestAlgorithm('sum(values[0:len(values)])', specs, generations, 0)
+
+    # This test uses a cost function that is the negative of the previous cost
+    # function. Therefore, the best result should be found in task with all the
+    # flags enabled.
+    cost_function = 'sys.maxint - sum(values[0:len(values)])'
+    all_flags = list(generation_tasks)[0].GetFlags()
+    cost = _ComputeCost(cost_function, specs, all_flags)
+
+    # Generate the initial generations.
+    generation_tasks = _GenerateNoFlagTask()
+    generations = [HillClimbingBestBranch(generation_tasks, set([]), specs)]
+
+    # Test the algorithm. The cost function is negative of the summation of all
+    # the values of all the flags. Therefore, the best value is supposed to be
+    # 0, i.e., when all the flags are disabled.
+    _TestAlgorithm(cost_function, specs, generations, cost)
+
+  def testGeneticAlgorithm(self):
+    """Test the Genetic Algorithm.
+
+    Do a functional testing here and see how well it scales.
+    """
+
+    # Initiate the build/test command and the log directory.
+    Task.InitLogCommand(None, None, 'output')
+
+    # Generate the testing specs.
+    specs = self._GenerateFlagSpecifications()
+    # Initiate the build/test command and the log directory.
+    GAGeneration.InitMetaData(STOP_THRESHOLD, NUM_CHROMOSOMES, NUM_TRIALS,
+                              specs, MUTATION_RATE)
+
+    # Generate the initial generations.
+    generation_tasks = GenerateRandomGATasks(specs, NUM_CHROMOSOMES, NUM_TRIALS)
+    generations = [GAGeneration(generation_tasks, set([]), 0)]
+
+    # Test the algorithm.
+    _TestAlgorithm('sum(values[0:len(values)])', specs, generations, -1)
+    cost_func = 'sys.maxint - sum(values[0:len(values)])'
+    _TestAlgorithm(cost_func, specs, generations, -1)
+
+  def testIterativeElimination(self):
+    """Test the iterative elimination algorithm.
+
+    Test whether it finds the best results as expected.
+    """
+
+    # Initiate the build/test command and the log directory.
+    Task.InitLogCommand(None, None, 'output')
+
+    # Generate the testing specs.
+    specs = self._GenerateFlagSpecifications()
+
+    # Generate the initial generations. The generation contains the base line
+    # task that turns on all the flags and tasks that each turn off one of the
+    # flags.
+    generations = _GenerateAllIterativeEliminationTasks(specs)
+
+    # Test the algorithm. The cost function is the summation of all the values
+    # of all the flags. Therefore, the best value is supposed to be 0, i.e.,
+    # when all the flags are disabled.
+    _TestAlgorithm('sum(values[0:len(values)])', specs, generations, 0)
+
+    # This test uses a cost function that is the negative of the previous cost
+    # function. Therefore, the best result should be found in task with all the
+    # flags enabled.
+    all_flags_tasks = _GenerateAllFlagsTasks(specs)
+    cost_function = 'sys.maxint - sum(values[0:len(values)])'
+    # Compute the cost of the task that turns on all the flags.
+    all_flags = list(all_flags_tasks)[0].GetFlags()
+    cost = _ComputeCost(cost_function, specs, all_flags)
+
+    # Test the algorithm. The cost function is negative of the summation of all
+    # the values of all the flags. Therefore, the best value is supposed to be
+    # 0, i.e., when all the flags are disabled.
+    # The concrete type of the generation decides how the next generation will
+    # be generated.
+    _TestAlgorithm(cost_function, specs, generations, cost)
+
+
+if __name__ == '__main__':
+  unittest.main()