diff options
author | Yuheng Long <yuhenglong@google.com> | 2013-08-07 08:48:05 -0700 |
---|---|---|
committer | ChromeBot <chrome-bot@google.com> | 2013-08-07 22:14:42 -0700 |
commit | 057ad5c77ef0ea44ee260718eb6d30afef2f7f83 (patch) | |
tree | 75dc92c063a32304259fc834382c9cf7fa314602 /bestflags | |
parent | 570b2ce931d40a335f3b02ed859b376040b5fb67 (diff) | |
download | toolchain-utils-057ad5c77ef0ea44ee260718eb6d30afef2f7f83.tar.gz |
Add the Genetic Algorithm.
BUG=None
TEST=unit testings for the pipeline stage, pipeline workers, generation,
steering, task, flag, hill climbing and genetic algorithm.
Change-Id: I2864d6a6859fff43bc2d3afb059c672c54bbe385
Reviewed-on: https://gerrit-int.chromium.org/42472
Reviewed-by: Simon Que <sque@google.com>
Commit-Queue: Yuheng Long <yuhenglong@google.com>
Tested-by: Yuheng Long <yuhenglong@google.com>
Diffstat (limited to 'bestflags')
-rw-r--r-- | bestflags/generation.py | 5 | ||||
-rw-r--r-- | bestflags/genetic_algorithm.py | 295 | ||||
-rw-r--r-- | bestflags/testing_batch.py | 80 |
3 files changed, 375 insertions, 5 deletions
diff --git a/bestflags/generation.py b/bestflags/generation.py index 0bc2f57c..8e1358ad 100644 --- a/bestflags/generation.py +++ b/bestflags/generation.py @@ -50,6 +50,11 @@ class Generation(object): # generations until all its pending tasks have been executed. self._pending = len(exe_pool) + def CandidatePool(self): + """Return the candidate tasks of this generation.""" + + return self._candidate_pool + def Pool(self): """Return the task set of this generation.""" diff --git a/bestflags/genetic_algorithm.py b/bestflags/genetic_algorithm.py new file mode 100644 index 00000000..56b1dcc1 --- /dev/null +++ b/bestflags/genetic_algorithm.py @@ -0,0 +1,295 @@ +# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""The hill genetic algorithm. + +Part of the Chrome build flags optimization. +""" + +__author__ = 'yuhenglong@google.com (Yuheng Long)' + +import random + +import flags +from flags import Flag +from flags import FlagSet +from generation import Generation +from task import Task + + +def CrossoverWith(first_flag, second_flag): + """Get a crossed over gene. + + At present, this just picks either/or of these values. However, it could be + implemented as an integer maskover effort, if required. + + Args: + first_flag: The first gene (Flag) to cross over with. + second_flag: The second gene (Flag) to cross over with. + + Returns: + A Flag that can be considered appropriately randomly blended between the + first and second input flag. + """ + + return first_flag if random.randint(0, 1) else second_flag + + +def RandomMutate(specs, flag_set, mutation_rate): + """Randomly mutate the content of a task. + + Args: + specs: A list of spec from which the flag set is created. + flag_set: The current flag set being mutated + mutation_rate: What fraction of genes to mutate. + + Returns: + A Genetic Task constructed by randomly mutating the input flag set. + """ + + results_flags = [] + + for spec in specs: + # Randomly choose whether this flag should be mutated. + if random.randint(0, int(1 / mutation_rate)): + continue + + # If the flag is not already in the flag set, it is added. + if spec not in flag_set: + results_flags.append(Flag(spec)) + continue + + # If the flag is already in the flag set, it is mutated. + result = flags.Search(spec) + + # The value of a numeric flag will be changed, and a boolean flag will be + # dropped. + if not result: + continue + + value = flag_set[spec].GetValue() + + # Randomly select a nearby value of the current value of the flag. + rand_arr = [value] + if value + 1 < int(result.group('end')): + rand_arr.append(value + 1) + + rand_arr.append(value - 1) + value = random.sample(rand_arr, 1)[0] + + # If the value is smaller than the start of the spec, this flag will be + # dropped. + if value != int(result.group('start')) - 1: + results_flags.append(Flag(spec, value)) + + return GATask(FlagSet(results_flags)) + + +class GATask(Task): + def __init__(self, flag_set): + Task.__init__(self, flag_set) + + def ReproduceWith(self, other, specs, mutation_rate): + """Reproduce with other FlagSet. + + Args: + other: A FlagSet to reproduce with. + specs: A list of spec from which the flag set is created. + mutation_rate: one in mutation_rate flags will be mutated (replaced by a + random version of the same flag, instead of one from either of the + parents). Set to 0 to disable mutation. + + Returns: + A GA task made by mixing self with other. + """ + + # Get the flag dictionary. + father_flags = self.GetFlags().GetFlags() + mother_flags = other.GetFlags().GetFlags() + + # Flags that are common in both parents and flags that belong to only one + # parent. + self_flags = [] + other_flags = [] + common_flags = [] + + # Find out flags that are common to both parent and flags that belong soly + # to one parent. + for self_flag in father_flags: + if self_flag in mother_flags: + common_flags.append(self_flag) + else: + self_flags.append(self_flag) + + for other_flag in mother_flags: + if other_flag not in father_flags: + other_flags.append(other_flag) + + # Randomly select flags that belong to only one parent. + output_flags = [father_flags[f] for f in self_flags if random.randint(0, 1)] + others = [mother_flags[f] for f in other_flags if random.randint(0, 1)] + output_flags.extend(others) + # Turn on flags that belong to both parent. Randomly choose the value of the + # flag from either parent. + for flag in common_flags: + output_flags.append(CrossoverWith(father_flags[flag], mother_flags[flag])) + + # Mutate flags + if mutation_rate: + return RandomMutate(specs, FlagSet(output_flags), mutation_rate) + + return GATask(FlagSet(output_flags)) + + +class GAGeneration(Generation): + """The Genetic Algorithm.""" + + # The value checks whether the algorithm has converged and arrives at a fixed + # point. If STOP_THRESHOLD of generations have not seen any performance + # improvement, the Genetic Algorithm stops. + STOP_THRESHOLD = None + + # Number of tasks in each generation. + NUM_CHROMOSOMES = None + + # The value checks whether the algorithm has converged and arrives at a fixed + # point. If NUM_TRIALS of trials have been attempted to generate a new task + # without a success, the Genetic Algorithm stops. + NUM_TRIALS = None + + # The flags that can be used to generate new tasks. + SPECS = None + + # What fraction of genes to mutate. + MUTATION_RATE = 0 + + @staticmethod + def InitMetaData(stop_threshold, num_chromosomes, num_trials, specs, + mutation_rate): + """Set up the meta data for the Genetic Algorithm. + + Args: + stop_threshold: The number of generations, upon which no performance has + seen, the Genetic Algorithm stops. + num_chromosomes: Number of tasks in each generation. + num_trials: The number of trials, upon which new task has been tried to + generated without success, the Genetic Algorithm stops. + specs: The flags that can be used to generate new tasks. + mutation_rate: What fraction of genes to mutate. + """ + + GAGeneration.STOP_THRESHOLD = stop_threshold + GAGeneration.NUM_CHROMOSOMES = num_chromosomes + GAGeneration.NUM_TRIALS = num_trials + GAGeneration.SPECS = specs + GAGeneration.MUTATION_RATE = mutation_rate + + def __init__(self, tasks, parents, total_stucks): + """Set up the meta data for the Genetic Algorithm. + + Args: + tasks: A set of tasks to be run. + parents: A set of tasks from which this new generation is produced. This + set also contains the best tasks generated so far. + total_stucks: The number of generations that have not seen improvement. + The Genetic Algorithm will stop once the total_stucks equals to + NUM_TRIALS defined in the GAGeneration class. + """ + + Generation.__init__(self, tasks, parents) + self._total_stucks = total_stucks + + def Improve(self): + """True if this generation has improvement upon its parent generation.""" + + tasks = self.Pool() + parents = self.CandidatePool() + + # The first generate does not have parents. + if not parents: + return True + + # Found out whether a task has improvement upon the best task in the + # parent generation. + best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0] + best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0] + + # At least one task has improvement. + if best_current.Improve(best_parent): + self._total_stucks = 0 + return True + + # If STOP_THRESHOLD of generations have no improvement, the algorithm stops. + if self._total_stucks >= GAGeneration.STOP_THRESHOLD: + return False + + self._total_stucks += 1 + return True + + def Next(self, cache): + """Calculate the next generation. + + Generate a new generation from the a set of tasks. This set contains the + best set seen so far and the tasks executed in the parent generation. + + Args: + cache: A set of tasks that have been generated before. + + Returns: + A set of new generations. + """ + + target_len = GAGeneration.NUM_CHROMOSOMES + specs = GAGeneration.SPECS + mutation_rate = GAGeneration.MUTATION_RATE + + # Collect a set of size target_len of tasks. This set will be used to + # produce a new generation of tasks. + gen_tasks = [task for task in self.Pool()] + + parents = self.CandidatePool() + if parents: + gen_tasks.extend(parents) + + # A set of tasks that are the best. This set will be used as the parent + # generation to produce the next generation. + sort_func = lambda task: task.GetTestResult() + retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len] + + child_pool = set() + for father in retained_tasks: + num_trials = 0 + # Try num_trials times to produce a new child. + while num_trials < GAGeneration.NUM_TRIALS: + # Randomly select another parent. + mother = random.choice(retained_tasks) + # Cross over. + child = mother.ReproduceWith(father, specs, mutation_rate) + if child not in child_pool and child not in cache: + child_pool.add(child) + break + else: + num_trials += 1 + + num_trials = 0 + + while len(child_pool) < target_len and num_trials < GAGeneration.NUM_TRIALS: + for keep_task in retained_tasks: + # Mutation. + child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate) + if child not in child_pool and child not in cache: + child_pool.add(child) + if len(child_pool) >= target_len: + break + else: + num_trials += 1 + + # If NUM_TRIALS of tries have been attempted without generating a set of new + # tasks, the algorithm stops. + if num_trials >= GAGeneration.NUM_TRIALS: + return [] + + assert len(child_pool) == target_len + + return [GAGeneration(child_pool, set(retained_tasks), self._total_stucks)] diff --git a/bestflags/testing_batch.py b/bestflags/testing_batch.py index 8ec2cd9d..ad1ab4b7 100644 --- a/bestflags/testing_batch.py +++ b/bestflags/testing_batch.py @@ -19,6 +19,8 @@ import unittest import flags from flags import Flag from flags import FlagSet +from genetic_algorithm import GAGeneration +from genetic_algorithm import GATask from hill_climb_best_neighbor import HillClimbingBestBranch import pipeline_process from steering import Steering @@ -33,6 +35,12 @@ NUM_FLAGS = 5 # The value range of the flags. FLAG_RANGES = 10 +# The following variables are meta data for the Genetic Algorithm. +STOP_THRESHOLD = 20 +NUM_CHROMOSOMES = 10 +NUM_TRIALS = 20 +MUTATION_RATE = 0.03 + def _GenerateRandomRasks(specs): """Generate a task that has random values. @@ -92,6 +100,35 @@ def _GenerateNoFlagTask(): return set([Task(FlagSet([]))]) +def _GenerateRandomGATasks(specs, num_tasks, num_trials): + """Generate a set of tasks for the Genetic Algorithm. + + Args: + specs: A list of spec from which the flag set is created. + num_tasks: number of tasks that should be generated. + num_trials: the maximum number of tries should be attempted to generate the + set of tasks. + + Returns: + A set of randomly generated tasks. + """ + + tasks = set([]) + + total_trials = 0 + while len(tasks) < num_tasks and total_trials < num_trials: + new_flag = FlagSet([Flag(spec) for spec in specs if random.randint(0, 1)]) + new_task = GATask(new_flag) + + if new_task in tasks: + total_trials += 1 + else: + tasks.add(new_task) + total_trials = 0 + + return tasks + + def _ComputeCost(cost_func, specs, flag_set): """Compute the mock cost of the flag_set using the input cost function. @@ -148,7 +185,9 @@ def _TestAlgorithm(cost_func, specs, generations, best_result): specs: All the specs that are used in the algorithm. This is used to check whether certain flag is disabled in the flag_set dictionary. generations: The initial generations to be evaluated. - best_result: The expected best result of the algorithm. + best_result: The expected best result of the algorithm. If best_result is + -1, the algorithm may or may not return the best value. Therefore, no + assertion will be inserted. """ # Set up the utilities to test the framework. @@ -184,16 +223,19 @@ def _TestAlgorithm(cost_func, specs, generations, best_result): output_queue.put(task) pp_steer.join() - assert best_result == result + + # Only do this test when best_result is not -1. + if best_result != -1: + assert best_result == result class FlagAlgorithms(unittest.TestCase): """This class test the FlagSet class.""" def testBestHillClimb(self): - """Test the equal method of the Class FlagSet. + """Test the best hill climb algorithm. - Two FlagSet instances are equal if all their flags are equal. + Test whether it finds the best results as expected. """ # Initiate the build/test command and the log directory. @@ -217,7 +259,7 @@ class FlagAlgorithms(unittest.TestCase): # This test uses a cost function that is the negative of the previous cost # function. Therefore, the best result should be found in task with all the # flags enabled. - cost_function = '-sum(values[0:len(values)])' + cost_function = 'sys.maxint - sum(values[0:len(values)])' all_flags = list(generation_tasks)[0].GetFlags() cost = _ComputeCost(cost_function, specs, all_flags) @@ -230,6 +272,34 @@ class FlagAlgorithms(unittest.TestCase): # 0, i.e., when all the flags are disabled. _TestAlgorithm(cost_function, specs, generations, cost) + def testGeneticAlgorithm(self): + """Test the Genetic Algorithm. + + Do a function testing here and see how well it scales. + """ + + # Initiate the build/test command and the log directory. + Task.InitLogCommand(None, None, 'output') + + # Generate the testing specs. + mock_test_file = 'scale_mock_test' + _GenerateTestFlags(NUM_FLAGS, FLAG_RANGES, mock_test_file) + specs = flags.ReadConf(mock_test_file) + + # Initiate the build/test command and the log directory. + GAGeneration.InitMetaData(STOP_THRESHOLD, NUM_CHROMOSOMES, NUM_TRIALS, + specs, MUTATION_RATE) + + # Generate the initial generations. + generation_tasks = _GenerateRandomGATasks(specs, NUM_CHROMOSOMES, + NUM_TRIALS) + generations = [GAGeneration(generation_tasks, set([]), 0)] + + # Test the algorithm. + _TestAlgorithm('sum(values[0:len(values)])', specs, generations, -1) + cost_func = 'sys.maxint - sum(values[0:len(values)])' + _TestAlgorithm(cost_func, specs, generations, -1) + if __name__ == '__main__': unittest.main() |