diff options
Diffstat (limited to 'fdo_scripts/summarize_hot_blocks.py')
-rw-r--r-- | fdo_scripts/summarize_hot_blocks.py | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/fdo_scripts/summarize_hot_blocks.py b/fdo_scripts/summarize_hot_blocks.py new file mode 100644 index 00000000..c19f5477 --- /dev/null +++ b/fdo_scripts/summarize_hot_blocks.py @@ -0,0 +1,184 @@ +#!/usr/bin/python2.6 +# +# Copyright 2011 Google Inc. All Rights Reserved. + +"""Summarize hottest basic blocks found while doing a ChromeOS FDO build. + +Here is an example execution: + + summarize_hot_blocks.py + --data_dir=~/chromeos/chroot/var/cache/chromeos-chrome/ --cutoff=10000 + --output_dir=/home/x/y + +With the cutoff, it will ignore any basic blocks that have a count less +than what is specified (in this example 10000) +The script looks inside the directory (this is typically a directory where +the object files are generated) for files with *.profile and *.optimized +suffixes. To get these, the following flags were added to the compiler +invokation within vanilla_vs_fdo.py in the profile-use phase. + + "-fdump-tree-optimized-blocks-lineno " + "-fdump-ipa-profile-blocks-lineno " + +Here is an example of the *.profile and *.optimized files contents: + +# BLOCK 7 freq:3901 count:60342, starting at line 92 +# PRED: 6 [39.0%] count:60342 (true,exec) + [url_canon_internal.cc : 92:28] MEM[(const char * *)source_6(D) + 16B] = D.28080_17; + [url_canon_internal.cc : 93:41] MEM[(struct Component *)parsed_4(D) + 16B] = MEM[(const struct Component &)repl_1(D) + 80]; +# SUCC: 8 [100.0%] count:60342 (fallthru,exec) +# BLOCK 8 freq:10000 count:154667, starting at line 321 +# PRED: 7 [100.0%] count:60342 (fallthru,exec) 6 [61.0%] count:94325 (false,exec) + [url_canon_internal.cc : 321:51] # DEBUG D#10 => [googleurl/src/url_canon_internal.cc : 321] &parsed_4(D)->host + +this script finds the blocks with highest count and shows the first line +of each block so that it is easy to identify the origin of the basic block. + +""" + +__author__ = "llozano@google.com (Luis Lozano)" + +import optparse +import os +import re +import shutil +import sys +import tempfile + +from utils import command_executer + + +# Given a line, check if it has a block count and return it. +# Return -1 if there is no match +def GetBlockCount(line): + match_obj = re.match(".*# BLOCK \d+ .*count:(\d+)", line) + if match_obj: + return int(match_obj.group(1)) + else: + return -1 + + +class Collector(object): + def __init__(self, data_dir, cutoff, output_dir, tempdir): + self._data_dir = data_dir + self._cutoff = cutoff + self._output_dir = output_dir + self._tempdir = tempdir + self._ce = command_executer.GetCommandExecuter() + + def CollectFileList(self, file_exp, list_file): + command = ("find %s -type f -name '%s' > %s" % + (self._data_dir, file_exp, + os.path.join(self._tempdir, list_file))) + ret = self._ce.RunCommand(command) + if ret: + raise Exception("Failed: %s" % command) + + def SummarizeLines(self, data_file): + sum_lines = [] + search_lno = False + for line in data_file: + count = GetBlockCount(line) + if count != -1: + if count >= self._cutoff: + search_lno = True + sum_line = line.strip() + sum_count = count + # look for a line that starts with line number information + elif search_lno and re.match("^\s*\[.*: \d*:\d*]", line): + search_lno = False + sum_lines.append("%d:%s: %s %s" % + (sum_count, data_file.name, sum_line, line)) + return sum_lines + + # Look for blocks in the data file that have a count larger than the cutoff + # and generate a sorted summary file of the hottest blocks. + def SummarizeFile(self, data_file, sum_file): + with open(data_file, "r") as f: + sum_lines = self.SummarizeLines(f) + + # sort reverse the list in place by the block count number + sum_lines.sort(key=GetBlockCount, reverse=True) + + with open(sum_file, "w") as sf: + sf.write("".join(sum_lines)) + + print "Generated file Summary: ", sum_file + + # Find hottest blocks in the list of files, generate a sorted summary for + # each file and then do a sorted merge of all the summaries. + def SummarizeList(self, list_file, summary_file): + with open(os.path.join(self._tempdir, list_file)) as f: + sort_list = [] + for file_name in f: + file_name = file_name.strip() + sum_file = "%s.sum" % file_name + sort_list.append("%s%s" % (sum_file, chr(0))) + self.SummarizeFile(file_name, sum_file) + + tmp_list_file = os.path.join(self._tempdir, "file_list.dat") + with open(tmp_list_file, "w") as file_list_file: + for x in sort_list: + file_list_file.write(x) + + merge_command = ("sort -nr -t: -k1 --merge --files0-from=%s > %s " % + (tmp_list_file, summary_file)) + + ret = self._ce.RunCommand(merge_command) + if ret: + raise Exception("Failed: %s" % merge_command) + print "Generated general summary: ", summary_file + + def SummarizePreOptimized(self, summary_file): + self.CollectFileList("*.profile", "chrome.profile.list") + self.SummarizeList("chrome.profile.list", + os.path.join(self._output_dir, summary_file)) + + def SummarizeOptimized(self, summary_file): + self.CollectFileList("*.optimized", "chrome.optimized.list") + self.SummarizeList("chrome.optimized.list", + os.path.join(self._output_dir, summary_file)) + + +def Main(argv): + command_executer.InitCommandExecuter() + usage = ("usage: %prog --data_dir=<dir> --cutoff=<value> " + "--output_dir=<dir> [--keep_tmp]") + parser = optparse.OptionParser(usage=usage) + parser.add_option("--data_dir", + dest="data_dir", + help=("directory where the FDO (*.profile and " + "*.optimized) files are located")) + parser.add_option("--cutoff", + dest="cutoff", + help="Minimum count to consider for each basic block") + parser.add_option("--output_dir", + dest="output_dir", + help=("directory where summary data will be generated" + "(pre_optimized.txt, optimized.txt)")) + parser.add_option("--keep_tmp", + action="store_true", + dest="keep_tmp", + default=False, + help=("Keep directory with temporary files" + "(for debugging purposes)")) + options = parser.parse_args(argv)[0] + if not all((options.data_dir, options.cutoff, options.output_dir)): + parser.print_help() + sys.exit(1) + + tempdir = tempfile.mkdtemp() + + co = Collector(options.data_dir, int(options.cutoff), options.output_dir, + tempdir) + co.SummarizePreOptimized("pre_optimized.txt") + co.SummarizeOptimized("optimized.txt") + + if not options.keep_tmp: + shutil.rmtree(tempdir, ignore_errors=True) + + return 0 + +if __name__ == "__main__": + retval = Main(sys.argv) + sys.exit(retval) |