diff options
Diffstat (limited to 'deprecated/mem_tests/clean_data.py')
-rwxr-xr-x | deprecated/mem_tests/clean_data.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/deprecated/mem_tests/clean_data.py b/deprecated/mem_tests/clean_data.py new file mode 100755 index 00000000..1433ba41 --- /dev/null +++ b/deprecated/mem_tests/clean_data.py @@ -0,0 +1,29 @@ +#!/usr/bin/python2 +"""Cleans output from other scripts to eliminate duplicates. + +When frequently sampling data, we see that records occasionally will contain +the same timestamp (due to perf recording twice in the same second). + +This removes all of the duplicate timestamps for every record. Order with +respect to timestamps is not preserved. Also, the assumption is that the log +file is a csv with the first value in each row being the time in seconds from a +standard time. + +""" + +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('filename') +args = parser.parse_args() + +my_file = open(args.filename) +output_file = open('clean2.csv', 'a') +dictionary = dict() + +for line in my_file: + new_time = int(line.split(',')[0]) + dictionary[new_time] = line + +for key in dictionary.keys(): + output_file.write(dictionary[key]) |