aboutsummaryrefslogtreecommitdiff
path: root/deprecated/mem_tests/clean_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'deprecated/mem_tests/clean_data.py')
-rwxr-xr-xdeprecated/mem_tests/clean_data.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/deprecated/mem_tests/clean_data.py b/deprecated/mem_tests/clean_data.py
new file mode 100755
index 00000000..1433ba41
--- /dev/null
+++ b/deprecated/mem_tests/clean_data.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python2
+"""Cleans output from other scripts to eliminate duplicates.
+
+When frequently sampling data, we see that records occasionally will contain
+the same timestamp (due to perf recording twice in the same second).
+
+This removes all of the duplicate timestamps for every record. Order with
+respect to timestamps is not preserved. Also, the assumption is that the log
+file is a csv with the first value in each row being the time in seconds from a
+standard time.
+
+"""
+
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('filename')
+args = parser.parse_args()
+
+my_file = open(args.filename)
+output_file = open('clean2.csv', 'a')
+dictionary = dict()
+
+for line in my_file:
+ new_time = int(line.split(',')[0])
+ dictionary[new_time] = line
+
+for key in dictionary.keys():
+ output_file.write(dictionary[key])