#!/usr/bin/env python # # Copyright (C) 2016 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Analyze block trace""" import collections import os import re import string import sys RE_BLOCK = r'.+\s+(block[a-z_]+):\s+' RE_BLOCK_BIO_QUEUE = r'.+\s+([0-9]+\.[0-9]+):\s+block_bio_queue:\s+([0-9]+)\,([0-9]+)\s+([RW]\S*)\s+([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)' # dev_num = major * MULTIPLIER + minor DEV_MAJOR_MULTIPLIER = 1000 # dm access is remapped to disk access. So account differently DM_MAJOR = 253 class RwEvent: def __init__(self, block_num, start_time, size): self.block_num = block_num self.start_time = start_time self.size = size self.latency = 0 def set_latency(self, latency): self.latency = latency def get_string_pos(strings, string_to_find): for i, s in enumerate(strings): if s == string_to_find: return i return -1 class Trace: def __init__(self, process): self.process = process self.reads = [] #(start time, RwEvent) self.writes = [] #(start time, RwEvent) self.recent_reads = {} # K: self.total_latency = 0 self.total_reads = 0 self.total_writes = 0 self.total_dm_reads = {} #K: devnum, V: blocks self.total_dm_writes = {} self.re_block_queue = re.compile(RE_BLOCK_BIO_QUEUE) self.processes = set() if process[-1] == '*': print "Process name starts with", process[:-1] self.process_name_is_prefix = True else: print "Process name", process self.process_name_is_prefix = False def parse_bio_queue(self, l): match = self.re_block_queue.match(l) if not match: return start_time = int(float(match.group(1))*1000000) #us major = int(match.group(2)) minor = int(match.group(3)) operation = match.group(4) block_num = int(match.group(5)) size = int(match.group(6)) process = match.group(7) if self.process_name_is_prefix: if not process.startswith(self.process[:-1]): return self.processes.add(process) else: if process != self.process: return if major == DM_MAJOR: devNum = major * DEV_MAJOR_MULTIPLIER + minor; if operation[0] == 'R': if devNum not in self.total_dm_reads: self.total_dm_reads[devNum] = 0 self.total_dm_reads[devNum] += size elif operation[0] == 'W': if devNum not in self.total_dm_writes: self.total_dm_writes[devNum] = 0 self.total_dm_writes[devNum] += size return event = RwEvent(block_num, start_time, size) if operation[0] == 'R': self.reads.append((start_time, event)) self.recent_reads[block_num] = event self.total_reads += size elif operation[0] == 'W': self.writes.append((start_time, event)) self.total_writes += size def parse_rq_complete(self, l): words = string.split(l) cmd_pos = get_string_pos(words, "block_rq_complete:") if cmd_pos == -1: cmd_pos = get_string_pos(words, "block_bio_complete:") block_num = int(words[-4]) event = self.recent_reads.get(block_num) if not event: return operation = words[cmd_pos + 2] if not operation.startswith("R"): return end_time = int(float(words[cmd_pos - 1][:-1])*1000000) #us latency = end_time - event.start_time if latency > 20000: print "very bad latency:", latency, l print "start time,", event.start_time event.set_latency(latency) del self.recent_reads[block_num] self.total_latency += latency def parse_block_trace(self, l, match): try: cmd = match.group(1) if cmd == "block_bio_queue": self.parse_bio_queue(l) elif cmd == "block_rq_complete" or cmd == "block_bio_complete": self.parse_rq_complete(l) except ValueError: print "cannot parse:", l raise def dump(self): if self.process_name_is_prefix: print "Processes:", self.processes print "total read blocks,", self.total_reads print "total write blocks,", self.total_writes if len(self.reads) > 0: total_read_time = self.reads[-1][0] + self.reads[-1][1].latency - self.reads[0][0] else: total_read_time = 0 print "Total DM R" for dev,size in self.total_dm_reads.items(): print dev, size print "Total DM W" for dev,size in self.total_dm_writes.items(): print dev, size print "total read time,",total_read_time read_size_histogram = {} latency_per_read_size = {} for (time, event) in self.reads: if not read_size_histogram.get(event.size): read_size_histogram[event.size] = 0 if not latency_per_read_size.get(event.size): latency_per_read_size[event.size] = [ 0, 0] # num events, total latency read_size_histogram[event.size] = read_size_histogram[event.size] + 1 latency_sum = latency_per_read_size[event.size] latency_sum[0] += 1 latency_sum[1] += event.latency read_size_histogram = collections.OrderedDict(sorted(read_size_histogram.items())) print "read histogram" for k,v in read_size_histogram.iteritems(): print k, ',', v print "latency per read size" latency_per_read_size = collections.OrderedDict(sorted(latency_per_read_size.items())) for k,v in latency_per_read_size.iteritems(): if v[0] != 0: print k, ',', v[1] / v[0], v[0], v[1] def main(argv): if (len(argv) < 3): print "check_io_trace.py processname filename" return keyword = argv[1] filename = argv[2] trace = Trace(keyword) prog = re.compile(RE_BLOCK) with open(filename) as f: for l in f: result = prog.match(l) if result: trace.parse_block_trace(l, result) trace.dump() if __name__ == '__main__': main(sys.argv)