diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/fio.service | 10 | ||||
-rwxr-xr-x | tools/fiologparser.py | 221 | ||||
-rwxr-xr-x | tools/genfio | 8 | ||||
-rw-r--r-- | tools/hist/.gitignore | 3 | ||||
-rwxr-xr-x | tools/hist/fiologparser_hist.py | 388 | ||||
-rw-r--r-- | tools/hist/fiologparser_hist.py.1 | 201 | ||||
-rwxr-xr-x | tools/hist/half-bins.py | 38 | ||||
-rwxr-xr-x | tools/plot/fio2gnuplot | 18 | ||||
-rw-r--r-- | tools/plot/graph2D.gpm | 37 | ||||
-rw-r--r-- | tools/plot/graph3D.gpm | 33 | ||||
-rw-r--r-- | tools/plot/math.gpm | 25 |
11 files changed, 956 insertions, 26 deletions
diff --git a/tools/fio.service b/tools/fio.service new file mode 100644 index 00000000..21de0b7a --- /dev/null +++ b/tools/fio.service @@ -0,0 +1,10 @@ +[Unit] + +Description=flexible I/O tester server +After=network.target + +[Service] + +Type=simple +PIDFile=/run/fio.pid +ExecStart=/usr/bin/fio --server diff --git a/tools/fiologparser.py b/tools/fiologparser.py new file mode 100755 index 00000000..5a95009e --- /dev/null +++ b/tools/fiologparser.py @@ -0,0 +1,221 @@ +#!/usr/bin/python +# +# fiologparser.py +# +# This tool lets you parse multiple fio log files and look at interaval +# statistics even when samples are non-uniform. For instance: +# +# fiologparser.py -s *bw* +# +# to see per-interval sums for all bandwidth logs or: +# +# fiologparser.py -a *clat* +# +# to see per-interval average completion latency. + +import argparse +import math + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') + parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') + parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') + parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, + help='print all stats for each interval.') + parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') + parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') + parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") + args = parser.parse_args() + + return args + +def get_ftime(series): + ftime = 0 + for ts in series: + if ftime == 0 or ts.last.end < ftime: + ftime = ts.last.end + return ftime + +def print_full(ctx, series): + ftime = get_ftime(series) + start = 0 + end = ctx.interval + + while (start < ftime): + end = ftime if ftime < end else end + results = [ts.get_value(start, end) for ts in series] + print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))) + start += ctx.interval + end += ctx.interval + +def print_sums(ctx, series): + ftime = get_ftime(series) + start = 0 + end = ctx.interval + + while (start < ftime): + end = ftime if ftime < end else end + results = [ts.get_value(start, end) for ts in series] + print("%s, %0.3f" % (end, sum(results))) + start += ctx.interval + end += ctx.interval + +def print_averages(ctx, series): + ftime = get_ftime(series) + start = 0 + end = ctx.interval + + while (start < ftime): + end = ftime if ftime < end else end + results = [ts.get_value(start, end) for ts in series] + print("%s, %0.3f" % (end, float(sum(results))/len(results))) + start += ctx.interval + end += ctx.interval + +# FIXME: this routine is computationally inefficient +# and has O(N^2) behavior +# it would be better to make one pass through samples +# to segment them into a series of time intervals, and +# then compute stats on each time interval instead. +# to debug this routine, use +# # sort -n -t ',' -k 2 small.log +# on your input. + +def my_extend( vlist, val ): + vlist.extend(val) + return vlist + +array_collapser = lambda vlist, val: my_extend(vlist, val) + +def print_all_stats(ctx, series): + ftime = get_ftime(series) + start = 0 + end = ctx.interval + print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') + while (start < ftime): # for each time interval + end = ftime if ftime < end else end + sample_arrays = [ s.get_samples(start, end) for s in series ] + samplevalue_arrays = [] + for sample_array in sample_arrays: + samplevalue_arrays.append( + [ sample.value for sample in sample_array ] ) + # collapse list of lists of sample values into list of sample values + samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) + # compute all stats and print them + mymin = min(samplevalues) + myavg = sum(samplevalues) / float(len(samplevalues)) + mymedian = median(samplevalues) + my90th = percentile(samplevalues, 0.90) + my95th = percentile(samplevalues, 0.95) + my99th = percentile(samplevalues, 0.99) + mymax = max(samplevalues) + print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( + start, len(samplevalues), + mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) + + # advance to next interval + start += ctx.interval + end += ctx.interval + +def median(values): + s=sorted(values) + return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2 + +def percentile(values, p): + s = sorted(values) + k = (len(s)-1) * p + f = math.floor(k) + c = math.ceil(k) + if f == c: + return s[int(k)] + return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f)) + +def print_default(ctx, series): + ftime = get_ftime(series) + start = 0 + end = ctx.interval + averages = [] + weights = [] + + while (start < ftime): + end = ftime if ftime < end else end + results = [ts.get_value(start, end) for ts in series] + averages.append(sum(results)) + weights.append(end-start) + start += ctx.interval + end += ctx.interval + + total = 0 + for i in range(0, len(averages)): + total += averages[i]*weights[i] + print('%0.3f' % (total/sum(weights))) + +class TimeSeries(object): + def __init__(self, ctx, fn): + self.ctx = ctx + self.last = None + self.samples = [] + self.read_data(fn) + + def read_data(self, fn): + f = open(fn, 'r') + p_time = 0 + for line in f: + (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') + self.add_sample(p_time, int(time), int(value)) + p_time = int(time) + + def add_sample(self, start, end, value): + sample = Sample(ctx, start, end, value) + if not self.last or self.last.end < end: + self.last = sample + self.samples.append(sample) + + def get_samples(self, start, end): + sample_list = [] + for s in self.samples: + if s.start >= start and s.end <= end: + sample_list.append(s) + return sample_list + + def get_value(self, start, end): + value = 0 + for sample in self.samples: + value += sample.get_contribution(start, end) + return value + +class Sample(object): + def __init__(self, ctx, start, end, value): + self.ctx = ctx + self.start = start + self.end = end + self.value = value + + def get_contribution(self, start, end): + # short circuit if not within the bound + if (end < self.start or start > self.end): + return 0 + + sbound = self.start if start < self.start else start + ebound = self.end if end > self.end else end + ratio = float(ebound-sbound) / (end-start) + return self.value*ratio/ctx.divisor + + +if __name__ == '__main__': + ctx = parse_args() + series = [] + for fn in ctx.FILE: + series.append(TimeSeries(ctx, fn)) + if ctx.sum: + print_sums(ctx, series) + elif ctx.average: + print_averages(ctx, series) + elif ctx.full: + print_full(ctx, series) + elif ctx.allstats: + print_all_stats(ctx, series) + else: + print_default(ctx, series) + diff --git a/tools/genfio b/tools/genfio index 4d32d130..68004520 100755 --- a/tools/genfio +++ b/tools/genfio @@ -54,6 +54,8 @@ show_help() { Default is $IODEPTH -d disk1[,disk2,disk3,..] : Run the tests on the selected disks Separated each disk with a comma +-z filesize : Specify the working file size, if you are passing filepaths to -d + Disabled by default -r seconds : Time in seconds per benchmark 0 means till the end of the device Default is $RUNTIME seconds @@ -203,7 +205,7 @@ esac } parse_cmdline() { -while getopts "hacpsd:b:r:m:x:D:A:B:" opt; do +while getopts "hacpsd:b:r:m:x:z:D:A:B:" opt; do case $opt in h) show_help @@ -260,6 +262,10 @@ while getopts "hacpsd:b:r:m:x:D:A:B:" opt; do A) echo "exec_postrun=$OPTARG" >> $TEMPLATE ;; + z) + FSIZE=$OPTARG + echo "size=$FSIZE" >> $TEMPLATE + ;; \?) echo "Invalid option: -$OPTARG" >&2 ;; diff --git a/tools/hist/.gitignore b/tools/hist/.gitignore new file mode 100644 index 00000000..4f875dac --- /dev/null +++ b/tools/hist/.gitignore @@ -0,0 +1,3 @@ +*.pyc +*.ipynb +.ipynb_checkpoints diff --git a/tools/hist/fiologparser_hist.py b/tools/hist/fiologparser_hist.py new file mode 100755 index 00000000..ead5e543 --- /dev/null +++ b/tools/hist/fiologparser_hist.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python2.7 +""" + Utility for converting *_clat_hist* files generated by fio into latency statistics. + + Example usage: + + $ fiologparser_hist.py *_clat_hist* + end-time, samples, min, avg, median, 90%, 95%, 99%, max + 1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000 + 2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000 + 4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744 + ... + + @author Karl Cronburg <karl.cronburg@gmail.com> +""" +import os +import sys +import pandas +import numpy as np + +err = sys.stderr.write + +def weighted_percentile(percs, vs, ws): + """ Use linear interpolation to calculate the weighted percentile. + + Value and weight arrays are first sorted by value. The cumulative + distribution function (cdf) is then computed, after which np.interp + finds the two values closest to our desired weighted percentile(s) + and linearly interpolates them. + + percs :: List of percentiles we want to calculate + vs :: Array of values we are computing the percentile of + ws :: Array of weights for our corresponding values + return :: Array of percentiles + """ + idx = np.argsort(vs) + vs, ws = vs[idx], ws[idx] # weights and values sorted by value + cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum() + return np.interp(percs, cdf, vs) # linear interpolation + +def weights(start_ts, end_ts, start, end): + """ Calculate weights based on fraction of sample falling in the + given interval [start,end]. Weights computed using vector / array + computation instead of for-loops. + + Note that samples with zero time length are effectively ignored + (we set their weight to zero). + + start_ts :: Array of start times for a set of samples + end_ts :: Array of end times for a set of samples + start :: int + end :: int + return :: Array of weights + """ + sbounds = np.maximum(start_ts, start).astype(float) + ebounds = np.minimum(end_ts, end).astype(float) + ws = (ebounds - sbounds) / (end_ts - start_ts) + if np.any(np.isnan(ws)): + err("WARNING: zero-length sample(s) detected. Log file corrupt" + " / bad time values? Ignoring these samples.\n") + ws[np.where(np.isnan(ws))] = 0.0; + return ws + +def weighted_average(vs, ws): + return np.sum(vs * ws) / np.sum(ws) + +columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"] +percs = [50, 90, 95, 99] + +def fmt_float_list(ctx, num=1): + """ Return a comma separated list of float formatters to the required number + of decimal places. For instance: + + fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f" + """ + return ', '.join(["%%.%df" % ctx.decimals] * num) + +# Default values - see beginning of main() for how we detect number columns in +# the input files: +__HIST_COLUMNS = 1216 +__NON_HIST_COLUMNS = 3 +__TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS + +def read_chunk(rdr, sz): + """ Read the next chunk of size sz from the given reader. """ + try: + """ StopIteration occurs when the pandas reader is empty, and AttributeError + occurs if rdr is None due to the file being empty. """ + new_arr = rdr.read().values + except (StopIteration, AttributeError): + return None + + """ Extract array of just the times, and histograms matrix without times column. """ + times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2] + hists = new_arr[:,__NON_HIST_COLUMNS:] + times = times.reshape((len(times),1)) + arr = np.append(times, hists, axis=1) + + return arr + +def get_min(fps, arrs): + """ Find the file with the current first row with the smallest start time """ + return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0]) + +def histogram_generator(ctx, fps, sz): + + # Create a chunked pandas reader for each of the files: + rdrs = {} + for fp in fps: + try: + rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz) + except ValueError as e: + if e.message == 'No columns to parse from file': + if ctx.warn: sys.stderr.write("WARNING: Empty input file encountered.\n") + rdrs[fp] = None + else: + raise(e) + + # Initial histograms from disk: + arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()} + while True: + + try: + """ ValueError occurs when nothing more to read """ + fp = get_min(fps, arrs) + except ValueError: + return + arr = arrs[fp] + yield np.insert(arr[0], 1, fps.index(fp)) + arrs[fp] = arr[1:] + + if arrs[fp].shape[0] == 0: + arrs[fp] = read_chunk(rdrs[fp], sz) + +def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64): + """ Taken from fio's stat.c for calculating the latency value of a bin + from that bin's index. + + idx : the value of the index into the histogram bins + edge : fractional value in the range [0,1]** indicating how far into + the bin we wish to compute the latency value of. + + ** edge = 0.0 and 1.0 computes the lower and upper latency bounds + respectively of the given bin index. """ + + # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use + # all bits of the sample as index + if (idx < (FIO_IO_U_PLAT_VAL << 1)): + return idx + + # Find the group and compute the minimum value of that group + error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1 + base = 1 << (error_bits + FIO_IO_U_PLAT_BITS) + + # Find its bucket number of the group + k = idx % FIO_IO_U_PLAT_VAL + + # Return the mean (if edge=0.5) of the range of the bucket + return base + ((k + edge) * (1 << error_bits)) + +def plat_idx_to_val_coarse(idx, coarseness, edge=0.5): + """ Converts the given *coarse* index into a non-coarse index as used by fio + in stat.h:plat_idx_to_val(), subsequently computing the appropriate + latency value for that bin. + """ + + # Multiply the index by the power of 2 coarseness to get the bin + # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h) + stride = 1 << coarseness + idx = idx * stride + lower = _plat_idx_to_val(idx, edge=0.0) + upper = _plat_idx_to_val(idx + stride, edge=1.0) + return lower + (upper - lower) * edge + +def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx): + ps = weighted_percentile(percs, vs, ws) + + avg = weighted_average(vs, ws) + values = [mn, avg] + list(ps) + [mx] + row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values) + fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d" + print (fmt % tuple(row)) + +def update_extreme(val, fncn, new_val): + """ Calculate min / max in the presence of None values """ + if val is None: return new_val + else: return fncn(val, new_val) + +# See beginning of main() for how bin_vals are computed +bin_vals = [] +lower_bin_vals = [] # lower edge of each bin +upper_bin_vals = [] # upper edge of each bin + +def process_interval(ctx, samples, iStart, iEnd): + """ Construct the weighted histogram for the given interval by scanning + through all the histograms and figuring out which of their bins have + samples with latencies which overlap with the given interval + [iStart,iEnd]. + """ + + times, files, hists = samples[:,0], samples[:,1], samples[:,2:] + iHist = np.zeros(__HIST_COLUMNS) + ss_cnt = 0 # number of samples affecting this interval + mn_bin_val, mx_bin_val = None, None + + for end_time,file,hist in zip(times,files,hists): + + # Only look at bins of the current histogram sample which + # started before the end of the current time interval [start,end] + start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0 + idx = np.where(start_times < iEnd) + s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx] + + # Increment current interval histogram by weighted values of future histogram: + ws = hs * weights(s_ts, end_time, iStart, iEnd) + iHist[idx] += ws + + # Update total number of samples affecting current interval histogram: + ss_cnt += np.sum(hs) + + # Update min and max bin values seen if necessary: + idx = np.where(hs != 0)[0] + if idx.size > 0: + mn_bin_val = update_extreme(mn_bin_val, min, l_bvs[max(0, idx[0] - 1)]) + mx_bin_val = update_extreme(mx_bin_val, max, u_bvs[min(len(hs) - 1, idx[-1] + 1)]) + + if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val) + +def guess_max_from_bins(ctx, hist_cols): + """ Try to guess the GROUP_NR from given # of histogram + columns seen in an input file """ + max_coarse = 8 + if ctx.group_nr < 19 or ctx.group_nr > 26: + bins = [ctx.group_nr * (1 << 6)] + else: + bins = [1216,1280,1344,1408,1472,1536,1600,1664] + coarses = range(max_coarse + 1) + fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else -10, coarses)) + + arr = np.transpose(list(map(fncn, bins))) + idx = np.where(arr == hist_cols) + if len(idx[1]) == 0: + table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array',' ') + err("Unable to determine bin values from input clat_hist files. Namely \n" + "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) + + "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n" + "This number needs to be equal to one of the following numbers:\n\n" + + table + "\n\n" + "Possible reasons and corresponding solutions:\n" + " - Input file(s) does not contain histograms.\n" + " - You recompiled fio with a different GROUP_NR. If so please specify this\n" + " new GROUP_NR on the command line with --group_nr\n") + exit(1) + return bins[idx[1][0]] + +def main(ctx): + + if ctx.job_file: + try: + from configparser import SafeConfigParser, NoOptionError + except ImportError: + from ConfigParser import SafeConfigParser, NoOptionError + + cp = SafeConfigParser(allow_no_value=True) + with open(ctx.job_file, 'r') as fp: + cp.readfp(fp) + + if ctx.interval is None: + # Auto detect --interval value + for s in cp.sections(): + try: + hist_msec = cp.get(s, 'log_hist_msec') + if hist_msec is not None: + ctx.interval = int(hist_msec) + except NoOptionError: + pass + + if ctx.interval is None: + ctx.interval = 1000 + + # Automatically detect how many columns are in the input files, + # calculate the corresponding 'coarseness' parameter used to generate + # those files, and calculate the appropriate bin latency values: + with open(ctx.FILE[0], 'r') as fp: + global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS + __TOTAL_COLUMNS = len(fp.readline().split(',')) + __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS + + max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS) + coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS)) + bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float) + lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float) + upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float) + + fps = [open(f, 'r') for f in ctx.FILE] + gen = histogram_generator(ctx, fps, ctx.buff_size) + + print(', '.join(columns)) + + try: + start, end = 0, ctx.interval + arr = np.empty(shape=(0,__TOTAL_COLUMNS - 1)) + more_data = True + while more_data or len(arr) > 0: + + # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval. + while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end: + try: + new_arr = next(gen) + except StopIteration: + more_data = False + break + arr = np.append(arr, new_arr.reshape((1,__TOTAL_COLUMNS - 1)), axis=0) + arr = arr.astype(int) + + if arr.size > 0: + # Jump immediately to the start of the input, rounding + # down to the nearest multiple of the interval (useful when --log_unix_epoch + # was used to create these histograms): + if start == 0 and arr[0][0] - ctx.max_latency > end: + start = arr[0][0] - ctx.max_latency + start = start - (start % ctx.interval) + end = start + ctx.interval + + process_interval(ctx, arr, start, end) + + # Update arr to throw away samples we no longer need - samples which + # end before the start of the next interval, i.e. the end of the + # current interval: + idx = np.where(arr[:,0] > end) + arr = arr[idx] + + start += ctx.interval + end = start + ctx.interval + finally: + map(lambda f: f.close(), fps) + + +if __name__ == '__main__': + import argparse + p = argparse.ArgumentParser() + arg = p.add_argument + arg("FILE", help='space separated list of latency log filenames', nargs='+') + arg('--buff_size', + default=10000, + type=int, + help='number of samples to buffer into numpy at a time') + + arg('--max_latency', + default=20, + type=float, + help='number of seconds of data to process at a time') + + arg('-i', '--interval', + type=int, + help='interval width (ms), default 1000 ms') + + arg('-d', '--divisor', + required=False, + type=int, + default=1, + help='divide the results by this value.') + + arg('--decimals', + default=3, + type=int, + help='number of decimal places to print floats to') + + arg('--warn', + dest='warn', + action='store_true', + default=False, + help='print warning messages to stderr') + + arg('--group_nr', + default=19, + type=int, + help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h') + + arg('--job-file', + default=None, + type=str, + help='Optional argument pointing to the job file used to create the ' + 'given histogram files. Useful for auto-detecting --log_hist_msec and ' + '--log_unix_epoch (in fio) values.') + + main(p.parse_args()) + diff --git a/tools/hist/fiologparser_hist.py.1 b/tools/hist/fiologparser_hist.py.1 new file mode 100644 index 00000000..ed22c747 --- /dev/null +++ b/tools/hist/fiologparser_hist.py.1 @@ -0,0 +1,201 @@ +.TH fiologparser_hist.py 1 "August 18, 2016" +.SH NAME +fiologparser_hist.py \- Calculate statistics from fio histograms +.SH SYNOPSIS +.B fiologparser_hist.py +[\fIoptions\fR] [clat_hist_files]... +.SH DESCRIPTION +.B fiologparser_hist.py +is a utility for converting *_clat_hist* files +generated by fio into a CSV of latency statistics including minimum, +average, maximum latency, and 50th, 95th, and 99th percentiles. +.SH EXAMPLES +.PP +.nf +$ fiologparser_hist.py *_clat_hist* +end-time, samples, min, avg, median, 90%, 95%, 99%, max +1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000 +2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000 +4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744 +... +.fi +.PP + +.SH OPTIONS +.TP +.BR \-\-help +Print these options. +.TP +.BR \-\-buff_size \fR=\fPint +Number of samples to buffer into numpy at a time. Default is 10,000. +This can be adjusted to help performance. +.TP +.BR \-\-max_latency \fR=\fPint +Number of seconds of data to process at a time. Defaults to 20 seconds, +in order to handle the 17 second upper bound on latency in histograms +reported by fio. This should be increased if fio has been +run with a larger maximum latency. Lowering this when a lower maximum +latency is known can improve performance. See NOTES for more details. +.TP +.BR \-i ", " \-\-interval \fR=\fPint +Interval at which statistics are reported. Defaults to 1000 ms. This +should be set a minimum of the value for \fBlog_hist_msec\fR as given +to fio. +.TP +.BR \-d ", " \-\-divisor \fR=\fPint +Divide statistics by this value. Defaults to 1. Useful if you want to +convert latencies from milliseconds to seconds (\fBdivisor\fR=\fP1000\fR). +.TP +.BR \-\-warn +Enables warning messages printed to stderr, useful for debugging. +.TP +.BR \-\-group_nr \fR=\fPint +Set this to the value of \fIFIO_IO_U_PLAT_GROUP_NR\fR as defined in +\fPstat.h\fR if fio has been recompiled. Defaults to 19, the +current value used in fio. See NOTES for more details. + +.SH NOTES +end-times are calculated to be uniform increments of the \fB\-\-interval\fR value given, +regardless of when histogram samples are reported. Of note: + +.RS +Intervals with no samples are omitted. In the example above this means +"no statistics from 2 to 3 seconds" and "39 samples influenced the statistics +of the interval from 3 to 4 seconds". +.LP +Intervals with a single sample will have the same value for all statistics +.RE + +.PP +The number of samples is unweighted, corresponding to the total number of samples +which have any effect whatsoever on the interval. + +Min statistics are computed using value of the lower boundary of the first bin +(in increasing bin order) with non-zero samples in it. Similarly for max, +we take the upper boundary of the last bin with non-zero samples in it. +This is semantically identical to taking the 0th and 100th percentiles with a +50% bin-width buffer (because percentiles are computed using mid-points of +the bins). This enforces the following nice properties: + +.RS +min <= 50th <= 90th <= 95th <= 99th <= max +.LP +min and max are strict lower and upper bounds on the actual +min / max seen by fio (and reported in *_clat.* with averaging turned off). +.RE + +.PP +Average statistics use a standard weighted arithmetic mean. + +Percentile statistics are computed using the weighted percentile method as +described here: \fIhttps://en.wikipedia.org/wiki/Percentile#Weighted_percentile\fR. +See weights() method for details on how weights are computed for individual +samples. In process_interval() we further multiply by the height of each bin +to get weighted histograms. + +We convert files given on the command line, assumed to be fio histogram files, +An individual histogram file can contain the +histograms for multiple different r/w directions (notably when \fB\-\-rw\fR=\fPrandrw\fR). This +is accounted for by tracking each r/w direction separately. In the statistics +reported we ultimately merge *all* histograms (regardless of r/w direction). + +The value of *_GROUP_NR in \fIstat.h\fR (and *_BITS) determines how many latency bins +fio outputs when histogramming is enabled. Namely for the current default of +GROUP_NR=19, we get 1,216 bins with a maximum latency of approximately 17 +seconds. For certain applications this may not be sufficient. With GROUP_NR=24 +we have 1,536 bins, giving us a maximum latency of 541 seconds (~ 9 minutes). If +you expect your application to experience latencies greater than 17 seconds, +you will need to recompile fio with a larger GROUP_NR, e.g. with: + +.RS +.PP +.nf +sed -i.bak 's/^#define FIO_IO_U_PLAT_GROUP_NR 19\n/#define FIO_IO_U_PLAT_GROUP_NR 24/g' stat.h +make fio +.fi +.PP +.RE + +.PP +Quick reference table for the max latency corresponding to a sampling of +values for GROUP_NR: + +.RS +.PP +.nf +GROUP_NR | # bins | max latency bin value +19 | 1216 | 16.9 sec +20 | 1280 | 33.8 sec +21 | 1344 | 67.6 sec +22 | 1408 | 2 min, 15 sec +23 | 1472 | 4 min, 32 sec +24 | 1536 | 9 min, 4 sec +25 | 1600 | 18 min, 8 sec +26 | 1664 | 36 min, 16 sec +.fi +.PP +.RE + +.PP +At present this program automatically detects the number of histogram bins in +the log files, and adjusts the bin latency values accordingly. In particular if +you use the \fB\-\-log_hist_coarseness\fR parameter of fio, you get output files with +a number of bins according to the following table (note that the first +row is identical to the table above): + +.RS +.PP +.nf +coarse \\ GROUP_NR + 19 20 21 22 23 24 25 26 + ------------------------------------------------------- + 0 [[ 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664], + 1 [ 608, 640, 672, 704, 736, 768, 800, 832], + 2 [ 304, 320, 336, 352, 368, 384, 400, 416], + 3 [ 152, 160, 168, 176, 184, 192, 200, 208], + 4 [ 76, 80, 84, 88, 92, 96, 100, 104], + 5 [ 38, 40, 42, 44, 46, 48, 50, 52], + 6 [ 19, 20, 21, 22, 23, 24, 25, 26], + 7 [ N/A, 10, N/A, 11, N/A, 12, N/A, 13], + 8 [ N/A, 5, N/A, N/A, N/A, 6, N/A, N/A]] +.fi +.PP +.RE + +.PP +For other values of GROUP_NR and coarseness, this table can be computed like this: + +.RS +.PP +.nf +bins = [1216,1280,1344,1408,1472,1536,1600,1664] +max_coarse = 8 +fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else nan, range(max_coarse + 1))) +np.transpose(list(map(fncn, bins))) +.fi +.PP +.RE + +.PP +If you have not adjusted GROUP_NR for your (high latency) application, then you +will see the percentiles computed by this tool max out at the max latency bin +value as in the first table above, and in this plot (where GROUP_NR=19 and thus we see +a max latency of ~16.7 seconds in the red line): + +.RS +\fIhttps://www.cronburg.com/fio/max_latency_bin_value_bug.png +.RE + +.PP +Motivation for, design decisions, and the implementation process are +described in further detail here: + +.RS +\fIhttps://www.cronburg.com/fio/cloud-latency-problem-measurement/ +.RE + +.SH AUTHOR +.B fiologparser_hist.py +and this manual page were written by Karl Cronburg <karl.cronburg@gmail.com>. +.SH "REPORTING BUGS" +Report bugs to the \fBfio\fR mailing list <fio@vger.kernel.org>. diff --git a/tools/hist/half-bins.py b/tools/hist/half-bins.py new file mode 100755 index 00000000..d592af00 --- /dev/null +++ b/tools/hist/half-bins.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python2.7 +""" Cut the number bins in half in fio histogram output. Example usage: + + $ half-bins.py -c 2 output_clat_hist.1.log > smaller_clat_hist.1.log + + Which merges e.g. bins [0 .. 3], [4 .. 7], ..., [1212 .. 1215] resulting in + 304 = 1216 / (2**2) merged bins per histogram sample. + + @author Karl Cronburg <karl.cronburg@gmail.com> +""" +import sys + +def main(ctx): + stride = 1 << ctx.coarseness + with open(ctx.FILENAME, 'r') as fp: + for line in fp.readlines(): + vals = line.split(', ') + sys.stdout.write("%s, %s, %s, " % tuple(vals[:3])) + + hist = list(map(int, vals[3:])) + for i in range(0, len(hist) - stride, stride): + sys.stdout.write("%d, " % sum(hist[i : i + stride],)) + sys.stdout.write("%d\n" % sum(hist[len(hist) - stride:])) + +if __name__ == '__main__': + import argparse + p = argparse.ArgumentParser() + arg = p.add_argument + arg( 'FILENAME', help='clat_hist file for which we will reduce' + ' (by half or more) the number of bins.') + arg('-c', '--coarseness', + default=1, + type=int, + help='number of times to reduce number of bins by half, ' + 'e.g. coarseness of 4 merges each 2^4 = 16 consecutive ' + 'bins.') + main(p.parse_args()) + diff --git a/tools/plot/fio2gnuplot b/tools/plot/fio2gnuplot index 2d64a6ea..a703ae33 100755 --- a/tools/plot/fio2gnuplot +++ b/tools/plot/fio2gnuplot @@ -31,7 +31,7 @@ def find_file(path, pattern): fio_data_file=[] # For all the local files for file in os.listdir(path): - # If the file math the regexp + # If the file matches the glob if fnmatch.fnmatch(file, pattern): # Let's consider this file fio_data_file.append(file) @@ -361,7 +361,7 @@ def print_help(): print 'fio2gnuplot -ghbiodvk -t <title> -o <outputfile> -p <pattern> -G <type> -m <time> -M <time>' print print '-h --help : Print this help' - print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files' + print '-p <pattern> or --pattern <pattern> : A glob pattern to select fio input files' print '-b or --bandwidth : A predefined pattern for selecting *_bw.log files' print '-i or --iops : A predefined pattern for selecting *_iops.log files' print '-g or --gnuplot : Render gnuplot traces before exiting' @@ -458,7 +458,15 @@ def main(argv): fio_data_file=find_file('.',pattern) if len(fio_data_file) == 0: print "No log file found with pattern %s!" % pattern - sys.exit(1) + # Try numjob log file format if per_numjob_logs=1 + if (pattern == '*_bw.log'): + fio_data_file=find_file('.','*_bw.*.log') + if (pattern == '*_iops.log'): + fio_data_file=find_file('.','*_iops.*.log') + if len(fio_data_file) == 0: + sys.exit(1) + else: + print "Using log file per job format instead" else: print "%d files Selected with pattern '%s'" % (len(fio_data_file), pattern) @@ -479,7 +487,7 @@ def main(argv): #We need to adjust the output filename regarding the pattern required by the user if (pattern_set_by_user == True): gnuplot_output_filename=pattern - # As we do have some regexp in the pattern, let's make this simpliest + # As we do have some glob in the pattern, let's make this simpliest # We do remove the simpliest parts of the expression to get a clear file name gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-') gnuplot_output_filename=gnuplot_output_filename.replace('*','-') @@ -488,6 +496,8 @@ def main(argv): # Insure that we don't have any starting or trailing dash to the filename gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename + if (gnuplot_output_filename == ''): + gnuplot_output_filename='default' if parse_global==True: parse_global_files(fio_data_file, global_search) diff --git a/tools/plot/graph2D.gpm b/tools/plot/graph2D.gpm index 5cd6ff35..769b754a 100644 --- a/tools/plot/graph2D.gpm +++ b/tools/plot/graph2D.gpm @@ -1,9 +1,30 @@ # This Gnuplot file has been generated by eNovance -set title '$0' +needed_args = 8 +if (exists("ARGC") && ARGC >= needed_args) \ + found_args = 1; \ +else if (strlen("$$#") < 3 && "$#" >= needed_args) \ + found_args = 1; \ + ARG1 = "$0"; \ + ARG2 = "$1"; \ + ARG3 = "$2"; \ + ARG4 = "$3"; \ + ARG5 = "$4"; \ + ARG6 = "$5"; \ + ARG7 = "$6"; \ + ARG8 = "$7"; \ +else \ + found_args = 0; \ + print "Aborting: could not find all arguments"; \ + exit + +avg_num = ARG8 + 0 +avg_str = sprintf("%g", avg_num) + +set title ARG1 set terminal png size 1280,1024 -set output '$3.png' +set output ARG4 . '.png' #set terminal x11 #Preparing Axes @@ -12,7 +33,7 @@ set ytics axis out auto #set data style lines set key top left reverse set xlabel "Time (Seconds)" -set ylabel '$4' +set ylabel ARG5 set xrange [0:] set yrange [0:] @@ -22,13 +43,13 @@ set yrange [0:] set style line 100 lt 7 lw 0.5 set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green" -plot '$1' using 2:3 with linespoints title '$2', $7 w l ls 1 ti 'Global average value ($7)' +plot ARG2 using 2:3 with linespoints title ARG3, avg_num w l ls 1 ti 'Global average value (' . avg_str . ')' -set output '$5.png' -plot '$1' using 2:3 smooth csplines title '$2', $7 w l ls 1 ti 'Global average value ($7)' +set output ARG6 . '.png' +plot ARG2 using 2:3 smooth csplines title ARG3, avg_num w l ls 1 ti 'Global average value (' . avg_str . ')' -set output '$6.png' -plot '$1' using 2:3 smooth bezier title '$2', $7 w l ls 1 ti 'Global average value ($7)' +set output ARG7 . '.png' +plot ARG2 using 2:3 smooth bezier title ARG3, avg_num w l ls 1 ti 'Global average value (' . avg_str .')' #pause -1 #The End diff --git a/tools/plot/graph3D.gpm b/tools/plot/graph3D.gpm index 93f7a4da..ac2cdf6c 100644 --- a/tools/plot/graph3D.gpm +++ b/tools/plot/graph3D.gpm @@ -1,9 +1,24 @@ # This Gnuplot file has been generated by eNovance -set title '$0' +needed_args = 5 +if (exists("ARGC") && ARGC >= needed_args) \ + found_args = 1; \ +else if (strlen("$$#") < 3 && "$#" >= needed_args) \ + found_args = 1; \ + ARG1 = "$0"; \ + ARG2 = "$1"; \ + ARG3 = "$2"; \ + ARG4 = "$3"; \ + ARG5 = "$4"; \ +else \ + found_args = 0; \ + print "Aborting: could not find all arguments"; \ + exit + +set title ARG1 set terminal png size 1280,1024 -set output '$3.png' +set output ARG4 . '.png' #set terminal x11 #3D Config set isosamples 30 @@ -19,7 +34,7 @@ set grid back set key top left reverse set ylabel "Disk" set xlabel "Time (Seconds)" -set zlabel '$4' +set zlabel ARG5 set cbrange [0:] set zrange [0:] @@ -35,7 +50,7 @@ set multiplot set size 0.5,0.5 set view 64,216 set origin 0,0.5 -splot '$1' using 2:1:3 with linespoints title '$2' +splot ARG2 using 2:1:3 with linespoints title ARG3 #Top Right View set size 0.5,0.5 @@ -43,7 +58,7 @@ set origin 0.5,0.5 set view 90,0 set pm3d at s solid hidden3d 100 scansbackward set pm3d depthorder -splot '$1' using 2:1:3 with linespoints title '$2' +splot ARG2 using 2:1:3 with linespoints title ARG3 #Bottom Right View set size 0.5,0.5 @@ -51,13 +66,13 @@ set origin 0.5,0 set view 63,161 set pm3d at s solid hidden3d 100 scansbackward set pm3d depthorder -splot '$1' using 2:1:3 with linespoints title '$2' +splot ARG2 using 2:1:3 with linespoints title ARG3 #Bottom Left View set size 0.5,0.5 set origin 0,0 set pm3d map -splot '$1' using 2:1:3 with linespoints title '$2' +splot ARG2 using 2:1:3 with linespoints title ARG3 #Unsetting multiplotting unset multiplot @@ -66,7 +81,7 @@ unset multiplot #Preparing 3D Interactive view set mouse set terminal png size 1024,768 -set output '$3-3D.png' +set output ARG4 . '-3D.png' #set term x11 set view 64,216 @@ -74,7 +89,7 @@ set origin 0,0 set size 1,1 set pm3d at bs solid hidden3d 100 scansbackward set pm3d depthorder -splot '$1' using 2:1:3 with linespoints title '$2' +splot ARG2 using 2:1:3 with linespoints title ARG3 #pause -1 #The End diff --git a/tools/plot/math.gpm b/tools/plot/math.gpm index a01f5a0d..0a2aff56 100644 --- a/tools/plot/math.gpm +++ b/tools/plot/math.gpm @@ -1,15 +1,32 @@ # This Gnuplot file has been generated by eNovance +if (exists("ARGC") && ARGC > 5) \ + found_args = 1; \ +else if (strlen("$$#") < 3 && "$#" > 5) \ + found_args = 1; \ + ARG1 = "$0"; \ + ARG2 = "$1"; \ + ARG3 = "$2"; \ + ARG4 = "$3"; \ + ARG5 = "$4"; \ + ARG6 = "$5"; \ +else \ + found_args = 0; \ + print "Aborting: could not find all arguments"; \ + exit -set title '$0' +avg_num = ARG6 + 0 +avg_str = sprintf("%g", avg_num) + +set title ARG1 set terminal png size 1280,1024 -set output '$3.png' +set output ARG4 . '.png' set palette rgbformulae 7,5,15 set style line 100 lt 7 lw 0.5 set style fill transparent solid 0.9 noborder set auto x -set ylabel '$4' +set ylabel ARG5 set xlabel "Disk" set yrange [0:] set style data histogram @@ -22,4 +39,4 @@ set xtics axis out set xtic rotate by 45 scale 0 font ",8" autojustify set xtics offset 0,-1 border -5,1,5 set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green" -plot '$1' using 2:xtic(1) ti col, $5 w l ls 1 ti 'Global average value ($5)' +plot ARG2 using 2:xtic(1) ti col, avg_num w l ls 1 ti 'Global average value (' . avg_str . ')' |