11 files changed, 956 insertions, 26 deletions
diff --git a/tools/fio.service b/tools/fio.service
new file mode 100644
index 00000000..21de0b7a
--- /dev/null
+++ b/tools/fio.service
@@ -0,0 +1,10 @@
+[Unit]
+
+Description=flexible I/O tester server
+After=network.target
+
+[Service]
+
+Type=simple
+PIDFile=/run/fio.pid
+ExecStart=/usr/bin/fio --server
diff --git a/tools/fiologparser.py b/tools/fiologparser.py
new file mode 100755
index 00000000..5a95009e
--- /dev/null
+++ b/tools/fiologparser.py
@@ -0,0 +1,221 @@
+#!/usr/bin/python
+#
+# fiologparser.py
+#
+# This tool lets you parse multiple fio log files and look at interaval
+# statistics even when samples are non-uniform.  For instance:
+#
+# fiologparser.py -s *bw*
+#
+# to see per-interval sums for all bandwidth logs or:
+#
+# fiologparser.py -a *clat*
+#
+# to see per-interval average completion latency.
+
+import argparse
+import math
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
+    parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
+    parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
+    parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, 
+                        help='print all stats for each interval.')
+    parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
+    parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
+    parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
+    args = parser.parse_args()
+
+    return args
+
+def get_ftime(series):
+    ftime = 0
+    for ts in series:
+        if ftime == 0 or ts.last.end < ftime:
+            ftime = ts.last.end
+    return ftime
+
+def print_full(ctx, series):
+    ftime = get_ftime(series)
+    start = 0 
+    end = ctx.interval
+
+    while (start < ftime):
+        end = ftime if ftime < end else end
+        results = [ts.get_value(start, end) for ts in series]
+        print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
+        start += ctx.interval
+        end += ctx.interval
+
+def print_sums(ctx, series):
+    ftime = get_ftime(series)
+    start = 0
+    end = ctx.interval
+
+    while (start < ftime):
+        end = ftime if ftime < end else end
+        results = [ts.get_value(start, end) for ts in series]
+        print("%s, %0.3f" % (end, sum(results)))
+        start += ctx.interval
+        end += ctx.interval
+
+def print_averages(ctx, series):
+    ftime = get_ftime(series)
+    start = 0
+    end = ctx.interval
+
+    while (start < ftime):
+        end = ftime if ftime < end else end
+        results = [ts.get_value(start, end) for ts in series]
+        print("%s, %0.3f" % (end, float(sum(results))/len(results)))
+        start += ctx.interval
+        end += ctx.interval
+
+# FIXME: this routine is computationally inefficient
+# and has O(N^2) behavior
+# it would be better to make one pass through samples
+# to segment them into a series of time intervals, and
+# then compute stats on each time interval instead.
+# to debug this routine, use
+#   # sort -n -t ',' -k 2 small.log
+# on your input.
+
+def my_extend( vlist, val ):
+    vlist.extend(val)
+    return vlist
+
+array_collapser = lambda vlist, val:  my_extend(vlist, val) 
+
+def print_all_stats(ctx, series):
+    ftime = get_ftime(series)
+    start = 0 
+    end = ctx.interval
+    print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
+    while (start < ftime):  # for each time interval
+        end = ftime if ftime < end else end
+        sample_arrays = [ s.get_samples(start, end) for s in series ]
+        samplevalue_arrays = []
+        for sample_array in sample_arrays:
+            samplevalue_arrays.append( 
+                [ sample.value for sample in sample_array ] )
+        # collapse list of lists of sample values into list of sample values
+        samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
+        # compute all stats and print them
+        mymin = min(samplevalues)
+        myavg = sum(samplevalues) / float(len(samplevalues))
+        mymedian = median(samplevalues)
+        my90th = percentile(samplevalues, 0.90) 
+        my95th = percentile(samplevalues, 0.95)
+        my99th = percentile(samplevalues, 0.99)
+        mymax = max(samplevalues)
+        print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
+            start, len(samplevalues), 
+            mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
+
+        # advance to next interval
+        start += ctx.interval
+        end += ctx.interval
+
+def median(values):
+    s=sorted(values)
+    return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
+
+def percentile(values, p):
+    s = sorted(values)
+    k = (len(s)-1) * p
+    f = math.floor(k)
+    c = math.ceil(k)
+    if f == c:
+        return s[int(k)]
+    return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
+
+def print_default(ctx, series):
+    ftime = get_ftime(series)
+    start = 0
+    end = ctx.interval
+    averages = []
+    weights = []
+
+    while (start < ftime):
+        end = ftime if ftime < end else end
+        results = [ts.get_value(start, end) for ts in series]
+        averages.append(sum(results)) 
+        weights.append(end-start)
+        start += ctx.interval
+        end += ctx.interval
+
+    total = 0
+    for i in range(0, len(averages)):
+        total += averages[i]*weights[i]
+    print('%0.3f' % (total/sum(weights)))
+ 
+class TimeSeries(object):
+    def __init__(self, ctx, fn):
+        self.ctx = ctx
+        self.last = None 
+        self.samples = []
+        self.read_data(fn)
+
+    def read_data(self, fn):
+        f = open(fn, 'r')
+        p_time = 0
+        for line in f:
+            (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
+            self.add_sample(p_time, int(time), int(value))
+            p_time = int(time)
+ 
+    def add_sample(self, start, end, value):
+        sample = Sample(ctx, start, end, value)
+        if not self.last or self.last.end < end:
+            self.last = sample
+        self.samples.append(sample)
+
+    def get_samples(self, start, end):
+        sample_list = []
+        for s in self.samples:
+            if s.start >= start and s.end <= end:
+                sample_list.append(s)
+        return sample_list
+
+    def get_value(self, start, end):
+        value = 0
+        for sample in self.samples:
+            value += sample.get_contribution(start, end)
+        return value
+
+class Sample(object):
+    def __init__(self, ctx, start, end, value):
+       self.ctx = ctx
+       self.start = start
+       self.end = end
+       self.value = value
+
+    def get_contribution(self, start, end):
+       # short circuit if not within the bound
+       if (end < self.start or start > self.end):
+           return 0 
+
+       sbound = self.start if start < self.start else start
+       ebound = self.end if end > self.end else end
+       ratio = float(ebound-sbound) / (end-start) 
+       return self.value*ratio/ctx.divisor
+
+
+if __name__ == '__main__':
+    ctx = parse_args()
+    series = []
+    for fn in ctx.FILE:
+       series.append(TimeSeries(ctx, fn)) 
+    if ctx.sum:
+        print_sums(ctx, series)
+    elif ctx.average:
+        print_averages(ctx, series)
+    elif ctx.full:
+        print_full(ctx, series)
+    elif ctx.allstats:
+        print_all_stats(ctx, series)
+    else:
+        print_default(ctx, series)
+
diff --git a/tools/genfio b/tools/genfio
index 4d32d130..68004520 100755
--- a/tools/genfio
+++ b/tools/genfio
@@ -54,6 +54,8 @@ show_help() {
 					Default is $IODEPTH
 -d disk1[,disk2,disk3,..]	: Run the tests on the selected disks
 					Separated each disk with a comma
+-z filesize                     : Specify the working file size, if you are passing filepaths to -d
+                                        Disabled by default
 -r seconds			: Time in seconds per benchmark
 					0 means till the end of the device
 					Default is $RUNTIME seconds
@@ -203,7 +205,7 @@ esac
 }
 
 parse_cmdline() {
-while getopts "hacpsd:b:r:m:x:D:A:B:" opt; do
+while getopts "hacpsd:b:r:m:x:z:D:A:B:" opt; do
   case $opt in
     h)
 	show_help
@@ -260,6 +262,10 @@ while getopts "hacpsd:b:r:m:x:D:A:B:" opt; do
     A)
 	echo "exec_postrun=$OPTARG" >> $TEMPLATE
       ;;
+    z)
+	FSIZE=$OPTARG
+	echo "size=$FSIZE" >> $TEMPLATE
+      ;;
     \?)
       echo "Invalid option: -$OPTARG" >&2
       ;;
diff --git a/tools/hist/.gitignore b/tools/hist/.gitignore
new file mode 100644
index 00000000..4f875dac
--- /dev/null
+++ b/tools/hist/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+*.ipynb
+.ipynb_checkpoints
diff --git a/tools/hist/fiologparser_hist.py b/tools/hist/fiologparser_hist.py
new file mode 100755
index 00000000..ead5e543
--- /dev/null
+++ b/tools/hist/fiologparser_hist.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python2.7
+""" 
+    Utility for converting *_clat_hist* files generated by fio into latency statistics.
+    
+    Example usage:
+    
+            $ fiologparser_hist.py *_clat_hist*
+            end-time, samples, min, avg, median, 90%, 95%, 99%, max
+            1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000
+            2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000
+            4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
+            ...
+    
+    @author Karl Cronburg <karl.cronburg@gmail.com>
+"""
+import os
+import sys
+import pandas
+import numpy as np
+
+err = sys.stderr.write
+
+def weighted_percentile(percs, vs, ws):
+    """ Use linear interpolation to calculate the weighted percentile.
+        
+        Value and weight arrays are first sorted by value. The cumulative
+        distribution function (cdf) is then computed, after which np.interp
+        finds the two values closest to our desired weighted percentile(s)
+        and linearly interpolates them.
+        
+        percs  :: List of percentiles we want to calculate
+        vs     :: Array of values we are computing the percentile of
+        ws     :: Array of weights for our corresponding values
+        return :: Array of percentiles
+    """
+    idx = np.argsort(vs)
+    vs, ws = vs[idx], ws[idx] # weights and values sorted by value
+    cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum()
+    return np.interp(percs, cdf, vs) # linear interpolation
+
+def weights(start_ts, end_ts, start, end):
+    """ Calculate weights based on fraction of sample falling in the
+        given interval [start,end]. Weights computed using vector / array
+        computation instead of for-loops.
+    
+        Note that samples with zero time length are effectively ignored
+        (we set their weight to zero).
+
+        start_ts :: Array of start times for a set of samples
+        end_ts   :: Array of end times for a set of samples
+        start    :: int
+        end      :: int
+        return   :: Array of weights
+    """
+    sbounds = np.maximum(start_ts, start).astype(float)
+    ebounds = np.minimum(end_ts,   end).astype(float)
+    ws = (ebounds - sbounds) / (end_ts - start_ts)
+    if np.any(np.isnan(ws)):
+      err("WARNING: zero-length sample(s) detected. Log file corrupt"
+          " / bad time values? Ignoring these samples.\n")
+    ws[np.where(np.isnan(ws))] = 0.0;
+    return ws
+
+def weighted_average(vs, ws):
+    return np.sum(vs * ws) / np.sum(ws)
+
+columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"]
+percs   = [50, 90, 95, 99]
+
+def fmt_float_list(ctx, num=1):
+  """ Return a comma separated list of float formatters to the required number
+      of decimal places. For instance:
+
+        fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f"
+  """
+  return ', '.join(["%%.%df" % ctx.decimals] * num)
+
+# Default values - see beginning of main() for how we detect number columns in
+# the input files:
+__HIST_COLUMNS = 1216
+__NON_HIST_COLUMNS = 3
+__TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS
+    
+def read_chunk(rdr, sz):
+    """ Read the next chunk of size sz from the given reader. """
+    try:
+        """ StopIteration occurs when the pandas reader is empty, and AttributeError
+            occurs if rdr is None due to the file being empty. """
+        new_arr = rdr.read().values
+    except (StopIteration, AttributeError):
+        return None    
+
+    """ Extract array of just the times, and histograms matrix without times column. """
+    times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2]
+    hists = new_arr[:,__NON_HIST_COLUMNS:]
+    times = times.reshape((len(times),1))
+    arr = np.append(times, hists, axis=1)
+
+    return arr
+
+def get_min(fps, arrs):
+    """ Find the file with the current first row with the smallest start time """
+    return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0])
+
+def histogram_generator(ctx, fps, sz):
+    
+    # Create a chunked pandas reader for each of the files:
+    rdrs = {}
+    for fp in fps:
+        try:
+            rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz)
+        except ValueError as e:
+            if e.message == 'No columns to parse from file':
+                if ctx.warn: sys.stderr.write("WARNING: Empty input file encountered.\n")
+                rdrs[fp] = None
+            else:
+                raise(e)
+
+    # Initial histograms from disk:
+    arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()}
+    while True:
+
+        try:
+            """ ValueError occurs when nothing more to read """
+            fp = get_min(fps, arrs)
+        except ValueError:
+            return
+        arr = arrs[fp]
+        yield np.insert(arr[0], 1, fps.index(fp))
+        arrs[fp] = arr[1:]
+
+        if arrs[fp].shape[0] == 0:
+            arrs[fp] = read_chunk(rdrs[fp], sz)
+
+def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64):
+    """ Taken from fio's stat.c for calculating the latency value of a bin
+        from that bin's index.
+        
+            idx  : the value of the index into the histogram bins
+            edge : fractional value in the range [0,1]** indicating how far into
+            the bin we wish to compute the latency value of.
+        
+        ** edge = 0.0 and 1.0 computes the lower and upper latency bounds
+           respectively of the given bin index. """
+
+    # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use
+    # all bits of the sample as index
+    if (idx < (FIO_IO_U_PLAT_VAL << 1)):
+        return idx 
+
+    # Find the group and compute the minimum value of that group
+    error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1 
+    base = 1 << (error_bits + FIO_IO_U_PLAT_BITS)
+
+    # Find its bucket number of the group
+    k = idx % FIO_IO_U_PLAT_VAL
+
+    # Return the mean (if edge=0.5) of the range of the bucket
+    return base + ((k + edge) * (1 << error_bits))
+    
+def plat_idx_to_val_coarse(idx, coarseness, edge=0.5):
+    """ Converts the given *coarse* index into a non-coarse index as used by fio
+        in stat.h:plat_idx_to_val(), subsequently computing the appropriate
+        latency value for that bin.
+        """
+
+    # Multiply the index by the power of 2 coarseness to get the bin
+    # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h)
+    stride = 1 << coarseness
+    idx = idx * stride
+    lower = _plat_idx_to_val(idx, edge=0.0)
+    upper = _plat_idx_to_val(idx + stride, edge=1.0)
+    return lower + (upper - lower) * edge
+
+def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx):
+    ps = weighted_percentile(percs, vs, ws)
+
+    avg = weighted_average(vs, ws)
+    values = [mn, avg] + list(ps) + [mx]
+    row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values)
+    fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d"
+    print (fmt % tuple(row))
+
+def update_extreme(val, fncn, new_val):
+    """ Calculate min / max in the presence of None values """
+    if val is None: return new_val
+    else: return fncn(val, new_val)
+
+# See beginning of main() for how bin_vals are computed
+bin_vals = []
+lower_bin_vals = [] # lower edge of each bin
+upper_bin_vals = [] # upper edge of each bin 
+
+def process_interval(ctx, samples, iStart, iEnd):
+    """ Construct the weighted histogram for the given interval by scanning
+        through all the histograms and figuring out which of their bins have
+        samples with latencies which overlap with the given interval
+        [iStart,iEnd].
+    """
+    
+    times, files, hists = samples[:,0], samples[:,1], samples[:,2:]
+    iHist = np.zeros(__HIST_COLUMNS)
+    ss_cnt = 0 # number of samples affecting this interval
+    mn_bin_val, mx_bin_val = None, None
+
+    for end_time,file,hist in zip(times,files,hists):
+            
+        # Only look at bins of the current histogram sample which
+        # started before the end of the current time interval [start,end]
+        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0
+        idx = np.where(start_times < iEnd)
+        s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx]
+
+        # Increment current interval histogram by weighted values of future histogram:
+        ws = hs * weights(s_ts, end_time, iStart, iEnd)
+        iHist[idx] += ws
+    
+        # Update total number of samples affecting current interval histogram:
+        ss_cnt += np.sum(hs)
+        
+        # Update min and max bin values seen if necessary:
+        idx = np.where(hs != 0)[0]
+        if idx.size > 0:
+            mn_bin_val = update_extreme(mn_bin_val, min, l_bvs[max(0,           idx[0]  - 1)])
+            mx_bin_val = update_extreme(mx_bin_val, max, u_bvs[min(len(hs) - 1, idx[-1] + 1)])
+
+    if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val)
+
+def guess_max_from_bins(ctx, hist_cols):
+    """ Try to guess the GROUP_NR from given # of histogram
+        columns seen in an input file """
+    max_coarse = 8
+    if ctx.group_nr < 19 or ctx.group_nr > 26:
+        bins = [ctx.group_nr * (1 << 6)]
+    else:
+        bins = [1216,1280,1344,1408,1472,1536,1600,1664]
+    coarses = range(max_coarse + 1)
+    fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else -10, coarses))
+    
+    arr = np.transpose(list(map(fncn, bins)))
+    idx = np.where(arr == hist_cols)
+    if len(idx[1]) == 0:
+        table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array','     ')
+        err("Unable to determine bin values from input clat_hist files. Namely \n"
+            "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
+            "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
+            "This number needs to be equal to one of the following numbers:\n\n"
+            + table + "\n\n"
+            "Possible reasons and corresponding solutions:\n"
+            "  - Input file(s) does not contain histograms.\n"
+            "  - You recompiled fio with a different GROUP_NR. If so please specify this\n"
+            "    new GROUP_NR on the command line with --group_nr\n")
+        exit(1)
+    return bins[idx[1][0]]
+
+def main(ctx):
+
+    if ctx.job_file:
+        try:
+            from configparser import SafeConfigParser, NoOptionError
+        except ImportError:
+            from ConfigParser import SafeConfigParser, NoOptionError
+
+        cp = SafeConfigParser(allow_no_value=True)
+        with open(ctx.job_file, 'r') as fp:
+            cp.readfp(fp)
+
+        if ctx.interval is None:
+            # Auto detect --interval value
+            for s in cp.sections():
+                try:
+                    hist_msec = cp.get(s, 'log_hist_msec')
+                    if hist_msec is not None:
+                        ctx.interval = int(hist_msec)
+                except NoOptionError:
+                    pass
+
+    if ctx.interval is None:
+        ctx.interval = 1000
+
+    # Automatically detect how many columns are in the input files,
+    # calculate the corresponding 'coarseness' parameter used to generate
+    # those files, and calculate the appropriate bin latency values:
+    with open(ctx.FILE[0], 'r') as fp:
+        global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS
+        __TOTAL_COLUMNS = len(fp.readline().split(','))
+        __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS
+
+        max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
+        coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
+        bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float)
+        lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float)
+        upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float)
+
+    fps = [open(f, 'r') for f in ctx.FILE]
+    gen = histogram_generator(ctx, fps, ctx.buff_size)
+
+    print(', '.join(columns))
+
+    try:
+        start, end = 0, ctx.interval
+        arr = np.empty(shape=(0,__TOTAL_COLUMNS - 1))
+        more_data = True
+        while more_data or len(arr) > 0:
+            
+            # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval.
+            while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end:
+                try:
+                    new_arr = next(gen)
+                except StopIteration:
+                    more_data = False
+                    break
+                arr = np.append(arr, new_arr.reshape((1,__TOTAL_COLUMNS - 1)), axis=0)
+            arr = arr.astype(int)
+            
+            if arr.size > 0:
+                # Jump immediately to the start of the input, rounding
+                # down to the nearest multiple of the interval (useful when --log_unix_epoch
+                # was used to create these histograms):
+                if start == 0 and arr[0][0] - ctx.max_latency > end:
+                    start = arr[0][0] - ctx.max_latency
+                    start = start - (start % ctx.interval)
+                    end = start + ctx.interval
+
+                process_interval(ctx, arr, start, end)
+                
+                # Update arr to throw away samples we no longer need - samples which
+                # end before the start of the next interval, i.e. the end of the
+                # current interval:
+                idx = np.where(arr[:,0] > end)
+                arr = arr[idx]
+            
+            start += ctx.interval
+            end = start + ctx.interval
+    finally:
+        map(lambda f: f.close(), fps)
+
+
+if __name__ == '__main__':
+    import argparse
+    p = argparse.ArgumentParser()
+    arg = p.add_argument
+    arg("FILE", help='space separated list of latency log filenames', nargs='+')
+    arg('--buff_size',
+        default=10000,
+        type=int,
+        help='number of samples to buffer into numpy at a time')
+
+    arg('--max_latency',
+        default=20,
+        type=float,
+        help='number of seconds of data to process at a time')
+
+    arg('-i', '--interval',
+        type=int,
+        help='interval width (ms), default 1000 ms')
+
+    arg('-d', '--divisor',
+        required=False,
+        type=int,
+        default=1,
+        help='divide the results by this value.')
+
+    arg('--decimals',
+        default=3,
+        type=int,
+        help='number of decimal places to print floats to')
+
+    arg('--warn',
+        dest='warn',
+        action='store_true',
+        default=False,
+        help='print warning messages to stderr')
+
+    arg('--group_nr',
+        default=19,
+        type=int,
+        help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h')
+
+    arg('--job-file',
+        default=None,
+        type=str,
+        help='Optional argument pointing to the job file used to create the '
+             'given histogram files. Useful for auto-detecting --log_hist_msec and '
+             '--log_unix_epoch (in fio) values.')
+
+    main(p.parse_args())
+
diff --git a/tools/hist/fiologparser_hist.py.1 b/tools/hist/fiologparser_hist.py.1
new file mode 100644
index 00000000..ed22c747
--- /dev/null
+++ b/tools/hist/fiologparser_hist.py.1
@@ -0,0 +1,201 @@
+.TH fiologparser_hist.py 1 "August 18, 2016"
+.SH NAME
+fiologparser_hist.py \- Calculate statistics from fio histograms
+.SH SYNOPSIS
+.B fiologparser_hist.py
+[\fIoptions\fR] [clat_hist_files]...
+.SH DESCRIPTION
+.B fiologparser_hist.py
+is a utility for converting *_clat_hist* files
+generated by fio into a CSV of latency statistics including minimum,
+average, maximum latency, and 50th, 95th, and 99th percentiles.
+.SH EXAMPLES
+.PP
+.nf
+$ fiologparser_hist.py *_clat_hist*
+end-time, samples, min, avg, median, 90%, 95%, 99%, max
+1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000
+2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000
+4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
+...
+.fi
+.PP
+
+.SH OPTIONS
+.TP
+.BR \-\-help
+Print these options.
+.TP
+.BR \-\-buff_size \fR=\fPint
+Number of samples to buffer into numpy at a time. Default is 10,000.
+This can be adjusted to help performance.
+.TP
+.BR \-\-max_latency \fR=\fPint
+Number of seconds of data to process at a time. Defaults to 20 seconds,
+in order to handle the 17 second upper bound on latency in histograms
+reported by fio. This should be increased if fio has been
+run with a larger maximum latency. Lowering this when a lower maximum
+latency is known can improve performance. See NOTES for more details.
+.TP
+.BR \-i ", " \-\-interval \fR=\fPint
+Interval at which statistics are reported. Defaults to 1000 ms. This
+should be set a minimum of the value for \fBlog_hist_msec\fR as given
+to fio.
+.TP
+.BR \-d ", " \-\-divisor \fR=\fPint
+Divide statistics by this value. Defaults to 1. Useful if you want to
+convert latencies from milliseconds to seconds (\fBdivisor\fR=\fP1000\fR).
+.TP
+.BR \-\-warn
+Enables warning messages printed to stderr, useful for debugging.
+.TP
+.BR \-\-group_nr \fR=\fPint
+Set this to the value of \fIFIO_IO_U_PLAT_GROUP_NR\fR as defined in
+\fPstat.h\fR if fio has been recompiled. Defaults to 19, the
+current value used in fio. See NOTES for more details.
+
+.SH NOTES
+end-times are calculated to be uniform increments of the \fB\-\-interval\fR value given,
+regardless of when histogram samples are reported. Of note:
+
+.RS
+Intervals with no samples are omitted. In the example above this means
+"no statistics from 2 to 3 seconds" and "39 samples influenced the statistics
+of the interval from 3 to 4 seconds".
+.LP
+Intervals with a single sample will have the same value for all statistics
+.RE
+
+.PP
+The number of samples is unweighted, corresponding to the total number of samples
+which have any effect whatsoever on the interval.
+
+Min statistics are computed using value of the lower boundary of the first bin
+(in increasing bin order) with non-zero samples in it. Similarly for max,
+we take the upper boundary of the last bin with non-zero samples in it.
+This is semantically identical to taking the 0th and 100th percentiles with a
+50% bin-width buffer (because percentiles are computed using mid-points of
+the bins). This enforces the following nice properties:
+
+.RS
+min <= 50th <= 90th <= 95th <= 99th <= max
+.LP
+min and max are strict lower and upper bounds on the actual
+min / max seen by fio (and reported in *_clat.* with averaging turned off).
+.RE
+
+.PP
+Average statistics use a standard weighted arithmetic mean.
+
+Percentile statistics are computed using the weighted percentile method as
+described here: \fIhttps://en.wikipedia.org/wiki/Percentile#Weighted_percentile\fR.
+See weights() method for details on how weights are computed for individual
+samples. In process_interval() we further multiply by the height of each bin
+to get weighted histograms.
+
+We convert files given on the command line, assumed to be fio histogram files,
+An individual histogram file can contain the
+histograms for multiple different r/w directions (notably when \fB\-\-rw\fR=\fPrandrw\fR). This
+is accounted for by tracking each r/w direction separately. In the statistics
+reported we ultimately merge *all* histograms (regardless of r/w direction).
+
+The value of *_GROUP_NR in \fIstat.h\fR (and *_BITS) determines how many latency bins
+fio outputs when histogramming is enabled. Namely for the current default of
+GROUP_NR=19, we get 1,216 bins with a maximum latency of approximately 17
+seconds. For certain applications this may not be sufficient. With GROUP_NR=24
+we have 1,536 bins, giving us a maximum latency of 541 seconds (~ 9 minutes). If
+you expect your application to experience latencies greater than 17 seconds,
+you will need to recompile fio with a larger GROUP_NR, e.g. with:
+
+.RS
+.PP
+.nf
+sed -i.bak 's/^#define FIO_IO_U_PLAT_GROUP_NR 19\n/#define FIO_IO_U_PLAT_GROUP_NR 24/g' stat.h
+make fio
+.fi
+.PP
+.RE
+
+.PP
+Quick reference table for the max latency corresponding to a sampling of
+values for GROUP_NR:
+
+.RS
+.PP
+.nf
+GROUP_NR | # bins | max latency bin value
+19       | 1216   | 16.9 sec
+20       | 1280   | 33.8 sec
+21       | 1344   | 67.6 sec
+22       | 1408   | 2  min, 15 sec
+23       | 1472   | 4  min, 32 sec
+24       | 1536   | 9  min, 4  sec
+25       | 1600   | 18 min, 8  sec
+26       | 1664   | 36 min, 16 sec
+.fi
+.PP
+.RE
+
+.PP
+At present this program automatically detects the number of histogram bins in
+the log files, and adjusts the bin latency values accordingly. In particular if
+you use the \fB\-\-log_hist_coarseness\fR parameter of fio, you get output files with
+a number of bins according to the following table (note that the first
+row is identical to the table above):
+
+.RS
+.PP
+.nf
+coarse \\ GROUP_NR
+        19     20    21     22     23     24     25     26
+   -------------------------------------------------------
+  0  [[ 1216,  1280,  1344,  1408,  1472,  1536,  1600,  1664],
+  1   [  608,   640,   672,   704,   736,   768,   800,   832],
+  2   [  304,   320,   336,   352,   368,   384,   400,   416],
+  3   [  152,   160,   168,   176,   184,   192,   200,   208],
+  4   [   76,    80,    84,    88,    92,    96,   100,   104],
+  5   [   38,    40,    42,    44,    46,    48,    50,    52],
+  6   [   19,    20,    21,    22,    23,    24,    25,    26],
+  7   [  N/A,    10,   N/A,    11,   N/A,    12,   N/A,    13],
+  8   [  N/A,     5,   N/A,   N/A,   N/A,     6,   N/A,   N/A]]
+.fi
+.PP
+.RE
+
+.PP
+For other values of GROUP_NR and coarseness, this table can be computed like this:
+
+.RS
+.PP
+.nf
+bins = [1216,1280,1344,1408,1472,1536,1600,1664]
+max_coarse = 8
+fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else nan, range(max_coarse + 1)))
+np.transpose(list(map(fncn, bins)))
+.fi
+.PP
+.RE
+
+.PP
+If you have not adjusted GROUP_NR for your (high latency) application, then you
+will see the percentiles computed by this tool max out at the max latency bin
+value as in the first table above, and in this plot (where GROUP_NR=19 and thus we see
+a max latency of ~16.7 seconds in the red line):
+
+.RS
+\fIhttps://www.cronburg.com/fio/max_latency_bin_value_bug.png
+.RE
+
+.PP
+Motivation for, design decisions, and the implementation process are
+described in further detail here:
+
+.RS
+\fIhttps://www.cronburg.com/fio/cloud-latency-problem-measurement/
+.RE
+
+.SH AUTHOR
+.B fiologparser_hist.py
+and this manual page were written by Karl Cronburg <karl.cronburg@gmail.com>.
+.SH "REPORTING BUGS"
+Report bugs to the \fBfio\fR mailing list <fio@vger.kernel.org>.
diff --git a/tools/hist/half-bins.py b/tools/hist/half-bins.py
new file mode 100755
index 00000000..d592af00
--- /dev/null
+++ b/tools/hist/half-bins.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python2.7
+""" Cut the number bins in half in fio histogram output. Example usage:
+
+        $ half-bins.py -c 2 output_clat_hist.1.log > smaller_clat_hist.1.log
+
+    Which merges e.g. bins [0 .. 3], [4 .. 7], ..., [1212 .. 1215] resulting in
+    304 = 1216 / (2**2) merged bins per histogram sample.
+
+    @author Karl Cronburg <karl.cronburg@gmail.com>
+"""
+import sys
+
+def main(ctx):
+    stride = 1 << ctx.coarseness
+    with open(ctx.FILENAME, 'r') as fp:
+        for line in fp.readlines():
+            vals = line.split(', ')
+            sys.stdout.write("%s, %s, %s, " % tuple(vals[:3]))
+
+            hist = list(map(int, vals[3:]))
+            for i in range(0, len(hist) - stride, stride):
+                sys.stdout.write("%d, " % sum(hist[i : i + stride],))
+            sys.stdout.write("%d\n" % sum(hist[len(hist) - stride:]))
+
+if __name__ == '__main__':
+    import argparse
+    p = argparse.ArgumentParser()
+    arg = p.add_argument
+    arg( 'FILENAME', help='clat_hist file for which we will reduce'
+                         ' (by half or more) the number of bins.')
+    arg('-c', '--coarseness',
+       default=1,
+       type=int,
+       help='number of times to reduce number of bins by half, '
+            'e.g. coarseness of 4 merges each 2^4 = 16 consecutive '
+            'bins.')
+    main(p.parse_args())
+
diff --git a/tools/plot/fio2gnuplot b/tools/plot/fio2gnuplot
index 2d64a6ea..a703ae33 100755
--- a/tools/plot/fio2gnuplot
+++ b/tools/plot/fio2gnuplot
@@ -31,7 +31,7 @@ def find_file(path, pattern):
 	fio_data_file=[]
 	# For all the local files
 	for file in os.listdir(path):
-	    # If the file math the regexp
+	    # If the file matches the glob
 	    if fnmatch.fnmatch(file, pattern):
 		# Let's consider this file
 		fio_data_file.append(file)
@@ -361,7 +361,7 @@ def print_help():
     print 'fio2gnuplot -ghbiodvk -t <title> -o <outputfile> -p <pattern> -G <type> -m <time> -M <time>'
     print
     print '-h --help                           : Print this help'
-    print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files'
+    print '-p <pattern> or --pattern <pattern> : A glob pattern to select fio input files'
     print '-b           or --bandwidth         : A predefined pattern for selecting *_bw.log files'
     print '-i           or --iops              : A predefined pattern for selecting *_iops.log files'
     print '-g           or --gnuplot           : Render gnuplot traces before exiting'
@@ -458,7 +458,15 @@ def main(argv):
     fio_data_file=find_file('.',pattern)
     if len(fio_data_file) == 0:
 	    print "No log file found with pattern %s!" % pattern
-	    sys.exit(1)
+	    # Try numjob log file format if per_numjob_logs=1
+            if (pattern == '*_bw.log'):
+                fio_data_file=find_file('.','*_bw.*.log')
+            if (pattern == '*_iops.log'):
+                fio_data_file=find_file('.','*_iops.*.log')
+            if len(fio_data_file) == 0:
+                sys.exit(1)
+            else:
+                print "Using log file per job format instead"
     else:
 	    print "%d files Selected with pattern '%s'" % (len(fio_data_file), pattern)
 
@@ -479,7 +487,7 @@ def main(argv):
     #We need to adjust the output filename regarding the pattern required by the user
     if (pattern_set_by_user == True):
 	    gnuplot_output_filename=pattern
-	    # As we do have some regexp in the pattern, let's make this simpliest
+	    # As we do have some glob in the pattern, let's make this simpliest
 	    # We do remove the simpliest parts of the expression to get a clear file name
 	    gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
 	    gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
@@ -488,6 +496,8 @@ def main(argv):
 	    # Insure that we don't have any starting or trailing dash to the filename
 	    gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
 	    gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
+	    if (gnuplot_output_filename == ''):
+            	gnuplot_output_filename='default'	
 
     if parse_global==True:
 	parse_global_files(fio_data_file, global_search)
diff --git a/tools/plot/graph2D.gpm b/tools/plot/graph2D.gpm
index 5cd6ff35..769b754a 100644
--- a/tools/plot/graph2D.gpm
+++ b/tools/plot/graph2D.gpm
@@ -1,9 +1,30 @@
 # This Gnuplot file has been generated by eNovance
 
-set title '$0'
+needed_args = 8
+if (exists("ARGC") && ARGC >= needed_args) \
+	found_args = 1; \
+else if (strlen("$$#") < 3 && "$#" >= needed_args) \
+	found_args = 1; \
+	ARG1 = "$0"; \
+	ARG2 = "$1"; \
+	ARG3 = "$2"; \
+	ARG4 = "$3"; \
+	ARG5 = "$4"; \
+	ARG6 = "$5"; \
+	ARG7 = "$6"; \
+	ARG8 = "$7"; \
+else \
+	found_args = 0; \
+	print "Aborting: could not find all arguments"; \
+	exit
+
+avg_num = ARG8 + 0
+avg_str = sprintf("%g", avg_num)
+
+set title ARG1
 
 set terminal png size 1280,1024
-set output '$3.png'
+set output ARG4 . '.png'
 #set terminal x11
 
 #Preparing Axes
@@ -12,7 +33,7 @@ set ytics axis out auto
 #set data style lines
 set key top left reverse
 set xlabel "Time (Seconds)"
-set ylabel '$4'
+set ylabel ARG5
 set xrange [0:]
 set yrange [0:]
 
@@ -22,13 +43,13 @@ set yrange [0:]
 set style line 100 lt 7 lw 0.5
 set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green"
 
-plot '$1' using 2:3 with linespoints title '$2', $7 w l ls 1 ti 'Global average value ($7)'
+plot ARG2 using 2:3 with linespoints title ARG3, avg_num w l ls 1 ti 'Global average value (' . avg_str . ')'
 
-set output '$5.png'
-plot '$1' using 2:3 smooth csplines title '$2', $7 w l ls 1 ti 'Global average value ($7)'
+set output ARG6 . '.png'
+plot ARG2 using 2:3 smooth csplines title ARG3, avg_num w l ls 1 ti 'Global average value (' . avg_str . ')'
 
-set output '$6.png'
-plot '$1' using 2:3 smooth bezier title '$2', $7 w l ls 1 ti 'Global average value ($7)'
+set output ARG7 . '.png'
+plot ARG2 using 2:3 smooth bezier title ARG3, avg_num w l ls 1 ti 'Global average value (' . avg_str .')'
 
 #pause -1
 #The End
diff --git a/tools/plot/graph3D.gpm b/tools/plot/graph3D.gpm
index 93f7a4da..ac2cdf6c 100644
--- a/tools/plot/graph3D.gpm
+++ b/tools/plot/graph3D.gpm
@@ -1,9 +1,24 @@
 # This Gnuplot file has been generated by eNovance
 
-set title '$0'
+needed_args = 5
+if (exists("ARGC") && ARGC >= needed_args) \
+	found_args = 1; \
+else if (strlen("$$#") < 3 && "$#" >= needed_args) \
+	found_args = 1; \
+	ARG1 = "$0"; \
+	ARG2 = "$1"; \
+	ARG3 = "$2"; \
+	ARG4 = "$3"; \
+	ARG5 = "$4"; \
+else \
+	found_args = 0; \
+	print "Aborting: could not find all arguments"; \
+	exit
+
+set title ARG1
 
 set terminal png size 1280,1024
-set output '$3.png'
+set output ARG4 . '.png'
 #set terminal x11
 #3D Config
 set isosamples 30
@@ -19,7 +34,7 @@ set grid back
 set key top left reverse
 set ylabel "Disk"
 set xlabel "Time (Seconds)"
-set zlabel '$4'
+set zlabel ARG5
 set cbrange [0:]
 set zrange [0:]
 
@@ -35,7 +50,7 @@ set multiplot
 set size 0.5,0.5
 set view 64,216
 set origin 0,0.5
-splot '$1' using 2:1:3 with linespoints title '$2'
+splot ARG2 using 2:1:3 with linespoints title ARG3
 
 #Top Right View
 set size 0.5,0.5
@@ -43,7 +58,7 @@ set origin 0.5,0.5
 set view 90,0
 set pm3d at s solid hidden3d 100 scansbackward
 set pm3d depthorder
-splot '$1' using 2:1:3 with linespoints title '$2'
+splot ARG2 using 2:1:3 with linespoints title ARG3
 
 #Bottom Right View
 set size 0.5,0.5
@@ -51,13 +66,13 @@ set origin 0.5,0
 set view 63,161
 set pm3d at s solid hidden3d 100 scansbackward
 set pm3d depthorder
-splot '$1' using 2:1:3 with linespoints title '$2'
+splot ARG2 using 2:1:3 with linespoints title ARG3
 
 #Bottom Left View
 set size 0.5,0.5
 set origin 0,0
 set pm3d map
-splot '$1' using 2:1:3 with linespoints title '$2'
+splot ARG2 using 2:1:3 with linespoints title ARG3
 
 #Unsetting multiplotting
 unset multiplot
@@ -66,7 +81,7 @@ unset multiplot
 #Preparing 3D Interactive view
 set mouse
 set terminal png size 1024,768
-set output '$3-3D.png'
+set output ARG4 . '-3D.png'
 
 #set term x11
 set view 64,216
@@ -74,7 +89,7 @@ set origin 0,0
 set size 1,1
 set pm3d at bs solid hidden3d 100 scansbackward
 set pm3d depthorder
-splot '$1' using 2:1:3 with linespoints title '$2'
+splot ARG2 using 2:1:3 with linespoints title ARG3
 
 #pause -1
 #The End
diff --git a/tools/plot/math.gpm b/tools/plot/math.gpm
index a01f5a0d..0a2aff56 100644
--- a/tools/plot/math.gpm
+++ b/tools/plot/math.gpm
@@ -1,15 +1,32 @@
 # This Gnuplot file has been generated by eNovance
+if (exists("ARGC") && ARGC > 5) \
+	found_args = 1; \
+else if (strlen("$$#") < 3 && "$#" > 5) \
+	found_args = 1; \
+	ARG1 = "$0"; \
+	ARG2 = "$1"; \
+	ARG3 = "$2"; \
+	ARG4 = "$3"; \
+	ARG5 = "$4"; \
+	ARG6 = "$5"; \
+else \
+	found_args = 0; \
+	print "Aborting: could not find all arguments"; \
+	exit
 
-set title '$0'
+avg_num = ARG6 + 0
+avg_str = sprintf("%g", avg_num)
+
+set title ARG1
 
 set terminal png size 1280,1024
-set output '$3.png'
+set output ARG4 . '.png'
 
 set palette rgbformulae 7,5,15
 set style line 100 lt 7 lw 0.5
 set style fill transparent solid 0.9 noborder
 set auto x
-set ylabel '$4'
+set ylabel ARG5
 set xlabel "Disk"
 set yrange [0:]
 set style data histogram
@@ -22,4 +39,4 @@ set xtics axis out
 set xtic rotate by 45 scale 0 font ",8" autojustify
 set xtics offset 0,-1 border -5,1,5
 set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green"
-plot '$1' using 2:xtic(1) ti col, $5 w l ls 1 ti 'Global average value ($5)'
+plot ARG2 using 2:xtic(1) ti col, avg_num w l ls 1 ti 'Global average value (' . avg_str . ')'