diff options
Diffstat (limited to 'src/analysis/mod.rs')
-rwxr-xr-x | src/analysis/mod.rs | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/src/analysis/mod.rs b/src/analysis/mod.rs new file mode 100755 index 0000000..caa948d --- /dev/null +++ b/src/analysis/mod.rs @@ -0,0 +1,358 @@ +use std::path::Path; + +use crate::stats::bivariate::regression::Slope; +use crate::stats::bivariate::Data; +use crate::stats::univariate::outliers::tukey; +use crate::stats::univariate::Sample; +use crate::stats::{Distribution, Tails}; + +use crate::benchmark::BenchmarkConfig; +use crate::connection::OutgoingMessage; +use crate::estimate::{ + build_estimates, ConfidenceInterval, Distributions, Estimate, Estimates, PointEstimates, +}; +use crate::fs; +use crate::measurement::Measurement; +use crate::report::{BenchmarkId, ReportContext}; +use crate::routine::Routine; +use crate::{Baseline, Criterion, SavedSample, Throughput}; + +macro_rules! elapsed { + ($msg:expr, $block:expr) => {{ + let start = ::std::time::Instant::now(); + let out = $block; + let elapsed = &start.elapsed(); + + info!( + "{} took {}", + $msg, + crate::format::time(crate::DurationExt::to_nanos(elapsed) as f64) + ); + + out + }}; +} + +mod compare; + +// Common analysis procedure +pub(crate) fn common<M: Measurement, T: ?Sized>( + id: &BenchmarkId, + routine: &mut dyn Routine<M, T>, + config: &BenchmarkConfig, + criterion: &Criterion<M>, + report_context: &ReportContext, + parameter: &T, + throughput: Option<Throughput>, +) { + criterion.report.benchmark_start(id, report_context); + + if let Baseline::Compare = criterion.baseline { + if !base_dir_exists( + id, + &criterion.baseline_directory, + &criterion.output_directory, + ) { + panic!(format!( + "Baseline '{base}' must exist before comparison is allowed; try --save-baseline {base}", + base=criterion.baseline_directory, + )); + } + } + + let (sampling_mode, iters, times); + if let Some(baseline) = &criterion.load_baseline { + let mut sample_path = criterion.output_directory.clone(); + sample_path.push(id.as_directory_name()); + sample_path.push(baseline); + sample_path.push("sample.json"); + let loaded = fs::load::<SavedSample, _>(&sample_path); + + match loaded { + Err(err) => panic!( + "Baseline '{base}' must exist before it can be loaded; try --save-baseline {base}. Error: {err}", + base = baseline, err = err + ), + Ok(samples) => { + sampling_mode = samples.sampling_mode; + iters = samples.iters.into_boxed_slice(); + times = samples.times.into_boxed_slice(); + } + } + } else { + let sample = routine.sample( + &criterion.measurement, + id, + config, + criterion, + report_context, + parameter, + ); + sampling_mode = sample.0; + iters = sample.1; + times = sample.2; + + if let Some(conn) = &criterion.connection { + conn.send(&OutgoingMessage::MeasurementComplete { + id: id.into(), + iters: &iters, + times: ×, + plot_config: (&report_context.plot_config).into(), + sampling_method: sampling_mode.into(), + benchmark_config: config.into(), + }) + .unwrap(); + + conn.serve_value_formatter(criterion.measurement.formatter()) + .unwrap(); + } + } + + criterion.report.analysis(id, report_context); + + let avg_times = iters + .iter() + .zip(times.iter()) + .map(|(&iters, &elapsed)| elapsed / iters) + .collect::<Vec<f64>>(); + let avg_times = Sample::new(&avg_times); + + if criterion.connection.is_none() && criterion.load_baseline.is_none() { + log_if_err!({ + let mut new_dir = criterion.output_directory.clone(); + new_dir.push(id.as_directory_name()); + new_dir.push("new"); + fs::mkdirp(&new_dir) + }); + } + + let data = Data::new(&iters, ×); + let labeled_sample = tukey::classify(avg_times); + if criterion.connection.is_none() { + log_if_err!({ + let mut tukey_file = criterion.output_directory.to_owned(); + tukey_file.push(id.as_directory_name()); + tukey_file.push("new"); + tukey_file.push("tukey.json"); + fs::save(&labeled_sample.fences(), &tukey_file) + }); + } + let (mut distributions, mut estimates) = estimates(avg_times, config); + if sampling_mode.is_linear() { + let (distribution, slope) = regression(&data, config); + + estimates.slope = Some(slope); + distributions.slope = Some(distribution); + } + + if criterion.connection.is_none() && criterion.load_baseline.is_none() { + log_if_err!({ + let mut sample_file = criterion.output_directory.clone(); + sample_file.push(id.as_directory_name()); + sample_file.push("new"); + sample_file.push("sample.json"); + fs::save( + &SavedSample { + sampling_mode, + iters: data.x().as_ref().to_vec(), + times: data.y().as_ref().to_vec(), + }, + &sample_file, + ) + }); + log_if_err!({ + let mut estimates_file = criterion.output_directory.clone(); + estimates_file.push(id.as_directory_name()); + estimates_file.push("new"); + estimates_file.push("estimates.json"); + fs::save(&estimates, &estimates_file) + }); + } + + let compare_data = if base_dir_exists( + id, + &criterion.baseline_directory, + &criterion.output_directory, + ) { + let result = compare::common(id, avg_times, config, criterion); + match result { + Ok(( + t_value, + t_distribution, + relative_estimates, + relative_distributions, + base_iter_counts, + base_sample_times, + base_avg_times, + base_estimates, + )) => { + let p_value = t_distribution.p_value(t_value, &Tails::Two); + Some(crate::report::ComparisonData { + p_value, + t_distribution, + t_value, + relative_estimates, + relative_distributions, + significance_threshold: config.significance_level, + noise_threshold: config.noise_threshold, + base_iter_counts, + base_sample_times, + base_avg_times, + base_estimates, + }) + } + Err(e) => { + crate::error::log_error(&e); + None + } + } + } else { + None + }; + + let measurement_data = crate::report::MeasurementData { + data: Data::new(&*iters, &*times), + avg_times: labeled_sample, + absolute_estimates: estimates, + distributions, + comparison: compare_data, + throughput, + }; + + criterion.report.measurement_complete( + id, + report_context, + &measurement_data, + criterion.measurement.formatter(), + ); + + if criterion.connection.is_none() && criterion.load_baseline.is_none() { + log_if_err!({ + let mut benchmark_file = criterion.output_directory.clone(); + benchmark_file.push(id.as_directory_name()); + benchmark_file.push("new"); + benchmark_file.push("benchmark.json"); + fs::save(&id, &benchmark_file) + }); + } + + if criterion.connection.is_none() { + if let Baseline::Save = criterion.baseline { + copy_new_dir_to_base( + id.as_directory_name(), + &criterion.baseline_directory, + &criterion.output_directory, + ); + } + } +} + +fn base_dir_exists(id: &BenchmarkId, baseline: &str, output_directory: &Path) -> bool { + let mut base_dir = output_directory.to_owned(); + base_dir.push(id.as_directory_name()); + base_dir.push(baseline); + base_dir.exists() +} + +// Performs a simple linear regression on the sample +fn regression( + data: &Data<'_, f64, f64>, + config: &BenchmarkConfig, +) -> (Distribution<f64>, Estimate) { + let cl = config.confidence_level; + + let distribution = elapsed!( + "Bootstrapped linear regression", + data.bootstrap(config.nresamples, |d| (Slope::fit(&d).0,)) + ) + .0; + + let point = Slope::fit(&data); + let (lb, ub) = distribution.confidence_interval(config.confidence_level); + let se = distribution.std_dev(None); + + ( + distribution, + Estimate { + confidence_interval: ConfidenceInterval { + confidence_level: cl, + lower_bound: lb, + upper_bound: ub, + }, + point_estimate: point.0, + standard_error: se, + }, + ) +} + +// Estimates the statistics of the population from the sample +fn estimates(avg_times: &Sample<f64>, config: &BenchmarkConfig) -> (Distributions, Estimates) { + fn stats(sample: &Sample<f64>) -> (f64, f64, f64, f64) { + let mean = sample.mean(); + let std_dev = sample.std_dev(Some(mean)); + let median = sample.percentiles().median(); + let mad = sample.median_abs_dev(Some(median)); + + (mean, std_dev, median, mad) + } + + let cl = config.confidence_level; + let nresamples = config.nresamples; + + let (mean, std_dev, median, mad) = stats(avg_times); + let points = PointEstimates { + mean, + median, + std_dev, + median_abs_dev: mad, + }; + + let (dist_mean, dist_stddev, dist_median, dist_mad) = elapsed!( + "Bootstrapping the absolute statistics.", + avg_times.bootstrap(nresamples, stats) + ); + + let distributions = Distributions { + mean: dist_mean, + slope: None, + median: dist_median, + median_abs_dev: dist_mad, + std_dev: dist_stddev, + }; + + let estimates = build_estimates(&distributions, &points, cl); + + (distributions, estimates) +} + +fn copy_new_dir_to_base(id: &str, baseline: &str, output_directory: &Path) { + let root_dir = Path::new(output_directory).join(id); + let base_dir = root_dir.join(baseline); + let new_dir = root_dir.join("new"); + + if !new_dir.exists() { + return; + }; + if !base_dir.exists() { + try_else_return!(fs::mkdirp(&base_dir)); + } + + // TODO: consider using walkdir or similar to generically copy. + try_else_return!(fs::cp( + &new_dir.join("estimates.json"), + &base_dir.join("estimates.json") + )); + try_else_return!(fs::cp( + &new_dir.join("sample.json"), + &base_dir.join("sample.json") + )); + try_else_return!(fs::cp( + &new_dir.join("tukey.json"), + &base_dir.join("tukey.json") + )); + try_else_return!(fs::cp( + &new_dir.join("benchmark.json"), + &base_dir.join("benchmark.json") + )); + try_else_return!(fs::cp(&new_dir.join("raw.csv"), &base_dir.join("raw.csv"))); +} |