aboutsummaryrefslogtreecommitdiff
path: root/cros_utils/tabulator.py
diff options
context:
space:
mode:
Diffstat (limited to 'cros_utils/tabulator.py')
-rw-r--r--cros_utils/tabulator.py61
1 files changed, 55 insertions, 6 deletions
diff --git a/cros_utils/tabulator.py b/cros_utils/tabulator.py
index d079ea22..2cfd5d35 100644
--- a/cros_utils/tabulator.py
+++ b/cros_utils/tabulator.py
@@ -67,13 +67,44 @@ import getpass
import math
import statistics
import sys
+from typing import Tuple, Union
from cros_utils import misc
from cros_utils.email_sender import EmailSender
+import numpy as np
-# TODO(crbug.com/980719): Drop scipy in the future.
-# pylint: disable=import-error
-import scipy
+
+def _ttest_ind(
+ sample: Union[np.ndarray, list], baseline: Union[np.ndarray, list]
+) -> Tuple[float, float]:
+ """Independent, two-sided student's T test.
+
+ Reimplementation of scipy.stats.ttest_ind.
+ """
+ if isinstance(sample, list):
+ sample = np.asarray(sample)
+ if isinstance(baseline, list):
+ baseline = np.asarray(baseline)
+ diff = np.mean(sample) - np.mean(baseline)
+ diff_stderr = np.sqrt(sample.var(ddof=1) + baseline.var(ddof=1))
+ t_value = np.mean(diff) / (diff_stderr / np.sqrt(len(sample)))
+ samples = _sample_student_t(len(sample), 1000)
+ # Assuming two-sided student's t
+ if t_value < 0:
+ # Lower tail
+ return t_value, 2 * np.sum(samples < t_value) / len(samples)
+ # Upper tail
+ return t_value, 2 * np.sum(samples > t_value) / len(samples)
+
+
+def _sample_student_t(
+ dof: float, num_samples: int
+) -> np.ndarray:
+ # In theory this probably should be memoized. However,
+ # that's a lot of data points to store in memory for
+ # the lifetime of the program?
+ sample_generator = np.random.default_rng()
+ return sample_generator.standard_t(dof, num_samples)
def _AllFloat(values):
@@ -141,6 +172,15 @@ class TableGenerator(object):
values = _StripNone(values)
if _AllFloat(values):
values = _GetFloats(values)
+ values = [
+ float(v)
+ for v in values
+ if isinstance(v, float)
+ or isinstance(v, int)
+ or v.lower() in ("nan", "inf")
+ ]
+ if not values:
+ return float("nan")
return max(values)
def _GetLowestValue(self, key):
@@ -152,6 +192,15 @@ class TableGenerator(object):
values = _StripNone(values)
if _AllFloat(values):
values = _GetFloats(values)
+ values = [
+ float(v)
+ for v in values
+ if isinstance(v, float)
+ or isinstance(v, int)
+ or v.lower() in ("nan", "inf")
+ ]
+ if not values:
+ return float("nan")
return min(values)
def _SortKeys(self, keys):
@@ -415,7 +464,7 @@ class SamplesTableGenerator(TableGenerator):
# weighted_samples we added up.
one_dict = {}
if run:
- one_dict[u"weighted_samples"] = [run, u"samples"]
+ one_dict["weighted_samples"] = [run, "samples"]
one_dict["retval"] = 0
else:
one_dict["retval"] = 1
@@ -682,7 +731,7 @@ class PValueResult(ComparisonResult):
if len(values) < 2 or len(baseline_values) < 2:
cell.value = float("nan")
return
- _, cell.value = scipy.stats.ttest_ind(values, baseline_values)
+ _, cell.value = _ttest_ind(values, baseline_values)
def _ComputeString(self, cell, values, baseline_values):
return float("nan")
@@ -940,7 +989,7 @@ class StorageFormat(Format):
current += 1
if current:
- divisor = base ** current
+ divisor = base**current
cell.string_value = "%1.1f%s" % (
(v / divisor),
suffices[current - 1],