aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crosperf/autotest_runner.py4
-rw-r--r--crosperf/benchmark.py5
-rw-r--r--crosperf/benchmark_run.py31
-rw-r--r--crosperf/experiment.py8
-rw-r--r--crosperf/experiment_factory.py13
-rw-r--r--crosperf/experiment_file.py6
-rw-r--r--crosperf/experiment_files/aes_perf3
-rw-r--r--crosperf/experiment_files/bloat_perf3
-rw-r--r--crosperf/experiment_files/morejs_perf3
-rw-r--r--crosperf/experiment_files/page_cycler_perf3
-rw-r--r--crosperf/experiment_files/toolchain5
-rw-r--r--crosperf/experiment_runner.py16
-rw-r--r--crosperf/machine_manager.py121
-rw-r--r--crosperf/results_cache.py43
-rw-r--r--crosperf/results_columns.py152
-rw-r--r--crosperf/results_organizer.py42
-rw-r--r--crosperf/results_report.py292
-rw-r--r--crosperf/results_sorter.py4
-rw-r--r--crosperf/settings_factory.py31
-rw-r--r--crosperf/table.py84
-rwxr-xr-ximage_chromeos.py19
-rw-r--r--utils/colortrans.py376
-rw-r--r--utils/command_executer.py27
-rw-r--r--utils/constants.py10
-rw-r--r--utils/file_utils.py18
-rw-r--r--utils/logger.py76
-rw-r--r--utils/misc.py70
-rw-r--r--utils/misc_test.py30
-rw-r--r--utils/stats.py4528
-rw-r--r--utils/tabulator.py967
-rw-r--r--utils/tabulator_test.py96
31 files changed, 6565 insertions, 521 deletions
diff --git a/crosperf/autotest_runner.py b/crosperf/autotest_runner.py
index 80fb2a24..5611b655 100644
--- a/crosperf/autotest_runner.py
+++ b/crosperf/autotest_runner.py
@@ -15,11 +15,15 @@ class AutotestRunner(object):
def Run(self, machine_name, chromeos_root, board, autotest_name,
autotest_args):
+ """Run the run_remote_test."""
options = ""
if board:
options += " --board=%s" % board
if autotest_args:
options += " %s" % autotest_args
+ command = "rm -rf /usr/local/autotest/results/*"
+ self._ce.CrosRunCommand(command, machine=machine_name, username="root",
+ chromeos_root=chromeos_root)
command = ("./run_remote_tests.sh --remote=%s %s %s" %
(machine_name, options, autotest_name))
return self._ce.ChrootRunCommand(chromeos_root, command, True, self._ct)
diff --git a/crosperf/benchmark.py b/crosperf/benchmark.py
index fa12d934..a75bd8e3 100644
--- a/crosperf/benchmark.py
+++ b/crosperf/benchmark.py
@@ -14,11 +14,10 @@ class Benchmark(object):
"""
def __init__(self, name, autotest_name, autotest_args, iterations,
- outlier_range, profile_counters, profile_type):
+ outlier_range, perf_args):
self.name = name
self.autotest_name = autotest_name
self.autotest_args = autotest_args
self.iterations = iterations
self.outlier_range = outlier_range
- self.profile_counters = profile_counters
- self.profile_type = profile_type
+ self.perf_args = perf_args
diff --git a/crosperf/benchmark_run.py b/crosperf/benchmark_run.py
index c20b24e0..7579b6c2 100644
--- a/crosperf/benchmark_run.py
+++ b/crosperf/benchmark_run.py
@@ -8,12 +8,12 @@ import re
import threading
import time
import traceback
-from results_cache import Result
-from utils import logger
-from utils import command_executer
+
from autotest_runner import AutotestRunner
+from results_cache import Result
from results_cache import ResultsCache
-
+from utils import command_executer
+from utils import logger
STATUS_FAILED = "FAILED"
STATUS_SUCCEEDED = "SUCCEEDED"
@@ -26,7 +26,7 @@ STATUS_PENDING = "PENDING"
class BenchmarkRun(threading.Thread):
def __init__(self, name, benchmark_name, autotest_name, autotest_args,
label_name, chromeos_root, chromeos_image, board, iteration,
- cache_conditions, outlier_range, profile_counters, profile_type,
+ cache_conditions, outlier_range, perf_args,
machine_manager,
logger_to_use):
threading.Thread.__init__(self)
@@ -45,8 +45,7 @@ class BenchmarkRun(threading.Thread):
self.status = STATUS_PENDING
self.run_completed = False
self.outlier_range = outlier_range
- self.profile_counters = profile_counters
- self.profile_type = profile_type
+ self.perf_args = perf_args
self.machine_manager = machine_manager
self.cache = ResultsCache()
self.autotest_runner = AutotestRunner(self._logger)
@@ -68,10 +67,11 @@ class BenchmarkRun(threading.Thread):
self.autotest_name,
self.iteration,
self.autotest_args,
- self.machine_manager.GetMachines()[0].name,
+ self.machine_manager,
self.board,
self.cache_conditions,
- self._logger)
+ self._logger,
+ )
self.result = self.cache.ReadResult()
self.cache_hit = (self.result is not None)
@@ -134,13 +134,12 @@ class BenchmarkRun(threading.Thread):
return machine
def _GetExtraAutotestArgs(self):
- if self.profile_type:
- if self.profile_type == "record":
- perf_args = "record -a -e %s" % ",".join(self.profile_counters)
- elif self.profile_type == "stat":
- perf_args = "stat -a"
- else:
- raise Exception("profile_type must be either record or stat")
+ if self.perf_args:
+ perf_args_list = self.perf_args.split(" ")
+ perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:]
+ perf_args = " ".join(perf_args_list)
+ if not perf_args_list[0] in ["record", "stat"]:
+ raise Exception("perf_args must start with either record or stat")
extra_autotest_args = ["--profiler=custom_perf",
("--profiler_args='perf_options=\"%s\"'" %
perf_args)]
diff --git a/crosperf/experiment.py b/crosperf/experiment.py
index 3f68f8be..7b48344c 100644
--- a/crosperf/experiment.py
+++ b/crosperf/experiment.py
@@ -18,7 +18,7 @@ class Experiment(object):
def __init__(self, name, remote, rerun_if_failed, working_directory,
chromeos_root, cache_conditions, labels, benchmarks,
- experiment_file):
+ experiment_file, email_to):
self.name = name
self.rerun_if_failed = rerun_if_failed
self.working_directory = working_directory
@@ -26,6 +26,7 @@ class Experiment(object):
self.chromeos_root = chromeos_root
self.cache_conditions = cache_conditions
self.experiment_file = experiment_file
+ self.email_to = email_to
self.results_directory = os.path.join(self.working_directory,
self.name + "_results")
@@ -48,6 +49,8 @@ class Experiment(object):
for machine in remote:
self.machine_manager.AddMachine(machine)
+ self.machine_manager.ComputeCommonCheckSum()
+ self.machine_manager.ComputeCommonCheckSumString()
self.start_time = None
self.benchmark_runs = self._GenerateBenchmarkRuns()
@@ -76,8 +79,7 @@ class Experiment(object):
iteration,
self.cache_conditions,
benchmark.outlier_range,
- benchmark.profile_counters,
- benchmark.profile_type,
+ benchmark.perf_args,
self.machine_manager,
logger_to_use)
diff --git a/crosperf/experiment_factory.py b/crosperf/experiment_factory.py
index d3c717ae..5c21179e 100644
--- a/crosperf/experiment_factory.py
+++ b/crosperf/experiment_factory.py
@@ -33,7 +33,7 @@ class ExperimentFactory(object):
if global_settings.GetField("rerun"):
cache_conditions.append(CacheConditions.FALSE)
if global_settings.GetField("exact_remote"):
- cache_conditions.append(CacheConditions.REMOTES_MATCH)
+ cache_conditions.append(CacheConditions.MACHINES_MATCH)
# Construct benchmarks.
benchmarks = []
@@ -46,11 +46,9 @@ class ExperimentFactory(object):
autotest_args = benchmark_settings.GetField("autotest_args")
iterations = benchmark_settings.GetField("iterations")
outlier_range = benchmark_settings.GetField("outlier_range")
- profile_counters = benchmark_settings.GetField("profile_counters")
- profile_type = benchmark_settings.GetField("profile_type")
+ perf_args = benchmark_settings.GetField("perf_args")
benchmark = Benchmark(benchmark_name, autotest_name, autotest_args,
- iterations, outlier_range, profile_counters,
- profile_type)
+ iterations, outlier_range, perf_args)
benchmarks.append(benchmark)
# Construct labels.
@@ -64,9 +62,12 @@ class ExperimentFactory(object):
label = Label(label_name, image, chromeos_root, board)
labels.append(label)
+ email = global_settings.GetField("email")
+
experiment = Experiment(experiment_name, remote, rerun_if_failed,
working_directory, chromeos_root,
cache_conditions, labels, benchmarks,
- experiment_file.Canonicalize())
+ experiment_file.Canonicalize(),
+ email)
return experiment
diff --git a/crosperf/experiment_file.py b/crosperf/experiment_file.py
index bde2a4d7..fc0f16ab 100644
--- a/crosperf/experiment_file.py
+++ b/crosperf/experiment_file.py
@@ -4,6 +4,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+import os.path
import re
from settings import Settings
from settings_factory import SettingsFactory
@@ -143,6 +144,11 @@ class ExperimentFile(object):
field = settings.fields[field_name]
if field.assigned:
res += "\t%s: %s\n" % (field.name, field.GetString())
+ if field.name == "chromeos_image":
+ real_file = (os.path.realpath
+ (os.path.expanduser(field.GetString())))
+ if real_file != field.GetString():
+ res += "\t#actual_image: %s\n" % real_file
res += "}\n\n"
return res
diff --git a/crosperf/experiment_files/aes_perf b/crosperf/experiment_files/aes_perf
index b298e91c..0c54ccbd 100644
--- a/crosperf/experiment_files/aes_perf
+++ b/crosperf/experiment_files/aes_perf
@@ -1,8 +1,7 @@
# This experiment just runs a short autotest which measures the performance of
# aes encryption. In addition, it profiles
-profile_type: record
-profile_counters: instructions cycles
+profile_args: record -e cycles -e instructions
benchmark: platform_AesThroughput {
}
diff --git a/crosperf/experiment_files/bloat_perf b/crosperf/experiment_files/bloat_perf
index a95d2cbf..f8258ee1 100644
--- a/crosperf/experiment_files/bloat_perf
+++ b/crosperf/experiment_files/bloat_perf
@@ -1,5 +1,4 @@
-profile_type: record
-profile_counters: cycles instructions
+perf_args: record -e cycles
benchmark: bloat {
autotest_name: desktopui_PyAutoPerfTests
diff --git a/crosperf/experiment_files/morejs_perf b/crosperf/experiment_files/morejs_perf
index d7ab45bb..a02f15f5 100644
--- a/crosperf/experiment_files/morejs_perf
+++ b/crosperf/experiment_files/morejs_perf
@@ -1,5 +1,4 @@
-profile_type: record
-profile_counters: cycles instructions
+perf_args: record -e cycles
benchmark: morejs {
autotest_name: desktopui_PyAutoPerfTests
diff --git a/crosperf/experiment_files/page_cycler_perf b/crosperf/experiment_files/page_cycler_perf
index 866fb751..7f5e7118 100644
--- a/crosperf/experiment_files/page_cycler_perf
+++ b/crosperf/experiment_files/page_cycler_perf
@@ -1,7 +1,6 @@
# This experiment profiles all page cyclers.
-profile_type: record
-profile_counters: cycles
+perf_args: record -e cycles
benchmark: morejs {
autotest_name: desktopui_PyAutoPerfTests
diff --git a/crosperf/experiment_files/toolchain b/crosperf/experiment_files/toolchain
index c6790505..9156998b 100644
--- a/crosperf/experiment_files/toolchain
+++ b/crosperf/experiment_files/toolchain
@@ -5,11 +5,12 @@ benchmark: bvt {
}
benchmark: suite_Smoke {
- auotest_name: suite:smoke
+ autotest_name: suite:smoke
}
benchmark: PyAutoPerfTests {
}
-benchmark: AndroidBench {
+benchmark: BootPerfServer {
+ autotest_name: ^server/site_tests/platform_BootPerfServer/control$
}
diff --git a/crosperf/experiment_runner.py b/crosperf/experiment_runner.py
index c5cd1ada..4219c435 100644
--- a/crosperf/experiment_runner.py
+++ b/crosperf/experiment_runner.py
@@ -53,7 +53,7 @@ class ExperimentRunner(object):
if not benchmark_run.cache_hit:
send_mail = True
break
- if not send_mail:
+ if not send_mail and not experiment.email_to:
return
label_names = []
@@ -61,11 +61,12 @@ class ExperimentRunner(object):
label_names.append(label.name)
subject = "%s: %s" % (experiment.name, " vs. ".join(label_names))
- text_report = TextResultsReport(experiment).GetReport()
+ text_report = TextResultsReport(experiment, True).GetReport()
text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report
html_report = HTMLResultsReport(experiment).GetReport()
attachment = EmailSender.Attachment("report.html", html_report)
- EmailSender().SendEmail([getpass.getuser()],
+ email_to = [getpass.getuser()] + experiment.email_to
+ EmailSender().SendEmail(email_to,
subject,
text_report,
attachments=[attachment],
@@ -89,10 +90,11 @@ class ExperimentRunner(object):
self.l.LogOutput("Storing results of each benchmark run.")
for benchmark_run in experiment.benchmark_runs:
- benchmark_run_name = filter(str.isalnum, benchmark_run.name)
- benchmark_run_path = os.path.join(results_directory,
- benchmark_run_name)
- benchmark_run.result.CopyResultsTo(benchmark_run_path)
+ if benchmark_run.result:
+ benchmark_run_name = filter(str.isalnum, benchmark_run.name)
+ benchmark_run_path = os.path.join(results_directory,
+ benchmark_run_name)
+ benchmark_run.result.CopyResultsTo(benchmark_run_path)
def Run(self):
self._Run(self._experiment)
diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py
index eb0b7539..8562e929 100644
--- a/crosperf/machine_manager.py
+++ b/crosperf/machine_manager.py
@@ -1,9 +1,12 @@
+import hashlib
+import image_chromeos
+import lock_machine
+import math
+import os.path
import sys
import threading
import time
from image_checksummer import ImageChecksummer
-import image_chromeos
-import lock_machine
from utils import command_executer
from utils import logger
from utils.file_utils import FileUtils
@@ -12,13 +15,90 @@ CHECKSUM_FILE = "/usr/local/osimage_checksum_file"
class CrosMachine(object):
- def __init__(self, name):
+ def __init__(self, name, chromeos_root):
self.name = name
self.image = None
self.checksum = None
self.locked = False
self.released_time = time.time()
self.autotest_run = None
+ self.chromeos_root = chromeos_root
+ self._GetMemoryInfo()
+ self._GetCPUInfo()
+ self._ComputeMachineChecksumString()
+ self._ComputeMachineChecksum()
+
+ def _ParseMemoryInfo(self):
+ line = self.meminfo.splitlines()[0]
+ usable_kbytes = int(line.split()[1])
+ # This code is from src/third_party/autotest/files/client/bin/base_utils.py
+ # usable_kbytes is system's usable DRAM in kbytes,
+ # as reported by memtotal() from device /proc/meminfo memtotal
+ # after Linux deducts 1.5% to 9.5% for system table overhead
+ # Undo the unknown actual deduction by rounding up
+ # to next small multiple of a big power-of-two
+ # eg 12GB - 5.1% gets rounded back up to 12GB
+ mindeduct = 0.005 # 0.5 percent
+ maxdeduct = 0.095 # 9.5 percent
+ # deduction range 1.5% .. 9.5% supports physical mem sizes
+ # 6GB .. 12GB in steps of .5GB
+ # 12GB .. 24GB in steps of 1 GB
+ # 24GB .. 48GB in steps of 2 GB ...
+ # Finer granularity in physical mem sizes would require
+ # tighter spread between min and max possible deductions
+
+ # increase mem size by at least min deduction, without rounding
+ min_kbytes = int(usable_kbytes / (1.0 - mindeduct))
+ # increase mem size further by 2**n rounding, by 0..roundKb or more
+ round_kbytes = int(usable_kbytes / (1.0 - maxdeduct)) - min_kbytes
+ # find least binary roundup 2**n that covers worst-cast roundKb
+ mod2n = 1 << int(math.ceil(math.log(round_kbytes, 2)))
+ # have round_kbytes <= mod2n < round_kbytes*2
+ # round min_kbytes up to next multiple of mod2n
+ phys_kbytes = min_kbytes + mod2n - 1
+ phys_kbytes -= phys_kbytes % mod2n # clear low bits
+ self.phys_kbytes = phys_kbytes
+
+ def _GetMemoryInfo(self):
+ #TODO yunlian: when the machine in rebooting, it will not return
+ #meminfo, the assert does not catch it either
+ ce = command_executer.GetCommandExecuter()
+ command = "cat /proc/meminfo"
+ ret, self.meminfo, _ = ce.CrosRunCommand(
+ command, return_output=True,
+ machine=self.name, username="root", chromeos_root=self.chromeos_root)
+ assert ret == 0, "Could not get meminfo from machine: %s" % self.name
+ if ret == 0:
+ self._ParseMemoryInfo()
+
+ #cpuinfo format is different across architecture
+ #need to find a better way to parse it.
+ def _ParseCPUInfo(self,cpuinfo):
+ return 0
+
+ def _GetCPUInfo(self):
+ ce = command_executer.GetCommandExecuter()
+ command = "cat /proc/cpuinfo"
+ ret, self.cpuinfo, _ = ce.CrosRunCommand(
+ command, return_output=True,
+ machine=self.name, username="root", chromeos_root=self.chromeos_root)
+ assert ret == 0, "Could not get cpuinfo from machine: %s" % self.name
+ if ret == 0:
+ self._ParseCPUInfo(self.cpuinfo)
+
+ def _ComputeMachineChecksumString(self):
+ self.checksum_string = ""
+ exclude_lines_list = ["MHz", "BogoMIPS", "bogomips"]
+ for line in self.cpuinfo.splitlines():
+ if not any([e in line for e in exclude_lines_list]):
+ self.checksum_string += line
+ self.checksum_string += " " + str(self.phys_kbytes)
+
+ def _ComputeMachineChecksum(self):
+ if self.checksum_string:
+ self.machine_checksum = hashlib.md5(self.checksum_string).hexdigest()
+ else:
+ self.machine_checksum = ""
def __str__(self):
l = []
@@ -38,7 +118,10 @@ class MachineManager(object):
self.image_lock = threading.Lock()
self.num_reimages = 0
self.chromeos_root = None
- self.no_lock = False
+ if os.path.isdir(lock_machine.FileLock.LOCKS_DIR):
+ self.no_lock = False
+ else:
+ self.no_lock = True
self.initialized = False
self.chromeos_root = chromeos_root
@@ -69,6 +152,20 @@ class MachineManager(object):
return retval
+ def ComputeCommonCheckSum(self):
+ self.machine_checksum = ""
+ for machine in self.GetMachines():
+ if machine.machine_checksum:
+ self.machine_checksum = machine.machine_checksum
+ break
+
+ def ComputeCommonCheckSumString(self):
+ self.machine_checksum_string = ""
+ for machine in self.GetMachines():
+ if machine.checksum_string:
+ self.machine_checksum_string = machine.checksum_string
+ break
+
def _TryToLockMachine(self, cros_machine):
with self._lock:
assert cros_machine, "Machine can't be None"
@@ -96,7 +193,14 @@ class MachineManager(object):
with self._lock:
for m in self._all_machines:
assert m.name != machine_name, "Tried to double-add %s" % machine_name
- self._all_machines.append(CrosMachine(machine_name))
+ cm = CrosMachine(machine_name, self.chromeos_root)
+ assert cm.machine_checksum, ("Could not find checksum for machine %s" %
+ machine_name)
+ self._all_machines.append(cm)
+
+ def AreAllMachineSame(self):
+ checksums = [m.machine_checksum for m in self.GetMachines()]
+ return len(set(checksums)) == 1
def AcquireMachine(self, chromeos_image):
image_checksum = ImageChecksummer().Checksum(chromeos_image)
@@ -109,12 +213,15 @@ class MachineManager(object):
for m in self._all_machines:
m.released_time = time.time()
+ if not self.AreAllMachineSame():
+ logger.GetLogger().LogFatal("-- not all the machine are identical")
if not self._machines:
machine_names = []
for machine in self._all_machines:
machine_names.append(machine.name)
- raise Exception("Could not acquire any of the following machines: '%s'"
- % ", ".join(machine_names))
+ logger.GetLogger().LogFatal("Could not acquire any of the"
+ "following machines: '%s'"
+ % ", ".join(machine_names))
### for m in self._machines:
### if (m.locked and time.time() - m.released_time < 10 and
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py
index 6fdca550..1c33e720 100644
--- a/crosperf/results_cache.py
+++ b/crosperf/results_cache.py
@@ -10,14 +10,15 @@ import pickle
import re
import tempfile
-from image_checksummer import ImageChecksummer
from utils import command_executer
from utils import logger
from utils import misc
+from image_checksummer import ImageChecksummer
SCRATCH_DIR = "/home/%s/cros_scratch" % getpass.getuser()
RESULTS_FILE = "results.txt"
+MACHINE_FILE = "machine.txt"
AUTOTEST_TARBALL = "autotest.tbz2"
PERF_RESULTS_FILE = "perf-results.txt"
@@ -65,7 +66,7 @@ class Result(object):
[ret, out, err] = self._ce.RunCommand(command, return_output=True)
keyvals_dict = {}
for line in out.splitlines():
- tokens = line.split(",")
+ tokens = re.split("=|,", line)
key = tokens[-2]
if key.startswith(self.results_dir):
key = key[len(self.results_dir) + 1:]
@@ -109,6 +110,7 @@ class Result(object):
chroot_perf_report_file = misc.GetInsideChrootPath(self._chromeos_root,
perf_report_file)
command = ("/usr/sbin/perf report "
+ "-n "
"--symfs /build/%s "
"--vmlinux /build/%s/usr/lib/debug/boot/vmlinux "
"--kallsyms /build/%s/boot/System.map-* "
@@ -186,6 +188,7 @@ class Result(object):
self.perf_data_files = self._GetPerfDataFiles()
self.perf_report_files = self._GetPerfReportFiles()
self._ProcessResults()
+ self.CleanUp()
def CleanUp(self):
if self._temp_dir:
@@ -193,7 +196,7 @@ class Result(object):
self._ce.RunCommand(command)
- def StoreToCacheDir(self, cache_dir):
+ def StoreToCacheDir(self, cache_dir, machine_manager):
# Create the dir if it doesn't exist.
command = "mkdir -p %s" % cache_dir
ret = self._ce.RunCommand(command)
@@ -206,10 +209,19 @@ class Result(object):
pickle.dump(self.retval, f)
tarball = os.path.join(cache_dir, AUTOTEST_TARBALL)
- command = ("cd %s && tar cjf %s ." % (self.results_dir, tarball))
+ command = ("cd %s && "
+ "tar "
+ "--exclude=var/spool "
+ "--exclude=var/log "
+ "-cjf %s ." % (self.results_dir, tarball))
ret = self._ce.RunCommand(command)
if ret:
raise Exception("Couldn't store autotest output directory.")
+ # Store machine info.
+ # TODO(asharif): Make machine_manager a singleton, and don't pass it into
+ # this function.
+ with open(os.path.join(cache_dir, MACHINE_FILE), "w") as f:
+ f.write(machine_manager.machine_checksum_string)
@classmethod
def CreateFromRun(cls, logger, chromeos_root, board, out, err, retval):
@@ -232,8 +244,9 @@ class CacheConditions(object):
# Cache hit only if the result file exists.
CACHE_FILE_EXISTS = 0
- # Cache hit if the ip address of the cached result and the new run match.
- REMOTES_MATCH = 1
+ # Cache hit if the checksum of cpuinfo and totalmem of
+ # the cached result and the new run match.
+ MACHINES_MATCH = 1
# Cache hit if the image checksum of the cached result and the new run match.
CHECKSUMS_MATCH = 2
@@ -253,18 +266,19 @@ class ResultsCache(object):
is exactly stored (value). The value generation is handled by the Results
class.
"""
- CACHE_VERSION = 3
+ CACHE_VERSION = 5
+
def Init(self, chromeos_image, chromeos_root, autotest_name, iteration,
- autotest_args, remote, board, cache_conditions,
+ autotest_args, machine_manager, board, cache_conditions,
logger_to_use):
self.chromeos_image = chromeos_image
self.chromeos_root = chromeos_root
self.autotest_name = autotest_name
self.iteration = iteration
self.autotest_args = autotest_args,
- self.remote = remote
self.board = board
self.cache_conditions = cache_conditions
+ self.machine_manager = machine_manager
self._logger = logger_to_use
self._ce = command_executer.GetCommandExecuter(self._logger)
@@ -291,10 +305,10 @@ class ResultsCache(object):
return cache_path
def _GetCacheKeyList(self, read):
- if read and CacheConditions.REMOTES_MATCH not in self.cache_conditions:
- remote = "*"
+ if read and CacheConditions.MACHINES_MATCH not in self.cache_conditions:
+ machine_checksum = "*"
else:
- remote = self.remote
+ machine_checksum = self.machine_manager.machine_checksum
if read and CacheConditions.CHECKSUMS_MATCH not in self.cache_conditions:
checksum = "*"
else:
@@ -307,12 +321,11 @@ class ResultsCache(object):
autotest_args_checksum = hashlib.md5(
"".join(self.autotest_args)).hexdigest()
-
return (image_path_checksum,
self.autotest_name, str(self.iteration),
autotest_args_checksum,
checksum,
- remote,
+ machine_checksum,
str(self.CACHE_VERSION))
def ReadResult(self):
@@ -342,7 +355,7 @@ class ResultsCache(object):
def StoreResult(self, result):
cache_dir = self._GetCacheDirForWrite()
- result.StoreToCacheDir(cache_dir)
+ result.StoreToCacheDir(cache_dir, self.machine_manager)
class MockResultsCache(object):
diff --git a/crosperf/results_columns.py b/crosperf/results_columns.py
deleted file mode 100644
index 09e97d0b..00000000
--- a/crosperf/results_columns.py
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2011 Google Inc. All Rights Reserved.
-
-import math
-
-
-class Column(object):
- def __init__(self, name):
- self.name = name
-
- def _ContainsString(self, results):
- for result in results:
- if isinstance(result, str):
- return True
- return False
-
- def _StripNone(self, results):
- res = []
- for result in results:
- if result is not None:
- res.append(result)
- return res
-
-
-class MinColumn(Column):
- def Compute(self, results, baseline_results):
- if self._ContainsString(results):
- return "-"
- results = self._StripNone(results)
- if not results:
- return "-"
- return min(results)
-
-
-class MaxColumn(Column):
- def Compute(self, results, baseline_results):
- if self._ContainsString(results):
- return "-"
- results = self._StripNone(results)
- if not results:
- return "-"
- return max(results)
-
-
-class MeanColumn(Column):
- def Compute(self, results, baseline_results):
- all_pass = True
- all_fail = True
- if self._ContainsString(results):
- for result in results:
- if result != "PASSED":
- all_pass = False
- if result != "FAILED":
- all_fail = False
-
- if all_pass:
- return "ALL PASS"
- elif all_fail:
- return "ALL FAIL"
- else:
- return "-"
-
- results = self._StripNone(results)
- if not results:
- return "-"
- return float(sum(results)) / len(results)
-
-
-class StandardDeviationColumn(Column):
- def __init__(self, name):
- super(StandardDeviationColumn, self).__init__(name)
-
- def Compute(self, results, baseline_results):
- if self._ContainsString(results):
- return "-"
-
- results = self._StripNone(results)
- if not results:
- return "-"
- n = len(results)
- average = sum(results) / n
- total = 0
- for result in results:
- total += (result - average) ** 2
-
- return math.sqrt(total / n)
-
-
-class RatioColumn(Column):
- def __init__(self, name):
- super(RatioColumn, self).__init__(name)
-
- def Compute(self, results, baseline_results):
- if self._ContainsString(results) or self._ContainsString(baseline_results):
- return "-"
-
- results = self._StripNone(results)
- baseline_results = self._StripNone(baseline_results)
- if not results or not baseline_results:
- return "-"
- result_mean = sum(results) / len(results)
- baseline_mean = sum(baseline_results) / len(baseline_results)
-
- if not baseline_mean:
- return "-"
-
- return result_mean / baseline_mean
-
-
-class DeltaColumn(Column):
- def __init__(self, name):
- super(DeltaColumn, self).__init__(name)
-
- def Compute(self, results, baseline_results):
- if self._ContainsString(results) or self._ContainsString(baseline_results):
- return "-"
-
- results = self._StripNone(results)
- baseline_results = self._StripNone(baseline_results)
- if not results or not baseline_results:
- return "-"
- result_mean = sum(results) / len(results)
- baseline_mean = sum(baseline_results) / len(baseline_results)
-
- if not baseline_mean:
- return "-"
-
- res = 100 * (result_mean - baseline_mean) / baseline_mean
- return res
-
-
-class IterationsCompleteColumn(Column):
- def __init__(self, name):
- super(IterationsCompleteColumn, self).__init__(name)
-
- def Compute(self, results, baseline_results):
- return len(self._StripNone(results))
-
-
-class IterationColumn(Column):
- def __init__(self, name, iteration):
- super(IterationColumn, self).__init__(name)
- self.iteration = iteration
-
- def Compute(self, results, baseline_results):
- if self.iteration > len(results):
- return ""
- res = results[self.iteration - 1]
- if not res:
- return "-"
- return res
diff --git a/crosperf/results_organizer.py b/crosperf/results_organizer.py
new file mode 100644
index 00000000..0071387b
--- /dev/null
+++ b/crosperf/results_organizer.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+
+# Copyright 2012 Google Inc. All Rights Reserved.
+
+
+class ResultOrganizer(object):
+ """Create a dict from benchmark_runs.
+
+ The structure of the output dict is as follows:
+ {"benchmark_1":[
+ [{"key1":"v1", "key2":"v2"},{"key1":"v1", "key2","v2"}]
+ #one label
+ []
+ #the other label
+ ]
+ "benchmark_2":
+ [
+ ]}.
+ """
+
+ def __init__(self, benchmark_runs, labels):
+ self.result = {}
+ self.labels = []
+ for label in labels:
+ self.labels.append(label.name)
+ for benchmark_run in benchmark_runs:
+ benchmark_name = benchmark_run.benchmark_name
+ if benchmark_name not in self.result:
+ self.result[benchmark_name] = []
+ while len(self.result[benchmark_name]) < len(labels):
+ self.result[benchmark_name].append([])
+ label_index = self.labels.index(benchmark_run.label_name)
+ cur_table = self.result[benchmark_name][label_index]
+ index = benchmark_run.iteration - 1
+ while index >= len(cur_table):
+ cur_table.append({})
+ cur_dict = cur_table[index]
+ if not benchmark_run.result:
+ continue
+ for autotest_key in benchmark_run.result.keyvals:
+ result_value = benchmark_run.result.keyvals[autotest_key]
+ cur_dict[autotest_key] = result_value
diff --git a/crosperf/results_report.py b/crosperf/results_report.py
index 0cd46ed4..b591370a 100644
--- a/crosperf/results_report.py
+++ b/crosperf/results_report.py
@@ -2,20 +2,14 @@
# Copyright 2011 Google Inc. All Rights Reserved.
+import math
from column_chart import ColumnChart
-from results_columns import IterationColumn
-from results_columns import IterationsCompleteColumn
-from results_columns import MaxColumn
-from results_columns import MeanColumn
-from results_columns import MinColumn
-from results_columns import RatioColumn
-from results_columns import StandardDeviationColumn
from results_sorter import ResultSorter
-from table import Table
-
+from results_organizer import ResultOrganizer
+from utils.tabulator import *
class ResultsReport(object):
- DELTA_COLUMN_NAME = "Change"
+ MAX_COLOR_CODE = 255
def __init__(self, experiment):
self.experiment = experiment
@@ -32,85 +26,106 @@ class ResultsReport(object):
labels[benchmark_run.label_name].append(benchmark_run)
return labels
- def GetFullTable(self):
- full_columns = []
- max_iterations = 0
- for benchmark in self.benchmarks:
- if benchmark.iterations > max_iterations:
- max_iterations = benchmark.iterations
-
- for i in range(1, max_iterations + 1):
- full_columns.append(IterationColumn(str(i), i))
-
- full_columns.append(IterationsCompleteColumn("Completed"))
- full_columns.append(MinColumn("Min"))
- full_columns.append(MaxColumn("Max"))
- full_columns.append(MeanColumn("Avg"))
- full_columns.append(StandardDeviationColumn("Std Dev"))
- full_columns.append(RatioColumn(self.DELTA_COLUMN_NAME))
- return self._GetTable(self.labels, self.benchmarks, self.benchmark_runs,
- full_columns)
-
- def GetSummaryTable(self):
- summary_columns = [MeanColumn("Average"),
- RatioColumn(self.DELTA_COLUMN_NAME)]
- return self._GetTable(self.labels, self.benchmarks, self.benchmark_runs,
- summary_columns)
-
- def _GetTable(self, labels, benchmarks, benchmark_runs, columns):
- table = Table("box-table-a")
- label_headings = [Table.Cell("", hidden=True, colspan=2, header=True)]
- for label in labels:
- colspan = len(columns)
- if label.name == self.baseline.name:
- colspan -= 1
- label_headings.append(Table.Cell(label.name, colspan=colspan,
- header=True))
-
- table.AddRow(label_headings)
-
- column_headings = [Table.Cell("Autotest Key", header=True),
- Table.Cell("Iterations", header=True)]
- for label in labels:
- for column in columns:
- if (label.name == self.baseline.name and
- column.name == self.DELTA_COLUMN_NAME):
- continue
- column_headings.append(Table.Cell(column.name, header=True))
-
- table.AddRow(column_headings)
-
- sorter = ResultSorter(benchmark_runs)
-
- for benchmark in benchmarks:
- table.AddRow([Table.Cell(benchmark.name)])
- autotest_keys = sorter.GetAutotestKeys(benchmark.name)
- for autotest_key in autotest_keys:
- row = [Table.Cell(autotest_key),
- Table.Cell(benchmark.iterations)]
- for label in labels:
- for column in columns:
- if (label.name == self.baseline.name and
- column.name == self.DELTA_COLUMN_NAME):
- continue
- results = sorter.GetResults(benchmark.name,
- autotest_key, label.name)
- baseline_results = sorter.GetResults(benchmark.name,
- autotest_key,
- self.baseline.name)
- value = column.Compute(results, baseline_results)
- if isinstance(value, float):
- value_string = "%.2f" % value
- else:
- value_string = value
-
- row.append(Table.Cell(value_string))
-
- table.AddRow(row)
-
- return table
-
-
+ def GetFullTables(self):
+ columns = [Column(NonEmptyCountResult(),
+ Format(),
+ "Completed"),
+ Column(RawResult(),
+ Format()),
+ Column(MinResult(),
+ Format()),
+ Column(MaxResult(),
+ Format()),
+ Column(AmeanResult(),
+ Format()),
+ Column(StdResult(),
+ Format())
+ ]
+ return self._GetTables(self.labels, self.benchmark_runs, columns)
+
+ def GetSummaryTables(self):
+ columns = [Column(AmeanResult(),
+ Format()),
+ Column(StdResult(),
+ Format(), "StdDev"),
+ Column(CoeffVarResult(),
+ CoeffVarFormat(), "Mean/StdDev"),
+ Column(GmeanRatioResult(),
+ RatioFormat(), "GmeanSpeedup"),
+ Column(GmeanRatioResult(),
+ ColorBoxFormat(), " "),
+ Column(StatsSignificant(),
+ Format(), "p-value")
+ ]
+ return self._GetTables(self.labels, self.benchmark_runs, columns)
+
+ def _ParseColumn(self, columns, iteration):
+ new_column = []
+ for column in columns:
+ if column.result.__class__.__name__ != "RawResult":
+ #TODO(asharif): tabulator should support full table natively.
+ new_column.append(column)
+ else:
+ for i in range(iteration):
+ cc = Column(LiteralResult(i), Format(), str(i+1))
+ new_column.append(cc)
+ return new_column
+
+ def _AreAllRunsEmpty(self, runs):
+ for label in runs:
+ for dictionary in label:
+ if dictionary:
+ return False
+ return True
+
+ def _GetTables(self, labels, benchmark_runs, columns):
+ tables = []
+ ro = ResultOrganizer(benchmark_runs, labels)
+ result = ro.result
+ label_name = ro.labels
+ for item in result:
+ runs = result[item]
+ for benchmark in self.benchmarks:
+ if benchmark.name == item:
+ break
+ benchmark_info = ("Benchmark: {0}; Iterations: {1}"
+ .format(benchmark.name, benchmark.iterations))
+ cell = Cell()
+ cell.string_value = benchmark_info
+ ben_table = [[cell]]
+
+ if self._AreAllRunsEmpty(runs):
+ cell = Cell()
+ cell.string_value = ("This benchmark contains no result."
+ " Is the benchmark name valid?")
+ cell_table = [[cell]]
+ else:
+ tg = TableGenerator(runs, label_name)
+ table = tg.GetTable()
+ parsed_columns = self._ParseColumn(columns, benchmark.iterations)
+ tf = TableFormatter(table, parsed_columns)
+ cell_table = tf.GetCellTable()
+ tables.append(ben_table)
+ tables.append(cell_table)
+ return tables
+
+ def PrintTables(self, tables, out_to):
+ output = ""
+ for table in tables:
+ if out_to == "HTML":
+ tp = TablePrinter(table, TablePrinter.HTML)
+ elif out_to == "PLAIN":
+ tp = TablePrinter(table, TablePrinter.PLAIN)
+ elif out_to == "CONSOLE":
+ tp = TablePrinter(table, TablePrinter.CONSOLE)
+ elif out_to == "TSV":
+ tp = TablePrinter(table, TablePrinter.TSV)
+ elif out_to == "EMAIL":
+ tp = TablePrinter(table, TablePrinter.EMAIL)
+ else:
+ pass
+ output += tp.Print()
+ return output
class TextResultsReport(ResultsReport):
TEXT = """
===========================================
@@ -118,50 +133,36 @@ Results report for: '%s'
===========================================
-------------------------------------------
-Benchmark Run Status
--------------------------------------------
-%s
-
-Number re-images: %s
-
--------------------------------------------
Summary
-------------------------------------------
%s
-------------------------------------------
-Full Table
--------------------------------------------
-%s
-
--------------------------------------------
Experiment File
-------------------------------------------
%s
===========================================
"""
- def __init__(self, experiment):
+ def __init__(self, experiment, email=False):
super(TextResultsReport, self).__init__(experiment)
-
- def GetStatusTable(self):
- status_table = Table("status")
- for benchmark_run in self.benchmark_runs:
- status_table.AddRow([Table.Cell(benchmark_run.name),
- Table.Cell(benchmark_run.status),
- Table.Cell(benchmark_run.failure_reason)])
- return status_table
+ self.email = email
def GetReport(self):
+ summary_table = self.GetSummaryTables()
+ full_table = self.GetFullTables()
+ if not self.email:
+ return self.TEXT % (self.experiment.name,
+ self.PrintTables(summary_table, "CONSOLE"),
+ self.experiment.experiment_file)
+
return self.TEXT % (self.experiment.name,
- self.GetStatusTable().ToText(),
- self.experiment.machine_manager.num_reimages,
- self.GetSummaryTable().ToText(80),
- self.GetFullTable().ToText(80),
+ self.PrintTables(summary_table, "EMAIL"),
self.experiment.experiment_file)
class HTMLResultsReport(ResultsReport):
+
HTML = """
<html>
<head>
@@ -303,36 +304,49 @@ pre {
def GetReport(self):
chart_javascript = ""
- charts = self._GetCharts(self.labels, self.benchmarks, self.benchmark_runs)
+ charts = self._GetCharts(self.labels, self.benchmark_runs)
for chart in charts:
chart_javascript += chart.GetJavascript()
chart_divs = ""
for chart in charts:
chart_divs += chart.GetDiv()
- summary_table = self.GetSummaryTable()
- full_table = self.GetFullTable()
+ summary_table = self.GetSummaryTables()
+ full_table = self.GetFullTables()
return self.HTML % (chart_javascript,
- summary_table.ToHTML(),
- summary_table.ToText(),
- summary_table.ToTSV(),
+ self.PrintTables(summary_table, "HTML"),
+ self.PrintTables(summary_table, "PLAIN"),
+ self.PrintTables(summary_table, "TSV"),
self._GetTabMenuHTML("summary"),
chart_divs,
- full_table.ToHTML(),
- full_table.ToText(),
- full_table.ToTSV(),
+ self.PrintTables(full_table, "HTML"),
+ self.PrintTables(full_table, "PLAIN"),
+ self.PrintTables(full_table, "TSV"),
self._GetTabMenuHTML("full"),
self.experiment.experiment_file)
- def _GetCharts(self, labels, benchmarks, benchmark_runs):
+ def _GetCharts(self, labels, benchmark_runs):
charts = []
- sorter = ResultSorter(benchmark_runs)
-
- for benchmark in benchmarks:
- autotest_keys = sorter.GetAutotestKeys(benchmark.name)
-
- for autotest_key in autotest_keys:
- title = "%s: %s" % (benchmark.name, autotest_key.replace("/", " "))
+ ro = ResultOrganizer(benchmark_runs, labels)
+ result = ro.result
+ for item in result:
+ runs = result[item]
+ tg = TableGenerator(runs, ro.labels)
+ table = tg.GetTable()
+ columns = [Column(AmeanResult(),
+ Format()),
+ Column(MinResult(),
+ Format()),
+ Column(MaxResult(),
+ Format())
+ ]
+ tf = TableFormatter(table, columns)
+ data_table = tf.GetCellTable()
+
+ for i in range(2, len(data_table)):
+ cur_row_data = data_table[i]
+ autotest_key = cur_row_data[0].string_value
+ title = "{0}: {1}".format(item, autotest_key.replace("/", ""))
chart = ColumnChart(title, 300, 200)
chart.AddColumn("Label", "string")
chart.AddColumn("Average", "number")
@@ -340,17 +354,15 @@ pre {
chart.AddColumn("Max", "number")
chart.AddSeries("Min", "line", "black")
chart.AddSeries("Max", "line", "black")
-
- for label in labels:
- res = sorter.GetResults(benchmark.name, autotest_key, label.name)
- avg_val = MeanColumn("").Compute(res, None)
- min_val = MinColumn("").Compute(res, None)
- max_val = MaxColumn("").Compute(res, None)
- chart.AddRow([label.name, avg_val, min_val, max_val])
- if isinstance(avg_val, str):
+ cur_index = 1
+ for label in ro.labels:
+ chart.AddRow([label, cur_row_data[cur_index].value,
+ cur_row_data[cur_index + 1].value,
+ cur_row_data[cur_index + 2].value])
+ if isinstance(cur_row_data[cur_index].value, str):
chart = None
break
-
+ cur_index += 3
if chart:
charts.append(chart)
return charts
diff --git a/crosperf/results_sorter.py b/crosperf/results_sorter.py
index 0567ef7b..985a91fb 100644
--- a/crosperf/results_sorter.py
+++ b/crosperf/results_sorter.py
@@ -9,6 +9,8 @@ class ResultSorter(object):
for benchmark_run in benchmark_runs:
benchmark_name = benchmark_run.benchmark_name
label_name = benchmark_run.label_name
+ if not benchmark_run.result:
+ continue
for autotest_key in benchmark_run.result.keyvals:
result_tuple = (benchmark_name, autotest_key, label_name)
if result_tuple not in self.table:
@@ -32,6 +34,8 @@ class ResultSorter(object):
benchmark_name = benchmark_run.benchmark_name
if benchmark_name not in self.autotest_keys:
self.autotest_keys[benchmark_name] = {}
+ if not benchmark_run.result:
+ continue
for autotest_key in benchmark_run.result.keyvals:
self.autotest_keys[benchmark_name][autotest_key] = True
diff --git a/crosperf/settings_factory.py b/crosperf/settings_factory.py
index 95ceb0fd..782f0dd3 100644
--- a/crosperf/settings_factory.py
+++ b/crosperf/settings_factory.py
@@ -26,15 +26,11 @@ class BenchmarkSettings(Settings):
self.AddField(FloatField("outlier_range", default=0.2,
description="The percentage of highest/lowest "
"values to omit when computing the average."))
- self.AddField(ListField("profile_counters",
- default=["cycles"],
- description="A list of profile counters to "
- "collect."))
- self.AddField(EnumField("profile_type",
- description="The type of profile to collect. "
- "Either 'stat', 'record' or ''.",
- options=["stat", "record", ""],
- default=""))
+ self.AddField(TextField("perf_args", default="",
+ description="The optional profile command. It "
+ "enables perf commands to record perforamance "
+ "related counters. It must start with perf "
+ "command record or stat followed by arguments."))
class LabelSettings(Settings):
@@ -66,10 +62,12 @@ class GlobalSettings(Settings):
self.AddField(BooleanField("rerun_if_failed", description="Whether to "
"re-run failed autotest runs or not.",
default=False))
+ self.AddField(ListField("email", description="Space-seperated"
+ "list of email addresses to send email to."))
self.AddField(BooleanField("rerun", description="Whether to ignore the "
"cache and for autotests to be re-run.",
default=False))
- self.AddField(BooleanField("exact_remote", default=False,
+ self.AddField(BooleanField("exact_remote", default=True,
description="Ensure cached runs are run on the "
"same device that is specified as a remote."))
self.AddField(IntegerField("iterations", default=1,
@@ -80,14 +78,11 @@ class GlobalSettings(Settings):
"contains a src/scripts directory. Defaults to "
"the chromeos checkout which contains the "
"chromeos_image."))
- self.AddField(ListField("profile_counters",
- default=["cycles"],
- description="A list of profile counters to "
- "collect."))
- self.AddField(EnumField("profile_type",
- description="The type of profile to collect. "
- "Either 'stat', 'record' or ''.",
- options=["stat", "record", ""]))
+ self.AddField(TextField("perf_args", default="",
+ description="The optional profile command. It "
+ "enables perf commands to record perforamance "
+ "related counters. It must start with perf "
+ "command record or stat followed by arguments."))
class SettingsFactory(object):
diff --git a/crosperf/table.py b/crosperf/table.py
deleted file mode 100644
index 84eb21ae..00000000
--- a/crosperf/table.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2011 Google Inc. All Rights Reserved.
-
-import math
-
-
-class Table(object):
- class Cell(object):
- def __init__(self, value, colspan=1, hidden=False, header=False):
- self.value = value
- self.colspan = colspan
- self.hidden = hidden
- self.header = header
-
- def __init__(self, table_id):
- self.table_id = table_id
- self.rows = []
-
- def AddRow(self, row):
- self.rows.append(row)
-
- def ToHTML(self):
- res = "<table id='%s'>\n" % self.table_id
- for row in self.rows:
- res += "<tr>"
- for cell in row:
- if cell.header:
- tag = "th"
- else:
- tag = "td"
- cell_class = ""
- if cell.hidden:
- cell_class = "class='hidden'"
- res += "<%s colspan='%s' %s>%s</%s>" % (tag, cell.colspan, cell_class,
- cell.value, tag)
- res += "</tr>\n"
- res += "</table>"
- return res
-
- def ToText(self, max_column_width=None):
- col_spacing = 2
- max_widths = []
- for row in self.rows:
- column = 0
- for cell in row:
- text_width = len(str(cell.value))
- per_column_width = int(math.ceil(float(text_width) / cell.colspan))
- if max_column_width:
- per_column_width = min(max_column_width, per_column_width)
- for i in range(column, column + cell.colspan):
- while i >= len(max_widths):
- max_widths.append(0)
- max_widths[i] = max(per_column_width, max_widths[i])
- column += cell.colspan
-
- res = ""
- for row in self.rows:
- column = 0
- for cell in row:
- val = str(cell.value)
- if max_column_width:
- if len(val) > max_column_width:
- val = val[:2] + ".." + val[len(val) - (max_column_width - 4):]
- res += val
- space_to_use = (sum(max_widths[column:column + cell.colspan]) +
- (cell.colspan * col_spacing))
- whitespace_length = space_to_use - len(val)
- res += " " * whitespace_length
- # Add space b/w columns
- column += cell.colspan
- res += "\n"
- return res
-
- def ToTSV(self):
- res = ""
- column = 0
- for row in self.rows:
- for cell in row:
- res += str(cell.value).replace("\t", " ")
- for _ in range(column, column + cell.colspan):
- res += "\t"
- res += "\n"
- return res
diff --git a/image_chromeos.py b/image_chromeos.py
index f1497fb9..380a94f7 100755
--- a/image_chromeos.py
+++ b/image_chromeos.py
@@ -9,10 +9,12 @@ This script images a remote ChromeOS device with a specific image."
__author__ = "asharif@google.com (Ahmad Sharif)"
+import fcntl
import filecmp
import glob
import optparse
import os
+import re
import shutil
import sys
import tempfile
@@ -22,15 +24,25 @@ from utils import misc
from utils.file_utils import FileUtils
checksum_file = "/usr/local/osimage_checksum_file"
-
+lock_file = "/tmp/lock_image_chromeos"
def Usage(parser, message):
print "ERROR: " + message
parser.print_help()
sys.exit(0)
+
def Main(argv):
"""Build ChromeOS."""
+ #Get lock for the host
+ f = open(lock_file, "w+a")
+ try:
+ fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
+ except IOError:
+ f.close()
+ print ("You can not run two instances of image_chromes at the same time."
+ "\nTry again. Exiting ....")
+ exit(0)
# Common initializations
cmd_executer = command_executer.GetCommandExecuter()
l = logger.GetLogger()
@@ -150,6 +162,8 @@ def Main(argv):
else:
l.LogOutput("Checksums match. Skipping reimage")
+ fcntl.lockf(f, fcntl.LOCK_UN)
+ f.close()
return retval
@@ -219,7 +233,8 @@ def IsImageModdedForTest(chromeos_root, image):
stateful_mp = tempfile.mkdtemp()
MountImage(chromeos_root, image, rootfs_mp, stateful_mp)
lsb_release_file = os.path.join(rootfs_mp, "etc/lsb-release")
- is_test_image = "Test Build" in open(lsb_release_file).read()
+ lsb_release_contents = open(lsb_release_file).read()
+ is_test_image = re.search("test", lsb_release_contents, re.IGNORECASE)
MountImage(chromeos_root, image, rootfs_mp, stateful_mp, unmount=True)
return is_test_image
diff --git a/utils/colortrans.py b/utils/colortrans.py
new file mode 100644
index 00000000..37e91572
--- /dev/null
+++ b/utils/colortrans.py
@@ -0,0 +1,376 @@
+#! /usr/bin/env python
+
+""" Convert values between RGB hex codes and xterm-256 color codes.
+
+Nice long listing of all 256 colors and their codes. Useful for
+developing console color themes, or even script output schemes.
+
+Resources:
+* http://en.wikipedia.org/wiki/8-bit_color
+* http://en.wikipedia.org/wiki/ANSI_escape_code
+* /usr/share/X11/rgb.txt
+
+I'm not sure where this script was inspired from. I think I must have
+written it from scratch, though it's been several years now.
+"""
+
+__author__ = 'Micah Elliott http://MicahElliott.com'
+__version__ = '0.1'
+__copyright__ = 'Copyright (C) 2011 Micah Elliott. All rights reserved.'
+__license__ = 'WTFPL http://sam.zoy.org/wtfpl/'
+
+#---------------------------------------------------------------------
+
+import sys, re
+
+CLUT = [ # color look-up table
+# 8-bit, RGB hex
+
+ # Primary 3-bit (8 colors). Unique representation!
+ ('00', '000000'),
+ ('01', '800000'),
+ ('02', '008000'),
+ ('03', '808000'),
+ ('04', '000080'),
+ ('05', '800080'),
+ ('06', '008080'),
+ ('07', 'c0c0c0'),
+
+ # Equivalent "bright" versions of original 8 colors.
+ ('08', '808080'),
+ ('09', 'ff0000'),
+ ('10', '00ff00'),
+ ('11', 'ffff00'),
+ ('12', '0000ff'),
+ ('13', 'ff00ff'),
+ ('14', '00ffff'),
+ ('15', 'ffffff'),
+
+ # Strictly ascending.
+ ('16', '000000'),
+ ('17', '00005f'),
+ ('18', '000087'),
+ ('19', '0000af'),
+ ('20', '0000d7'),
+ ('21', '0000ff'),
+ ('22', '005f00'),
+ ('23', '005f5f'),
+ ('24', '005f87'),
+ ('25', '005faf'),
+ ('26', '005fd7'),
+ ('27', '005fff'),
+ ('28', '008700'),
+ ('29', '00875f'),
+ ('30', '008787'),
+ ('31', '0087af'),
+ ('32', '0087d7'),
+ ('33', '0087ff'),
+ ('34', '00af00'),
+ ('35', '00af5f'),
+ ('36', '00af87'),
+ ('37', '00afaf'),
+ ('38', '00afd7'),
+ ('39', '00afff'),
+ ('40', '00d700'),
+ ('41', '00d75f'),
+ ('42', '00d787'),
+ ('43', '00d7af'),
+ ('44', '00d7d7'),
+ ('45', '00d7ff'),
+ ('46', '00ff00'),
+ ('47', '00ff5f'),
+ ('48', '00ff87'),
+ ('49', '00ffaf'),
+ ('50', '00ffd7'),
+ ('51', '00ffff'),
+ ('52', '5f0000'),
+ ('53', '5f005f'),
+ ('54', '5f0087'),
+ ('55', '5f00af'),
+ ('56', '5f00d7'),
+ ('57', '5f00ff'),
+ ('58', '5f5f00'),
+ ('59', '5f5f5f'),
+ ('60', '5f5f87'),
+ ('61', '5f5faf'),
+ ('62', '5f5fd7'),
+ ('63', '5f5fff'),
+ ('64', '5f8700'),
+ ('65', '5f875f'),
+ ('66', '5f8787'),
+ ('67', '5f87af'),
+ ('68', '5f87d7'),
+ ('69', '5f87ff'),
+ ('70', '5faf00'),
+ ('71', '5faf5f'),
+ ('72', '5faf87'),
+ ('73', '5fafaf'),
+ ('74', '5fafd7'),
+ ('75', '5fafff'),
+ ('76', '5fd700'),
+ ('77', '5fd75f'),
+ ('78', '5fd787'),
+ ('79', '5fd7af'),
+ ('80', '5fd7d7'),
+ ('81', '5fd7ff'),
+ ('82', '5fff00'),
+ ('83', '5fff5f'),
+ ('84', '5fff87'),
+ ('85', '5fffaf'),
+ ('86', '5fffd7'),
+ ('87', '5fffff'),
+ ('88', '870000'),
+ ('89', '87005f'),
+ ('90', '870087'),
+ ('91', '8700af'),
+ ('92', '8700d7'),
+ ('93', '8700ff'),
+ ('94', '875f00'),
+ ('95', '875f5f'),
+ ('96', '875f87'),
+ ('97', '875faf'),
+ ('98', '875fd7'),
+ ('99', '875fff'),
+ ('100', '878700'),
+ ('101', '87875f'),
+ ('102', '878787'),
+ ('103', '8787af'),
+ ('104', '8787d7'),
+ ('105', '8787ff'),
+ ('106', '87af00'),
+ ('107', '87af5f'),
+ ('108', '87af87'),
+ ('109', '87afaf'),
+ ('110', '87afd7'),
+ ('111', '87afff'),
+ ('112', '87d700'),
+ ('113', '87d75f'),
+ ('114', '87d787'),
+ ('115', '87d7af'),
+ ('116', '87d7d7'),
+ ('117', '87d7ff'),
+ ('118', '87ff00'),
+ ('119', '87ff5f'),
+ ('120', '87ff87'),
+ ('121', '87ffaf'),
+ ('122', '87ffd7'),
+ ('123', '87ffff'),
+ ('124', 'af0000'),
+ ('125', 'af005f'),
+ ('126', 'af0087'),
+ ('127', 'af00af'),
+ ('128', 'af00d7'),
+ ('129', 'af00ff'),
+ ('130', 'af5f00'),
+ ('131', 'af5f5f'),
+ ('132', 'af5f87'),
+ ('133', 'af5faf'),
+ ('134', 'af5fd7'),
+ ('135', 'af5fff'),
+ ('136', 'af8700'),
+ ('137', 'af875f'),
+ ('138', 'af8787'),
+ ('139', 'af87af'),
+ ('140', 'af87d7'),
+ ('141', 'af87ff'),
+ ('142', 'afaf00'),
+ ('143', 'afaf5f'),
+ ('144', 'afaf87'),
+ ('145', 'afafaf'),
+ ('146', 'afafd7'),
+ ('147', 'afafff'),
+ ('148', 'afd700'),
+ ('149', 'afd75f'),
+ ('150', 'afd787'),
+ ('151', 'afd7af'),
+ ('152', 'afd7d7'),
+ ('153', 'afd7ff'),
+ ('154', 'afff00'),
+ ('155', 'afff5f'),
+ ('156', 'afff87'),
+ ('157', 'afffaf'),
+ ('158', 'afffd7'),
+ ('159', 'afffff'),
+ ('160', 'd70000'),
+ ('161', 'd7005f'),
+ ('162', 'd70087'),
+ ('163', 'd700af'),
+ ('164', 'd700d7'),
+ ('165', 'd700ff'),
+ ('166', 'd75f00'),
+ ('167', 'd75f5f'),
+ ('168', 'd75f87'),
+ ('169', 'd75faf'),
+ ('170', 'd75fd7'),
+ ('171', 'd75fff'),
+ ('172', 'd78700'),
+ ('173', 'd7875f'),
+ ('174', 'd78787'),
+ ('175', 'd787af'),
+ ('176', 'd787d7'),
+ ('177', 'd787ff'),
+ ('178', 'd7af00'),
+ ('179', 'd7af5f'),
+ ('180', 'd7af87'),
+ ('181', 'd7afaf'),
+ ('182', 'd7afd7'),
+ ('183', 'd7afff'),
+ ('184', 'd7d700'),
+ ('185', 'd7d75f'),
+ ('186', 'd7d787'),
+ ('187', 'd7d7af'),
+ ('188', 'd7d7d7'),
+ ('189', 'd7d7ff'),
+ ('190', 'd7ff00'),
+ ('191', 'd7ff5f'),
+ ('192', 'd7ff87'),
+ ('193', 'd7ffaf'),
+ ('194', 'd7ffd7'),
+ ('195', 'd7ffff'),
+ ('196', 'ff0000'),
+ ('197', 'ff005f'),
+ ('198', 'ff0087'),
+ ('199', 'ff00af'),
+ ('200', 'ff00d7'),
+ ('201', 'ff00ff'),
+ ('202', 'ff5f00'),
+ ('203', 'ff5f5f'),
+ ('204', 'ff5f87'),
+ ('205', 'ff5faf'),
+ ('206', 'ff5fd7'),
+ ('207', 'ff5fff'),
+ ('208', 'ff8700'),
+ ('209', 'ff875f'),
+ ('210', 'ff8787'),
+ ('211', 'ff87af'),
+ ('212', 'ff87d7'),
+ ('213', 'ff87ff'),
+ ('214', 'ffaf00'),
+ ('215', 'ffaf5f'),
+ ('216', 'ffaf87'),
+ ('217', 'ffafaf'),
+ ('218', 'ffafd7'),
+ ('219', 'ffafff'),
+ ('220', 'ffd700'),
+ ('221', 'ffd75f'),
+ ('222', 'ffd787'),
+ ('223', 'ffd7af'),
+ ('224', 'ffd7d7'),
+ ('225', 'ffd7ff'),
+ ('226', 'ffff00'),
+ ('227', 'ffff5f'),
+ ('228', 'ffff87'),
+ ('229', 'ffffaf'),
+ ('230', 'ffffd7'),
+ ('231', 'ffffff'),
+
+ # Gray-scale range.
+ ('232', '080808'),
+ ('233', '121212'),
+ ('234', '1c1c1c'),
+ ('235', '262626'),
+ ('236', '303030'),
+ ('237', '3a3a3a'),
+ ('238', '444444'),
+ ('239', '4e4e4e'),
+ ('240', '585858'),
+ ('241', '626262'),
+ ('242', '6c6c6c'),
+ ('243', '767676'),
+ ('244', '808080'),
+ ('245', '8a8a8a'),
+ ('246', '949494'),
+ ('247', '9e9e9e'),
+ ('248', 'a8a8a8'),
+ ('249', 'b2b2b2'),
+ ('250', 'bcbcbc'),
+ ('251', 'c6c6c6'),
+ ('252', 'd0d0d0'),
+ ('253', 'dadada'),
+ ('254', 'e4e4e4'),
+ ('255', 'eeeeee'),
+]
+
+def _str2hex(hexstr):
+ return int(hexstr, 16)
+
+def _strip_hash(rgb):
+ # Strip leading `#` if exists.
+ if rgb.startswith('#'):
+ rgb = rgb.lstrip('#')
+ return rgb
+
+def _create_dicts():
+ short2rgb_dict = dict(CLUT)
+ rgb2short_dict = {}
+ for k, v in short2rgb_dict.items():
+ rgb2short_dict[v] = k
+ return rgb2short_dict, short2rgb_dict
+
+def short2rgb(short):
+ return SHORT2RGB_DICT[short]
+
+def print_all():
+ """ Print all 256 xterm color codes.
+ """
+ for short, rgb in CLUT:
+ sys.stdout.write('\033[48;5;%sm%s:%s' % (short, short, rgb))
+ sys.stdout.write("\033[0m ")
+ sys.stdout.write('\033[38;5;%sm%s:%s' % (short, short, rgb))
+ sys.stdout.write("\033[0m\n")
+ print "Printed all codes."
+ print "You can translate a hex or 0-255 code by providing an argument."
+
+def rgb2short(rgb):
+ """ Find the closest xterm-256 approximation to the given RGB value.
+ @param rgb: Hex code representing an RGB value, eg, 'abcdef'
+ @returns: String between 0 and 255, compatible with xterm.
+ >>> rgb2short('123456')
+ ('23', '005f5f')
+ >>> rgb2short('ffffff')
+ ('231', 'ffffff')
+ >>> rgb2short('0DADD6') # vimeo logo
+ ('38', '00afd7')
+ """
+ rgb = _strip_hash(rgb)
+ incs = (0x00, 0x5f, 0x87, 0xaf, 0xd7, 0xff)
+ # Break 6-char RGB code into 3 integer vals.
+ parts = [ int(h, 16) for h in re.split(r'(..)(..)(..)', rgb)[1:4] ]
+ res = []
+ for part in parts:
+ i = 0
+ while i < len(incs)-1:
+ s, b = incs[i], incs[i+1] # smaller, bigger
+ if s <= part <= b:
+ s1 = abs(s - part)
+ b1 = abs(b - part)
+ if s1 < b1: closest = s
+ else: closest = b
+ res.append(closest)
+ break
+ i += 1
+ #print '***', res
+ res = ''.join([ ('%02.x' % i) for i in res ])
+ equiv = RGB2SHORT_DICT[ res ]
+ #print '***', res, equiv
+ return equiv, res
+
+RGB2SHORT_DICT, SHORT2RGB_DICT = _create_dicts()
+
+#---------------------------------------------------------------------
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
+ if len(sys.argv) == 1:
+ print_all()
+ raise SystemExit
+ arg = sys.argv[1]
+ if len(arg) < 4 and int(arg) < 256:
+ rgb = short2rgb(arg)
+ sys.stdout.write('xterm color \033[38;5;%sm%s\033[0m -> RGB exact \033[38;5;%sm%s\033[0m' % (arg, arg, arg, rgb))
+ sys.stdout.write("\033[0m\n")
+ else:
+ short, rgb = rgb2short(arg)
+ sys.stdout.write('RGB %s -> xterm color approx \033[38;5;%sm%s (%s)' % (arg, short, short, rgb))
+ sys.stdout.write("\033[0m\n")
diff --git a/utils/command_executer.py b/utils/command_executer.py
index c9a19ff9..54c9c355 100644
--- a/utils/command_executer.py
+++ b/utils/command_executer.py
@@ -42,14 +42,15 @@ class CommandExecuter:
def RunCommand(self, cmd, return_output=False, machine=None,
username=None, command_terminator=None,
command_timeout=None,
- terminated_timeout=10):
+ terminated_timeout=10,
+ print_to_console=True):
"""Run a command."""
cmd = str(cmd)
- self.logger.LogCmd(cmd, machine, username)
+ self.logger.LogCmd(cmd, machine, username, print_to_console)
if command_terminator and command_terminator.IsTerminated():
- self.logger.LogError("Command was terminated!")
+ self.logger.LogError("Command was terminated!", print_to_console)
if return_output:
return [1, "", ""]
else:
@@ -81,7 +82,7 @@ class CommandExecuter:
if command_terminator and command_terminator.IsTerminated():
self.RunCommand("sudo kill -9 " + str(p.pid))
wait = p.wait()
- self.logger.LogError("Command was terminated!")
+ self.logger.LogError("Command was terminated!", print_to_console)
if return_output:
return (p.wait, full_stdout, full_stderr)
else:
@@ -91,14 +92,14 @@ class CommandExecuter:
out = os.read(p.stdout.fileno(), 16384)
if return_output:
full_stdout += out
- self.logger.LogCommandOutput(out)
+ self.logger.LogCommandOutput(out, print_to_console)
if out == "":
pipes.remove(p.stdout)
if fd == p.stderr:
err = os.read(p.stderr.fileno(), 16384)
if return_output:
full_stderr += err
- self.logger.LogCommandError(err)
+ self.logger.LogCommandError(err, print_to_console)
if err == "":
pipes.remove(p.stderr)
@@ -109,14 +110,14 @@ class CommandExecuter:
time.time() - terminated_time > terminated_timeout):
m = ("Timeout of %s seconds reached since process termination."
% terminated_timeout)
- self.logger.LogWarning(m)
+ self.logger.LogWarning(m, print_to_console)
break
if (command_timeout is not None and
time.time() - started_time > command_timeout):
m = ("Timeout of %s seconds reached since process started."
% command_timeout)
- self.logger.LogWarning(m)
+ self.logger.LogWarning(m, print_to_console)
self.RunCommand("kill %d || sudo kill %d || sudo kill -9 %d" %
(p.pid, p.pid, p.pid))
break
@@ -161,9 +162,10 @@ class CommandExecuter:
def CrosRunCommand(self, cmd, return_output=False, machine=None,
username=None, command_terminator=None, chromeos_root=None,
command_timeout=None,
- terminated_timeout=10):
+ terminated_timeout=10,
+ print_to_console=True):
"""Run a command on a chromeos box"""
- self.logger.LogCmd(cmd)
+ self.logger.LogCmd(cmd, print_to_console)
self.logger.LogFatalIf(not machine, "No machine provided!")
self.logger.LogFatalIf(not chromeos_root, "chromeos_root not given!")
chromeos_root = os.path.expanduser(chromeos_root)
@@ -197,8 +199,9 @@ class CommandExecuter:
def ChrootRunCommand(self, chromeos_root, command, return_output=False,
command_terminator=None, command_timeout=None,
- terminated_timeout=10):
- self.logger.LogCmd(command)
+ terminated_timeout=10,
+ print_to_console=True):
+ self.logger.LogCmd(command, print_to_console)
handle, command_file = tempfile.mkstemp(dir=os.path.join(chromeos_root,
"src/scripts"),
diff --git a/utils/constants.py b/utils/constants.py
new file mode 100644
index 00000000..33875d66
--- /dev/null
+++ b/utils/constants.py
@@ -0,0 +1,10 @@
+#!/usr/bin/python2.6
+#
+# Copyright 2010 Google Inc. All Rights Reserved.
+
+"""Generic constants used accross modules.
+"""
+
+__author__ = "shenhan@google.com (Han Shen)"
+
+mounted_toolchain_root='/usr/local/toolchain_root'
diff --git a/utils/file_utils.py b/utils/file_utils.py
index 987e88ad..24809333 100644
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -6,6 +6,7 @@ import errno
import hashlib
import os
import shutil
+import command_executer
class FileUtils(object):
@@ -28,21 +29,18 @@ class FileUtils(object):
return cls._instance
def Md5File(self, filename, block_size=2 ** 10):
- md5 = hashlib.md5()
+ command = "md5sum %s" % filename
+ ce = command_executer.GetCommandExecuter()
+ ret, out, err = ce.RunCommand(command, return_output=True)
+ if ret:
+ raise Exception("Could not run md5sum on: %s" % filename)
- with open(filename) as f:
- while True:
- data = f.read(block_size)
- if not data:
- break
- md5.update(data)
-
- return md5.hexdigest()
+ return out.strip().split()[0]
def CanonicalizeChromeOSRoot(self, chromeos_root):
chromeos_root = os.path.expanduser(chromeos_root)
if os.path.isfile(os.path.join(chromeos_root,
- "src/scripts/enter_chroot.sh")):
+ "src/scripts/run_remote_tests.sh")):
return chromeos_root
else:
return None
diff --git a/utils/logger.py b/utils/logger.py
index 1ef996eb..a8f0a6f0 100644
--- a/utils/logger.py
+++ b/utils/logger.py
@@ -56,13 +56,21 @@ class Logger(object):
# self._AddSuffix(basename, suffix))
return suffix
+ def _CreateLogFileHandle(self, name):
+ fd = None
+ try:
+ fd = open(name, "w")
+ except IOError:
+ print "Warning: could not open %s for writing." % name
+ return fd
+
def _CreateLogFileHandles(self, basename):
suffix = self._FindSuffix(basename)
suffixed_basename = self._AddSuffix(basename, suffix)
- self.cmdfd = open("%s.cmd" % suffixed_basename, "w", 0755)
- self.stdout = open("%s.out" % suffixed_basename, "w")
- self.stderr = open("%s.err" % suffixed_basename, "w")
+ self.cmdfd = self._CreateLogFileHandle("%s.cmd" % suffixed_basename)
+ self.stdout = self._CreateLogFileHandle("%s.out" % suffixed_basename)
+ self.stderr = self._CreateLogFileHandle("%s.err" % suffixed_basename)
self._CreateLogFileSymlinks(basename, suffixed_basename)
@@ -75,40 +83,58 @@ class Logger(object):
if os.path.exists(dest_file):
os.remove(dest_file)
os.symlink(src_file, dest_file)
- except IOError as ex:
- self.LogFatal(str(ex))
+ except Exception as ex:
+ print "Exception while creating symlinks: %s" % str(ex)
def _WriteTo(self, fd, msg, flush):
- fd.write(msg)
- if flush:
- fd.flush()
+ if fd:
+ fd.write(msg)
+ if flush:
+ fd.flush()
def _LogMsg(self, file_fd, term_fd, msg, flush=True):
- self._WriteTo(file_fd, msg, flush)
+ if file_fd:
+ self._WriteTo(file_fd, msg, flush)
if self.print_console:
self._WriteTo(term_fd, msg, flush)
- def LogCmd(self, cmd, machine="", user=None):
+ def _GetStdout(self, print_to_console):
+ if print_to_console:
+ return sys.stdout
+ return None
+
+ def _GetStderr(self, print_to_console):
+ if print_to_console:
+ return sys.stderr
+ return None
+
+ def LogCmd(self, cmd, machine="", user=None, print_to_console=True):
if user:
host = "%s@%s" % (user, machine)
else:
host = machine
- self._LogMsg(self.cmdfd, sys.stdout, "CMD (%s): %s\n" % (host, cmd))
+ self._LogMsg(self.cmdfd, self._GetStdout(print_to_console),
+ "CMD (%s): %s\n" % (host, cmd))
- def LogFatal(self, msg):
- self._LogMsg(self.stderr, sys.stderr, "FATAL: %s\n" % msg)
- self._LogMsg(self.stderr, sys.stderr, "\n".join(traceback.format_stack()))
+ def LogFatal(self, msg, print_to_console=True):
+ self._LogMsg(self.stderr, self._GetStderr(print_to_console),
+ "FATAL: %s\n" % msg)
+ self._LogMsg(self.stderr, self._GetStderr(print_to_console),
+ "\n".join(traceback.format_stack()))
sys.exit(1)
- def LogError(self, msg):
- self._LogMsg(self.stderr, sys.stderr, "ERROR: %s\n" % msg)
+ def LogError(self, msg, print_to_console=True):
+ self._LogMsg(self.stderr, self._GetStderr(print_to_console),
+ "ERROR: %s\n" % msg)
- def LogWarning(self, msg):
- self._LogMsg(self.stderr, sys.stderr, "WARNING: %s\n" % msg)
+ def LogWarning(self, msg, print_to_console=True):
+ self._LogMsg(self.stderr, self._GetStderr(print_to_console),
+ "WARNING: %s\n" % msg)
- def LogOutput(self, msg):
- self._LogMsg(self.stdout, sys.stdout, "OUTPUT: %s\n" % msg)
+ def LogOutput(self, msg, print_to_console=True):
+ self._LogMsg(self.stdout, self._GetStdout(print_to_console),
+ "OUTPUT: %s\n" % msg)
def LogFatalIf(self, condition, msg):
if condition:
@@ -122,11 +148,13 @@ class Logger(object):
if condition:
self.LogWarning(msg)
- def LogCommandOutput(self, msg):
- self._LogMsg(self.stdout, sys.stdout, msg, flush=False)
+ def LogCommandOutput(self, msg, print_to_console=True):
+ self._LogMsg(self.stdout, self._GetStdout(print_to_console),
+ msg, flush=False)
- def LogCommandError(self, msg):
- self._LogMsg(self.stderr, sys.stderr, msg, flush=False)
+ def LogCommandError(self, msg, print_to_console=True):
+ self._LogMsg(self.stderr, self._GetStderr(print_to_console),
+ msg, flush=False)
def Flush(self):
self.cmdfd.flush()
diff --git a/utils/misc.py b/utils/misc.py
index 43ef09dc..cb9acbfa 100644
--- a/utils/misc.py
+++ b/utils/misc.py
@@ -6,14 +6,34 @@
__author__ = "asharif@google.com (Ahmad Sharif)"
-import hashlib
+from contextlib import contextmanager
import os
import re
-import stat
+
import command_executer
import logger
-import tempfile
-from contextlib import contextmanager
+
+
+def GetChromeOSVersionFromLSBVersion(lsb_version):
+ ce = command_executer.GetCommandExecuter()
+ command = "git ls-remote http://git.chromium.org/chromiumos/manifest.git"
+ ret, out, _ = ce.RunCommand(command, return_output=True,
+ print_to_console=False)
+ assert ret == 0, "Command %s failed" % command
+ lower = []
+ for line in out.splitlines():
+ mo = re.search("refs/heads/release-R(\d+)-(\d+)\.B", line)
+ if mo:
+ revision = int(mo.group(1))
+ build = int(mo.group(2))
+ lsb_build = int(lsb_version.split(".")[0])
+ if lsb_build > build:
+ lower.append(revision)
+ lower = sorted(lower)
+ if lower:
+ return "R%d-%s" % (lower[-1] + 1, lsb_version)
+ else:
+ return "Unknown"
def ApplySubs(string, *substitutions):
@@ -27,9 +47,11 @@ def UnitToNumber(string, base=1000):
"mega": base**2,
"giga": base**3}
string = string.lower()
- mo = re.search("(\d*)(.+)", string)
+ mo = re.search("(\d*)(.+)?", string)
number = mo.group(1)
unit = mo.group(2)
+ if not unit:
+ return float(number)
for k, v in unit_dict.items():
if k.startswith(unit):
return float(number) * v
@@ -42,8 +64,8 @@ def GetFilenameFromString(string):
return ApplySubs(string,
("/", "__"),
("\s", "_"),
- ("=", ""),
- ("\"", ""))
+ ("[\^\$=\"\\\?]", ""),
+ )
def GetRoot(scr_name):
@@ -154,14 +176,21 @@ def GetCtargetFromBoard(board, chromeos_root):
"../platform/dev/toolchain_utils.sh; get_ctarget_from_board %s" %
base_board)
ce = command_executer.GetCommandExecuter()
- ret, out, err = ce.ChrootRunCommand(chromeos_root,
- command,
- return_output=True)
+ ret, out, _ = ce.ChrootRunCommand(chromeos_root,
+ command,
+ return_output=True)
if ret != 0:
raise ValueError("Board %s is invalid!" % board)
+ # Remove ANSI escape sequences.
+ out = StripANSIEscapeSequences(out)
return out.strip()
+def StripANSIEscapeSequences(string):
+ string = re.sub("\x1b\[[0-9]*[a-zA-Z]", "", string)
+ return string
+
+
def GetChromeSrcDir():
return "var/cache/distfiles/target/chrome-src/src"
@@ -170,11 +199,30 @@ def GetEnvStringFromDict(env_dict):
return " ".join(["%s=\"%s\"" % var for var in env_dict.items()])
+def MergeEnvStringWithDict(env_string, env_dict, prepend=True):
+ if not env_string.strip():
+ return GetEnvStringFromDict(env_dict)
+ override_env_list = []
+ ce = command_executer.GetCommandExecuter()
+ for k, v in env_dict.items():
+ v = v.strip("\"'")
+ if prepend:
+ new_env = "%s=\"%s $%s\"" % (k, v, k)
+ else:
+ new_env = "%s=\"$%s %s\"" % (k, k, v)
+ command = "; ".join([env_string, new_env, "echo $%s" % k])
+ ret, out, _ = ce.RunCommand(command, return_output=True)
+ override_env_list.append("%s=%r" % (k, out.strip()))
+ ret = env_string + " " + " ".join(override_env_list)
+ return ret.strip()
+
+
def GetAllImages(chromeos_root, board):
ce = command_executer.GetCommandExecuter()
command = ("find %s/src/build/images/%s -name chromiumos_test_image.bin" %
(chromeos_root, board))
- ret, out, err = ce.RunCommand(command, return_output=True)
+ ret, out, _ = ce.RunCommand(command, return_output=True)
+ assert ret == 0, "Could not run command: %s" % command
return out.splitlines()
diff --git a/utils/misc_test.py b/utils/misc_test.py
index 62742e3b..e234332f 100644
--- a/utils/misc_test.py
+++ b/utils/misc_test.py
@@ -14,9 +14,37 @@ import misc
class UtilsTest(unittest.TestCase):
def testGetFilenameFromString(self):
- string = 'a /b=c"d'
+ string = 'a /b=c"d^$?\\'
filename = misc.GetFilenameFromString(string)
self.assertTrue(filename == 'a___bcd')
+ def testPrependMergeEnv(self):
+ var = 'USE'
+ use_flags = 'hello 123'
+ added_use_flags = 'bla bla'
+ env_string = '%s=%r' % (var, use_flags)
+ new_env_string = misc.MergeEnvStringWithDict(env_string,
+ {var: added_use_flags})
+ expected_new_env = '%s=%r' % (var, ' '.join([added_use_flags, use_flags]))
+ self.assertTrue(new_env_string == ' '.join([env_string, expected_new_env]))
+
+ def testGetChromeOSVersionFromLSBVersion(self):
+ versions_dict = {"2630.0.0": "22",
+ "2030.0.0": "19"}
+ f = misc.GetChromeOSVersionFromLSBVersion
+ for k, v in versions_dict.items():
+ self.assertTrue(f(k) == "R%s-%s" % (v, k))
+
+ def testPostpendMergeEnv(self):
+ var = 'USE'
+ use_flags = 'hello 123'
+ added_use_flags = 'bla bla'
+ env_string = '%s=%r' % (var, use_flags)
+ new_env_string = misc.MergeEnvStringWithDict(env_string,
+ {var: added_use_flags},
+ False)
+ expected_new_env = '%s=%r' % (var, ' '.join([use_flags, added_use_flags]))
+ self.assertTrue(new_env_string == ' '.join([env_string, expected_new_env]))
+
if __name__ == '__main__':
unittest.main()
diff --git a/utils/stats.py b/utils/stats.py
new file mode 100644
index 00000000..aceed824
--- /dev/null
+++ b/utils/stats.py
@@ -0,0 +1,4528 @@
+# Copyright (c) 1999-2008 Gary Strangman; All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# Comments and/or additions are welcome (send e-mail to:
+# strang@nmr.mgh.harvard.edu).
+#
+"""
+stats.py module
+
+(Requires pstat.py module.)
+
+#################################################
+####### Written by: Gary Strangman ###########
+####### Last modified: Oct 31, 2008 ###########
+#################################################
+
+A collection of basic statistical functions for python. The function
+names appear below.
+
+IMPORTANT: There are really *3* sets of functions. The first set has an 'l'
+prefix, which can be used with list or tuple arguments. The second set has
+an 'a' prefix, which can accept NumPy array arguments. These latter
+functions are defined only when NumPy is available on the system. The third
+type has NO prefix (i.e., has the name that appears below). Functions of
+this set are members of a "Dispatch" class, c/o David Ascher. This class
+allows different functions to be called depending on the type of the passed
+arguments. Thus, stats.mean is a member of the Dispatch class and
+stats.mean(range(20)) will call stats.lmean(range(20)) while
+stats.mean(Numeric.arange(20)) will call stats.amean(Numeric.arange(20)).
+This is a handy way to keep consistent function names when different
+argument types require different functions to be called. Having
+implementated the Dispatch class, however, means that to get info on
+a given function, you must use the REAL function name ... that is
+"print stats.lmean.__doc__" or "print stats.amean.__doc__" work fine,
+while "print stats.mean.__doc__" will print the doc for the Dispatch
+class. NUMPY FUNCTIONS ('a' prefix) generally have more argument options
+but should otherwise be consistent with the corresponding list functions.
+
+Disclaimers: The function list is obviously incomplete and, worse, the
+functions are not optimized. All functions have been tested (some more
+so than others), but they are far from bulletproof. Thus, as with any
+free software, no warranty or guarantee is expressed or implied. :-) A
+few extra functions that don't appear in the list below can be found by
+interested treasure-hunters. These functions don't necessarily have
+both list and array versions but were deemed useful
+
+CENTRAL TENDENCY: geometricmean
+ harmonicmean
+ mean
+ median
+ medianscore
+ mode
+
+MOMENTS: moment
+ variation
+ skew
+ kurtosis
+ skewtest (for Numpy arrays only)
+ kurtosistest (for Numpy arrays only)
+ normaltest (for Numpy arrays only)
+
+ALTERED VERSIONS: tmean (for Numpy arrays only)
+ tvar (for Numpy arrays only)
+ tmin (for Numpy arrays only)
+ tmax (for Numpy arrays only)
+ tstdev (for Numpy arrays only)
+ tsem (for Numpy arrays only)
+ describe
+
+FREQUENCY STATS: itemfreq
+ scoreatpercentile
+ percentileofscore
+ histogram
+ cumfreq
+ relfreq
+
+VARIABILITY: obrientransform
+ samplevar
+ samplestdev
+ signaltonoise (for Numpy arrays only)
+ var
+ stdev
+ sterr
+ sem
+ z
+ zs
+ zmap (for Numpy arrays only)
+
+TRIMMING FCNS: threshold (for Numpy arrays only)
+ trimboth
+ trim1
+ round (round all vals to 'n' decimals; Numpy only)
+
+CORRELATION FCNS: covariance (for Numpy arrays only)
+ correlation (for Numpy arrays only)
+ paired
+ pearsonr
+ spearmanr
+ pointbiserialr
+ kendalltau
+ linregress
+
+INFERENTIAL STATS: ttest_1samp
+ ttest_ind
+ ttest_rel
+ chisquare
+ ks_2samp
+ mannwhitneyu
+ ranksums
+ wilcoxont
+ kruskalwallish
+ friedmanchisquare
+
+PROBABILITY CALCS: chisqprob
+ erfcc
+ zprob
+ ksprob
+ fprob
+ betacf
+ gammln
+ betai
+
+ANOVA FUNCTIONS: F_oneway
+ F_value
+
+SUPPORT FUNCTIONS: writecc
+ incr
+ sign (for Numpy arrays only)
+ sum
+ cumsum
+ ss
+ summult
+ sumdiffsquared
+ square_of_sums
+ shellsort
+ rankdata
+ outputpairedstats
+ findwithin
+"""
+## CHANGE LOG:
+## ===========
+## 09-07-21 ... added capability for getting the 'proportion' out of l/amannwhitneyu (but comment-disabled)
+## 08-10-31 ... fixed import LinearAlgebra bug before glm fcns
+## 07-11-26 ... conversion for numpy started
+## 07-05-16 ... added Lin's Concordance Correlation Coefficient (alincc) and acov
+## 05-08-21 ... added "Dice's coefficient"
+## 04-10-26 ... added ap2t(), an ugly fcn for converting p-vals to T-vals
+## 04-04-03 ... added amasslinregress() function to do regression on N-D arrays
+## 03-01-03 ... CHANGED VERSION TO 0.6
+## fixed atsem() to properly handle limits=None case
+## improved histogram and median functions (estbinwidth) and
+## fixed atvar() function (wrong answers for neg numbers?!?)
+## 02-11-19 ... fixed attest_ind and attest_rel for div-by-zero Overflows
+## 02-05-10 ... fixed lchisqprob indentation (failed when df=even)
+## 00-12-28 ... removed aanova() to separate module, fixed licensing to
+## match Python License, fixed doc string & imports
+## 00-04-13 ... pulled all "global" statements, except from aanova()
+## added/fixed lots of documentation, removed io.py dependency
+## changed to version 0.5
+## 99-11-13 ... added asign() function
+## 99-11-01 ... changed version to 0.4 ... enough incremental changes now
+## 99-10-25 ... added acovariance and acorrelation functions
+## 99-10-10 ... fixed askew/akurtosis to avoid divide-by-zero errors
+## added aglm function (crude, but will be improved)
+## 99-10-04 ... upgraded acumsum, ass, asummult, asamplevar, avar, etc. to
+## all handle lists of 'dimension's and keepdims
+## REMOVED ar0, ar2, ar3, ar4 and replaced them with around
+## reinserted fixes for abetai to avoid math overflows
+## 99-09-05 ... rewrote achisqprob/aerfcc/aksprob/afprob/abetacf/abetai to
+## handle multi-dimensional arrays (whew!)
+## 99-08-30 ... fixed l/amoment, l/askew, l/akurtosis per D'Agostino (1990)
+## added anormaltest per same reference
+## re-wrote azprob to calc arrays of probs all at once
+## 99-08-22 ... edited attest_ind printing section so arrays could be rounded
+## 99-08-19 ... fixed amean and aharmonicmean for non-error(!) overflow on
+## short/byte arrays (mean of #s btw 100-300 = -150??)
+## 99-08-09 ... fixed asum so that the None case works for Byte arrays
+## 99-08-08 ... fixed 7/3 'improvement' to handle t-calcs on N-D arrays
+## 99-07-03 ... improved attest_ind, attest_rel (zero-division errortrap)
+## 99-06-24 ... fixed bug(?) in attest_ind (n1=a.shape[0])
+## 04/11/99 ... added asignaltonoise, athreshold functions, changed all
+## max/min in array section to N.maximum/N.minimum,
+## fixed square_of_sums to prevent integer overflow
+## 04/10/99 ... !!! Changed function name ... sumsquared ==> square_of_sums
+## 03/18/99 ... Added ar0, ar2, ar3 and ar4 rounding functions
+## 02/28/99 ... Fixed aobrientransform to return an array rather than a list
+## 01/15/99 ... Essentially ceased updating list-versions of functions (!!!)
+## 01/13/99 ... CHANGED TO VERSION 0.3
+## fixed bug in a/lmannwhitneyu p-value calculation
+## 12/31/98 ... fixed variable-name bug in ldescribe
+## 12/19/98 ... fixed bug in findwithin (fcns needed pstat. prefix)
+## 12/16/98 ... changed amedianscore to return float (not array) for 1 score
+## 12/14/98 ... added atmin and atmax functions
+## removed umath from import line (not needed)
+## l/ageometricmean modified to reduce chance of overflows (take
+## nth root first, then multiply)
+## 12/07/98 ... added __version__variable (now 0.2)
+## removed all 'stats.' from anova() fcn
+## 12/06/98 ... changed those functions (except shellsort) that altered
+## arguments in-place ... cumsum, ranksort, ...
+## updated (and fixed some) doc-strings
+## 12/01/98 ... added anova() function (requires NumPy)
+## incorporated Dispatch class
+## 11/12/98 ... added functionality to amean, aharmonicmean, ageometricmean
+## added 'asum' function (added functionality to N.add.reduce)
+## fixed both moment and amoment (two errors)
+## changed name of skewness and askewness to skew and askew
+## fixed (a)histogram (which sometimes counted points <lowerlimit)
+
+import pstat # required 3rd party module
+import math, string, copy # required python modules
+from types import *
+
+__version__ = 0.6
+
+############# DISPATCH CODE ##############
+
+
+class Dispatch:
+ """
+The Dispatch class, care of David Ascher, allows different functions to
+be called depending on the argument types. This way, there can be one
+function name regardless of the argument type. To access function doc
+in stats.py module, prefix the function with an 'l' or 'a' for list or
+array arguments, respectively. That is, print stats.lmean.__doc__ or
+print stats.amean.__doc__ or whatever.
+"""
+
+ def __init__(self, *tuples):
+ self._dispatch = {}
+ for func, types in tuples:
+ for t in types:
+ if t in self._dispatch.keys():
+ raise ValueError, "can't have two dispatches on "+str(t)
+ self._dispatch[t] = func
+ self._types = self._dispatch.keys()
+
+ def __call__(self, arg1, *args, **kw):
+ if type(arg1) not in self._types:
+ raise TypeError, "don't know how to dispatch %s arguments" % type(arg1)
+ return apply(self._dispatch[type(arg1)], (arg1,) + args, kw)
+
+
+##########################################################################
+######################## LIST-BASED FUNCTIONS ########################
+##########################################################################
+
+### Define these regardless
+
+####################################
+####### CENTRAL TENDENCY #########
+####################################
+
+def lgeometricmean (inlist):
+ """
+Calculates the geometric mean of the values in the passed list.
+That is: n-th root of (x1 * x2 * ... * xn). Assumes a '1D' list.
+
+Usage: lgeometricmean(inlist)
+"""
+ mult = 1.0
+ one_over_n = 1.0/len(inlist)
+ for item in inlist:
+ mult = mult * pow(item,one_over_n)
+ return mult
+
+
+def lharmonicmean (inlist):
+ """
+Calculates the harmonic mean of the values in the passed list.
+That is: n / (1/x1 + 1/x2 + ... + 1/xn). Assumes a '1D' list.
+
+Usage: lharmonicmean(inlist)
+"""
+ sum = 0
+ for item in inlist:
+ sum = sum + 1.0/item
+ return len(inlist) / sum
+
+
+def lmean (inlist):
+ """
+Returns the arithematic mean of the values in the passed list.
+Assumes a '1D' list, but will function on the 1st dim of an array(!).
+
+Usage: lmean(inlist)
+"""
+ sum = 0
+ for item in inlist:
+ sum = sum + item
+ return sum/float(len(inlist))
+
+
+def lmedian (inlist,numbins=1000):
+ """
+Returns the computed median value of a list of numbers, given the
+number of bins to use for the histogram (more bins brings the computed value
+closer to the median score, default number of bins = 1000). See G.W.
+Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics.
+
+Usage: lmedian (inlist, numbins=1000)
+"""
+ (hist, smallest, binsize, extras) = histogram(inlist,numbins,[min(inlist),max(inlist)]) # make histog
+ cumhist = cumsum(hist) # make cumulative histogram
+ for i in range(len(cumhist)): # get 1st(!) index holding 50%ile score
+ if cumhist[i]>=len(inlist)/2.0:
+ cfbin = i
+ break
+ LRL = smallest + binsize*cfbin # get lower read limit of that bin
+ cfbelow = cumhist[cfbin-1]
+ freq = float(hist[cfbin]) # frequency IN the 50%ile bin
+ median = LRL + ((len(inlist)/2.0 - cfbelow)/float(freq))*binsize # median formula
+ return median
+
+
+def lmedianscore (inlist):
+ """
+Returns the 'middle' score of the passed list. If there is an even
+number of scores, the mean of the 2 middle scores is returned.
+
+Usage: lmedianscore(inlist)
+"""
+
+ newlist = copy.deepcopy(inlist)
+ newlist.sort()
+ if len(newlist) % 2 == 0: # if even number of scores, average middle 2
+ index = len(newlist)/2 # integer division correct
+ median = float(newlist[index] + newlist[index-1]) /2
+ else:
+ index = len(newlist)/2 # int divsion gives mid value when count from 0
+ median = newlist[index]
+ return median
+
+
+def lmode(inlist):
+ """
+Returns a list of the modal (most common) score(s) in the passed
+list. If there is more than one such score, all are returned. The
+bin-count for the mode(s) is also returned.
+
+Usage: lmode(inlist)
+Returns: bin-count for mode(s), a list of modal value(s)
+"""
+
+ scores = pstat.unique(inlist)
+ scores.sort()
+ freq = []
+ for item in scores:
+ freq.append(inlist.count(item))
+ maxfreq = max(freq)
+ mode = []
+ stillmore = 1
+ while stillmore:
+ try:
+ indx = freq.index(maxfreq)
+ mode.append(scores[indx])
+ del freq[indx]
+ del scores[indx]
+ except ValueError:
+ stillmore=0
+ return maxfreq, mode
+
+
+####################################
+############ MOMENTS #############
+####################################
+
+def lmoment(inlist,moment=1):
+ """
+Calculates the nth moment about the mean for a sample (defaults to
+the 1st moment). Used to calculate coefficients of skewness and kurtosis.
+
+Usage: lmoment(inlist,moment=1)
+Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r)
+"""
+ if moment == 1:
+ return 0.0
+ else:
+ mn = mean(inlist)
+ n = len(inlist)
+ s = 0
+ for x in inlist:
+ s = s + (x-mn)**moment
+ return s/float(n)
+
+
+def lvariation(inlist):
+ """
+Returns the coefficient of variation, as defined in CRC Standard
+Probability and Statistics, p.6.
+
+Usage: lvariation(inlist)
+"""
+ return 100.0*samplestdev(inlist)/float(mean(inlist))
+
+
+def lskew(inlist):
+ """
+Returns the skewness of a distribution, as defined in Numerical
+Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
+
+Usage: lskew(inlist)
+"""
+ return moment(inlist,3)/pow(moment(inlist,2),1.5)
+
+
+def lkurtosis(inlist):
+ """
+Returns the kurtosis of a distribution, as defined in Numerical
+Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
+
+Usage: lkurtosis(inlist)
+"""
+ return moment(inlist,4)/pow(moment(inlist,2),2.0)
+
+
+def ldescribe(inlist):
+ """
+Returns some descriptive statistics of the passed list (assumed to be 1D).
+
+Usage: ldescribe(inlist)
+Returns: n, mean, standard deviation, skew, kurtosis
+"""
+ n = len(inlist)
+ mm = (min(inlist),max(inlist))
+ m = mean(inlist)
+ sd = stdev(inlist)
+ sk = skew(inlist)
+ kurt = kurtosis(inlist)
+ return n, mm, m, sd, sk, kurt
+
+
+####################################
+####### FREQUENCY STATS ##########
+####################################
+
+def litemfreq(inlist):
+ """
+Returns a list of pairs. Each pair consists of one of the scores in inlist
+and it's frequency count. Assumes a 1D list is passed.
+
+Usage: litemfreq(inlist)
+Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)
+"""
+ scores = pstat.unique(inlist)
+ scores.sort()
+ freq = []
+ for item in scores:
+ freq.append(inlist.count(item))
+ return pstat.abut(scores, freq)
+
+
+def lscoreatpercentile (inlist, percent):
+ """
+Returns the score at a given percentile relative to the distribution
+given by inlist.
+
+Usage: lscoreatpercentile(inlist,percent)
+"""
+ if percent > 1:
+ print "\nDividing percent>1 by 100 in lscoreatpercentile().\n"
+ percent = percent / 100.0
+ targetcf = percent*len(inlist)
+ h, lrl, binsize, extras = histogram(inlist)
+ cumhist = cumsum(copy.deepcopy(h))
+ for i in range(len(cumhist)):
+ if cumhist[i] >= targetcf:
+ break
+ score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i)
+ return score
+
+
+def lpercentileofscore (inlist, score,histbins=10,defaultlimits=None):
+ """
+Returns the percentile value of a score relative to the distribution
+given by inlist. Formula depends on the values used to histogram the data(!).
+
+Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None)
+"""
+
+ h, lrl, binsize, extras = histogram(inlist,histbins,defaultlimits)
+ cumhist = cumsum(copy.deepcopy(h))
+ i = int((score - lrl)/float(binsize))
+ pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inlist)) * 100
+ return pct
+
+
+def lhistogram (inlist,numbins=10,defaultreallimits=None,printextras=0):
+ """
+Returns (i) a list of histogram bin counts, (ii) the smallest value
+of the histogram binning, and (iii) the bin width (the last 2 are not
+necessarily integers). Default number of bins is 10. If no sequence object
+is given for defaultreallimits, the routine picks (usually non-pretty) bins
+spanning all the numbers in the inlist.
+
+Usage: lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0)
+Returns: list of bin values, lowerreallimit, binsize, extrapoints
+"""
+ if (defaultreallimits <> None):
+ if type(defaultreallimits) not in [ListType,TupleType] or len(defaultreallimits)==1: # only one limit given, assumed to be lower one & upper is calc'd
+ lowerreallimit = defaultreallimits
+ upperreallimit = 1.000001 * max(inlist)
+ else: # assume both limits given
+ lowerreallimit = defaultreallimits[0]
+ upperreallimit = defaultreallimits[1]
+ binsize = (upperreallimit-lowerreallimit)/float(numbins)
+ else: # no limits given for histogram, both must be calc'd
+ estbinwidth=(max(inlist)-min(inlist))/float(numbins) +1e-6 #1=>cover all
+ binsize = ((max(inlist)-min(inlist)+estbinwidth))/float(numbins)
+ lowerreallimit = min(inlist) - binsize/2 #lower real limit,1st bin
+ bins = [0]*(numbins)
+ extrapoints = 0
+ for num in inlist:
+ try:
+ if (num-lowerreallimit) < 0:
+ extrapoints = extrapoints + 1
+ else:
+ bintoincrement = int((num-lowerreallimit)/float(binsize))
+ bins[bintoincrement] = bins[bintoincrement] + 1
+ except:
+ extrapoints = extrapoints + 1
+ if (extrapoints > 0 and printextras == 1):
+ print '\nPoints outside given histogram range =',extrapoints
+ return (bins, lowerreallimit, binsize, extrapoints)
+
+
+def lcumfreq(inlist,numbins=10,defaultreallimits=None):
+ """
+Returns a cumulative frequency histogram, using the histogram function.
+
+Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None)
+Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h,l,b,e = histogram(inlist,numbins,defaultreallimits)
+ cumhist = cumsum(copy.deepcopy(h))
+ return cumhist,l,b,e
+
+
+def lrelfreq(inlist,numbins=10,defaultreallimits=None):
+ """
+Returns a relative frequency histogram, using the histogram function.
+
+Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None)
+Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h,l,b,e = histogram(inlist,numbins,defaultreallimits)
+ for i in range(len(h)):
+ h[i] = h[i]/float(len(inlist))
+ return h,l,b,e
+
+
+####################################
+##### VARIABILITY FUNCTIONS ######
+####################################
+
+def lobrientransform(*args):
+ """
+Computes a transform on input data (any number of columns). Used to
+test for homogeneity of variance prior to running one-way stats. From
+Maxwell and Delaney, p.112.
+
+Usage: lobrientransform(*args)
+Returns: transformed data for use in an ANOVA
+"""
+ TINY = 1e-10
+ k = len(args)
+ n = [0.0]*k
+ v = [0.0]*k
+ m = [0.0]*k
+ nargs = []
+ for i in range(k):
+ nargs.append(copy.deepcopy(args[i]))
+ n[i] = float(len(nargs[i]))
+ v[i] = var(nargs[i])
+ m[i] = mean(nargs[i])
+ for j in range(k):
+ for i in range(n[j]):
+ t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2
+ t2 = 0.5*v[j]*(n[j]-1.0)
+ t3 = (n[j]-1.0)*(n[j]-2.0)
+ nargs[j][i] = (t1-t2) / float(t3)
+ check = 1
+ for j in range(k):
+ if v[j] - mean(nargs[j]) > TINY:
+ check = 0
+ if check <> 1:
+ raise ValueError, 'Problem in obrientransform.'
+ else:
+ return nargs
+
+
+def lsamplevar (inlist):
+ """
+Returns the variance of the values in the passed list using
+N for the denominator (i.e., DESCRIBES the sample variance only).
+
+Usage: lsamplevar(inlist)
+"""
+ n = len(inlist)
+ mn = mean(inlist)
+ deviations = []
+ for item in inlist:
+ deviations.append(item-mn)
+ return ss(deviations)/float(n)
+
+
+def lsamplestdev (inlist):
+ """
+Returns the standard deviation of the values in the passed list using
+N for the denominator (i.e., DESCRIBES the sample stdev only).
+
+Usage: lsamplestdev(inlist)
+"""
+ return math.sqrt(samplevar(inlist))
+
+
+def lcov (x,y, keepdims=0):
+ """
+Returns the estimated covariance of the values in the passed
+array (i.e., N-1). Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: lcov(x,y,keepdims=0)
+"""
+
+ n = len(x)
+ xmn = mean(x)
+ ymn = mean(y)
+ xdeviations = [0]*len(x)
+ ydeviations = [0]*len(y)
+ for i in range(len(x)):
+ xdeviations[i] = x[i] - xmn
+ ydeviations[i] = y[i] - ymn
+ ss = 0.0
+ for i in range(len(xdeviations)):
+ ss = ss + xdeviations[i]*ydeviations[i]
+ return ss/float(n-1)
+
+
+def lvar (inlist):
+ """
+Returns the variance of the values in the passed list using N-1
+for the denominator (i.e., for estimating population variance).
+
+Usage: lvar(inlist)
+"""
+ n = len(inlist)
+ mn = mean(inlist)
+ deviations = [0]*len(inlist)
+ for i in range(len(inlist)):
+ deviations[i] = inlist[i] - mn
+ return ss(deviations)/float(n-1)
+
+
+def lstdev (inlist):
+ """
+Returns the standard deviation of the values in the passed list
+using N-1 in the denominator (i.e., to estimate population stdev).
+
+Usage: lstdev(inlist)
+"""
+ return math.sqrt(var(inlist))
+
+
+def lsterr(inlist):
+ """
+Returns the standard error of the values in the passed list using N-1
+in the denominator (i.e., to estimate population standard error).
+
+Usage: lsterr(inlist)
+"""
+ return stdev(inlist) / float(math.sqrt(len(inlist)))
+
+
+def lsem (inlist):
+ """
+Returns the estimated standard error of the mean (sx-bar) of the
+values in the passed list. sem = stdev / sqrt(n)
+
+Usage: lsem(inlist)
+"""
+ sd = stdev(inlist)
+ n = len(inlist)
+ return sd/math.sqrt(n)
+
+
+def lz (inlist, score):
+ """
+Returns the z-score for a given input score, given that score and the
+list from which that score came. Not appropriate for population calculations.
+
+Usage: lz(inlist, score)
+"""
+ z = (score-mean(inlist))/samplestdev(inlist)
+ return z
+
+
+def lzs (inlist):
+ """
+Returns a list of z-scores, one for each score in the passed list.
+
+Usage: lzs(inlist)
+"""
+ zscores = []
+ for item in inlist:
+ zscores.append(z(inlist,item))
+ return zscores
+
+
+####################################
+####### TRIMMING FUNCTIONS #######
+####################################
+
+def ltrimboth (l,proportiontocut):
+ """
+Slices off the passed proportion of items from BOTH ends of the passed
+list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost'
+10% of scores. Assumes list is sorted by magnitude. Slices off LESS if
+proportion results in a non-integer slice index (i.e., conservatively
+slices off proportiontocut).
+
+Usage: ltrimboth (l,proportiontocut)
+Returns: trimmed version of list l
+"""
+ lowercut = int(proportiontocut*len(l))
+ uppercut = len(l) - lowercut
+ return l[lowercut:uppercut]
+
+
+def ltrim1 (l,proportiontocut,tail='right'):
+ """
+Slices off the passed proportion of items from ONE end of the passed
+list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'
+10% of scores). Slices off LESS if proportion results in a non-integer
+slice index (i.e., conservatively slices off proportiontocut).
+
+Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left'
+Returns: trimmed version of list l
+"""
+ if tail == 'right':
+ lowercut = 0
+ uppercut = len(l) - int(proportiontocut*len(l))
+ elif tail == 'left':
+ lowercut = int(proportiontocut*len(l))
+ uppercut = len(l)
+ return l[lowercut:uppercut]
+
+
+####################################
+##### CORRELATION FUNCTIONS ######
+####################################
+
+def lpaired(x,y):
+ """
+Interactively determines the type of data and then runs the
+appropriated statistic for paired group data.
+
+Usage: lpaired(x,y)
+Returns: appropriate statistic name, value, and probability
+"""
+ samples = ''
+ while samples not in ['i','r','I','R','c','C']:
+ print '\nIndependent or related samples, or correlation (i,r,c): ',
+ samples = raw_input()
+
+ if samples in ['i','I','r','R']:
+ print '\nComparing variances ...',
+# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
+ r = obrientransform(x,y)
+ f,p = F_oneway(pstat.colex(r,0),pstat.colex(r,1))
+ if p<0.05:
+ vartype='unequal, p='+str(round(p,4))
+ else:
+ vartype='equal'
+ print vartype
+ if samples in ['i','I']:
+ if vartype[0]=='e':
+ t,p = ttest_ind(x,y,0)
+ print '\nIndependent samples t-test: ', round(t,4),round(p,4)
+ else:
+ if len(x)>20 or len(y)>20:
+ z,p = ranksums(x,y)
+ print '\nRank Sums test (NONparametric, n>20): ', round(z,4),round(p,4)
+ else:
+ u,p = mannwhitneyu(x,y)
+ print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u,4),round(p,4)
+
+ else: # RELATED SAMPLES
+ if vartype[0]=='e':
+ t,p = ttest_rel(x,y,0)
+ print '\nRelated samples t-test: ', round(t,4),round(p,4)
+ else:
+ t,p = ranksums(x,y)
+ print '\nWilcoxon T-test (NONparametric): ', round(t,4),round(p,4)
+ else: # CORRELATION ANALYSIS
+ corrtype = ''
+ while corrtype not in ['c','C','r','R','d','D']:
+ print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
+ corrtype = raw_input()
+ if corrtype in ['c','C']:
+ m,b,r,p,see = linregress(x,y)
+ print '\nLinear regression for continuous variables ...'
+ lol = [['Slope','Intercept','r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]]
+ pstat.printcc(lol)
+ elif corrtype in ['r','R']:
+ r,p = spearmanr(x,y)
+ print '\nCorrelation for ranked variables ...'
+ print "Spearman's r: ",round(r,4),round(p,4)
+ else: # DICHOTOMOUS
+ r,p = pointbiserialr(x,y)
+ print '\nAssuming x contains a dichotomous variable ...'
+ print 'Point Biserial r: ',round(r,4),round(p,4)
+ print '\n\n'
+ return None
+
+
+def lpearsonr(x,y):
+ """
+Calculates a Pearson correlation coefficient and the associated
+probability value. Taken from Heiman's Basic Statistics for the Behav.
+Sci (2nd), p.195.
+
+Usage: lpearsonr(x,y) where x and y are equal-length lists
+Returns: Pearson's r value, two-tailed p-value
+"""
+ TINY = 1.0e-30
+ if len(x) <> len(y):
+ raise ValueError, 'Input values not paired in pearsonr. Aborting.'
+ n = len(x)
+ x = map(float,x)
+ y = map(float,y)
+ xmean = mean(x)
+ ymean = mean(y)
+ r_num = n*(summult(x,y)) - sum(x)*sum(y)
+ r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y)))
+ r = (r_num / r_den) # denominator already a float
+ df = n-2
+ t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
+ prob = betai(0.5*df,0.5,df/float(df+t*t))
+ return r, prob
+
+
+def llincc(x,y):
+ """
+Calculates Lin's concordance correlation coefficient.
+
+Usage: alincc(x,y) where x, y are equal-length arrays
+Returns: Lin's CC
+"""
+ covar = lcov(x,y)*(len(x)-1)/float(len(x)) # correct denom to n
+ xvar = lvar(x)*(len(x)-1)/float(len(x)) # correct denom to n
+ yvar = lvar(y)*(len(y)-1)/float(len(y)) # correct denom to n
+ lincc = (2 * covar) / ((xvar+yvar) +((amean(x)-amean(y))**2))
+ return lincc
+
+
+def lspearmanr(x,y):
+ """
+Calculates a Spearman rank-order correlation coefficient. Taken
+from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.
+
+Usage: lspearmanr(x,y) where x and y are equal-length lists
+Returns: Spearman's r, two-tailed p-value
+"""
+ TINY = 1e-30
+ if len(x) <> len(y):
+ raise ValueError, 'Input values not paired in spearmanr. Aborting.'
+ n = len(x)
+ rankx = rankdata(x)
+ ranky = rankdata(y)
+ dsq = sumdiffsquared(rankx,ranky)
+ rs = 1 - 6*dsq / float(n*(n**2-1))
+ t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs)))
+ df = n-2
+ probrs = betai(0.5*df,0.5,df/(df+t*t)) # t already a float
+# probability values for rs are from part 2 of the spearman function in
+# Numerical Recipies, p.510. They are close to tables, but not exact. (?)
+ return rs, probrs
+
+
+def lpointbiserialr(x,y):
+ """
+Calculates a point-biserial correlation coefficient and the associated
+probability value. Taken from Heiman's Basic Statistics for the Behav.
+Sci (1st), p.194.
+
+Usage: lpointbiserialr(x,y) where x,y are equal-length lists
+Returns: Point-biserial r, two-tailed p-value
+"""
+ TINY = 1e-30
+ if len(x) <> len(y):
+ raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.'
+ data = pstat.abut(x,y)
+ categories = pstat.unique(x)
+ if len(categories) <> 2:
+ raise ValueError, "Exactly 2 categories required for pointbiserialr()."
+ else: # there are 2 categories, continue
+ codemap = pstat.abut(categories,range(2))
+ recoded = pstat.recode(data,codemap,0)
+ x = pstat.linexand(data,0,categories[0])
+ y = pstat.linexand(data,0,categories[1])
+ xmean = mean(pstat.colex(x,1))
+ ymean = mean(pstat.colex(y,1))
+ n = len(data)
+ adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n)))
+ rpb = (ymean - xmean)/samplestdev(pstat.colex(data,1))*adjust
+ df = n-2
+ t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))
+ prob = betai(0.5*df,0.5,df/(df+t*t)) # t already a float
+ return rpb, prob
+
+
+def lkendalltau(x,y):
+ """
+Calculates Kendall's tau ... correlation of ordinal data. Adapted
+from function kendl1 in Numerical Recipies. Needs good test-routine.@@@
+
+Usage: lkendalltau(x,y)
+Returns: Kendall's tau, two-tailed p-value
+"""
+ n1 = 0
+ n2 = 0
+ iss = 0
+ for j in range(len(x)-1):
+ for k in range(j,len(y)):
+ a1 = x[j] - x[k]
+ a2 = y[j] - y[k]
+ aa = a1 * a2
+ if (aa): # neither list has a tie
+ n1 = n1 + 1
+ n2 = n2 + 1
+ if aa > 0:
+ iss = iss + 1
+ else:
+ iss = iss -1
+ else:
+ if (a1):
+ n1 = n1 + 1
+ else:
+ n2 = n2 + 1
+ tau = iss / math.sqrt(n1*n2)
+ svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1))
+ z = tau / math.sqrt(svar)
+ prob = erfcc(abs(z)/1.4142136)
+ return tau, prob
+
+
+def llinregress(x,y):
+ """
+Calculates a regression line on x,y pairs.
+
+Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates
+Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
+"""
+ TINY = 1.0e-20
+ if len(x) <> len(y):
+ raise ValueError, 'Input values not paired in linregress. Aborting.'
+ n = len(x)
+ x = map(float,x)
+ y = map(float,y)
+ xmean = mean(x)
+ ymean = mean(y)
+ r_num = float(n*(summult(x,y)) - sum(x)*sum(y))
+ r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y)))
+ r = r_num / r_den
+ z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY))
+ df = n-2
+ t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
+ prob = betai(0.5*df,0.5,df/(df+t*t))
+ slope = r_num / float(n*ss(x) - square_of_sums(x))
+ intercept = ymean - slope*xmean
+ sterrest = math.sqrt(1-r*r)*samplestdev(y)
+ return slope, intercept, r, prob, sterrest
+
+
+####################################
+##### INFERENTIAL STATISTICS #####
+####################################
+
+def lttest_1samp(a,popmean,printit=0,name='Sample',writemode='a'):
+ """
+Calculates the t-obtained for the independent samples T-test on ONE group
+of scores a, given a population mean. If printit=1, results are printed
+to the screen. If printit='filename', the results are output to 'filename'
+using the given writemode (default=append). Returns t-value, and prob.
+
+Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ x = mean(a)
+ v = var(a)
+ n = len(a)
+ df = n-1
+ svar = ((n-1)*v)/float(df)
+ t = (x-popmean)/math.sqrt(svar*(1.0/n))
+ prob = betai(0.5*df,0.5,float(df)/(df+t*t))
+
+ if printit <> 0:
+ statname = 'Single-sample T-test.'
+ outputpairedstats(printit,writemode,
+ 'Population','--',popmean,0,0,0,
+ name,n,x,v,min(a),max(a),
+ statname,t,prob)
+ return t,prob
+
+
+def lttest_ind (a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO INDEPENDENT samples of
+scores a, and b. From Numerical Recipies, p.483. If printit=1, results
+are printed to the screen. If printit='filename', the results are output
+to 'filename' using the given writemode (default=append). Returns t-value,
+and prob.
+
+Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ x1 = mean(a)
+ x2 = mean(b)
+ v1 = stdev(a)**2
+ v2 = stdev(b)**2
+ n1 = len(a)
+ n2 = len(b)
+ df = n1+n2-2
+ svar = ((n1-1)*v1+(n2-1)*v2)/float(df)
+ if not svar:
+ svar = 1.0e-26
+ t = (x1-x2)/math.sqrt(svar*(1.0/n1 + 1.0/n2))
+ prob = betai(0.5*df,0.5,df/(df+t*t))
+
+ if printit <> 0:
+ statname = 'Independent samples T-test.'
+ outputpairedstats(printit,writemode,
+ name1,n1,x1,v1,min(a),max(a),
+ name2,n2,x2,v2,min(b),max(b),
+ statname,t,prob)
+ return t,prob
+
+
+def lttest_rel (a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO RELATED samples of scores,
+a and b. From Numerical Recipies, p.483. If printit=1, results are
+printed to the screen. If printit='filename', the results are output to
+'filename' using the given writemode (default=append). Returns t-value,
+and prob.
+
+Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ if len(a)<>len(b):
+ raise ValueError, 'Unequal length lists in ttest_rel.'
+ x1 = mean(a)
+ x2 = mean(b)
+ v1 = var(a)
+ v2 = var(b)
+ n = len(a)
+ cov = 0
+ for i in range(len(a)):
+ cov = cov + (a[i]-x1) * (b[i]-x2)
+ df = n-1
+ cov = cov / float(df)
+ sd = math.sqrt((v1+v2 - 2.0*cov)/float(n))
+ t = (x1-x2)/sd
+ prob = betai(0.5*df,0.5,df/(df+t*t))
+
+ if printit <> 0:
+ statname = 'Related samples T-test.'
+ outputpairedstats(printit,writemode,
+ name1,n,x1,v1,min(a),max(a),
+ name2,n,x2,v2,min(b),max(b),
+ statname,t,prob)
+ return t, prob
+
+
+def lchisquare(f_obs,f_exp=None):
+ """
+Calculates a one-way chi square for list of observed frequencies and returns
+the result. If no expected frequencies are given, the total N is assumed to
+be equally distributed across all groups.
+
+Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq.
+Returns: chisquare-statistic, associated p-value
+"""
+ k = len(f_obs) # number of groups
+ if f_exp == None:
+ f_exp = [sum(f_obs)/float(k)] * len(f_obs) # create k bins with = freq.
+ chisq = 0
+ for i in range(len(f_obs)):
+ chisq = chisq + (f_obs[i]-f_exp[i])**2 / float(f_exp[i])
+ return chisq, chisqprob(chisq, k-1)
+
+
+def lks_2samp (data1,data2):
+ """
+Computes the Kolmogorov-Smirnof statistic on 2 samples. From
+Numerical Recipies in C, page 493.
+
+Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions
+Returns: KS D-value, associated p-value
+"""
+ j1 = 0
+ j2 = 0
+ fn1 = 0.0
+ fn2 = 0.0
+ n1 = len(data1)
+ n2 = len(data2)
+ en1 = n1
+ en2 = n2
+ d = 0.0
+ data1.sort()
+ data2.sort()
+ while j1 < n1 and j2 < n2:
+ d1=data1[j1]
+ d2=data2[j2]
+ if d1 <= d2:
+ fn1 = (j1)/float(en1)
+ j1 = j1 + 1
+ if d2 <= d1:
+ fn2 = (j2)/float(en2)
+ j2 = j2 + 1
+ dt = (fn2-fn1)
+ if math.fabs(dt) > math.fabs(d):
+ d = dt
+ try:
+ en = math.sqrt(en1*en2/float(en1+en2))
+ prob = ksprob((en+0.12+0.11/en)*abs(d))
+ except:
+ prob = 1.0
+ return d, prob
+
+
+def lmannwhitneyu(x,y):
+ """
+Calculates a Mann-Whitney U statistic on the provided scores and
+returns the result. Use only when the n in each condition is < 20 and
+you have 2 independent samples of ranks. NOTE: Mann-Whitney U is
+significant if the u-obtained is LESS THAN or equal to the critical
+value of U found in the tables. Equivalent to Kruskal-Wallis H with
+just 2 groups.
+
+Usage: lmannwhitneyu(data)
+Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
+"""
+ n1 = len(x)
+ n2 = len(y)
+ ranked = rankdata(x+y)
+ rankx = ranked[0:n1] # get the x-ranks
+ ranky = ranked[n1:] # the rest are y-ranks
+ u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx) # calc U for x
+ u2 = n1*n2 - u1 # remainder is U for y
+ bigu = max(u1,u2)
+ smallu = min(u1,u2)
+ proportion = bigu/float(n1*n2)
+ T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
+ if T == 0:
+ raise ValueError, 'All numbers are identical in lmannwhitneyu'
+ sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0)
+ z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc
+ return smallu, 1.0 - zprob(z) #, proportion
+
+
+def ltiecorrect(rankvals):
+ """
+Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See
+Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences.
+New York: McGraw-Hill. Code adapted from |Stat rankind.c code.
+
+Usage: ltiecorrect(rankvals)
+Returns: T correction factor for U or H
+"""
+ sorted,posn = shellsort(rankvals)
+ n = len(sorted)
+ T = 0.0
+ i = 0
+ while (i<n-1):
+ if sorted[i] == sorted[i+1]:
+ nties = 1
+ while (i<n-1) and (sorted[i] == sorted[i+1]):
+ nties = nties +1
+ i = i +1
+ T = T + nties**3 - nties
+ i = i+1
+ T = T / float(n**3-n)
+ return 1.0 - T
+
+
+def lranksums(x,y):
+ """
+Calculates the rank sums statistic on the provided scores and
+returns the result. Use only when the n in each condition is > 20 and you
+have 2 independent samples of ranks.
+
+Usage: lranksums(x,y)
+Returns: a z-statistic, two-tailed p-value
+"""
+ n1 = len(x)
+ n2 = len(y)
+ alldata = x+y
+ ranked = rankdata(alldata)
+ x = ranked[:n1]
+ y = ranked[n1:]
+ s = sum(x)
+ expected = n1*(n1+n2+1) / 2.0
+ z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0)
+ prob = 2*(1.0 -zprob(abs(z)))
+ return z, prob
+
+
+def lwilcoxont(x,y):
+ """
+Calculates the Wilcoxon T-test for related samples and returns the
+result. A non-parametric T-test.
+
+Usage: lwilcoxont(x,y)
+Returns: a t-statistic, two-tail probability estimate
+"""
+ if len(x) <> len(y):
+ raise ValueError, 'Unequal N in wilcoxont. Aborting.'
+ d=[]
+ for i in range(len(x)):
+ diff = x[i] - y[i]
+ if diff <> 0:
+ d.append(diff)
+ count = len(d)
+ absd = map(abs,d)
+ absranked = rankdata(absd)
+ r_plus = 0.0
+ r_minus = 0.0
+ for i in range(len(absd)):
+ if d[i] < 0:
+ r_minus = r_minus + absranked[i]
+ else:
+ r_plus = r_plus + absranked[i]
+ wt = min(r_plus, r_minus)
+ mn = count * (count+1) * 0.25
+ se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0)
+ z = math.fabs(wt-mn) / se
+ prob = 2*(1.0 -zprob(abs(z)))
+ return wt, prob
+
+
+def lkruskalwallish(*args):
+ """
+The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more
+groups, requiring at least 5 subjects in each group. This function
+calculates the Kruskal-Wallis H-test for 3 or more independent samples
+and returns the result.
+
+Usage: lkruskalwallish(*args)
+Returns: H-statistic (corrected for ties), associated p-value
+"""
+ args = list(args)
+ n = [0]*len(args)
+ all = []
+ n = map(len,args)
+ for i in range(len(args)):
+ all = all + args[i]
+ ranked = rankdata(all)
+ T = tiecorrect(ranked)
+ for i in range(len(args)):
+ args[i] = ranked[0:n[i]]
+ del ranked[0:n[i]]
+ rsums = []
+ for i in range(len(args)):
+ rsums.append(sum(args[i])**2)
+ rsums[i] = rsums[i] / float(n[i])
+ ssbn = sum(rsums)
+ totaln = sum(n)
+ h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1)
+ df = len(args) - 1
+ if T == 0:
+ raise ValueError, 'All numbers are identical in lkruskalwallish'
+ h = h / float(T)
+ return h, chisqprob(h,df)
+
+
+def lfriedmanchisquare(*args):
+ """
+Friedman Chi-Square is a non-parametric, one-way within-subjects
+ANOVA. This function calculates the Friedman Chi-square test for repeated
+measures and returns the result, along with the associated probability
+value. It assumes 3 or more repeated measures. Only 3 levels requires a
+minimum of 10 subjects in the study. Four levels requires 5 subjects per
+level(??).
+
+Usage: lfriedmanchisquare(*args)
+Returns: chi-square statistic, associated p-value
+"""
+ k = len(args)
+ if k < 3:
+ raise ValueError, 'Less than 3 levels. Friedman test not appropriate.'
+ n = len(args[0])
+ data = apply(pstat.abut,tuple(args))
+ for i in range(len(data)):
+ data[i] = rankdata(data[i])
+ ssbn = 0
+ for i in range(k):
+ ssbn = ssbn + sum(args[i])**2
+ chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)
+ return chisq, chisqprob(chisq,k-1)
+
+
+####################################
+#### PROBABILITY CALCULATIONS ####
+####################################
+
+def lchisqprob(chisq,df):
+ """
+Returns the (1-tailed) probability value associated with the provided
+chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat.
+
+Usage: lchisqprob(chisq,df)
+"""
+ BIG = 20.0
+ def ex(x):
+ BIG = 20.0
+ if x < -BIG:
+ return 0.0
+ else:
+ return math.exp(x)
+
+ if chisq <=0 or df < 1:
+ return 1.0
+ a = 0.5 * chisq
+ if df%2 == 0:
+ even = 1
+ else:
+ even = 0
+ if df > 1:
+ y = ex(-a)
+ if even:
+ s = y
+ else:
+ s = 2.0 * zprob(-math.sqrt(chisq))
+ if (df > 2):
+ chisq = 0.5 * (df - 1.0)
+ if even:
+ z = 1.0
+ else:
+ z = 0.5
+ if a > BIG:
+ if even:
+ e = 0.0
+ else:
+ e = math.log(math.sqrt(math.pi))
+ c = math.log(a)
+ while (z <= chisq):
+ e = math.log(z) + e
+ s = s + ex(c*z-a-e)
+ z = z + 1.0
+ return s
+ else:
+ if even:
+ e = 1.0
+ else:
+ e = 1.0 / math.sqrt(math.pi) / math.sqrt(a)
+ c = 0.0
+ while (z <= chisq):
+ e = e * (a/float(z))
+ c = c + e
+ z = z + 1.0
+ return (c*y+s)
+ else:
+ return s
+
+
+def lerfcc(x):
+ """
+Returns the complementary error function erfc(x) with fractional
+error everywhere less than 1.2e-7. Adapted from Numerical Recipies.
+
+Usage: lerfcc(x)
+"""
+ z = abs(x)
+ t = 1.0 / (1.0+0.5*z)
+ ans = t * math.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277)))))))))
+ if x >= 0:
+ return ans
+ else:
+ return 2.0 - ans
+
+
+def lzprob(z):
+ """
+Returns the area under the normal curve 'to the left of' the given z value.
+Thus,
+ for z<0, zprob(z) = 1-tail probability
+ for z>0, 1.0-zprob(z) = 1-tail probability
+ for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability
+Adapted from z.c in Gary Perlman's |Stat.
+
+Usage: lzprob(z)
+"""
+ Z_MAX = 6.0 # maximum meaningful z-value
+ if z == 0.0:
+ x = 0.0
+ else:
+ y = 0.5 * math.fabs(z)
+ if y >= (Z_MAX*0.5):
+ x = 1.0
+ elif (y < 1.0):
+ w = y*y
+ x = ((((((((0.000124818987 * w
+ -0.001075204047) * w +0.005198775019) * w
+ -0.019198292004) * w +0.059054035642) * w
+ -0.151968751364) * w +0.319152932694) * w
+ -0.531923007300) * w +0.797884560593) * y * 2.0
+ else:
+ y = y - 2.0
+ x = (((((((((((((-0.000045255659 * y
+ +0.000152529290) * y -0.000019538132) * y
+ -0.000676904986) * y +0.001390604284) * y
+ -0.000794620820) * y -0.002034254874) * y
+ +0.006549791214) * y -0.010557625006) * y
+ +0.011630447319) * y -0.009279453341) * y
+ +0.005353579108) * y -0.002141268741) * y
+ +0.000535310849) * y +0.999936657524
+ if z > 0.0:
+ prob = ((x+1.0)*0.5)
+ else:
+ prob = ((1.0-x)*0.5)
+ return prob
+
+
+def lksprob(alam):
+ """
+Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from
+Numerical Recipies.
+
+Usage: lksprob(alam)
+"""
+ fac = 2.0
+ sum = 0.0
+ termbf = 0.0
+ a2 = -2.0*alam*alam
+ for j in range(1,201):
+ term = fac*math.exp(a2*j*j)
+ sum = sum + term
+ if math.fabs(term) <= (0.001*termbf) or math.fabs(term) < (1.0e-8*sum):
+ return sum
+ fac = -fac
+ termbf = math.fabs(term)
+ return 1.0 # Get here only if fails to converge; was 0.0!!
+
+
+def lfprob (dfnum, dfden, F):
+ """
+Returns the (1-tailed) significance level (p-value) of an F
+statistic given the degrees of freedom for the numerator (dfR-dfF) and
+the degrees of freedom for the denominator (dfF).
+
+Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn
+"""
+ p = betai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F))
+ return p
+
+
+def lbetacf(a,b,x):
+ """
+This function evaluates the continued fraction form of the incomplete
+Beta function, betai. (Adapted from: Numerical Recipies in C.)
+
+Usage: lbetacf(a,b,x)
+"""
+ ITMAX = 200
+ EPS = 3.0e-7
+
+ bm = az = am = 1.0
+ qab = a+b
+ qap = a+1.0
+ qam = a-1.0
+ bz = 1.0-qab*x/qap
+ for i in range(ITMAX+1):
+ em = float(i+1)
+ tem = em + em
+ d = em*(b-em)*x/((qam+tem)*(a+tem))
+ ap = az + d*am
+ bp = bz+d*bm
+ d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem))
+ app = ap+d*az
+ bpp = bp+d*bz
+ aold = az
+ am = ap/bpp
+ bm = bp/bpp
+ az = app/bpp
+ bz = 1.0
+ if (abs(az-aold)<(EPS*abs(az))):
+ return az
+ print 'a or b too big, or ITMAX too small in Betacf.'
+
+
+def lgammln(xx):
+ """
+Returns the gamma function of xx.
+ Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.
+(Adapted from: Numerical Recipies in C.)
+
+Usage: lgammln(xx)
+"""
+
+ coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,
+ 0.120858003e-2, -0.536382e-5]
+ x = xx - 1.0
+ tmp = x + 5.5
+ tmp = tmp - (x+0.5)*math.log(tmp)
+ ser = 1.0
+ for j in range(len(coeff)):
+ x = x + 1
+ ser = ser + coeff[j]/x
+ return -tmp + math.log(2.50662827465*ser)
+
+
+def lbetai(a,b,x):
+ """
+Returns the incomplete beta function:
+
+ I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)
+
+where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
+function of a. The continued fraction formulation is implemented here,
+using the betacf function. (Adapted from: Numerical Recipies in C.)
+
+Usage: lbetai(a,b,x)
+"""
+ if (x<0.0 or x>1.0):
+ raise ValueError, 'Bad x in lbetai'
+ if (x==0.0 or x==1.0):
+ bt = 0.0
+ else:
+ bt = math.exp(gammln(a+b)-gammln(a)-gammln(b)+a*math.log(x)+b*
+ math.log(1.0-x))
+ if (x<(a+1.0)/(a+b+2.0)):
+ return bt*betacf(a,b,x)/float(a)
+ else:
+ return 1.0-bt*betacf(b,a,1.0-x)/float(b)
+
+
+####################################
+####### ANOVA CALCULATIONS #######
+####################################
+
+def lF_oneway(*lists):
+ """
+Performs a 1-way ANOVA, returning an F-value and probability given
+any number of groups. From Heiman, pp.394-7.
+
+Usage: F_oneway(*lists) where *lists is any number of lists, one per
+ treatment group
+Returns: F value, one-tailed p-value
+"""
+ a = len(lists) # ANOVA on 'a' groups, each in it's own list
+ means = [0]*a
+ vars = [0]*a
+ ns = [0]*a
+ alldata = []
+ tmp = map(N.array,lists)
+ means = map(amean,tmp)
+ vars = map(avar,tmp)
+ ns = map(len,lists)
+ for i in range(len(lists)):
+ alldata = alldata + lists[i]
+ alldata = N.array(alldata)
+ bign = len(alldata)
+ sstot = ass(alldata)-(asquare_of_sums(alldata)/float(bign))
+ ssbn = 0
+ for list in lists:
+ ssbn = ssbn + asquare_of_sums(N.array(list))/float(len(list))
+ ssbn = ssbn - (asquare_of_sums(alldata)/float(bign))
+ sswn = sstot-ssbn
+ dfbn = a-1
+ dfwn = bign - a
+ msb = ssbn/float(dfbn)
+ msw = sswn/float(dfwn)
+ f = msb/msw
+ prob = fprob(dfbn,dfwn,f)
+ return f, prob
+
+
+def lF_value (ER,EF,dfnum,dfden):
+ """
+Returns an F-statistic given the following:
+ ER = error associated with the null hypothesis (the Restricted model)
+ EF = error associated with the alternate hypothesis (the Full model)
+ dfR-dfF = degrees of freedom of the numerator
+ dfF = degrees of freedom associated with the denominator/Full model
+
+Usage: lF_value(ER,EF,dfnum,dfden)
+"""
+ return ((ER-EF)/float(dfnum) / (EF/float(dfden)))
+
+
+####################################
+######## SUPPORT FUNCTIONS #######
+####################################
+
+def writecc (listoflists,file,writetype='w',extra=2):
+ """
+Writes a list of lists to a file in columns, customized by the max
+size of items within the columns (max size of items in col, +2 characters)
+to specified file. File-overwrite is the default.
+
+Usage: writecc (listoflists,file,writetype='w',extra=2)
+Returns: None
+"""
+ if type(listoflists[0]) not in [ListType,TupleType]:
+ listoflists = [listoflists]
+ outfile = open(file,writetype)
+ rowstokill = []
+ list2print = copy.deepcopy(listoflists)
+ for i in range(len(listoflists)):
+ if listoflists[i] == ['\n'] or listoflists[i]=='\n' or listoflists[i]=='dashes':
+ rowstokill = rowstokill + [i]
+ rowstokill.reverse()
+ for row in rowstokill:
+ del list2print[row]
+ maxsize = [0]*len(list2print[0])
+ for col in range(len(list2print[0])):
+ items = pstat.colex(list2print,col)
+ items = map(pstat.makestr,items)
+ maxsize[col] = max(map(len,items)) + extra
+ for row in listoflists:
+ if row == ['\n'] or row == '\n':
+ outfile.write('\n')
+ elif row == ['dashes'] or row == 'dashes':
+ dashes = [0]*len(maxsize)
+ for j in range(len(maxsize)):
+ dashes[j] = '-'*(maxsize[j]-2)
+ outfile.write(pstat.lineincustcols(dashes,maxsize))
+ else:
+ outfile.write(pstat.lineincustcols(row,maxsize))
+ outfile.write('\n')
+ outfile.close()
+ return None
+
+
+def lincr(l,cap): # to increment a list up to a max-list of 'cap'
+ """
+Simulate a counting system from an n-dimensional list.
+
+Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n
+Returns: next set of values for list l, OR -1 (if overflow)
+"""
+ l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap)
+ for i in range(len(l)):
+ if l[i] > cap[i] and i < len(l)-1: # if carryover AND not done
+ l[i] = 0
+ l[i+1] = l[i+1] + 1
+ elif l[i] > cap[i] and i == len(l)-1: # overflow past last column, must be finished
+ l = -1
+ return l
+
+
+def lsum (inlist):
+ """
+Returns the sum of the items in the passed list.
+
+Usage: lsum(inlist)
+"""
+ s = 0
+ for item in inlist:
+ s = s + item
+ return s
+
+
+def lcumsum (inlist):
+ """
+Returns a list consisting of the cumulative sum of the items in the
+passed list.
+
+Usage: lcumsum(inlist)
+"""
+ newlist = copy.deepcopy(inlist)
+ for i in range(1,len(newlist)):
+ newlist[i] = newlist[i] + newlist[i-1]
+ return newlist
+
+
+def lss(inlist):
+ """
+Squares each value in the passed list, adds up these squares and
+returns the result.
+
+Usage: lss(inlist)
+"""
+ ss = 0
+ for item in inlist:
+ ss = ss + item*item
+ return ss
+
+
+def lsummult (list1,list2):
+ """
+Multiplies elements in list1 and list2, element by element, and
+returns the sum of all resulting multiplications. Must provide equal
+length lists.
+
+Usage: lsummult(list1,list2)
+"""
+ if len(list1) <> len(list2):
+ raise ValueError, "Lists not equal length in summult."
+ s = 0
+ for item1,item2 in pstat.abut(list1,list2):
+ s = s + item1*item2
+ return s
+
+
+def lsumdiffsquared(x,y):
+ """
+Takes pairwise differences of the values in lists x and y, squares
+these differences, and returns the sum of these squares.
+
+Usage: lsumdiffsquared(x,y)
+Returns: sum[(x[i]-y[i])**2]
+"""
+ sds = 0
+ for i in range(len(x)):
+ sds = sds + (x[i]-y[i])**2
+ return sds
+
+
+def lsquare_of_sums(inlist):
+ """
+Adds the values in the passed list, squares the sum, and returns
+the result.
+
+Usage: lsquare_of_sums(inlist)
+Returns: sum(inlist[i])**2
+"""
+ s = sum(inlist)
+ return float(s)*s
+
+
+def lshellsort(inlist):
+ """
+Shellsort algorithm. Sorts a 1D-list.
+
+Usage: lshellsort(inlist)
+Returns: sorted-inlist, sorting-index-vector (for original list)
+"""
+ n = len(inlist)
+ svec = copy.deepcopy(inlist)
+ ivec = range(n)
+ gap = n/2 # integer division needed
+ while gap >0:
+ for i in range(gap,n):
+ for j in range(i-gap,-1,-gap):
+ while j>=0 and svec[j]>svec[j+gap]:
+ temp = svec[j]
+ svec[j] = svec[j+gap]
+ svec[j+gap] = temp
+ itemp = ivec[j]
+ ivec[j] = ivec[j+gap]
+ ivec[j+gap] = itemp
+ gap = gap / 2 # integer division needed
+# svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]]
+ return svec, ivec
+
+
+def lrankdata(inlist):
+ """
+Ranks the data in inlist, dealing with ties appropritely. Assumes
+a 1D inlist. Adapted from Gary Perlman's |Stat ranksort.
+
+Usage: lrankdata(inlist)
+Returns: a list of length equal to inlist, containing rank scores
+"""
+ n = len(inlist)
+ svec, ivec = shellsort(inlist)
+ sumranks = 0
+ dupcount = 0
+ newlist = [0]*n
+ for i in range(n):
+ sumranks = sumranks + i
+ dupcount = dupcount + 1
+ if i==n-1 or svec[i] <> svec[i+1]:
+ averank = sumranks / float(dupcount) + 1
+ for j in range(i-dupcount+1,i+1):
+ newlist[ivec[j]] = averank
+ sumranks = 0
+ dupcount = 0
+ return newlist
+
+
+def outputpairedstats(fname,writemode,name1,n1,m1,se1,min1,max1,name2,n2,m2,se2,min2,max2,statname,stat,prob):
+ """
+Prints or write to a file stats for two groups, using the name, n,
+mean, sterr, min and max for each group, as well as the statistic name,
+its value, and the associated p-value.
+
+Usage: outputpairedstats(fname,writemode,
+ name1,n1,mean1,stderr1,min1,max1,
+ name2,n2,mean2,stderr2,min2,max2,
+ statname,stat,prob)
+Returns: None
+"""
+ suffix = '' # for *s after the p-value
+ try:
+ x = prob.shape
+ prob = prob[0]
+ except:
+ pass
+ if prob < 0.001: suffix = ' ***'
+ elif prob < 0.01: suffix = ' **'
+ elif prob < 0.05: suffix = ' *'
+ title = [['Name','N','Mean','SD','Min','Max']]
+ lofl = title+[[name1,n1,round(m1,3),round(math.sqrt(se1),3),min1,max1],
+ [name2,n2,round(m2,3),round(math.sqrt(se2),3),min2,max2]]
+ if type(fname)<>StringType or len(fname)==0:
+ print
+ print statname
+ print
+ pstat.printcc(lofl)
+ print
+ try:
+ if stat.shape == ():
+ stat = stat[0]
+ if prob.shape == ():
+ prob = prob[0]
+ except:
+ pass
+ print 'Test statistic = ',round(stat,3),' p = ',round(prob,3),suffix
+ print
+ else:
+ file = open(fname,writemode)
+ file.write('\n'+statname+'\n\n')
+ file.close()
+ writecc(lofl,fname,'a')
+ file = open(fname,'a')
+ try:
+ if stat.shape == ():
+ stat = stat[0]
+ if prob.shape == ():
+ prob = prob[0]
+ except:
+ pass
+ file.write(pstat.list2string(['\nTest statistic = ',round(stat,4),' p = ',round(prob,4),suffix,'\n\n']))
+ file.close()
+ return None
+
+
+def lfindwithin (data):
+ """
+Returns an integer representing a binary vector, where 1=within-
+subject factor, 0=between. Input equals the entire data 2D list (i.e.,
+column 0=random factor, column -1=measured values (those two are skipped).
+Note: input data is in |Stat format ... a list of lists ("2D list") with
+one row per measured value, first column=subject identifier, last column=
+score, one in-between column per factor (these columns contain level
+designations on each factor). See also stats.anova.__doc__.
+
+Usage: lfindwithin(data) data in |Stat format
+"""
+
+ numfact = len(data[0])-1
+ withinvec = 0
+ for col in range(1,numfact):
+ examplelevel = pstat.unique(pstat.colex(data,col))[0]
+ rows = pstat.linexand(data,col,examplelevel) # get 1 level of this factor
+ factsubjs = pstat.unique(pstat.colex(rows,0))
+ allsubjs = pstat.unique(pstat.colex(data,0))
+ if len(factsubjs) == len(allsubjs): # fewer Ss than scores on this factor?
+ withinvec = withinvec + (1 << col)
+ return withinvec
+
+
+#########################################################
+#########################################################
+####### DISPATCH LISTS AND TUPLES TO ABOVE FCNS #########
+#########################################################
+#########################################################
+
+## CENTRAL TENDENCY:
+geometricmean = Dispatch ( (lgeometricmean, (ListType, TupleType)), )
+harmonicmean = Dispatch ( (lharmonicmean, (ListType, TupleType)), )
+mean = Dispatch ( (lmean, (ListType, TupleType)), )
+median = Dispatch ( (lmedian, (ListType, TupleType)), )
+medianscore = Dispatch ( (lmedianscore, (ListType, TupleType)), )
+mode = Dispatch ( (lmode, (ListType, TupleType)), )
+
+## MOMENTS:
+moment = Dispatch ( (lmoment, (ListType, TupleType)), )
+variation = Dispatch ( (lvariation, (ListType, TupleType)), )
+skew = Dispatch ( (lskew, (ListType, TupleType)), )
+kurtosis = Dispatch ( (lkurtosis, (ListType, TupleType)), )
+describe = Dispatch ( (ldescribe, (ListType, TupleType)), )
+
+## FREQUENCY STATISTICS:
+itemfreq = Dispatch ( (litemfreq, (ListType, TupleType)), )
+scoreatpercentile = Dispatch ( (lscoreatpercentile, (ListType, TupleType)), )
+percentileofscore = Dispatch ( (lpercentileofscore, (ListType, TupleType)), )
+histogram = Dispatch ( (lhistogram, (ListType, TupleType)), )
+cumfreq = Dispatch ( (lcumfreq, (ListType, TupleType)), )
+relfreq = Dispatch ( (lrelfreq, (ListType, TupleType)), )
+
+## VARIABILITY:
+obrientransform = Dispatch ( (lobrientransform, (ListType, TupleType)), )
+samplevar = Dispatch ( (lsamplevar, (ListType, TupleType)), )
+samplestdev = Dispatch ( (lsamplestdev, (ListType, TupleType)), )
+var = Dispatch ( (lvar, (ListType, TupleType)), )
+stdev = Dispatch ( (lstdev, (ListType, TupleType)), )
+sterr = Dispatch ( (lsterr, (ListType, TupleType)), )
+sem = Dispatch ( (lsem, (ListType, TupleType)), )
+z = Dispatch ( (lz, (ListType, TupleType)), )
+zs = Dispatch ( (lzs, (ListType, TupleType)), )
+
+## TRIMMING FCNS:
+trimboth = Dispatch ( (ltrimboth, (ListType, TupleType)), )
+trim1 = Dispatch ( (ltrim1, (ListType, TupleType)), )
+
+## CORRELATION FCNS:
+paired = Dispatch ( (lpaired, (ListType, TupleType)), )
+pearsonr = Dispatch ( (lpearsonr, (ListType, TupleType)), )
+spearmanr = Dispatch ( (lspearmanr, (ListType, TupleType)), )
+pointbiserialr = Dispatch ( (lpointbiserialr, (ListType, TupleType)), )
+kendalltau = Dispatch ( (lkendalltau, (ListType, TupleType)), )
+linregress = Dispatch ( (llinregress, (ListType, TupleType)), )
+
+## INFERENTIAL STATS:
+ttest_1samp = Dispatch ( (lttest_1samp, (ListType, TupleType)), )
+ttest_ind = Dispatch ( (lttest_ind, (ListType, TupleType)), )
+ttest_rel = Dispatch ( (lttest_rel, (ListType, TupleType)), )
+chisquare = Dispatch ( (lchisquare, (ListType, TupleType)), )
+ks_2samp = Dispatch ( (lks_2samp, (ListType, TupleType)), )
+mannwhitneyu = Dispatch ( (lmannwhitneyu, (ListType, TupleType)), )
+ranksums = Dispatch ( (lranksums, (ListType, TupleType)), )
+tiecorrect = Dispatch ( (ltiecorrect, (ListType, TupleType)), )
+wilcoxont = Dispatch ( (lwilcoxont, (ListType, TupleType)), )
+kruskalwallish = Dispatch ( (lkruskalwallish, (ListType, TupleType)), )
+friedmanchisquare = Dispatch ( (lfriedmanchisquare, (ListType, TupleType)), )
+
+## PROBABILITY CALCS:
+chisqprob = Dispatch ( (lchisqprob, (IntType, FloatType)), )
+zprob = Dispatch ( (lzprob, (IntType, FloatType)), )
+ksprob = Dispatch ( (lksprob, (IntType, FloatType)), )
+fprob = Dispatch ( (lfprob, (IntType, FloatType)), )
+betacf = Dispatch ( (lbetacf, (IntType, FloatType)), )
+betai = Dispatch ( (lbetai, (IntType, FloatType)), )
+erfcc = Dispatch ( (lerfcc, (IntType, FloatType)), )
+gammln = Dispatch ( (lgammln, (IntType, FloatType)), )
+
+## ANOVA FUNCTIONS:
+F_oneway = Dispatch ( (lF_oneway, (ListType, TupleType)), )
+F_value = Dispatch ( (lF_value, (ListType, TupleType)), )
+
+## SUPPORT FUNCTIONS:
+incr = Dispatch ( (lincr, (ListType, TupleType)), )
+sum = Dispatch ( (lsum, (ListType, TupleType)), )
+cumsum = Dispatch ( (lcumsum, (ListType, TupleType)), )
+ss = Dispatch ( (lss, (ListType, TupleType)), )
+summult = Dispatch ( (lsummult, (ListType, TupleType)), )
+square_of_sums = Dispatch ( (lsquare_of_sums, (ListType, TupleType)), )
+sumdiffsquared = Dispatch ( (lsumdiffsquared, (ListType, TupleType)), )
+shellsort = Dispatch ( (lshellsort, (ListType, TupleType)), )
+rankdata = Dispatch ( (lrankdata, (ListType, TupleType)), )
+findwithin = Dispatch ( (lfindwithin, (ListType, TupleType)), )
+
+
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+
+try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE
+ import numpy as N
+ import numpy.linalg as LA
+
+
+#####################################
+######## ACENTRAL TENDENCY ########
+#####################################
+
+ def ageometricmean (inarray,dimension=None,keepdims=0):
+ """
+Calculates the geometric mean of the values in the passed array.
+That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in
+the passed array. Use dimension=None to flatten array first. REMEMBER: if
+dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
+if dimension is a sequence, it collapses over all specified dimensions. If
+keepdims is set to 1, the resulting array will have as many dimensions as
+inarray, with only 1 'level' per dim that was collapsed over.
+
+Usage: ageometricmean(inarray,dimension=None,keepdims=0)
+Returns: geometric mean computed over dim(s) listed in dimension
+"""
+ inarray = N.array(inarray,N.float_)
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ size = len(inarray)
+ mult = N.power(inarray,1.0/size)
+ mult = N.multiply.reduce(mult)
+ elif type(dimension) in [IntType,FloatType]:
+ size = inarray.shape[dimension]
+ mult = N.power(inarray,1.0/size)
+ mult = N.multiply.reduce(mult,dimension)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ shp[dimension] = 1
+ sum = N.reshape(sum,shp)
+ else: # must be a SEQUENCE of dims to average over
+ dims = list(dimension)
+ dims.sort()
+ dims.reverse()
+ size = N.array(N.multiply.reduce(N.take(inarray.shape,dims)),N.float_)
+ mult = N.power(inarray,1.0/size)
+ for dim in dims:
+ mult = N.multiply.reduce(mult,dim)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ for dim in dims:
+ shp[dim] = 1
+ mult = N.reshape(mult,shp)
+ return mult
+
+
+ def aharmonicmean (inarray,dimension=None,keepdims=0):
+ """
+Calculates the harmonic mean of the values in the passed array.
+That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in
+the passed array. Use dimension=None to flatten array first. REMEMBER: if
+dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
+if dimension is a sequence, it collapses over all specified dimensions. If
+keepdims is set to 1, the resulting array will have as many dimensions as
+inarray, with only 1 'level' per dim that was collapsed over.
+
+Usage: aharmonicmean(inarray,dimension=None,keepdims=0)
+Returns: harmonic mean computed over dim(s) in dimension
+"""
+ inarray = inarray.astype(N.float_)
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ size = len(inarray)
+ s = N.add.reduce(1.0 / inarray)
+ elif type(dimension) in [IntType,FloatType]:
+ size = float(inarray.shape[dimension])
+ s = N.add.reduce(1.0/inarray, dimension)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ shp[dimension] = 1
+ s = N.reshape(s,shp)
+ else: # must be a SEQUENCE of dims to average over
+ dims = list(dimension)
+ dims.sort()
+ nondims = []
+ for i in range(len(inarray.shape)):
+ if i not in dims:
+ nondims.append(i)
+ tinarray = N.transpose(inarray,nondims+dims) # put keep-dims first
+ idx = [0] *len(nondims)
+ if idx == []:
+ size = len(N.ravel(inarray))
+ s = asum(1.0 / inarray)
+ if keepdims == 1:
+ s = N.reshape([s],N.ones(len(inarray.shape)))
+ else:
+ idx[0] = -1
+ loopcap = N.array(tinarray.shape[0:len(nondims)]) -1
+ s = N.zeros(loopcap+1,N.float_)
+ while incr(idx,loopcap) <> -1:
+ s[idx] = asum(1.0/tinarray[idx])
+ size = N.multiply.reduce(N.take(inarray.shape,dims))
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ for dim in dims:
+ shp[dim] = 1
+ s = N.reshape(s,shp)
+ return size / s
+
+
+ def amean (inarray,dimension=None,keepdims=0):
+ """
+Calculates the arithmatic mean of the values in the passed array.
+That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the
+passed array. Use dimension=None to flatten array first. REMEMBER: if
+dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
+if dimension is a sequence, it collapses over all specified dimensions. If
+keepdims is set to 1, the resulting array will have as many dimensions as
+inarray, with only 1 'level' per dim that was collapsed over.
+
+Usage: amean(inarray,dimension=None,keepdims=0)
+Returns: arithematic mean calculated over dim(s) in dimension
+"""
+ if inarray.dtype in [N.int_, N.short,N.ubyte]:
+ inarray = inarray.astype(N.float_)
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ sum = N.add.reduce(inarray)
+ denom = float(len(inarray))
+ elif type(dimension) in [IntType,FloatType]:
+ sum = asum(inarray,dimension)
+ denom = float(inarray.shape[dimension])
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ shp[dimension] = 1
+ sum = N.reshape(sum,shp)
+ else: # must be a TUPLE of dims to average over
+ dims = list(dimension)
+ dims.sort()
+ dims.reverse()
+ sum = inarray *1.0
+ for dim in dims:
+ sum = N.add.reduce(sum,dim)
+ denom = N.array(N.multiply.reduce(N.take(inarray.shape,dims)),N.float_)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ for dim in dims:
+ shp[dim] = 1
+ sum = N.reshape(sum,shp)
+ return sum/denom
+
+
+ def amedian (inarray,numbins=1000):
+ """
+Calculates the COMPUTED median value of an array of numbers, given the
+number of bins to use for the histogram (more bins approaches finding the
+precise median value of the array; default number of bins = 1000). From
+G.W. Heiman's Basic Stats, or CRC Probability & Statistics.
+NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first).
+
+Usage: amedian(inarray,numbins=1000)
+Returns: median calculated over ALL values in inarray
+"""
+ inarray = N.ravel(inarray)
+ (hist, smallest, binsize, extras) = ahistogram(inarray,numbins,[min(inarray),max(inarray)])
+ cumhist = N.cumsum(hist) # make cumulative histogram
+ otherbins = N.greater_equal(cumhist,len(inarray)/2.0)
+ otherbins = list(otherbins) # list of 0/1s, 1s start at median bin
+ cfbin = otherbins.index(1) # get 1st(!) index holding 50%ile score
+ LRL = smallest + binsize*cfbin # get lower read limit of that bin
+ cfbelow = N.add.reduce(hist[0:cfbin]) # cum. freq. below bin
+ freq = hist[cfbin] # frequency IN the 50%ile bin
+ median = LRL + ((len(inarray)/2.0-cfbelow)/float(freq))*binsize # MEDIAN
+ return median
+
+
+ def amedianscore (inarray,dimension=None):
+ """
+Returns the 'middle' score of the passed array. If there is an even
+number of scores, the mean of the 2 middle scores is returned. Can function
+with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can
+be None, to pre-flatten the array, or else dimension must equal 0).
+
+Usage: amedianscore(inarray,dimension=None)
+Returns: 'middle' score of the array, or the mean of the 2 middle scores
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ inarray = N.sort(inarray,dimension)
+ if inarray.shape[dimension] % 2 == 0: # if even number of elements
+ indx = inarray.shape[dimension]/2 # integer division correct
+ median = N.asarray(inarray[indx]+inarray[indx-1]) / 2.0
+ else:
+ indx = inarray.shape[dimension] / 2 # integer division correct
+ median = N.take(inarray,[indx],dimension)
+ if median.shape == (1,):
+ median = median[0]
+ return median
+
+
+ def amode(a, dimension=None):
+ """
+Returns an array of the modal (most common) score in the passed array.
+If there is more than one such score, ONLY THE FIRST is returned.
+The bin-count for the modal values is also returned. Operates on whole
+array (dimension=None), or on a given dimension.
+
+Usage: amode(a, dimension=None)
+Returns: array of bin-counts for mode(s), array of corresponding modal values
+"""
+
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ scores = pstat.aunique(N.ravel(a)) # get ALL unique values
+ testshape = list(a.shape)
+ testshape[dimension] = 1
+ oldmostfreq = N.zeros(testshape)
+ oldcounts = N.zeros(testshape)
+ for score in scores:
+ template = N.equal(a,score)
+ counts = asum(template,dimension,1)
+ mostfrequent = N.where(counts>oldcounts,score,oldmostfreq)
+ oldcounts = N.where(counts>oldcounts,counts,oldcounts)
+ oldmostfreq = mostfrequent
+ return oldcounts, mostfrequent
+
+
+ def atmean(a,limits=None,inclusive=(1,1)):
+ """
+Returns the arithmetic mean of all values in an array, ignoring values
+strictly outside the sequence passed to 'limits'. Note: either limit
+in the sequence, or the value of limits itself, can be set to None. The
+inclusive list/tuple determines whether the lower and upper limiting bounds
+(respectively) are open/exclusive (0) or closed/inclusive (1).
+
+Usage: atmean(a,limits=None,inclusive=(1,1))
+"""
+ if a.dtype in [N.int_, N.short,N.ubyte]:
+ a = a.astype(N.float_)
+ if limits == None:
+ return mean(a)
+ assert type(limits) in [ListType,TupleType,N.ndarray], "Wrong type for limits in atmean"
+ if inclusive[0]: lowerfcn = N.greater_equal
+ else: lowerfcn = N.greater
+ if inclusive[1]: upperfcn = N.less_equal
+ else: upperfcn = N.less
+ if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(N.ravel(a)):
+ raise ValueError, "No array values within given limits (atmean)."
+ elif limits[0]==None and limits[1]<>None:
+ mask = upperfcn(a,limits[1])
+ elif limits[0]<>None and limits[1]==None:
+ mask = lowerfcn(a,limits[0])
+ elif limits[0]<>None and limits[1]<>None:
+ mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1])
+ s = float(N.add.reduce(N.ravel(a*mask)))
+ n = float(N.add.reduce(N.ravel(mask)))
+ return s/n
+
+
+ def atvar(a,limits=None,inclusive=(1,1)):
+ """
+Returns the sample variance of values in an array, (i.e., using N-1),
+ignoring values strictly outside the sequence passed to 'limits'.
+Note: either limit in the sequence, or the value of limits itself,
+can be set to None. The inclusive list/tuple determines whether the lower
+and upper limiting bounds (respectively) are open/exclusive (0) or
+closed/inclusive (1). ASSUMES A FLAT ARRAY (OR ELSE PREFLATTENS).
+
+Usage: atvar(a,limits=None,inclusive=(1,1))
+"""
+ a = a.astype(N.float_)
+ if limits == None or limits == [None,None]:
+ return avar(a)
+ assert type(limits) in [ListType,TupleType,N.ndarray], "Wrong type for limits in atvar"
+ if inclusive[0]: lowerfcn = N.greater_equal
+ else: lowerfcn = N.greater
+ if inclusive[1]: upperfcn = N.less_equal
+ else: upperfcn = N.less
+ if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(N.ravel(a)):
+ raise ValueError, "No array values within given limits (atvar)."
+ elif limits[0]==None and limits[1]<>None:
+ mask = upperfcn(a,limits[1])
+ elif limits[0]<>None and limits[1]==None:
+ mask = lowerfcn(a,limits[0])
+ elif limits[0]<>None and limits[1]<>None:
+ mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1])
+
+ a = N.compress(mask,a) # squish out excluded values
+ return avar(a)
+
+
+ def atmin(a,lowerlimit=None,dimension=None,inclusive=1):
+ """
+Returns the minimum value of a, along dimension, including only values less
+than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None,
+all values in the array are used.
+
+Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1)
+"""
+ if inclusive: lowerfcn = N.greater
+ else: lowerfcn = N.greater_equal
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if lowerlimit == None:
+ lowerlimit = N.minimum.reduce(N.ravel(a))-11
+ biggest = N.maximum.reduce(N.ravel(a))
+ ta = N.where(lowerfcn(a,lowerlimit),a,biggest)
+ return N.minimum.reduce(ta,dimension)
+
+
+ def atmax(a,upperlimit,dimension=None,inclusive=1):
+ """
+Returns the maximum value of a, along dimension, including only values greater
+than (or equal to, if inclusive=1) upperlimit. If the limit is set to None,
+a limit larger than the max value in the array is used.
+
+Usage: atmax(a,upperlimit,dimension=None,inclusive=1)
+"""
+ if inclusive: upperfcn = N.less
+ else: upperfcn = N.less_equal
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if upperlimit == None:
+ upperlimit = N.maximum.reduce(N.ravel(a))+1
+ smallest = N.minimum.reduce(N.ravel(a))
+ ta = N.where(upperfcn(a,upperlimit),a,smallest)
+ return N.maximum.reduce(ta,dimension)
+
+
+ def atstdev(a,limits=None,inclusive=(1,1)):
+ """
+Returns the standard deviation of all values in an array, ignoring values
+strictly outside the sequence passed to 'limits'. Note: either limit
+in the sequence, or the value of limits itself, can be set to None. The
+inclusive list/tuple determines whether the lower and upper limiting bounds
+(respectively) are open/exclusive (0) or closed/inclusive (1).
+
+Usage: atstdev(a,limits=None,inclusive=(1,1))
+"""
+ return N.sqrt(tvar(a,limits,inclusive))
+
+
+ def atsem(a,limits=None,inclusive=(1,1)):
+ """
+Returns the standard error of the mean for the values in an array,
+(i.e., using N for the denominator), ignoring values strictly outside
+the sequence passed to 'limits'. Note: either limit in the sequence,
+or the value of limits itself, can be set to None. The inclusive list/tuple
+determines whether the lower and upper limiting bounds (respectively) are
+open/exclusive (0) or closed/inclusive (1).
+
+Usage: atsem(a,limits=None,inclusive=(1,1))
+"""
+ sd = tstdev(a,limits,inclusive)
+ if limits == None or limits == [None,None]:
+ n = float(len(N.ravel(a)))
+ limits = [min(a)-1, max(a)+1]
+ assert type(limits) in [ListType,TupleType,N.ndarray], "Wrong type for limits in atsem"
+ if inclusive[0]: lowerfcn = N.greater_equal
+ else: lowerfcn = N.greater
+ if inclusive[1]: upperfcn = N.less_equal
+ else: upperfcn = N.less
+ if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(N.ravel(a)):
+ raise ValueError, "No array values within given limits (atsem)."
+ elif limits[0]==None and limits[1]<>None:
+ mask = upperfcn(a,limits[1])
+ elif limits[0]<>None and limits[1]==None:
+ mask = lowerfcn(a,limits[0])
+ elif limits[0]<>None and limits[1]<>None:
+ mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1])
+ term1 = N.add.reduce(N.ravel(a*a*mask))
+ n = float(N.add.reduce(N.ravel(mask)))
+ return sd/math.sqrt(n)
+
+
+#####################################
+############ AMOMENTS #############
+#####################################
+
+ def amoment(a,moment=1,dimension=None):
+ """
+Calculates the nth moment about the mean for a sample (defaults to the
+1st moment). Generally used to calculate coefficients of skewness and
+kurtosis. Dimension can equal None (ravel array first), an integer
+(the dimension over which to operate), or a sequence (operate over
+multiple dimensions).
+
+Usage: amoment(a,moment=1,dimension=None)
+Returns: appropriate moment along given dimension
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if moment == 1:
+ return 0.0
+ else:
+ mn = amean(a,dimension,1) # 1=keepdims
+ s = N.power((a-mn),moment)
+ return amean(s,dimension)
+
+
+ def avariation(a,dimension=None):
+ """
+Returns the coefficient of variation, as defined in CRC Standard
+Probability and Statistics, p.6. Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions).
+
+Usage: avariation(a,dimension=None)
+"""
+ return 100.0*asamplestdev(a,dimension)/amean(a,dimension)
+
+
+ def askew(a,dimension=None):
+ """
+Returns the skewness of a distribution (normal ==> 0.0; >0 means extra
+weight in left tail). Use askewtest() to see if it's close enough.
+Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions).
+
+Usage: askew(a, dimension=None)
+Returns: skew of vals in a along dimension, returning ZERO where all vals equal
+"""
+ denom = N.power(amoment(a,2,dimension),1.5)
+ zero = N.equal(denom,0)
+ if type(denom) == N.ndarray and asum(zero) <> 0:
+ print "Number of zeros in askew: ",asum(zero)
+ denom = denom + zero # prevent divide-by-zero
+ return N.where(zero, 0, amoment(a,3,dimension)/denom)
+
+
+ def akurtosis(a,dimension=None):
+ """
+Returns the kurtosis of a distribution (normal ==> 3.0; >3 means
+heavier in the tails, and usually more peaked). Use akurtosistest()
+to see if it's close enough. Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions).
+
+Usage: akurtosis(a,dimension=None)
+Returns: kurtosis of values in a along dimension, and ZERO where all vals equal
+"""
+ denom = N.power(amoment(a,2,dimension),2)
+ zero = N.equal(denom,0)
+ if type(denom) == N.ndarray and asum(zero) <> 0:
+ print "Number of zeros in akurtosis: ",asum(zero)
+ denom = denom + zero # prevent divide-by-zero
+ return N.where(zero,0,amoment(a,4,dimension)/denom)
+
+
+ def adescribe(inarray,dimension=None):
+ """
+Returns several descriptive statistics of the passed array. Dimension
+can equal None (ravel array first), an integer (the dimension over
+which to operate), or a sequence (operate over multiple dimensions).
+
+Usage: adescribe(inarray,dimension=None)
+Returns: n, (min,max), mean, standard deviation, skew, kurtosis
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ n = inarray.shape[dimension]
+ mm = (N.minimum.reduce(inarray),N.maximum.reduce(inarray))
+ m = amean(inarray,dimension)
+ sd = astdev(inarray,dimension)
+ skew = askew(inarray,dimension)
+ kurt = akurtosis(inarray,dimension)
+ return n, mm, m, sd, skew, kurt
+
+
+#####################################
+######## NORMALITY TESTS ##########
+#####################################
+
+ def askewtest(a,dimension=None):
+ """
+Tests whether the skew is significantly different from a normal
+distribution. Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions).
+
+Usage: askewtest(a,dimension=None)
+Returns: z-score and 2-tail z-probability
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ b2 = askew(a,dimension)
+ n = float(a.shape[dimension])
+ y = b2 * N.sqrt(((n+1)*(n+3)) / (6.0*(n-2)) )
+ beta2 = ( 3.0*(n*n+27*n-70)*(n+1)*(n+3) ) / ( (n-2.0)*(n+5)*(n+7)*(n+9) )
+ W2 = -1 + N.sqrt(2*(beta2-1))
+ delta = 1/N.sqrt(N.log(N.sqrt(W2)))
+ alpha = N.sqrt(2/(W2-1))
+ y = N.where(y==0,1,y)
+ Z = delta*N.log(y/alpha + N.sqrt((y/alpha)**2+1))
+ return Z, (1.0-zprob(Z))*2
+
+
+ def akurtosistest(a,dimension=None):
+ """
+Tests whether a dataset has normal kurtosis (i.e.,
+kurtosis=3(n-1)/(n+1)) Valid only for n>20. Dimension can equal None
+(ravel array first), an integer (the dimension over which to operate),
+or a sequence (operate over multiple dimensions).
+
+Usage: akurtosistest(a,dimension=None)
+Returns: z-score and 2-tail z-probability, returns 0 for bad pixels
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ n = float(a.shape[dimension])
+ if n<20:
+ print "akurtosistest only valid for n>=20 ... continuing anyway, n=",n
+ b2 = akurtosis(a,dimension)
+ E = 3.0*(n-1) /(n+1)
+ varb2 = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1)*(n+3)*(n+5))
+ x = (b2-E)/N.sqrt(varb2)
+ sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * N.sqrt((6.0*(n+3)*(n+5))/
+ (n*(n-2)*(n-3)))
+ A = 6.0 + 8.0/sqrtbeta1 *(2.0/sqrtbeta1 + N.sqrt(1+4.0/(sqrtbeta1**2)))
+ term1 = 1 -2/(9.0*A)
+ denom = 1 +x*N.sqrt(2/(A-4.0))
+ denom = N.where(N.less(denom,0), 99, denom)
+ term2 = N.where(N.equal(denom,0), term1, N.power((1-2.0/A)/denom,1/3.0))
+ Z = ( term1 - term2 ) / N.sqrt(2/(9.0*A))
+ Z = N.where(N.equal(denom,99), 0, Z)
+ return Z, (1.0-zprob(Z))*2
+
+
+ def anormaltest(a,dimension=None):
+ """
+Tests whether skew and/OR kurtosis of dataset differs from normal
+curve. Can operate over multiple dimensions. Dimension can equal
+None (ravel array first), an integer (the dimension over which to
+operate), or a sequence (operate over multiple dimensions).
+
+Usage: anormaltest(a,dimension=None)
+Returns: z-score and 2-tail probability
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ s,p = askewtest(a,dimension)
+ k,p = akurtosistest(a,dimension)
+ k2 = N.power(s,2) + N.power(k,2)
+ return k2, achisqprob(k2,2)
+
+
+#####################################
+###### AFREQUENCY FUNCTIONS #######
+#####################################
+
+ def aitemfreq(a):
+ """
+Returns a 2D array of item frequencies. Column 1 contains item values,
+column 2 contains their respective counts. Assumes a 1D array is passed.
+@@@sorting OK?
+
+Usage: aitemfreq(a)
+Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)
+"""
+ scores = pstat.aunique(a)
+ scores = N.sort(scores)
+ freq = N.zeros(len(scores))
+ for i in range(len(scores)):
+ freq[i] = N.add.reduce(N.equal(a,scores[i]))
+ return N.array(pstat.aabut(scores, freq))
+
+
+ def ascoreatpercentile (inarray, percent):
+ """
+Usage: ascoreatpercentile(inarray,percent) 0<percent<100
+Returns: score at given percentile, relative to inarray distribution
+"""
+ percent = percent / 100.0
+ targetcf = percent*len(inarray)
+ h, lrl, binsize, extras = histogram(inarray)
+ cumhist = cumsum(h*1)
+ for i in range(len(cumhist)):
+ if cumhist[i] >= targetcf:
+ break
+ score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i)
+ return score
+
+
+ def apercentileofscore (inarray,score,histbins=10,defaultlimits=None):
+ """
+Note: result of this function depends on the values used to histogram
+the data(!).
+
+Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None)
+Returns: percentile-position of score (0-100) relative to inarray
+"""
+ h, lrl, binsize, extras = histogram(inarray,histbins,defaultlimits)
+ cumhist = cumsum(h*1)
+ i = int((score - lrl)/float(binsize))
+ pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inarray)) * 100
+ return pct
+
+
+ def ahistogram (inarray,numbins=10,defaultlimits=None,printextras=1):
+ """
+Returns (i) an array of histogram bin counts, (ii) the smallest value
+of the histogram binning, and (iii) the bin width (the last 2 are not
+necessarily integers). Default number of bins is 10. Defaultlimits
+can be None (the routine picks bins spanning all the numbers in the
+inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the
+following: array of bin values, lowerreallimit, binsize, extrapoints.
+
+Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1)
+Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range)
+"""
+ inarray = N.ravel(inarray) # flatten any >1D arrays
+ if (defaultlimits <> None):
+ lowerreallimit = defaultlimits[0]
+ upperreallimit = defaultlimits[1]
+ binsize = (upperreallimit-lowerreallimit) / float(numbins)
+ else:
+ Min = N.minimum.reduce(inarray)
+ Max = N.maximum.reduce(inarray)
+ estbinwidth = float(Max - Min)/float(numbins) + 1e-6
+ binsize = (Max-Min+estbinwidth)/float(numbins)
+ lowerreallimit = Min - binsize/2.0 #lower real limit,1st bin
+ bins = N.zeros(numbins)
+ extrapoints = 0
+ for num in inarray:
+ try:
+ if (num-lowerreallimit) < 0:
+ extrapoints = extrapoints + 1
+ else:
+ bintoincrement = int((num-lowerreallimit) / float(binsize))
+ bins[bintoincrement] = bins[bintoincrement] + 1
+ except: # point outside lower/upper limits
+ extrapoints = extrapoints + 1
+ if (extrapoints > 0 and printextras == 1):
+ print '\nPoints outside given histogram range =',extrapoints
+ return (bins, lowerreallimit, binsize, extrapoints)
+
+
+ def acumfreq(a,numbins=10,defaultreallimits=None):
+ """
+Returns a cumulative frequency histogram, using the histogram function.
+Defaultreallimits can be None (use all data), or a 2-sequence containing
+lower and upper limits on values to include.
+
+Usage: acumfreq(a,numbins=10,defaultreallimits=None)
+Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h,l,b,e = histogram(a,numbins,defaultreallimits)
+ cumhist = cumsum(h*1)
+ return cumhist,l,b,e
+
+
+ def arelfreq(a,numbins=10,defaultreallimits=None):
+ """
+Returns a relative frequency histogram, using the histogram function.
+Defaultreallimits can be None (use all data), or a 2-sequence containing
+lower and upper limits on values to include.
+
+Usage: arelfreq(a,numbins=10,defaultreallimits=None)
+Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h,l,b,e = histogram(a,numbins,defaultreallimits)
+ h = N.array(h/float(a.shape[0]))
+ return h,l,b,e
+
+
+#####################################
+###### AVARIABILITY FUNCTIONS #####
+#####################################
+
+ def aobrientransform(*args):
+ """
+Computes a transform on input data (any number of columns). Used to
+test for homogeneity of variance prior to running one-way stats. Each
+array in *args is one level of a factor. If an F_oneway() run on the
+transformed data and found significant, variances are unequal. From
+Maxwell and Delaney, p.112.
+
+Usage: aobrientransform(*args) *args = 1D arrays, one per level of factor
+Returns: transformed data for use in an ANOVA
+"""
+ TINY = 1e-10
+ k = len(args)
+ n = N.zeros(k,N.float_)
+ v = N.zeros(k,N.float_)
+ m = N.zeros(k,N.float_)
+ nargs = []
+ for i in range(k):
+ nargs.append(args[i].astype(N.float_))
+ n[i] = float(len(nargs[i]))
+ v[i] = var(nargs[i])
+ m[i] = mean(nargs[i])
+ for j in range(k):
+ for i in range(n[j]):
+ t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2
+ t2 = 0.5*v[j]*(n[j]-1.0)
+ t3 = (n[j]-1.0)*(n[j]-2.0)
+ nargs[j][i] = (t1-t2) / float(t3)
+ check = 1
+ for j in range(k):
+ if v[j] - mean(nargs[j]) > TINY:
+ check = 0
+ if check <> 1:
+ raise ValueError, 'Lack of convergence in obrientransform.'
+ else:
+ return N.array(nargs)
+
+
+ def asamplevar (inarray,dimension=None,keepdims=0):
+ """
+Returns the sample standard deviation of the values in the passed
+array (i.e., using N). Dimension can equal None (ravel array first),
+an integer (the dimension over which to operate), or a sequence
+(operate over multiple dimensions). Set keepdims=1 to return an array
+with the same number of dimensions as inarray.
+
+Usage: asamplevar(inarray,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ if dimension == 1:
+ mn = amean(inarray,dimension)[:,N.NewAxis]
+ else:
+ mn = amean(inarray,dimension,keepdims=1)
+ deviations = inarray - mn
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n*inarray.shape[d]
+ else:
+ n = inarray.shape[dimension]
+ svar = ass(deviations,dimension,keepdims) / float(n)
+ return svar
+
+
+ def asamplestdev (inarray, dimension=None, keepdims=0):
+ """
+Returns the sample standard deviation of the values in the passed
+array (i.e., using N). Dimension can equal None (ravel array first),
+an integer (the dimension over which to operate), or a sequence
+(operate over multiple dimensions). Set keepdims=1 to return an array
+with the same number of dimensions as inarray.
+
+Usage: asamplestdev(inarray,dimension=None,keepdims=0)
+"""
+ return N.sqrt(asamplevar(inarray,dimension,keepdims))
+
+
+ def asignaltonoise(instack,dimension=0):
+ """
+Calculates signal-to-noise. Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions).
+
+Usage: asignaltonoise(instack,dimension=0):
+Returns: array containing the value of (mean/stdev) along dimension,
+ or 0 when stdev=0
+"""
+ m = mean(instack,dimension)
+ sd = stdev(instack,dimension)
+ return N.where(sd==0,0,m/sd)
+
+
+ def acov (x,y, dimension=None,keepdims=0):
+ """
+Returns the estimated covariance of the values in the passed
+array (i.e., N-1). Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: acov(x,y,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ x = N.ravel(x)
+ y = N.ravel(y)
+ dimension = 0
+ xmn = amean(x,dimension,1) # keepdims
+ xdeviations = x - xmn
+ ymn = amean(y,dimension,1) # keepdims
+ ydeviations = y - ymn
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n*x.shape[d]
+ else:
+ n = x.shape[dimension]
+ covar = N.sum(xdeviations*ydeviations)/float(n-1)
+ return covar
+
+
+ def avar (inarray, dimension=None,keepdims=0):
+ """
+Returns the estimated population variance of the values in the passed
+array (i.e., N-1). Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: avar(inarray,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ mn = amean(inarray,dimension,1)
+ deviations = inarray - mn
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n*inarray.shape[d]
+ else:
+ n = inarray.shape[dimension]
+ var = ass(deviations,dimension,keepdims)/float(n-1)
+ return var
+
+
+ def astdev (inarray, dimension=None, keepdims=0):
+ """
+Returns the estimated population standard deviation of the values in
+the passed array (i.e., N-1). Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions). Set keepdims=1 to return
+an array with the same number of dimensions as inarray.
+
+Usage: astdev(inarray,dimension=None,keepdims=0)
+"""
+ return N.sqrt(avar(inarray,dimension,keepdims))
+
+
+ def asterr (inarray, dimension=None, keepdims=0):
+ """
+Returns the estimated population standard error of the values in the
+passed array (i.e., N-1). Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions). Set keepdims=1 to return
+an array with the same number of dimensions as inarray.
+
+Usage: asterr(inarray,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ return astdev(inarray,dimension,keepdims) / float(N.sqrt(inarray.shape[dimension]))
+
+
+ def asem (inarray, dimension=None, keepdims=0):
+ """
+Returns the standard error of the mean (i.e., using N) of the values
+in the passed array. Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: asem(inarray,dimension=None, keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n*inarray.shape[d]
+ else:
+ n = inarray.shape[dimension]
+ s = asamplestdev(inarray,dimension,keepdims) / N.sqrt(n-1)
+ return s
+
+
+ def az (a, score):
+ """
+Returns the z-score of a given input score, given thearray from which
+that score came. Not appropriate for population calculations, nor for
+arrays > 1D.
+
+Usage: az(a, score)
+"""
+ z = (score-amean(a)) / asamplestdev(a)
+ return z
+
+
+ def azs (a):
+ """
+Returns a 1D array of z-scores, one for each score in the passed array,
+computed relative to the passed array.
+
+Usage: azs(a)
+"""
+ zscores = []
+ for item in a:
+ zscores.append(z(a,item))
+ return N.array(zscores)
+
+
+ def azmap (scores, compare, dimension=0):
+ """
+Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to
+array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0
+of the compare array.
+
+Usage: azs(scores, compare, dimension=0)
+"""
+ mns = amean(compare,dimension)
+ sstd = asamplestdev(compare,0)
+ return (scores - mns) / sstd
+
+
+#####################################
+####### ATRIMMING FUNCTIONS #######
+#####################################
+
+## deleted around() as it's in numpy now
+
+ def athreshold(a,threshmin=None,threshmax=None,newval=0):
+ """
+Like Numeric.clip() except that values <threshmid or >threshmax are replaced
+by newval instead of by threshmin/threshmax (respectively).
+
+Usage: athreshold(a,threshmin=None,threshmax=None,newval=0)
+Returns: a, with values <threshmin or >threshmax replaced with newval
+"""
+ mask = N.zeros(a.shape)
+ if threshmin <> None:
+ mask = mask + N.where(a<threshmin,1,0)
+ if threshmax <> None:
+ mask = mask + N.where(a>threshmax,1,0)
+ mask = N.clip(mask,0,1)
+ return N.where(mask,newval,a)
+
+
+ def atrimboth (a,proportiontocut):
+ """
+Slices off the passed proportion of items from BOTH ends of the passed
+array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND
+'rightmost' 10% of scores. You must pre-sort the array if you want
+"proper" trimming. Slices off LESS if proportion results in a
+non-integer slice index (i.e., conservatively slices off
+proportiontocut).
+
+Usage: atrimboth (a,proportiontocut)
+Returns: trimmed version of array a
+"""
+ lowercut = int(proportiontocut*len(a))
+ uppercut = len(a) - lowercut
+ return a[lowercut:uppercut]
+
+
+ def atrim1 (a,proportiontocut,tail='right'):
+ """
+Slices off the passed proportion of items from ONE end of the passed
+array (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'
+10% of scores). Slices off LESS if proportion results in a non-integer
+slice index (i.e., conservatively slices off proportiontocut).
+
+Usage: atrim1(a,proportiontocut,tail='right') or set tail='left'
+Returns: trimmed version of array a
+"""
+ if string.lower(tail) == 'right':
+ lowercut = 0
+ uppercut = len(a) - int(proportiontocut*len(a))
+ elif string.lower(tail) == 'left':
+ lowercut = int(proportiontocut*len(a))
+ uppercut = len(a)
+ return a[lowercut:uppercut]
+
+
+#####################################
+##### ACORRELATION FUNCTIONS ######
+#####################################
+
+ def acovariance(X):
+ """
+Computes the covariance matrix of a matrix X. Requires a 2D matrix input.
+
+Usage: acovariance(X)
+Returns: covariance matrix of X
+"""
+ if len(X.shape) <> 2:
+ raise TypeError, "acovariance requires 2D matrices"
+ n = X.shape[0]
+ mX = amean(X,0)
+ return N.dot(N.transpose(X),X) / float(n) - N.multiply.outer(mX,mX)
+
+
+ def acorrelation(X):
+ """
+Computes the correlation matrix of a matrix X. Requires a 2D matrix input.
+
+Usage: acorrelation(X)
+Returns: correlation matrix of X
+"""
+ C = acovariance(X)
+ V = N.diagonal(C)
+ return C / N.sqrt(N.multiply.outer(V,V))
+
+
+ def apaired(x,y):
+ """
+Interactively determines the type of data in x and y, and then runs the
+appropriated statistic for paired group data.
+
+Usage: apaired(x,y) x,y = the two arrays of values to be compared
+Returns: appropriate statistic name, value, and probability
+"""
+ samples = ''
+ while samples not in ['i','r','I','R','c','C']:
+ print '\nIndependent or related samples, or correlation (i,r,c): ',
+ samples = raw_input()
+
+ if samples in ['i','I','r','R']:
+ print '\nComparing variances ...',
+# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
+ r = obrientransform(x,y)
+ f,p = F_oneway(pstat.colex(r,0),pstat.colex(r,1))
+ if p<0.05:
+ vartype='unequal, p='+str(round(p,4))
+ else:
+ vartype='equal'
+ print vartype
+ if samples in ['i','I']:
+ if vartype[0]=='e':
+ t,p = ttest_ind(x,y,None,0)
+ print '\nIndependent samples t-test: ', round(t,4),round(p,4)
+ else:
+ if len(x)>20 or len(y)>20:
+ z,p = ranksums(x,y)
+ print '\nRank Sums test (NONparametric, n>20): ', round(z,4),round(p,4)
+ else:
+ u,p = mannwhitneyu(x,y)
+ print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u,4),round(p,4)
+
+ else: # RELATED SAMPLES
+ if vartype[0]=='e':
+ t,p = ttest_rel(x,y,0)
+ print '\nRelated samples t-test: ', round(t,4),round(p,4)
+ else:
+ t,p = ranksums(x,y)
+ print '\nWilcoxon T-test (NONparametric): ', round(t,4),round(p,4)
+ else: # CORRELATION ANALYSIS
+ corrtype = ''
+ while corrtype not in ['c','C','r','R','d','D']:
+ print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
+ corrtype = raw_input()
+ if corrtype in ['c','C']:
+ m,b,r,p,see = linregress(x,y)
+ print '\nLinear regression for continuous variables ...'
+ lol = [['Slope','Intercept','r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]]
+ pstat.printcc(lol)
+ elif corrtype in ['r','R']:
+ r,p = spearmanr(x,y)
+ print '\nCorrelation for ranked variables ...'
+ print "Spearman's r: ",round(r,4),round(p,4)
+ else: # DICHOTOMOUS
+ r,p = pointbiserialr(x,y)
+ print '\nAssuming x contains a dichotomous variable ...'
+ print 'Point Biserial r: ',round(r,4),round(p,4)
+ print '\n\n'
+ return None
+
+
+ def dices(x,y):
+ """
+Calculates Dice's coefficient ... (2*number of common terms)/(number of terms in x +
+number of terms in y). Returns a value between 0 (orthogonal) and 1.
+
+Usage: dices(x,y)
+"""
+ import sets
+ x = sets.Set(x)
+ y = sets.Set(y)
+ common = len(x.intersection(y))
+ total = float(len(x) + len(y))
+ return 2*common/total
+
+
+ def icc(x,y=None,verbose=0):
+ """
+Calculates intraclass correlation coefficients using simple, Type I sums of squares.
+If only one variable is passed, assumed it's an Nx2 matrix
+
+Usage: icc(x,y=None,verbose=0)
+Returns: icc rho, prob ####PROB IS A GUESS BASED ON PEARSON
+"""
+ TINY = 1.0e-20
+ if y:
+ all = N.concatenate([x,y],0)
+ else:
+ all = x+0
+ x = all[:,0]
+ y = all[:,1]
+ totalss = ass(all-mean(all))
+ pairmeans = (x+y)/2.
+ withinss = ass(x-pairmeans) + ass(y-pairmeans)
+ withindf = float(len(x))
+ betwdf = float(len(x)-1)
+ withinms = withinss / withindf
+ betweenms = (totalss-withinss) / betwdf
+ rho = (betweenms-withinms)/(withinms+betweenms)
+ t = rho*math.sqrt(betwdf/((1.0-rho+TINY)*(1.0+rho+TINY)))
+ prob = abetai(0.5*betwdf,0.5,betwdf/(betwdf+t*t),verbose)
+ return rho, prob
+
+
+ def alincc(x,y):
+ """
+Calculates Lin's concordance correlation coefficient.
+
+Usage: alincc(x,y) where x, y are equal-length arrays
+Returns: Lin's CC
+"""
+ x = N.ravel(x)
+ y = N.ravel(y)
+ covar = acov(x,y)*(len(x)-1)/float(len(x)) # correct denom to n
+ xvar = avar(x)*(len(x)-1)/float(len(x)) # correct denom to n
+ yvar = avar(y)*(len(y)-1)/float(len(y)) # correct denom to n
+ lincc = (2 * covar) / ((xvar+yvar) +((amean(x)-amean(y))**2))
+ return lincc
+
+
+ def apearsonr(x,y,verbose=1):
+ """
+Calculates a Pearson correlation coefficient and returns p. Taken
+from Heiman's Basic Statistics for the Behav. Sci (2nd), p.195.
+
+Usage: apearsonr(x,y,verbose=1) where x,y are equal length arrays
+Returns: Pearson's r, two-tailed p-value
+"""
+ TINY = 1.0e-20
+ n = len(x)
+ xmean = amean(x)
+ ymean = amean(y)
+ r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y)
+ r_den = math.sqrt((n*ass(x) - asquare_of_sums(x))*(n*ass(y)-asquare_of_sums(y)))
+ r = (r_num / r_den)
+ df = n-2
+ t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
+ prob = abetai(0.5*df,0.5,df/(df+t*t),verbose)
+ return r,prob
+
+
+ def aspearmanr(x,y):
+ """
+Calculates a Spearman rank-order correlation coefficient. Taken
+from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.
+
+Usage: aspearmanr(x,y) where x,y are equal-length arrays
+Returns: Spearman's r, two-tailed p-value
+"""
+ TINY = 1e-30
+ n = len(x)
+ rankx = rankdata(x)
+ ranky = rankdata(y)
+ dsq = N.add.reduce((rankx-ranky)**2)
+ rs = 1 - 6*dsq / float(n*(n**2-1))
+ t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs)))
+ df = n-2
+ probrs = abetai(0.5*df,0.5,df/(df+t*t))
+# probability values for rs are from part 2 of the spearman function in
+# Numerical Recipies, p.510. They close to tables, but not exact.(?)
+ return rs, probrs
+
+
+ def apointbiserialr(x,y):
+ """
+Calculates a point-biserial correlation coefficient and the associated
+probability value. Taken from Heiman's Basic Statistics for the Behav.
+Sci (1st), p.194.
+
+Usage: apointbiserialr(x,y) where x,y are equal length arrays
+Returns: Point-biserial r, two-tailed p-value
+"""
+ TINY = 1e-30
+ categories = pstat.aunique(x)
+ data = pstat.aabut(x,y)
+ if len(categories) <> 2:
+ raise ValueError, "Exactly 2 categories required (in x) for pointbiserialr()."
+ else: # there are 2 categories, continue
+ codemap = pstat.aabut(categories,N.arange(2))
+ recoded = pstat.arecode(data,codemap,0)
+ x = pstat.alinexand(data,0,categories[0])
+ y = pstat.alinexand(data,0,categories[1])
+ xmean = amean(pstat.acolex(x,1))
+ ymean = amean(pstat.acolex(y,1))
+ n = len(data)
+ adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n)))
+ rpb = (ymean - xmean)/asamplestdev(pstat.acolex(data,1))*adjust
+ df = n-2
+ t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))
+ prob = abetai(0.5*df,0.5,df/(df+t*t))
+ return rpb, prob
+
+
+ def akendalltau(x,y):
+ """
+Calculates Kendall's tau ... correlation of ordinal data. Adapted
+from function kendl1 in Numerical Recipies. Needs good test-cases.@@@
+
+Usage: akendalltau(x,y)
+Returns: Kendall's tau, two-tailed p-value
+"""
+ n1 = 0
+ n2 = 0
+ iss = 0
+ for j in range(len(x)-1):
+ for k in range(j,len(y)):
+ a1 = x[j] - x[k]
+ a2 = y[j] - y[k]
+ aa = a1 * a2
+ if (aa): # neither array has a tie
+ n1 = n1 + 1
+ n2 = n2 + 1
+ if aa > 0:
+ iss = iss + 1
+ else:
+ iss = iss -1
+ else:
+ if (a1):
+ n1 = n1 + 1
+ else:
+ n2 = n2 + 1
+ tau = iss / math.sqrt(n1*n2)
+ svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1))
+ z = tau / math.sqrt(svar)
+ prob = erfcc(abs(z)/1.4142136)
+ return tau, prob
+
+
+ def alinregress(*args):
+ """
+Calculates a regression line on two arrays, x and y, corresponding to x,y
+pairs. If a single 2D array is passed, alinregress finds dim with 2 levels
+and splits data into x,y pairs along that dim.
+
+Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array
+Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n
+"""
+ TINY = 1.0e-20
+ if len(args) == 1: # more than 1D array?
+ args = args[0]
+ if len(args) == 2:
+ x = args[0]
+ y = args[1]
+ else:
+ x = args[:,0]
+ y = args[:,1]
+ else:
+ x = args[0]
+ y = args[1]
+ n = len(x)
+ xmean = amean(x)
+ ymean = amean(y)
+ r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y)
+ r_den = math.sqrt((n*ass(x) - asquare_of_sums(x))*(n*ass(y)-asquare_of_sums(y)))
+ r = r_num / r_den
+ z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY))
+ df = n-2
+ t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
+ prob = abetai(0.5*df,0.5,df/(df+t*t))
+ slope = r_num / (float(n)*ass(x) - asquare_of_sums(x))
+ intercept = ymean - slope*xmean
+ sterrest = math.sqrt(1-r*r)*asamplestdev(y)
+ return slope, intercept, r, prob, sterrest, n
+
+ def amasslinregress(*args):
+ """
+Calculates a regression line on one 1D array (x) and one N-D array (y).
+
+Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n
+"""
+ TINY = 1.0e-20
+ if len(args) == 1: # more than 1D array?
+ args = args[0]
+ if len(args) == 2:
+ x = N.ravel(args[0])
+ y = args[1]
+ else:
+ x = N.ravel(args[:,0])
+ y = args[:,1]
+ else:
+ x = args[0]
+ y = args[1]
+ x = x.astype(N.float_)
+ y = y.astype(N.float_)
+ n = len(x)
+ xmean = amean(x)
+ ymean = amean(y,0)
+ shp = N.ones(len(y.shape))
+ shp[0] = len(x)
+ x.shape = shp
+ print x.shape, y.shape
+ r_num = n*(N.add.reduce(x*y,0)) - N.add.reduce(x)*N.add.reduce(y,0)
+ r_den = N.sqrt((n*ass(x) - asquare_of_sums(x))*(n*ass(y,0)-asquare_of_sums(y,0)))
+ zerodivproblem = N.equal(r_den,0)
+ r_den = N.where(zerodivproblem,1,r_den) # avoid zero-division in 1st place
+ r = r_num / r_den # need to do this nicely for matrix division
+ r = N.where(zerodivproblem,0.0,r)
+ z = 0.5*N.log((1.0+r+TINY)/(1.0-r+TINY))
+ df = n-2
+ t = r*N.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
+ prob = abetai(0.5*df,0.5,df/(df+t*t))
+
+ ss = float(n)*ass(x)-asquare_of_sums(x)
+ s_den = N.where(ss==0,1,ss) # avoid zero-division in 1st place
+ slope = r_num / s_den
+ intercept = ymean - slope*xmean
+ sterrest = N.sqrt(1-r*r)*asamplestdev(y,0)
+ return slope, intercept, r, prob, sterrest, n
+
+
+#####################################
+##### AINFERENTIAL STATISTICS #####
+#####################################
+
+ def attest_1samp(a,popmean,printit=0,name='Sample',writemode='a'):
+ """
+Calculates the t-obtained for the independent samples T-test on ONE group
+of scores a, given a population mean. If printit=1, results are printed
+to the screen. If printit='filename', the results are output to 'filename'
+using the given writemode (default=append). Returns t-value, and prob.
+
+Usage: attest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ if type(a) != N.ndarray:
+ a = N.array(a)
+ x = amean(a)
+ v = avar(a)
+ n = len(a)
+ df = n-1
+ svar = ((n-1)*v) / float(df)
+ t = (x-popmean)/math.sqrt(svar*(1.0/n))
+ prob = abetai(0.5*df,0.5,df/(df+t*t))
+
+ if printit <> 0:
+ statname = 'Single-sample T-test.'
+ outputpairedstats(printit,writemode,
+ 'Population','--',popmean,0,0,0,
+ name,n,x,v,N.minimum.reduce(N.ravel(a)),
+ N.maximum.reduce(N.ravel(a)),
+ statname,t,prob)
+ return t,prob
+
+
+ def attest_ind (a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2',writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores
+a, and b. From Numerical Recipies, p.483. If printit=1, results are
+printed to the screen. If printit='filename', the results are output
+to 'filename' using the given writemode (default=append). Dimension
+can equal None (ravel array first), or an integer (the dimension over
+which to operate on a and b).
+
+Usage: attest_ind (a,b,dimension=None,printit=0,
+ Name1='Samp1',Name2='Samp2',writemode='a')
+Returns: t-value, two-tailed p-value
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ b = N.ravel(b)
+ dimension = 0
+ x1 = amean(a,dimension)
+ x2 = amean(b,dimension)
+ v1 = avar(a,dimension)
+ v2 = avar(b,dimension)
+ n1 = a.shape[dimension]
+ n2 = b.shape[dimension]
+ df = n1+n2-2
+ svar = ((n1-1)*v1+(n2-1)*v2) / float(df)
+ zerodivproblem = N.equal(svar,0)
+ svar = N.where(zerodivproblem,1,svar) # avoid zero-division in 1st place
+ t = (x1-x2)/N.sqrt(svar*(1.0/n1 + 1.0/n2)) # N-D COMPUTATION HERE!!!!!!
+ t = N.where(zerodivproblem,1.0,t) # replace NaN/wrong t-values with 1.0
+ probs = abetai(0.5*df,0.5,float(df)/(df+t*t))
+
+ if type(t) == N.ndarray:
+ probs = N.reshape(probs,t.shape)
+ if probs.shape == (1,):
+ probs = probs[0]
+
+ if printit <> 0:
+ if type(t) == N.ndarray:
+ t = t[0]
+ if type(probs) == N.ndarray:
+ probs = probs[0]
+ statname = 'Independent samples T-test.'
+ outputpairedstats(printit,writemode,
+ name1,n1,x1,v1,N.minimum.reduce(N.ravel(a)),
+ N.maximum.reduce(N.ravel(a)),
+ name2,n2,x2,v2,N.minimum.reduce(N.ravel(b)),
+ N.maximum.reduce(N.ravel(b)),
+ statname,t,probs)
+ return
+ return t, probs
+
+ def ap2t(pval,df):
+ """
+Tries to compute a t-value from a p-value (or pval array) and associated df.
+SLOW for large numbers of elements(!) as it re-computes p-values 20 times
+(smaller step-sizes) at which point it decides it's done. Keeps the signs
+of the input array. Returns 1000 (or -1000) if t>100.
+
+Usage: ap2t(pval,df)
+Returns: an array of t-values with the shape of pval
+ """
+ pval = N.array(pval)
+ signs = N.sign(pval)
+ pval = abs(pval)
+ t = N.ones(pval.shape,N.float_)*50
+ step = N.ones(pval.shape,N.float_)*25
+ print "Initial ap2t() prob calc"
+ prob = abetai(0.5*df,0.5,float(df)/(df+t*t))
+ print 'ap2t() iter: ',
+ for i in range(10):
+ print i,' ',
+ t = N.where(pval<prob,t+step,t-step)
+ prob = abetai(0.5*df,0.5,float(df)/(df+t*t))
+ step = step/2
+ print
+ # since this is an ugly hack, we get ugly boundaries
+ t = N.where(t>99.9,1000,t) # hit upper-boundary
+ t = t+signs
+ return t #, prob, pval
+
+
+ def attest_rel (a,b,dimension=None,printit=0,name1='Samp1',name2='Samp2',writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO RELATED samples of scores, a
+and b. From Numerical Recipies, p.483. If printit=1, results are
+printed to the screen. If printit='filename', the results are output
+to 'filename' using the given writemode (default=append). Dimension
+can equal None (ravel array first), or an integer (the dimension over
+which to operate on a and b).
+
+Usage: attest_rel(a,b,dimension=None,printit=0,
+ name1='Samp1',name2='Samp2',writemode='a')
+Returns: t-value, two-tailed p-value
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ b = N.ravel(b)
+ dimension = 0
+ if len(a)<>len(b):
+ raise ValueError, 'Unequal length arrays.'
+ x1 = amean(a,dimension)
+ x2 = amean(b,dimension)
+ v1 = avar(a,dimension)
+ v2 = avar(b,dimension)
+ n = a.shape[dimension]
+ df = float(n-1)
+ d = (a-b).astype('d')
+
+ denom = N.sqrt((n*N.add.reduce(d*d,dimension) - N.add.reduce(d,dimension)**2) /df)
+ zerodivproblem = N.equal(denom,0)
+ denom = N.where(zerodivproblem,1,denom) # avoid zero-division in 1st place
+ t = N.add.reduce(d,dimension) / denom # N-D COMPUTATION HERE!!!!!!
+ t = N.where(zerodivproblem,1.0,t) # replace NaN/wrong t-values with 1.0
+ probs = abetai(0.5*df,0.5,float(df)/(df+t*t))
+ if type(t) == N.ndarray:
+ probs = N.reshape(probs,t.shape)
+ if probs.shape == (1,):
+ probs = probs[0]
+
+ if printit <> 0:
+ statname = 'Related samples T-test.'
+ outputpairedstats(printit,writemode,
+ name1,n,x1,v1,N.minimum.reduce(N.ravel(a)),
+ N.maximum.reduce(N.ravel(a)),
+ name2,n,x2,v2,N.minimum.reduce(N.ravel(b)),
+ N.maximum.reduce(N.ravel(b)),
+ statname,t,probs)
+ return
+ return t, probs
+
+
+ def achisquare(f_obs,f_exp=None):
+ """
+Calculates a one-way chi square for array of observed frequencies and returns
+the result. If no expected frequencies are given, the total N is assumed to
+be equally distributed across all groups.
+@@@NOT RIGHT??
+
+Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq.
+Returns: chisquare-statistic, associated p-value
+"""
+
+ k = len(f_obs)
+ if f_exp == None:
+ f_exp = N.array([sum(f_obs)/float(k)] * len(f_obs),N.float_)
+ f_exp = f_exp.astype(N.float_)
+ chisq = N.add.reduce((f_obs-f_exp)**2 / f_exp)
+ return chisq, achisqprob(chisq, k-1)
+
+
+ def aks_2samp (data1,data2):
+ """
+Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from
+Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc-
+like.
+
+Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays
+Returns: KS D-value, p-value
+"""
+ j1 = 0 # N.zeros(data1.shape[1:]) TRIED TO MAKE THIS UFUNC-LIKE
+ j2 = 0 # N.zeros(data2.shape[1:])
+ fn1 = 0.0 # N.zeros(data1.shape[1:],N.float_)
+ fn2 = 0.0 # N.zeros(data2.shape[1:],N.float_)
+ n1 = data1.shape[0]
+ n2 = data2.shape[0]
+ en1 = n1*1
+ en2 = n2*1
+ d = N.zeros(data1.shape[1:],N.float_)
+ data1 = N.sort(data1,0)
+ data2 = N.sort(data2,0)
+ while j1 < n1 and j2 < n2:
+ d1=data1[j1]
+ d2=data2[j2]
+ if d1 <= d2:
+ fn1 = (j1)/float(en1)
+ j1 = j1 + 1
+ if d2 <= d1:
+ fn2 = (j2)/float(en2)
+ j2 = j2 + 1
+ dt = (fn2-fn1)
+ if abs(dt) > abs(d):
+ d = dt
+# try:
+ en = math.sqrt(en1*en2/float(en1+en2))
+ prob = aksprob((en+0.12+0.11/en)*N.fabs(d))
+# except:
+# prob = 1.0
+ return d, prob
+
+
+ def amannwhitneyu(x,y):
+ """
+Calculates a Mann-Whitney U statistic on the provided scores and
+returns the result. Use only when the n in each condition is < 20 and
+you have 2 independent samples of ranks. REMEMBER: Mann-Whitney U is
+significant if the u-obtained is LESS THAN or equal to the critical
+value of U.
+
+Usage: amannwhitneyu(x,y) where x,y are arrays of values for 2 conditions
+Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
+"""
+ n1 = len(x)
+ n2 = len(y)
+ ranked = rankdata(N.concatenate((x,y)))
+ rankx = ranked[0:n1] # get the x-ranks
+ ranky = ranked[n1:] # the rest are y-ranks
+ u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx) # calc U for x
+ u2 = n1*n2 - u1 # remainder is U for y
+ bigu = max(u1,u2)
+ smallu = min(u1,u2)
+ proportion = bigu/float(n1*n2)
+ T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
+ if T == 0:
+ raise ValueError, 'All numbers are identical in amannwhitneyu'
+ sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0)
+ z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc
+ return smallu, 1.0 - azprob(z), proportion
+
+
+ def atiecorrect(rankvals):
+ """
+Tie-corrector for ties in Mann Whitney U and Kruskal Wallis H tests.
+See Siegel, S. (1956) Nonparametric Statistics for the Behavioral
+Sciences. New York: McGraw-Hill. Code adapted from |Stat rankind.c
+code.
+
+Usage: atiecorrect(rankvals)
+Returns: T correction factor for U or H
+"""
+ sorted,posn = ashellsort(N.array(rankvals))
+ n = len(sorted)
+ T = 0.0
+ i = 0
+ while (i<n-1):
+ if sorted[i] == sorted[i+1]:
+ nties = 1
+ while (i<n-1) and (sorted[i] == sorted[i+1]):
+ nties = nties +1
+ i = i +1
+ T = T + nties**3 - nties
+ i = i+1
+ T = T / float(n**3-n)
+ return 1.0 - T
+
+
+ def aranksums(x,y):
+ """
+Calculates the rank sums statistic on the provided scores and returns
+the result.
+
+Usage: aranksums(x,y) where x,y are arrays of values for 2 conditions
+Returns: z-statistic, two-tailed p-value
+"""
+ n1 = len(x)
+ n2 = len(y)
+ alldata = N.concatenate((x,y))
+ ranked = arankdata(alldata)
+ x = ranked[:n1]
+ y = ranked[n1:]
+ s = sum(x)
+ expected = n1*(n1+n2+1) / 2.0
+ z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0)
+ prob = 2*(1.0 - azprob(abs(z)))
+ return z, prob
+
+
+ def awilcoxont(x,y):
+ """
+Calculates the Wilcoxon T-test for related samples and returns the
+result. A non-parametric T-test.
+
+Usage: awilcoxont(x,y) where x,y are equal-length arrays for 2 conditions
+Returns: t-statistic, two-tailed p-value
+"""
+ if len(x) <> len(y):
+ raise ValueError, 'Unequal N in awilcoxont. Aborting.'
+ d = x-y
+ d = N.compress(N.not_equal(d,0),d) # Keep all non-zero differences
+ count = len(d)
+ absd = abs(d)
+ absranked = arankdata(absd)
+ r_plus = 0.0
+ r_minus = 0.0
+ for i in range(len(absd)):
+ if d[i] < 0:
+ r_minus = r_minus + absranked[i]
+ else:
+ r_plus = r_plus + absranked[i]
+ wt = min(r_plus, r_minus)
+ mn = count * (count+1) * 0.25
+ se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0)
+ z = math.fabs(wt-mn) / se
+ z = math.fabs(wt-mn) / se
+ prob = 2*(1.0 -zprob(abs(z)))
+ return wt, prob
+
+
+ def akruskalwallish(*args):
+ """
+The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more
+groups, requiring at least 5 subjects in each group. This function
+calculates the Kruskal-Wallis H and associated p-value for 3 or more
+independent samples.
+
+Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions
+Returns: H-statistic (corrected for ties), associated p-value
+"""
+ assert len(args) == 3, "Need at least 3 groups in stats.akruskalwallish()"
+ args = list(args)
+ n = [0]*len(args)
+ n = map(len,args)
+ all = []
+ for i in range(len(args)):
+ all = all + args[i].tolist()
+ ranked = rankdata(all)
+ T = tiecorrect(ranked)
+ for i in range(len(args)):
+ args[i] = ranked[0:n[i]]
+ del ranked[0:n[i]]
+ rsums = []
+ for i in range(len(args)):
+ rsums.append(sum(args[i])**2)
+ rsums[i] = rsums[i] / float(n[i])
+ ssbn = sum(rsums)
+ totaln = sum(n)
+ h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1)
+ df = len(args) - 1
+ if T == 0:
+ raise ValueError, 'All numbers are identical in akruskalwallish'
+ h = h / float(T)
+ return h, chisqprob(h,df)
+
+
+ def afriedmanchisquare(*args):
+ """
+Friedman Chi-Square is a non-parametric, one-way within-subjects
+ANOVA. This function calculates the Friedman Chi-square test for
+repeated measures and returns the result, along with the associated
+probability value. It assumes 3 or more repeated measures. Only 3
+levels requires a minimum of 10 subjects in the study. Four levels
+requires 5 subjects per level(??).
+
+Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions
+Returns: chi-square statistic, associated p-value
+"""
+ k = len(args)
+ if k < 3:
+ raise ValueError, '\nLess than 3 levels. Friedman test not appropriate.\n'
+ n = len(args[0])
+ data = apply(pstat.aabut,args)
+ data = data.astype(N.float_)
+ for i in range(len(data)):
+ data[i] = arankdata(data[i])
+ ssbn = asum(asum(args,1)**2)
+ chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)
+ return chisq, achisqprob(chisq,k-1)
+
+
+#####################################
+#### APROBABILITY CALCULATIONS ####
+#####################################
+
+ def achisqprob(chisq,df):
+ """
+Returns the (1-tail) probability value associated with the provided chi-square
+value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can
+handle multiple dimensions.
+
+Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom
+"""
+ BIG = 200.0
+ def ex(x):
+ BIG = 200.0
+ exponents = N.where(N.less(x,-BIG),-BIG,x)
+ return N.exp(exponents)
+
+ if type(chisq) == N.ndarray:
+ arrayflag = 1
+ else:
+ arrayflag = 0
+ chisq = N.array([chisq])
+ if df < 1:
+ return N.ones(chisq.shape,N.float)
+ probs = N.zeros(chisq.shape,N.float_)
+ probs = N.where(N.less_equal(chisq,0),1.0,probs) # set prob=1 for chisq<0
+ a = 0.5 * chisq
+ if df > 1:
+ y = ex(-a)
+ if df%2 == 0:
+ even = 1
+ s = y*1
+ s2 = s*1
+ else:
+ even = 0
+ s = 2.0 * azprob(-N.sqrt(chisq))
+ s2 = s*1
+ if (df > 2):
+ chisq = 0.5 * (df - 1.0)
+ if even:
+ z = N.ones(probs.shape,N.float_)
+ else:
+ z = 0.5 *N.ones(probs.shape,N.float_)
+ if even:
+ e = N.zeros(probs.shape,N.float_)
+ else:
+ e = N.log(N.sqrt(N.pi)) *N.ones(probs.shape,N.float_)
+ c = N.log(a)
+ mask = N.zeros(probs.shape)
+ a_big = N.greater(a,BIG)
+ a_big_frozen = -1 *N.ones(probs.shape,N.float_)
+ totalelements = N.multiply.reduce(N.array(probs.shape))
+ while asum(mask)<>totalelements:
+ e = N.log(z) + e
+ s = s + ex(c*z-a-e)
+ z = z + 1.0
+# print z, e, s
+ newmask = N.greater(z,chisq)
+ a_big_frozen = N.where(newmask*N.equal(mask,0)*a_big, s, a_big_frozen)
+ mask = N.clip(newmask+mask,0,1)
+ if even:
+ z = N.ones(probs.shape,N.float_)
+ e = N.ones(probs.shape,N.float_)
+ else:
+ z = 0.5 *N.ones(probs.shape,N.float_)
+ e = 1.0 / N.sqrt(N.pi) / N.sqrt(a) * N.ones(probs.shape,N.float_)
+ c = 0.0
+ mask = N.zeros(probs.shape)
+ a_notbig_frozen = -1 *N.ones(probs.shape,N.float_)
+ while asum(mask)<>totalelements:
+ e = e * (a/z.astype(N.float_))
+ c = c + e
+ z = z + 1.0
+# print '#2', z, e, c, s, c*y+s2
+ newmask = N.greater(z,chisq)
+ a_notbig_frozen = N.where(newmask*N.equal(mask,0)*(1-a_big),
+ c*y+s2, a_notbig_frozen)
+ mask = N.clip(newmask+mask,0,1)
+ probs = N.where(N.equal(probs,1),1,
+ N.where(N.greater(a,BIG),a_big_frozen,a_notbig_frozen))
+ return probs
+ else:
+ return s
+
+
+ def aerfcc(x):
+ """
+Returns the complementary error function erfc(x) with fractional error
+everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can
+handle multiple dimensions.
+
+Usage: aerfcc(x)
+"""
+ z = abs(x)
+ t = 1.0 / (1.0+0.5*z)
+ ans = t * N.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277)))))))))
+ return N.where(N.greater_equal(x,0), ans, 2.0-ans)
+
+
+ def azprob(z):
+ """
+Returns the area under the normal curve 'to the left of' the given z value.
+Thus,
+ for z<0, zprob(z) = 1-tail probability
+ for z>0, 1.0-zprob(z) = 1-tail probability
+ for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability
+Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions.
+
+Usage: azprob(z) where z is a z-value
+"""
+ def yfunc(y):
+ x = (((((((((((((-0.000045255659 * y
+ +0.000152529290) * y -0.000019538132) * y
+ -0.000676904986) * y +0.001390604284) * y
+ -0.000794620820) * y -0.002034254874) * y
+ +0.006549791214) * y -0.010557625006) * y
+ +0.011630447319) * y -0.009279453341) * y
+ +0.005353579108) * y -0.002141268741) * y
+ +0.000535310849) * y +0.999936657524
+ return x
+
+ def wfunc(w):
+ x = ((((((((0.000124818987 * w
+ -0.001075204047) * w +0.005198775019) * w
+ -0.019198292004) * w +0.059054035642) * w
+ -0.151968751364) * w +0.319152932694) * w
+ -0.531923007300) * w +0.797884560593) * N.sqrt(w) * 2.0
+ return x
+
+ Z_MAX = 6.0 # maximum meaningful z-value
+ x = N.zeros(z.shape,N.float_) # initialize
+ y = 0.5 * N.fabs(z)
+ x = N.where(N.less(y,1.0),wfunc(y*y),yfunc(y-2.0)) # get x's
+ x = N.where(N.greater(y,Z_MAX*0.5),1.0,x) # kill those with big Z
+ prob = N.where(N.greater(z,0),(x+1)*0.5,(1-x)*0.5)
+ return prob
+
+
+ def aksprob(alam):
+ """
+Returns the probability value for a K-S statistic computed via ks_2samp.
+Adapted from Numerical Recipies. Can handle multiple dimensions.
+
+Usage: aksprob(alam)
+"""
+ if type(alam) == N.ndarray:
+ frozen = -1 *N.ones(alam.shape,N.float64)
+ alam = alam.astype(N.float64)
+ arrayflag = 1
+ else:
+ frozen = N.array(-1.)
+ alam = N.array(alam,N.float64)
+ arrayflag = 1
+ mask = N.zeros(alam.shape)
+ fac = 2.0 *N.ones(alam.shape,N.float_)
+ sum = N.zeros(alam.shape,N.float_)
+ termbf = N.zeros(alam.shape,N.float_)
+ a2 = N.array(-2.0*alam*alam,N.float64)
+ totalelements = N.multiply.reduce(N.array(mask.shape))
+ for j in range(1,201):
+ if asum(mask) == totalelements:
+ break
+ exponents = (a2*j*j)
+ overflowmask = N.less(exponents,-746)
+ frozen = N.where(overflowmask,0,frozen)
+ mask = mask+overflowmask
+ term = fac*N.exp(exponents)
+ sum = sum + term
+ newmask = N.where(N.less_equal(abs(term),(0.001*termbf)) +
+ N.less(abs(term),1.0e-8*sum), 1, 0)
+ frozen = N.where(newmask*N.equal(mask,0), sum, frozen)
+ mask = N.clip(mask+newmask,0,1)
+ fac = -fac
+ termbf = abs(term)
+ if arrayflag:
+ return N.where(N.equal(frozen,-1), 1.0, frozen) # 1.0 if doesn't converge
+ else:
+ return N.where(N.equal(frozen,-1), 1.0, frozen)[0] # 1.0 if doesn't converge
+
+
+ def afprob (dfnum, dfden, F):
+ """
+Returns the 1-tailed significance level (p-value) of an F statistic
+given the degrees of freedom for the numerator (dfR-dfF) and the degrees
+of freedom for the denominator (dfF). Can handle multiple dims for F.
+
+Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn
+"""
+ if type(F) == N.ndarray:
+ return abetai(0.5*dfden, 0.5*dfnum, dfden/(1.0*dfden+dfnum*F))
+ else:
+ return abetai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F))
+
+
+ def abetacf(a,b,x,verbose=1):
+ """
+Evaluates the continued fraction form of the incomplete Beta function,
+betai. (Adapted from: Numerical Recipies in C.) Can handle multiple
+dimensions for x.
+
+Usage: abetacf(a,b,x,verbose=1)
+"""
+ ITMAX = 200
+ EPS = 3.0e-7
+
+ arrayflag = 1
+ if type(x) == N.ndarray:
+ frozen = N.ones(x.shape,N.float_) *-1 #start out w/ -1s, should replace all
+ else:
+ arrayflag = 0
+ frozen = N.array([-1])
+ x = N.array([x])
+ mask = N.zeros(x.shape)
+ bm = az = am = 1.0
+ qab = a+b
+ qap = a+1.0
+ qam = a-1.0
+ bz = 1.0-qab*x/qap
+ for i in range(ITMAX+1):
+ if N.sum(N.ravel(N.equal(frozen,-1)))==0:
+ break
+ em = float(i+1)
+ tem = em + em
+ d = em*(b-em)*x/((qam+tem)*(a+tem))
+ ap = az + d*am
+ bp = bz+d*bm
+ d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem))
+ app = ap+d*az
+ bpp = bp+d*bz
+ aold = az*1
+ am = ap/bpp
+ bm = bp/bpp
+ az = app/bpp
+ bz = 1.0
+ newmask = N.less(abs(az-aold),EPS*abs(az))
+ frozen = N.where(newmask*N.equal(mask,0), az, frozen)
+ mask = N.clip(mask+newmask,0,1)
+ noconverge = asum(N.equal(frozen,-1))
+ if noconverge <> 0 and verbose:
+ print 'a or b too big, or ITMAX too small in Betacf for ',noconverge,' elements'
+ if arrayflag:
+ return frozen
+ else:
+ return frozen[0]
+
+
+ def agammln(xx):
+ """
+Returns the gamma function of xx.
+ Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.
+Adapted from: Numerical Recipies in C. Can handle multiple dims ... but
+probably doesn't normally have to.
+
+Usage: agammln(xx)
+"""
+ coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,
+ 0.120858003e-2, -0.536382e-5]
+ x = xx - 1.0
+ tmp = x + 5.5
+ tmp = tmp - (x+0.5)*N.log(tmp)
+ ser = 1.0
+ for j in range(len(coeff)):
+ x = x + 1
+ ser = ser + coeff[j]/x
+ return -tmp + N.log(2.50662827465*ser)
+
+
+ def abetai(a,b,x,verbose=1):
+ """
+Returns the incomplete beta function:
+
+ I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)
+
+where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
+function of a. The continued fraction formulation is implemented
+here, using the betacf function. (Adapted from: Numerical Recipies in
+C.) Can handle multiple dimensions.
+
+Usage: abetai(a,b,x,verbose=1)
+"""
+ TINY = 1e-15
+ if type(a) == N.ndarray:
+ if asum(N.less(x,0)+N.greater(x,1)) <> 0:
+ raise ValueError, 'Bad x in abetai'
+ x = N.where(N.equal(x,0),TINY,x)
+ x = N.where(N.equal(x,1.0),1-TINY,x)
+
+ bt = N.where(N.equal(x,0)+N.equal(x,1), 0, -1)
+ exponents = ( gammln(a+b)-gammln(a)-gammln(b)+a*N.log(x)+b*
+ N.log(1.0-x) )
+ # 746 (below) is the MAX POSSIBLE BEFORE OVERFLOW
+ exponents = N.where(N.less(exponents,-740),-740,exponents)
+ bt = N.exp(exponents)
+ if type(x) == N.ndarray:
+ ans = N.where(N.less(x,(a+1)/(a+b+2.0)),
+ bt*abetacf(a,b,x,verbose)/float(a),
+ 1.0-bt*abetacf(b,a,1.0-x,verbose)/float(b))
+ else:
+ if x<(a+1)/(a+b+2.0):
+ ans = bt*abetacf(a,b,x,verbose)/float(a)
+ else:
+ ans = 1.0-bt*abetacf(b,a,1.0-x,verbose)/float(b)
+ return ans
+
+
+#####################################
+####### AANOVA CALCULATIONS #######
+#####################################
+
+ import numpy.linalg, operator
+ LA = numpy.linalg
+
+ def aglm(data,para):
+ """
+Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken
+from:
+ Peterson et al. Statistical limitations in functional neuroimaging
+ I. Non-inferential methods and statistical models. Phil Trans Royal Soc
+ Lond B 354: 1239-1260.
+
+Usage: aglm(data,para)
+Returns: statistic, p-value ???
+"""
+ if len(para) <> len(data):
+ print "data and para must be same length in aglm"
+ return
+ n = len(para)
+ p = pstat.aunique(para)
+ x = N.zeros((n,len(p))) # design matrix
+ for l in range(len(p)):
+ x[:,l] = N.equal(para,p[l])
+ b = N.dot(N.dot(LA.inv(N.dot(N.transpose(x),x)), # i.e., b=inv(X'X)X'Y
+ N.transpose(x)),
+ data)
+ diffs = (data - N.dot(x,b))
+ s_sq = 1./(n-len(p)) * N.dot(N.transpose(diffs), diffs)
+
+ if len(p) == 2: # ttest_ind
+ c = N.array([1,-1])
+ df = n-2
+ fact = asum(1.0/asum(x,0)) # i.e., 1/n1 + 1/n2 + 1/n3 ...
+ t = N.dot(c,b) / N.sqrt(s_sq*fact)
+ probs = abetai(0.5*df,0.5,float(df)/(df+t*t))
+ return t, probs
+
+
+ def aF_oneway(*args):
+ """
+Performs a 1-way ANOVA, returning an F-value and probability given
+any number of groups. From Heiman, pp.394-7.
+
+Usage: aF_oneway (*args) where *args is 2 or more arrays, one per
+ treatment group
+Returns: f-value, probability
+"""
+ na = len(args) # ANOVA on 'na' groups, each in it's own array
+ means = [0]*na
+ vars = [0]*na
+ ns = [0]*na
+ alldata = []
+ tmp = map(N.array,args)
+ means = map(amean,tmp)
+ vars = map(avar,tmp)
+ ns = map(len,args)
+ alldata = N.concatenate(args)
+ bign = len(alldata)
+ sstot = ass(alldata)-(asquare_of_sums(alldata)/float(bign))
+ ssbn = 0
+ for a in args:
+ ssbn = ssbn + asquare_of_sums(N.array(a))/float(len(a))
+ ssbn = ssbn - (asquare_of_sums(alldata)/float(bign))
+ sswn = sstot-ssbn
+ dfbn = na-1
+ dfwn = bign - na
+ msb = ssbn/float(dfbn)
+ msw = sswn/float(dfwn)
+ f = msb/msw
+ prob = fprob(dfbn,dfwn,f)
+ return f, prob
+
+
+ def aF_value (ER,EF,dfR,dfF):
+ """
+Returns an F-statistic given the following:
+ ER = error associated with the null hypothesis (the Restricted model)
+ EF = error associated with the alternate hypothesis (the Full model)
+ dfR = degrees of freedom the Restricted model
+ dfF = degrees of freedom associated with the Restricted model
+"""
+ return ((ER-EF)/float(dfR-dfF) / (EF/float(dfF)))
+
+
+ def outputfstats(Enum, Eden, dfnum, dfden, f, prob):
+ Enum = round(Enum,3)
+ Eden = round(Eden,3)
+ dfnum = round(Enum,3)
+ dfden = round(dfden,3)
+ f = round(f,3)
+ prob = round(prob,3)
+ suffix = '' # for *s after the p-value
+ if prob < 0.001: suffix = ' ***'
+ elif prob < 0.01: suffix = ' **'
+ elif prob < 0.05: suffix = ' *'
+ title = [['EF/ER','DF','Mean Square','F-value','prob','']]
+ lofl = title+[[Enum, dfnum, round(Enum/float(dfnum),3), f, prob, suffix],
+ [Eden, dfden, round(Eden/float(dfden),3),'','','']]
+ pstat.printcc(lofl)
+ return
+
+
+ def F_value_multivariate(ER, EF, dfnum, dfden):
+ """
+Returns an F-statistic given the following:
+ ER = error associated with the null hypothesis (the Restricted model)
+ EF = error associated with the alternate hypothesis (the Full model)
+ dfR = degrees of freedom the Restricted model
+ dfF = degrees of freedom associated with the Restricted model
+where ER and EF are matrices from a multivariate F calculation.
+"""
+ if type(ER) in [IntType, FloatType]:
+ ER = N.array([[ER]])
+ if type(EF) in [IntType, FloatType]:
+ EF = N.array([[EF]])
+ n_um = (LA.det(ER) - LA.det(EF)) / float(dfnum)
+ d_en = LA.det(EF) / float(dfden)
+ return n_um / d_en
+
+
+#####################################
+####### ASUPPORT FUNCTIONS ########
+#####################################
+
+ def asign(a):
+ """
+Usage: asign(a)
+Returns: array shape of a, with -1 where a<0 and +1 where a>=0
+"""
+ a = N.asarray(a)
+ if ((type(a) == type(1.4)) or (type(a) == type(1))):
+ return a-a-N.less(a,0)+N.greater(a,0)
+ else:
+ return N.zeros(N.shape(a))-N.less(a,0)+N.greater(a,0)
+
+
+ def asum (a, dimension=None,keepdims=0):
+ """
+An alternative to the Numeric.add.reduce function, which allows one to
+(1) collapse over multiple dimensions at once, and/or (2) to retain
+all dimensions in the original array (squashing one down to size.
+Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions). If keepdims=1, the resulting array will have as many
+dimensions as the input array.
+
+Usage: asum(a, dimension=None, keepdims=0)
+Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1
+"""
+ if type(a) == N.ndarray and a.dtype in [N.int_, N.short, N.ubyte]:
+ a = a.astype(N.float_)
+ if dimension == None:
+ s = N.sum(N.ravel(a))
+ elif type(dimension) in [IntType,FloatType]:
+ s = N.add.reduce(a, dimension)
+ if keepdims == 1:
+ shp = list(a.shape)
+ shp[dimension] = 1
+ s = N.reshape(s,shp)
+ else: # must be a SEQUENCE of dims to sum over
+ dims = list(dimension)
+ dims.sort()
+ dims.reverse()
+ s = a *1.0
+ for dim in dims:
+ s = N.add.reduce(s,dim)
+ if keepdims == 1:
+ shp = list(a.shape)
+ for dim in dims:
+ shp[dim] = 1
+ s = N.reshape(s,shp)
+ return s
+
+
+ def acumsum (a,dimension=None):
+ """
+Returns an array consisting of the cumulative sum of the items in the
+passed array. Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions, but this last one just barely makes sense).
+
+Usage: acumsum(a,dimension=None)
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if type(dimension) in [ListType, TupleType, N.ndarray]:
+ dimension = list(dimension)
+ dimension.sort()
+ dimension.reverse()
+ for d in dimension:
+ a = N.add.accumulate(a,d)
+ return a
+ else:
+ return N.add.accumulate(a,dimension)
+
+
+ def ass(inarray, dimension=None, keepdims=0):
+ """
+Squares each value in the passed array, adds these squares & returns
+the result. Unfortunate function name. :-) Defaults to ALL values in
+the array. Dimension can equal None (ravel array first), an integer
+(the dimension over which to operate), or a sequence (operate over
+multiple dimensions). Set keepdims=1 to maintain the original number
+of dimensions.
+
+Usage: ass(inarray, dimension=None, keepdims=0)
+Returns: sum-along-'dimension' for (inarray*inarray)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ return asum(inarray*inarray,dimension,keepdims)
+
+
+ def asummult (array1,array2,dimension=None,keepdims=0):
+ """
+Multiplies elements in array1 and array2, element by element, and
+returns the sum (along 'dimension') of all resulting multiplications.
+Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions). A trivial function, but included for completeness.
+
+Usage: asummult(array1,array2,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ array1 = N.ravel(array1)
+ array2 = N.ravel(array2)
+ dimension = 0
+ return asum(array1*array2,dimension,keepdims)
+
+
+ def asquare_of_sums(inarray, dimension=None, keepdims=0):
+ """
+Adds the values in the passed array, squares that sum, and returns the
+result. Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions). If keepdims=1, the returned array will have the same
+NUMBER of dimensions as the original.
+
+Usage: asquare_of_sums(inarray, dimension=None, keepdims=0)
+Returns: the square of the sum over dim(s) in dimension
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ s = asum(inarray,dimension,keepdims)
+ if type(s) == N.ndarray:
+ return s.astype(N.float_)*s
+ else:
+ return float(s)*s
+
+
+ def asumdiffsquared(a,b, dimension=None, keepdims=0):
+ """
+Takes pairwise differences of the values in arrays a and b, squares
+these differences, and returns the sum of these squares. Dimension
+can equal None (ravel array first), an integer (the dimension over
+which to operate), or a sequence (operate over multiple dimensions).
+keepdims=1 means the return shape = len(a.shape) = len(b.shape)
+
+Usage: asumdiffsquared(a,b)
+Returns: sum[ravel(a-b)**2]
+"""
+ if dimension == None:
+ inarray = N.ravel(a)
+ dimension = 0
+ return asum((a-b)**2,dimension,keepdims)
+
+
+ def ashellsort(inarray):
+ """
+Shellsort algorithm. Sorts a 1D-array.
+
+Usage: ashellsort(inarray)
+Returns: sorted-inarray, sorting-index-vector (for original array)
+"""
+ n = len(inarray)
+ svec = inarray *1.0
+ ivec = range(n)
+ gap = n/2 # integer division needed
+ while gap >0:
+ for i in range(gap,n):
+ for j in range(i-gap,-1,-gap):
+ while j>=0 and svec[j]>svec[j+gap]:
+ temp = svec[j]
+ svec[j] = svec[j+gap]
+ svec[j+gap] = temp
+ itemp = ivec[j]
+ ivec[j] = ivec[j+gap]
+ ivec[j+gap] = itemp
+ gap = gap / 2 # integer division needed
+# svec is now sorted input vector, ivec has the order svec[i] = vec[ivec[i]]
+ return svec, ivec
+
+
+ def arankdata(inarray):
+ """
+Ranks the data in inarray, dealing with ties appropritely. Assumes
+a 1D inarray. Adapted from Gary Perlman's |Stat ranksort.
+
+Usage: arankdata(inarray)
+Returns: array of length equal to inarray, containing rank scores
+"""
+ n = len(inarray)
+ svec, ivec = ashellsort(inarray)
+ sumranks = 0
+ dupcount = 0
+ newarray = N.zeros(n,N.float_)
+ for i in range(n):
+ sumranks = sumranks + i
+ dupcount = dupcount + 1
+ if i==n-1 or svec[i] <> svec[i+1]:
+ averank = sumranks / float(dupcount) + 1
+ for j in range(i-dupcount+1,i+1):
+ newarray[ivec[j]] = averank
+ sumranks = 0
+ dupcount = 0
+ return newarray
+
+
+ def afindwithin(data):
+ """
+Returns a binary vector, 1=within-subject factor, 0=between. Input
+equals the entire data array (i.e., column 1=random factor, last
+column = measured values.
+
+Usage: afindwithin(data) data in |Stat format
+"""
+ numfact = len(data[0])-2
+ withinvec = [0]*numfact
+ for col in range(1,numfact+1):
+ rows = pstat.linexand(data,col,pstat.unique(pstat.colex(data,1))[0]) # get 1 level of this factor
+ if len(pstat.unique(pstat.colex(rows,0))) < len(rows): # if fewer subjects than scores on this factor
+ withinvec[col-1] = 1
+ return withinvec
+
+
+ #########################################################
+ #########################################################
+ ###### RE-DEFINE DISPATCHES TO INCLUDE ARRAYS #########
+ #########################################################
+ #########################################################
+
+## CENTRAL TENDENCY:
+ geometricmean = Dispatch ( (lgeometricmean, (ListType, TupleType)),
+ (ageometricmean, (N.ndarray,)) )
+ harmonicmean = Dispatch ( (lharmonicmean, (ListType, TupleType)),
+ (aharmonicmean, (N.ndarray,)) )
+ mean = Dispatch ( (lmean, (ListType, TupleType)),
+ (amean, (N.ndarray,)) )
+ median = Dispatch ( (lmedian, (ListType, TupleType)),
+ (amedian, (N.ndarray,)) )
+ medianscore = Dispatch ( (lmedianscore, (ListType, TupleType)),
+ (amedianscore, (N.ndarray,)) )
+ mode = Dispatch ( (lmode, (ListType, TupleType)),
+ (amode, (N.ndarray,)) )
+ tmean = Dispatch ( (atmean, (N.ndarray,)) )
+ tvar = Dispatch ( (atvar, (N.ndarray,)) )
+ tstdev = Dispatch ( (atstdev, (N.ndarray,)) )
+ tsem = Dispatch ( (atsem, (N.ndarray,)) )
+
+## VARIATION:
+ moment = Dispatch ( (lmoment, (ListType, TupleType)),
+ (amoment, (N.ndarray,)) )
+ variation = Dispatch ( (lvariation, (ListType, TupleType)),
+ (avariation, (N.ndarray,)) )
+ skew = Dispatch ( (lskew, (ListType, TupleType)),
+ (askew, (N.ndarray,)) )
+ kurtosis = Dispatch ( (lkurtosis, (ListType, TupleType)),
+ (akurtosis, (N.ndarray,)) )
+ describe = Dispatch ( (ldescribe, (ListType, TupleType)),
+ (adescribe, (N.ndarray,)) )
+
+## DISTRIBUTION TESTS
+
+ skewtest = Dispatch ( (askewtest, (ListType, TupleType)),
+ (askewtest, (N.ndarray,)) )
+ kurtosistest = Dispatch ( (akurtosistest, (ListType, TupleType)),
+ (akurtosistest, (N.ndarray,)) )
+ normaltest = Dispatch ( (anormaltest, (ListType, TupleType)),
+ (anormaltest, (N.ndarray,)) )
+
+## FREQUENCY STATS:
+ itemfreq = Dispatch ( (litemfreq, (ListType, TupleType)),
+ (aitemfreq, (N.ndarray,)) )
+ scoreatpercentile = Dispatch ( (lscoreatpercentile, (ListType, TupleType)),
+ (ascoreatpercentile, (N.ndarray,)) )
+ percentileofscore = Dispatch ( (lpercentileofscore, (ListType, TupleType)),
+ (apercentileofscore, (N.ndarray,)) )
+ histogram = Dispatch ( (lhistogram, (ListType, TupleType)),
+ (ahistogram, (N.ndarray,)) )
+ cumfreq = Dispatch ( (lcumfreq, (ListType, TupleType)),
+ (acumfreq, (N.ndarray,)) )
+ relfreq = Dispatch ( (lrelfreq, (ListType, TupleType)),
+ (arelfreq, (N.ndarray,)) )
+
+## VARIABILITY:
+ obrientransform = Dispatch ( (lobrientransform, (ListType, TupleType)),
+ (aobrientransform, (N.ndarray,)) )
+ samplevar = Dispatch ( (lsamplevar, (ListType, TupleType)),
+ (asamplevar, (N.ndarray,)) )
+ samplestdev = Dispatch ( (lsamplestdev, (ListType, TupleType)),
+ (asamplestdev, (N.ndarray,)) )
+ signaltonoise = Dispatch( (asignaltonoise, (N.ndarray,)),)
+ var = Dispatch ( (lvar, (ListType, TupleType)),
+ (avar, (N.ndarray,)) )
+ stdev = Dispatch ( (lstdev, (ListType, TupleType)),
+ (astdev, (N.ndarray,)) )
+ sterr = Dispatch ( (lsterr, (ListType, TupleType)),
+ (asterr, (N.ndarray,)) )
+ sem = Dispatch ( (lsem, (ListType, TupleType)),
+ (asem, (N.ndarray,)) )
+ z = Dispatch ( (lz, (ListType, TupleType)),
+ (az, (N.ndarray,)) )
+ zs = Dispatch ( (lzs, (ListType, TupleType)),
+ (azs, (N.ndarray,)) )
+
+## TRIMMING FCNS:
+ threshold = Dispatch( (athreshold, (N.ndarray,)),)
+ trimboth = Dispatch ( (ltrimboth, (ListType, TupleType)),
+ (atrimboth, (N.ndarray,)) )
+ trim1 = Dispatch ( (ltrim1, (ListType, TupleType)),
+ (atrim1, (N.ndarray,)) )
+
+## CORRELATION FCNS:
+ paired = Dispatch ( (lpaired, (ListType, TupleType)),
+ (apaired, (N.ndarray,)) )
+ lincc = Dispatch ( (llincc, (ListType, TupleType)),
+ (alincc, (N.ndarray,)) )
+ pearsonr = Dispatch ( (lpearsonr, (ListType, TupleType)),
+ (apearsonr, (N.ndarray,)) )
+ spearmanr = Dispatch ( (lspearmanr, (ListType, TupleType)),
+ (aspearmanr, (N.ndarray,)) )
+ pointbiserialr = Dispatch ( (lpointbiserialr, (ListType, TupleType)),
+ (apointbiserialr, (N.ndarray,)) )
+ kendalltau = Dispatch ( (lkendalltau, (ListType, TupleType)),
+ (akendalltau, (N.ndarray,)) )
+ linregress = Dispatch ( (llinregress, (ListType, TupleType)),
+ (alinregress, (N.ndarray,)) )
+
+## INFERENTIAL STATS:
+ ttest_1samp = Dispatch ( (lttest_1samp, (ListType, TupleType)),
+ (attest_1samp, (N.ndarray,)) )
+ ttest_ind = Dispatch ( (lttest_ind, (ListType, TupleType)),
+ (attest_ind, (N.ndarray,)) )
+ ttest_rel = Dispatch ( (lttest_rel, (ListType, TupleType)),
+ (attest_rel, (N.ndarray,)) )
+ chisquare = Dispatch ( (lchisquare, (ListType, TupleType)),
+ (achisquare, (N.ndarray,)) )
+ ks_2samp = Dispatch ( (lks_2samp, (ListType, TupleType)),
+ (aks_2samp, (N.ndarray,)) )
+ mannwhitneyu = Dispatch ( (lmannwhitneyu, (ListType, TupleType)),
+ (amannwhitneyu, (N.ndarray,)) )
+ tiecorrect = Dispatch ( (ltiecorrect, (ListType, TupleType)),
+ (atiecorrect, (N.ndarray,)) )
+ ranksums = Dispatch ( (lranksums, (ListType, TupleType)),
+ (aranksums, (N.ndarray,)) )
+ wilcoxont = Dispatch ( (lwilcoxont, (ListType, TupleType)),
+ (awilcoxont, (N.ndarray,)) )
+ kruskalwallish = Dispatch ( (lkruskalwallish, (ListType, TupleType)),
+ (akruskalwallish, (N.ndarray,)) )
+ friedmanchisquare = Dispatch ( (lfriedmanchisquare, (ListType, TupleType)),
+ (afriedmanchisquare, (N.ndarray,)) )
+
+## PROBABILITY CALCS:
+ chisqprob = Dispatch ( (lchisqprob, (IntType, FloatType)),
+ (achisqprob, (N.ndarray,)) )
+ zprob = Dispatch ( (lzprob, (IntType, FloatType)),
+ (azprob, (N.ndarray,)) )
+ ksprob = Dispatch ( (lksprob, (IntType, FloatType)),
+ (aksprob, (N.ndarray,)) )
+ fprob = Dispatch ( (lfprob, (IntType, FloatType)),
+ (afprob, (N.ndarray,)) )
+ betacf = Dispatch ( (lbetacf, (IntType, FloatType)),
+ (abetacf, (N.ndarray,)) )
+ betai = Dispatch ( (lbetai, (IntType, FloatType)),
+ (abetai, (N.ndarray,)) )
+ erfcc = Dispatch ( (lerfcc, (IntType, FloatType)),
+ (aerfcc, (N.ndarray,)) )
+ gammln = Dispatch ( (lgammln, (IntType, FloatType)),
+ (agammln, (N.ndarray,)) )
+
+## ANOVA FUNCTIONS:
+ F_oneway = Dispatch ( (lF_oneway, (ListType, TupleType)),
+ (aF_oneway, (N.ndarray,)) )
+ F_value = Dispatch ( (lF_value, (ListType, TupleType)),
+ (aF_value, (N.ndarray,)) )
+
+## SUPPORT FUNCTIONS:
+ incr = Dispatch ( (lincr, (ListType, TupleType, N.ndarray)), )
+ sum = Dispatch ( (lsum, (ListType, TupleType)),
+ (asum, (N.ndarray,)) )
+ cumsum = Dispatch ( (lcumsum, (ListType, TupleType)),
+ (acumsum, (N.ndarray,)) )
+ ss = Dispatch ( (lss, (ListType, TupleType)),
+ (ass, (N.ndarray,)) )
+ summult = Dispatch ( (lsummult, (ListType, TupleType)),
+ (asummult, (N.ndarray,)) )
+ square_of_sums = Dispatch ( (lsquare_of_sums, (ListType, TupleType)),
+ (asquare_of_sums, (N.ndarray,)) )
+ sumdiffsquared = Dispatch ( (lsumdiffsquared, (ListType, TupleType)),
+ (asumdiffsquared, (N.ndarray,)) )
+ shellsort = Dispatch ( (lshellsort, (ListType, TupleType)),
+ (ashellsort, (N.ndarray,)) )
+ rankdata = Dispatch ( (lrankdata, (ListType, TupleType)),
+ (arankdata, (N.ndarray,)) )
+ findwithin = Dispatch ( (lfindwithin, (ListType, TupleType)),
+ (afindwithin, (N.ndarray,)) )
+
+###################### END OF NUMERIC FUNCTION BLOCK #####################
+
+###################### END OF STATISTICAL FUNCTIONS ######################
+
+except ImportError:
+ pass
diff --git a/utils/tabulator.py b/utils/tabulator.py
new file mode 100644
index 00000000..3c6b1839
--- /dev/null
+++ b/utils/tabulator.py
@@ -0,0 +1,967 @@
+#!/usr/bin/python
+
+# Copyright 2011 Google Inc. All Rights Reserved.
+
+import getpass
+import math
+import numpy
+import colortrans
+from email_sender import EmailSender
+
+
+def _IsFloat(v):
+ try:
+ float(v)
+ return True
+ except ValueError:
+ return False
+
+
+def _AllFloat(values):
+ return all([_IsFloat(v) for v in values])
+
+
+def _GetFloats(values):
+ return [float(v) for v in values]
+
+
+def _StripNone(results):
+ res = []
+ for result in results:
+ if result is not None:
+ res.append(result)
+ return res
+
+
+class TableGenerator(object):
+ """Creates a table from a list of list of dicts.
+
+ The main public function is called GetTable().
+ """
+ SORT_BY_KEYS = 0
+ SORT_BY_KEYS_DESC = 1
+ SORT_BY_VALUES = 2
+ SORT_BY_VALUES_DESC = 3
+
+ MISSING_VALUE = "x"
+
+ def __init__(self, d, l, sort=SORT_BY_KEYS):
+ self._runs = d
+ self._labels = l
+ self._sort = sort
+
+ def _AggregateKeys(self):
+ keys = set([])
+ for run_list in self._runs:
+ for run in run_list:
+ keys = keys.union(run.keys())
+ return keys
+
+ def _GetHighestValue(self, key):
+ values = []
+ for run_list in self._runs:
+ for run in run_list:
+ if key in run:
+ values.append(run[key])
+ ret = max(_StripNone(values))
+ return ret
+
+ def _GetLowestValue(self, key):
+ values = []
+ for run_list in self._runs:
+ for run in run_list:
+ if key in run:
+ values.append(run[key])
+ ret = min(_StripNone(values))
+ return ret
+
+ def _SortKeys(self, keys):
+ if self._sort == self.SORT_BY_KEYS:
+ return sorted(keys)
+ elif self._sort == self.SORT_BY_VALUES:
+ return sorted(keys, key=lambda x: self._GetLowestValue(x))
+ elif self._sort == self.SORT_BY_VALUES_DESC:
+ return sorted(keys, key=lambda x: self._GetHighestValue(x), reverse=True)
+ else:
+ assert 0, "Unimplemented sort %s" % self._sort
+
+ def _GetKeys(self):
+ keys = self._AggregateKeys()
+ return self._SortKeys(keys)
+
+ def GetTable(self):
+ """Returns a table from a list of list of dicts.
+
+ The list of list of dicts is passed into the constructor of TableGenerator.
+ This method converts that into a canonical list of lists which represents a
+ table of values.
+
+ Args:
+ None
+ Returns:
+ A list of lists which is the table.
+
+ Example:
+ We have the following runs:
+ [[{"k1": "v1", "k2": "v2"}, {"k1": "v3"}],
+ [{"k1": "v4", "k4": "v5"}]]
+ and the following labels:
+ ["vanilla", "modified"]
+ it will return:
+ [["Key", "vanilla", "modified"]
+ ["k1", ["v1", "v3"], ["v4"]]
+ ["k2", ["v2"], []]
+ ["k4", [], ["v5"]]]
+ The returned table can then be processed further by other classes in this
+ module.
+ """
+ keys = self._GetKeys()
+ header = ["keys"] + self._labels
+ table = [header]
+ for k in keys:
+ row = [k]
+ for run_list in self._runs:
+ v = []
+ for run in run_list:
+ if k in run:
+ v.append(run[k])
+ else:
+ v.append(None)
+ row.append(v)
+ table.append(row)
+ return table
+
+
+class Result(object):
+ """A class that respresents a single result.
+
+ This single result is obtained by condensing the information from a list of
+ runs and a list of baseline runs.
+ """
+
+ def __init__(self):
+ pass
+
+ def _AllStringsSame(self, values):
+ values_set = set(values)
+ return len(values_set) == 1
+
+ def NeedsBaseline(self):
+ return False
+
+ def _Literal(self, cell, values, baseline_values):
+ cell.value = " ".join([str(v) for v in values])
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ self._Literal(cell, values, baseline_values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ self._Literal(cell, values, baseline_values)
+
+ def _InvertIfLowerIsBetter(self, cell):
+ pass
+
+ def _GetGmean(self, values):
+ if not values:
+ return float("nan")
+ return (reduce(lambda x, y: x*y, values))**(1.0/len(values))
+
+ def Compute(self, cell, values, baseline_values):
+ """Compute the result given a list of values and baseline values.
+
+ Args:
+ cell: A cell data structure to populate.
+ values: List of values.
+ baseline_values: List of baseline values. Can be none if this is the
+ baseline itself.
+ """
+ all_floats = True
+ values = _StripNone(values)
+ if not len(values):
+ cell.value = ""
+ return
+ if _AllFloat(values):
+ float_values = _GetFloats(values)
+ else:
+ all_floats = False
+ if baseline_values:
+ baseline_values = _StripNone(baseline_values)
+ if baseline_values:
+ if _AllFloat(baseline_values):
+ float_baseline_values = _GetFloats(baseline_values)
+ else:
+ all_floats = False
+ else:
+ if self.NeedsBaseline():
+ cell.value = ""
+ return
+ float_baseline_values = None
+ if all_floats:
+ self._ComputeFloat(cell, float_values, float_baseline_values)
+ self._InvertIfLowerIsBetter(cell)
+ else:
+ self._ComputeString(cell, values, baseline_values)
+
+
+class LiteralResult(Result):
+ def __init__(self, iteration=0):
+ super(LiteralResult, self).__init__()
+ self.iteration = iteration
+
+ def Compute(self, cell, values, baseline_values):
+ try:
+ if values[self.iteration]:
+ cell.value = values[self.iteration]
+ else:
+ cell.value = "-"
+ except IndexError:
+ cell.value = "-"
+
+
+class NonEmptyCountResult(Result):
+ def Compute(self, cell, values, baseline_values):
+ cell.value = len(_StripNone(values))
+
+
+class StringMeanResult(Result):
+ def _ComputeString(self, cell, values, baseline_values):
+ if self._AllStringsSame(values):
+ cell.value = str(values[0])
+ else:
+ cell.value = "?"
+
+class AmeanResult(StringMeanResult):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = numpy.mean(values)
+
+
+class RawResult(Result):
+ pass
+
+
+class MinResult(Result):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = min(values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ if values:
+ cell.value = min(values)
+ else:
+ cell.value = ""
+
+
+class MaxResult(Result):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = max(values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ if values:
+ cell.value = max(values)
+ else:
+ cell.value = ""
+
+
+class NumericalResult(Result):
+ def _ComputeString(self, cell, values, baseline_values):
+ cell.value = "?"
+
+
+class StdResult(NumericalResult):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = numpy.std(values)
+
+
+class CoeffVarResult(NumericalResult):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ noise = numpy.std(values)/numpy.mean(values)
+ cell.value = noise
+
+
+class ComparisonResult(Result):
+ def NeedsBaseline(self):
+ return True
+
+ def _ComputeString(self, cell, values, baseline_values):
+ value = None
+ baseline_value = None
+ if self._AllStringsSame(values):
+ value = values[0]
+ if self._AllStringsSame(baseline_values):
+ baseline_value = baseline_values[0]
+ if value is not None and baseline_value is not None:
+ if value == baseline_value:
+ cell.value = "SAME"
+ else:
+ cell.value = "DIFFERENT"
+ else:
+ cell.value = "?"
+
+
+class StatsSignificant(ComparisonResult):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ if len(values) < 2 or len(baseline_values) < 2:
+ cell.value = float("nan")
+ return
+ import stats
+ _, cell.value = stats.lttest_ind(values, baseline_values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ return float("nan")
+
+class KeyAwareComparisonResult(ComparisonResult):
+ def _IsLowerBetter(self, key):
+ lower_is_better_keys = ["milliseconds", "ms", "seconds", "KB",
+ "rdbytes", "wrbytes"]
+ return any([key.startswith(l + "_") for l in lower_is_better_keys])
+
+ def _InvertIfLowerIsBetter(self, cell):
+ if self._IsLowerBetter(cell.name):
+ if cell.value:
+ cell.value = 1.0/cell.value
+
+
+class AmeanRatioResult(KeyAwareComparisonResult):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ if numpy.mean(baseline_values) != 0:
+ cell.value = numpy.mean(values)/numpy.mean(baseline_values)
+ elif numpy.mean(values) != 0:
+ cell.value = 0.00
+ # cell.value = 0 means the values and baseline_values have big difference
+ else:
+ cell.value = 1.00
+ # no difference if both values and baseline_values are 0
+
+
+class GmeanRatioResult(KeyAwareComparisonResult):
+ def _ComputeFloat(self, cell, values, baseline_values):
+ if self._GetGmean(baseline_values) != 0:
+ cell.value = self._GetGmean(values)/self._GetGmean(baseline_values)
+ elif self._GetGmean(values) != 0:
+ cell.value = 0.00
+ else:
+ cell.value = 1.00
+
+
+class Color(object):
+ """Class that represents color in RGBA format."""
+
+ def __init__(self, r=0, g=0, b=0, a=0):
+ self.r = r
+ self.g = g
+ self.b = b
+ self.a = a
+
+ def __str__(self):
+ return "r: %s g: %s: b: %s: a: %s" % (self.r, self.g, self.b, self.a)
+
+ def Round(self):
+ """Round RGBA values to the nearest integer."""
+ self.r = int(self.r)
+ self.g = int(self.g)
+ self.b = int(self.b)
+ self.a = int(self.a)
+
+ def GetRGB(self):
+ """Get a hex representation of the color."""
+ return "%02x%02x%02x" % (self.r, self.g, self.b)
+
+ @classmethod
+ def Lerp(cls, ratio, a, b):
+ """Perform linear interpolation between two colors.
+
+ Args:
+ ratio: The ratio to use for linear polation.
+ a: The first color object (used when ratio is 0).
+ b: The second color object (used when ratio is 1).
+
+ Returns:
+ Linearly interpolated color.
+ """
+ ret = cls()
+ ret.r = (b.r - a.r)*ratio + a.r
+ ret.g = (b.g - a.g)*ratio + a.g
+ ret.b = (b.b - a.b)*ratio + a.b
+ ret.a = (b.a - a.a)*ratio + a.a
+ return ret
+
+
+class Format(object):
+ """A class that represents the format of a column."""
+
+ def __init__(self):
+ pass
+
+ def Compute(self, cell):
+ """Computes the attributes of a cell based on its value.
+
+ Attributes typically are color, width, etc.
+
+ Args:
+ cell: The cell whose attributes are to be populated.
+ """
+ if cell.value is None:
+ cell.string_value = ""
+ if isinstance(cell.value, float):
+ self._ComputeFloat(cell)
+ else:
+ self._ComputeString(cell)
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = "{0:.2f}".format(cell.value)
+
+ def _ComputeString(self, cell):
+ cell.string_value = str(cell.value)
+
+ def _GetColor(self, value, low, mid, high, power=6, mid_value=1.0):
+ min_value = 0.0
+ max_value = 2.0
+ if math.isnan(value):
+ return mid
+ if value > mid_value:
+ value = max_value - mid_value/value
+
+ return self._GetColorBetweenRange(value, min_value, mid_value, max_value,
+ low, mid, high, power)
+
+ def _GetColorBetweenRange(self,
+ value,
+ min_value, mid_value, max_value,
+ low_color, mid_color, high_color,
+ power):
+ assert value <= max_value
+ assert value >= min_value
+ if value > mid_value:
+ value = (max_value - value)/(max_value - mid_value)
+ value **= power
+ ret = Color.Lerp(value, high_color, mid_color)
+ else:
+ value = (value - min_value)/(mid_value - min_value)
+ value **= power
+ ret = Color.Lerp(value, low_color, mid_color)
+ ret.Round()
+ return ret
+
+
+class StorageFormat(Format):
+ """Format the cell as a storage number.
+
+ Example:
+ If the cell contains a value of 1024, the string_value will be 1.0K.
+ """
+
+ def _ComputeFloat(self, cell):
+ base = 1024
+ suffices = ["K", "M", "G"]
+ v = float(cell.value)
+ current = 0
+ while v >= base**(current + 1) and current < len(suffices):
+ current += 1
+
+ if current:
+ divisor = base**current
+ cell.string_value = "%1.1f%s" % ((v/divisor), suffices[current - 1])
+ else:
+ cell.string_value = str(cell.value)
+
+
+class CoeffVarFormat(Format):
+ """Format the cell as a percent.
+
+ Example:
+ If the cell contains a value of 1.5, the string_value will be +150%.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = "%1.1f%%" % (float(cell.value) * 100)
+ cell.color = self._GetColor(cell.value,
+ Color(0, 255, 0, 0),
+ Color(0, 0, 0, 0),
+ Color(255, 0, 0, 0),
+ mid_value=0.02,
+ power=1)
+
+
+class PercentFormat(Format):
+ """Format the cell as a percent.
+
+ Example:
+ If the cell contains a value of 1.5, the string_value will be +50%.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = "%+1.1f%%" % ((float(cell.value) - 1) * 100)
+ cell.color = self._GetColor(cell.value,
+ Color(255, 0, 0, 0),
+ Color(0, 0, 0, 0),
+ Color(0, 255, 0, 0))
+
+
+class RatioFormat(Format):
+ """Format the cell as a ratio.
+
+ Example:
+ If the cell contains a value of 1.5642, the string_value will be 1.56.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = "%+1.1f%%" % ((cell.value - 1) * 100)
+ cell.color = self._GetColor(cell.value,
+ Color(255, 0, 0, 0),
+ Color(0, 0, 0, 0),
+ Color(0, 255, 0, 0))
+
+
+class ColorBoxFormat(Format):
+ """Format the cell as a color box.
+
+ Example:
+ If the cell contains a value of 1.5, it will get a green color.
+ If the cell contains a value of 0.5, it will get a red color.
+ The intensity of the green/red will be determined by how much above or below
+ 1.0 the value is.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = "--"
+ bgcolor = self._GetColor(cell.value,
+ Color(255, 0, 0, 0),
+ Color(255, 255, 255, 0),
+ Color(0, 255, 0, 0))
+ cell.bgcolor = bgcolor
+ cell.color = bgcolor
+
+
+class Cell(object):
+ """A class to represent a cell in a table.
+
+ Attributes:
+ value: The raw value of the cell.
+ color: The color of the cell.
+ bgcolor: The background color of the cell.
+ string_value: The string value of the cell.
+ suffix: A string suffix to be attached to the value when displaying.
+ prefix: A string prefix to be attached to the value when displaying.
+ color_row: Indicates whether the whole row is to inherit this cell's color.
+ bgcolor_row: Indicates whether the whole row is to inherit this cell's
+ bgcolor.
+ width: Optional specifier to make a column narrower than the usual width.
+ The usual width of a column is the max of all its cells widths.
+ colspan: Set the colspan of the cell in the HTML table, this is used for
+ table headers. Default value is 1.
+ name: the test name of the cell.
+ """
+
+ def __init__(self):
+ self.value = None
+ self.color = None
+ self.bgcolor = None
+ self.string_value = None
+ self.suffix = None
+ self.prefix = None
+ # Entire row inherits this color.
+ self.color_row = False
+ self.bgcolor_row = False
+ self.width = None
+ self.colspan = 1
+ self.name = None
+
+ def __str__(self):
+ l = []
+ l.append("value: %s" % self.value)
+ l.append("string_value: %s" % self.string_value)
+ return " ".join(l)
+
+
+class Column(object):
+ """Class representing a column in a table.
+
+ Attributes:
+ result: an object of the Result class.
+ fmt: an object of the Format class.
+ """
+
+ def __init__(self, result, fmt, name=""):
+ self.result = result
+ self.fmt = fmt
+ self.name = name
+
+
+# Takes in:
+# ["Key", "Label1", "Label2"]
+# ["k", ["v", "v2"], [v3]]
+# etc.
+# Also takes in a format string.
+# Returns a table like:
+# ["Key", "Label1", "Label2"]
+# ["k", avg("v", "v2"), stddev("v", "v2"), etc.]]
+# according to format string
+class TableFormatter(object):
+ """Class to convert a plain table into a cell-table.
+
+ This class takes in a table generated by TableGenerator and a list of column
+ formats to apply to the table and returns a table of cells.
+ """
+
+ def __init__(self, table, columns):
+ """The constructor takes in a table and a list of columns.
+
+ Args:
+ table: A list of lists of values.
+ columns: A list of column containing what to produce and how to format it.
+ """
+ self._table = table
+ self._columns = columns
+ self._table_columns = []
+ self._out_table = []
+
+ def _GenerateCellTable(self):
+ row_index = 0
+
+ for row in self._table[1:]:
+ key = Cell()
+ key.string_value = str(row[0])
+ out_row = [key]
+ baseline = None
+ for values in row[1:]:
+ for column in self._columns:
+ cell = Cell()
+ cell.name = key.string_value
+ if column.result.NeedsBaseline():
+ if baseline is not None:
+ column.result.Compute(cell, values, baseline)
+ column.fmt.Compute(cell)
+ out_row.append(cell)
+ if not row_index:
+ self._table_columns.append(column)
+ else:
+ column.result.Compute(cell, values, baseline)
+ column.fmt.Compute(cell)
+ out_row.append(cell)
+ if not row_index:
+ self._table_columns.append(column)
+
+ if baseline is None:
+ baseline = values
+ self._out_table.append(out_row)
+ row_index += 1
+
+ # TODO(asharif): refactor this.
+ # Now generate header
+ key = Cell()
+ key.string_value = "Keys"
+ header = [key]
+ for column in self._table_columns:
+ cell = Cell()
+ if column.name:
+ cell.string_value = column.name
+ else:
+ result_name = column.result.__class__.__name__
+ format_name = column.fmt.__class__.__name__
+
+ cell.string_value = "%s %s" % (result_name.replace("Result", ""),
+ format_name.replace("Format", ""))
+
+ header.append(cell)
+
+ self._out_table = [header] + self._out_table
+
+ top_header = []
+ colspan = 0
+ for column in self._columns:
+ if not column.result.NeedsBaseline():
+ colspan += 1
+ for label in self._table[0]:
+ cell = Cell()
+ cell.string_value = str(label)
+ if cell.string_value != "keys":
+ cell.colspan = colspan
+ top_header.append(cell)
+
+ self._out_table = [top_header] + self._out_table
+
+ return self._out_table
+
+ def _PrintOutTable(self):
+ o = ""
+ for row in self._out_table:
+ for cell in row:
+ o += str(cell) + " "
+ o += "\n"
+ print o
+
+ def GetCellTable(self):
+ """Function to return a table of cells.
+
+ The table (list of lists) is converted into a table of cells by this
+ function.
+
+ Returns:
+ A table of cells with each cell having the properties and string values as
+ requiested by the columns passed in the constructor.
+ """
+ # Generate the cell table, creating a list of dynamic columns on the fly.
+ return self._GenerateCellTable()
+
+
+class TablePrinter(object):
+ """Class to print a cell table to the console, file or html."""
+ PLAIN = 0
+ CONSOLE = 1
+ HTML = 2
+ TSV = 3
+ EMAIL = 4
+
+ def __init__(self, table, output_type):
+ """Constructor that stores the cell table and output type."""
+ self._table = table
+ self._output_type = output_type
+
+ # Compute whole-table properties like max-size, etc.
+ def _ComputeStyle(self):
+ self._row_styles = []
+ for row in self._table:
+ row_style = Cell()
+ for cell in row:
+ if cell.color_row:
+ assert cell.color, "Cell color not set but color_row set!"
+ assert not row_style.color, "Multiple row_style.colors found!"
+ row_style.color = cell.color
+ self._row_styles.append(row_style)
+
+ self._column_styles = []
+ if len(self._table) < 2:
+ return
+ for i in range(len(self._table[1])):
+ column_style = Cell()
+ for row in self._table[1:]:
+ column_style.width = max(column_style.width,
+ len(row[i].string_value))
+ self._column_styles.append(column_style)
+
+ def _GetBGColorFix(self, color):
+ if self._output_type == self.CONSOLE:
+ rgb = color.GetRGB()
+ prefix, _ = colortrans.rgb2short(rgb)
+ prefix = "\033[48;5;%sm" % prefix
+ suffix = "\033[0m"
+ elif self._output_type in [self.EMAIL, self.HTML]:
+ rgb = color.GetRGB()
+ prefix = ("<FONT style=\"BACKGROUND-COLOR:#{0}\" color =#{0}>"
+ .format(rgb))
+ suffix = "</FONT>"
+ elif self._output_type in [self.PLAIN, self.TSV]:
+ prefix = ""
+ suffix = ""
+ return prefix, suffix
+
+ def _GetColorFix(self, color):
+ if self._output_type == self.CONSOLE:
+ rgb = color.GetRGB()
+ prefix, _ = colortrans.rgb2short(rgb)
+ prefix = "\033[38;5;%sm" % prefix
+ suffix = "\033[0m"
+ elif self._output_type in [self.EMAIL, self.HTML]:
+ rgb = color.GetRGB()
+ prefix = "<FONT COLOR=#{0}>".format(rgb)
+ suffix = "</FONT>"
+ elif self._output_type in [self.PLAIN, self.TSV]:
+ prefix = ""
+ suffix = ""
+ return prefix, suffix
+
+ def Print(self):
+ """Print the table to a console, html, etc.
+
+ Returns:
+ A string that contains the desired representation of the table.
+ """
+ self._ComputeStyle()
+ return self._GetStringValue()
+
+ def _GetCellValue(self, i, j):
+ cell = self._table[i][j]
+ color = None
+ out = cell.string_value
+ if self._row_styles[i].color:
+ color = self._row_styles[i].color
+ elif cell.color:
+ color = cell.color
+
+ if self._row_styles[i].bgcolor:
+ bgcolor = self._row_styles[i].bgcolor
+ else:
+ bgcolor = cell.bgcolor
+
+ raw_width = len(out)
+
+ if color:
+ p, s = self._GetColorFix(color)
+ out = "%s%s%s" % (p, out, s)
+
+ if bgcolor:
+ p, s = self._GetBGColorFix(bgcolor)
+ out = "%s%s%s" % (p, out, s)
+
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.EMAIL]:
+ if cell.width:
+ width = cell.width
+ else:
+ if self._column_styles:
+ width = self._column_styles[j].width
+ else:
+ width = len(cell.string_value)
+ if cell.colspan > 1:
+ width = 0
+ for k in range(cell.colspan):
+ width += self._column_styles[1 + (j-1) * cell.colspan + k].width
+ if width > raw_width:
+ padding = ("%" + str(width - raw_width) + "s") % ""
+ out = padding + out
+
+ if self._output_type == self.HTML:
+ if i < 2:
+ tag = "th"
+ else:
+ tag = "td"
+ out = "<{0} colspan = \"{2}\"> {1} </{0}>".format(tag, out, cell.colspan)
+
+ return out
+
+ def _GetHorizontalSeparator(self):
+ if self._output_type in [self.CONSOLE, self.PLAIN, self.EMAIL]:
+ return " "
+ if self._output_type == self.HTML:
+ return "&nbsp;"
+ if self._output_type == self.TSV:
+ return "\t"
+
+ def _GetVerticalSeparator(self):
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
+ return "\n"
+ if self._output_type == self.HTML:
+ return "</tr>\n<tr>"
+
+ def _GetPrefix(self):
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
+ return ""
+ if self._output_type == self.HTML:
+ return "<p></p><table id=\"box-table-a\">\n<tr>"
+
+ def _GetSuffix(self):
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
+ return ""
+ if self._output_type == self.HTML:
+ return "</tr>\n</table>"
+
+ def _GetStringValue(self):
+ o = ""
+ o += self._GetPrefix()
+ for i in range(len(self._table)):
+ row = self._table[i]
+ for j in range(len(row)):
+ out = self._GetCellValue(i, j)
+ o += out + self._GetHorizontalSeparator()
+ o += self._GetVerticalSeparator()
+ o += self._GetSuffix()
+ return o
+
+
+# Some common drivers
+def GetSimpleTable(table, out_to=TablePrinter.CONSOLE):
+ """Prints a simple table.
+
+ This is used by code that has a very simple list-of-lists and wants to produce
+ a table with ameans, a percentage ratio of ameans and a colorbox.
+
+ Args:
+ table: a list of lists.
+ out_to: specify the fomat of output. Currently it supports HTML and CONSOLE.
+
+ Returns:
+ A string version of the table that can be printed to the console.
+
+ Example:
+ GetSimpleConsoleTable([["binary", "b1", "b2"],["size", "300", "400"]])
+ will produce a colored table that can be printed to the console.
+ """
+ columns = [
+ Column(AmeanResult(),
+ Format()),
+ Column(AmeanRatioResult(),
+ PercentFormat()),
+ Column(AmeanRatioResult(),
+ ColorBoxFormat()),
+ ]
+ our_table = [table[0]]
+ for row in table[1:]:
+ our_row = [row[0]]
+ for v in row[1:]:
+ our_row.append([v])
+ our_table.append(our_row)
+
+ tf = TableFormatter(our_table, columns)
+ cell_table = tf.GetCellTable()
+ tp = TablePrinter(cell_table, out_to)
+ return tp.Print()
+
+
+def GetComplexTable(runs, labels, out_to=TablePrinter.CONSOLE):
+ tg = TableGenerator(runs, labels, TableGenerator.SORT_BY_VALUES_DESC)
+ table = tg.GetTable()
+ columns = [Column(LiteralResult(),
+ Format(),
+ "Literal"),
+ Column(AmeanResult(),
+ Format()),
+ Column(StdResult(),
+ Format()),
+ Column(CoeffVarResult(),
+ CoeffVarFormat()),
+ Column(NonEmptyCountResult(),
+ Format()),
+ Column(AmeanRatioResult(),
+ PercentFormat()),
+ Column(AmeanRatioResult(),
+ RatioFormat()),
+ Column(GmeanRatioResult(),
+ RatioFormat()),
+ ]
+ tf = TableFormatter(table, columns)
+ cell_table = tf.GetCellTable()
+ tp = TablePrinter(cell_table, out_to)
+ return tp.Print()
+
+if __name__ == "__main__":
+ # Run a few small tests here.
+ runs = [
+ [
+ {"k1": "10", "k2": "12", "k5": "40", "k6": "40",
+ "ms_1": "20", "k7": "FAIL", "k8": "PASS", "k9": "PASS",
+ "k10": "0"},
+ {"k1": "13", "k2": "14", "k3": "15", "ms_1": "10", "k8": "PASS",
+ "k9": "FAIL", "k10": "0"}
+ ],
+ [
+ {"k1": "50", "k2": "51", "k3": "52", "k4": "53", "k5": "35", "k6":
+ "45", "ms_1": "200", "ms_2": "20", "k7": "FAIL", "k8": "PASS", "k9":
+ "PASS"},
+ ],
+ ]
+ labels = ["vanilla", "modified"]
+ t = GetComplexTable(runs, labels, TablePrinter.CONSOLE)
+ print t
+ email = GetComplexTable(runs, labels, TablePrinter.EMAIL)
+
+ simple_table = [
+ ["binary", "b1", "b2", "b3"],
+ ["size", 100, 105, 108],
+ ["rodata", 100, 80, 70],
+ ["data", 100, 100, 100],
+ ["debug", 100, 140, 60],
+ ]
+ t = GetSimpleTable(simple_table)
+ print t
+ email += GetSimpleTable(simple_table, TablePrinter.PLAIN)
+ email_to = [getpass.getuser()]
+ email = "<pre style='font-size: 13px'>%s</pre>" % email
+ EmailSender().SendEmail(email_to, "SimpleTableTest", email, msg_type="html")
diff --git a/utils/tabulator_test.py b/utils/tabulator_test.py
new file mode 100644
index 00000000..e1892b07
--- /dev/null
+++ b/utils/tabulator_test.py
@@ -0,0 +1,96 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+
+"""Tests for misc."""
+
+__author__ = 'asharif@google.com (Ahmad Sharif)'
+
+# System modules
+import unittest
+
+# Local modules
+import tabulator
+
+
+class TabulatorTest(unittest.TestCase):
+ def testResult(self):
+ table = ["k1", ["1", "3"], ["55"]]
+ result = tabulator.Result()
+ cell = tabulator.Cell()
+ result.Compute(cell, table[2], table[1])
+ expected = " ".join([str(float(v)) for v in table[2]])
+ self.assertTrue(cell.value == expected)
+
+ result = tabulator.AmeanResult()
+ cell = tabulator.Cell()
+ result.Compute(cell, table[2], table[1])
+ self.assertTrue(cell.value == float(table[2][0]))
+
+ def testStringMean(self):
+ smr = tabulator.StringMeanResult()
+ cell = tabulator.Cell()
+ value = "PASS"
+ values = [value for _ in range(3)]
+ smr.Compute(cell, values, None)
+ self.assertTrue(cell.value == value)
+ values.append("FAIL")
+ smr.Compute(cell, values, None)
+ self.assertTrue(cell.value == "?")
+
+ def testStorageFormat(self):
+ sf = tabulator.StorageFormat()
+ cell = tabulator.Cell()
+ base = 1024.0
+ cell.value = base
+ sf.Compute(cell)
+ self.assertTrue(cell.string_value == "1.0K")
+ cell.value = base**2
+ sf.Compute(cell)
+ self.assertTrue(cell.string_value == "1.0M")
+ cell.value = base**3
+ sf.Compute(cell)
+ self.assertTrue(cell.string_value == "1.0G")
+
+ def testLerp(self):
+ c1 = tabulator.Color(0, 0, 0, 0)
+ c2 = tabulator.Color(255, 0, 0, 0)
+ c3 = tabulator.Color.Lerp(0.5, c1, c2)
+ self.assertTrue(c3.r == 127.5)
+ self.assertTrue(c3.g == 0)
+ self.assertTrue(c3.b == 0)
+ self.assertTrue(c3.a == 0)
+ c3.Round()
+ self.assertTrue(c3.r == 127)
+
+ def testTableGenerator(self):
+ runs = [[{"k1": "10", "k2": "12"},
+ {"k1": "13", "k2": "14", "k3": "15"}],
+ [{"k1": "50", "k2": "51", "k3": "52", "k4": "53"},]]
+ labels = ["vanilla", "modified"]
+ tg = tabulator.TableGenerator(runs, labels)
+ table = tg.GetTable()
+ header = table.pop(0)
+
+ self.assertTrue(header == ["keys", "vanilla", "modified"])
+ row = table.pop(0)
+ self.assertTrue(row == ["k1", ["10", "13"], ["50"]])
+ row = table.pop(0)
+ self.assertTrue(row == ["k2", ["12", "14"], ["51"]])
+ row = table.pop(0)
+ self.assertTrue(row == ["k3", [None, "15"], ["52"]])
+ row = table.pop(0)
+ self.assertTrue(row == ["k4", [None, None], ["53"]])
+
+ table = tg.GetTable()
+ columns = [
+ tabulator.Column(tabulator.AmeanResult(),
+ tabulator.Format()),
+ tabulator.Column(tabulator.AmeanRatioResult(),
+ tabulator.PercentFormat()),
+ ]
+ tf = tabulator.TableFormatter(table, columns)
+ table = tf.GetCellTable()
+ self.assertTrue(table)
+
+
+if __name__ == '__main__':
+ unittest.main()