aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2020-01-14 02:33:25 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2020-01-14 02:33:25 +0000
commitcb22d117de65a37726bdf3c37e049b8523659ee2 (patch)
treef0eb1bd38c63748e0456fcbb97c819fd85dcce7a
parent2545e9782cd64806b3e61f98a0780c93b09b75da (diff)
parenta32c2bb1c1532112da6847bf266e595761ce88c6 (diff)
downloadtoolchain-utils-cb22d117de65a37726bdf3c37e049b8523659ee2.tar.gz
Merging 31 commit(s) from Chromium's toolchain-utils am: b506c21cfb am: 43f5bd944a am: a32c2bb1c1
Change-Id: Ia9c68fd33d9b9c1f7e38c087edff7c02268e1d6c
-rw-r--r--COMMIT-QUEUE.ini14
-rw-r--r--afdo_metadata/kernel_afdo.json6
-rwxr-xr-xbuildbot_test_llvm.py2
-rwxr-xr-xbuildbot_test_toolchains.py3
-rw-r--r--compiler_wrapper/gcc_flags.go1
-rw-r--r--compiler_wrapper/gcc_flags_test.go1
-rw-r--r--cros_utils/__init__.py5
-rwxr-xr-xcros_utils/buildbot_json.py280
-rw-r--r--cros_utils/buildbot_utils.py9
-rwxr-xr-xcros_utils/buildbot_utils_unittest.py8
-rw-r--r--cros_utils/command_executer.py27
-rwxr-xr-xcros_utils/command_executer_unittest.py5
-rw-r--r--cros_utils/constants.py6
-rw-r--r--cros_utils/contextlib3.py116
-rwxr-xr-xcros_utils/contextlib3_test.py195
-rw-r--r--[-rwxr-xr-x]cros_utils/device_setup_utils.py93
-rwxr-xr-xcros_utils/device_setup_utils_unittest.py95
-rwxr-xr-xcros_utils/email_sender.py10
-rw-r--r--cros_utils/file_utils.py6
-rw-r--r--cros_utils/html_tools.py6
-rw-r--r--cros_utils/logger.py34
-rw-r--r--cros_utils/machines.py2
-rw-r--r--cros_utils/manifest_versions.py14
-rw-r--r--cros_utils/misc.py33
-rwxr-xr-x[-rw-r--r--]cros_utils/misc_test.py7
-rwxr-xr-xcros_utils/no_pseudo_terminal_test.py3
-rwxr-xr-xcros_utils/perf_diff.py39
-rw-r--r--cros_utils/pstat.py1077
-rw-r--r--cros_utils/stats.py4519
-rw-r--r--cros_utils/tabulator.py29
-rwxr-xr-x[-rw-r--r--]cros_utils/tabulator_test.py4
-rw-r--r--cros_utils/timeline.py13
-rwxr-xr-x[-rw-r--r--]cros_utils/timeline_test.py7
-rwxr-xr-xcrosperf/crosperf.py7
-rw-r--r--crosperf/experiment.py1
-rw-r--r--crosperf/results_cache.py25
-rwxr-xr-xcrosperf/results_cache_unittest.py8
-rw-r--r--crosperf/schedv2.py16
-rw-r--r--crosperf/suite_runner.py163
-rwxr-xr-xcrosperf/suite_runner_unittest.py179
-rwxr-xr-xllvm_tools/copy_helpers_to_chromiumos_overlay.py1
-rwxr-xr-xllvm_tools/get_llvm_hash.py20
-rwxr-xr-xllvm_tools/git_llvm_rev.py37
-rwxr-xr-xllvm_tools/git_llvm_rev_test.py72
-rwxr-xr-xllvm_tools/patch_manager.py16
-rwxr-xr-xllvm_tools/update_chromeos_llvm_next_hash.py26
-rwxr-xr-xllvm_tools/update_chromeos_llvm_next_hash_unittest.py4
47 files changed, 759 insertions, 6485 deletions
diff --git a/COMMIT-QUEUE.ini b/COMMIT-QUEUE.ini
deleted file mode 100644
index 0afba420..00000000
--- a/COMMIT-QUEUE.ini
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# Per-project Commit Queue settings.
-# Documentation: http://goo.gl/4rZhAx
-
-[GENERAL]
-
-# Stages to ignore in the commit queue. If these steps break, your CL will be
-# submitted anyway. Use with caution.
-
-# Files in toolchain-utils repo do not impact any HW/VM tests.
-ignored-stages: HWTest VMTest
diff --git a/afdo_metadata/kernel_afdo.json b/afdo_metadata/kernel_afdo.json
index 8bb580e8..bf869c93 100644
--- a/afdo_metadata/kernel_afdo.json
+++ b/afdo_metadata/kernel_afdo.json
@@ -1,11 +1,11 @@
{
"chromeos-kernel-3_14": {
- "name": "R80-12607.34-1575283400"
+ "name": "R81-12607.58-1578524353"
},
"chromeos-kernel-4_4": {
- "name": "R80-12672.0-1575282851"
+ "name": "R81-12739.12-1578524221"
},
"chromeos-kernel-3_18": {
- "name": "R80-12607.34-1575282780"
+ "name": "R81-12739.12-1578524538"
}
} \ No newline at end of file
diff --git a/buildbot_test_llvm.py b/buildbot_test_llvm.py
index 57ed3067..7e7c6e63 100755
--- a/buildbot_test_llvm.py
+++ b/buildbot_test_llvm.py
@@ -116,7 +116,7 @@ class ToolchainVerifier(object):
self._build,
self._patches,
tryjob_flags=['--hwtest'],
- async=True)
+ asynchronous=True)
return buildbucket_id
diff --git a/buildbot_test_toolchains.py b/buildbot_test_toolchains.py
index cc07c8eb..175dae5e 100755
--- a/buildbot_test_toolchains.py
+++ b/buildbot_test_toolchains.py
@@ -239,7 +239,8 @@ class ToolchainComparator(object):
crosperf = os.path.join(TOOLCHAIN_DIR, 'crosperf', 'crosperf')
noschedv2_opts = '--noschedv2' if self._noschedv2 else ''
- command = ('{crosperf} --no_email=True --results_dir={r_dir} --no_hwp '
+ command = ('{crosperf} --no_email=True --results_dir={r_dir} '
+ '--intel_pstate=no_hwp '
'--json_report=True {noschedv2_opts} {exp_file}').format(
crosperf=crosperf,
r_dir=self._reports_dir,
diff --git a/compiler_wrapper/gcc_flags.go b/compiler_wrapper/gcc_flags.go
index 8775b23b..397bf82d 100644
--- a/compiler_wrapper/gcc_flags.go
+++ b/compiler_wrapper/gcc_flags.go
@@ -14,6 +14,7 @@ func processGccFlags(builder *commandBuilder) {
"-march=goldmont": "-march=silvermont",
"-march=goldmont-plus": "-march=silvermont",
"-march=skylake": "-march=corei7",
+ "-march=tremont": "-march=silvermont",
}
builder.transformArgs(func(arg builderArg) string {
diff --git a/compiler_wrapper/gcc_flags_test.go b/compiler_wrapper/gcc_flags_test.go
index 436f9521..8f8e9023 100644
--- a/compiler_wrapper/gcc_flags_test.go
+++ b/compiler_wrapper/gcc_flags_test.go
@@ -37,6 +37,7 @@ func TestConvertClangToGccFlags(t *testing.T) {
{"-march=goldmont", "-march=silvermont"},
{"-march=goldmont-plus", "-march=silvermont"},
{"-march=skylake", "-march=corei7"},
+ {"-march=tremont", "-march=silvermont"},
}
for _, tt := range tests {
diff --git a/cros_utils/__init__.py b/cros_utils/__init__.py
index 8b137891..4c4e5554 100644
--- a/cros_utils/__init__.py
+++ b/cros_utils/__init__.py
@@ -1 +1,4 @@
-
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
diff --git a/cros_utils/buildbot_json.py b/cros_utils/buildbot_json.py
index 42a27744..08a8ae05 100755
--- a/cros_utils/buildbot_json.py
+++ b/cros_utils/buildbot_json.py
@@ -1,5 +1,8 @@
-#!/usr/bin/env python2
-# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
@@ -28,6 +31,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# NOTE: This file is NOT under GPL. See above.
+
"""Queries buildbot through the json interface.
"""
@@ -50,9 +54,10 @@ import logging
import optparse
import time
-import urllib
-import urllib2
import sys
+import urllib.error
+import urllib.parse
+import urllib.request
try:
from natsort import natsorted
@@ -63,7 +68,7 @@ except ImportError:
# These values are buildbot constants used for Build and BuildStep.
# This line was copied from master/buildbot/status/builder.py.
-SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION, RETRY = range(6)
+SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION, RETRY = list(range(6))
## Generic node caching code.
@@ -182,7 +187,8 @@ class AddressableDataNode(AddressableBaseDataNode): # pylint: disable=W0223
"""Automatically encodes the url."""
def __init__(self, parent, url, data):
- super(AddressableDataNode, self).__init__(parent, urllib.quote(url), data)
+ super(AddressableDataNode, self).__init__(parent, urllib.parse.quote(url),
+ data)
class NonAddressableDataNode(Node): # pylint: disable=W0223
@@ -329,7 +335,7 @@ class NonAddressableNodeList(VirtualNodeList): # pylint: disable=W0223
def cached_keys(self):
if self.parent.cached_data is None:
return None
- return range(len(self.parent.data.get(self.subkey, [])))
+ return list(range(len(self.parent.data.get(self.subkey, []))))
@property
def data(self):
@@ -382,13 +388,13 @@ class AddressableNodeList(NodeList):
@property
def cached_children(self):
- for item in self._cache.itervalues():
+ for item in self._cache.values():
if item.cached_data is not None:
yield item
@property
def cached_keys(self):
- return self._cache.keys()
+ return list(self._cache.keys())
def __getitem__(self, key):
"""Enables 'obj[i]'."""
@@ -419,14 +425,13 @@ class AddressableNodeList(NodeList):
# pylint: disable=W0212
if not self._is_cached:
to_fetch = [
- child
- for child in children
+ child for child in children
if not (child in self._cache and self._cache[child].cached_data)
]
if to_fetch:
# Similar to cache(). The only reason to sort is to simplify testing.
- params = '&'.join('select=%s' % urllib.quote(str(v))
- for v in sorted(to_fetch))
+ params = '&'.join(
+ 'select=%s' % urllib.parse.quote(str(v)) for v in sorted(to_fetch))
data = self.read('?' + params)
for key in sorted(data):
self._create_obj(key, data[key])
@@ -440,7 +445,7 @@ class AddressableNodeList(NodeList):
def discard(self):
"""Discards temporary children."""
super(AddressableNodeList, self).discard()
- for v in self._cache.itervalues():
+ for v in self._cache.values():
v.discard()
def read(self, suburl):
@@ -526,8 +531,8 @@ class SubViewNodeList(VirtualNodeList): # pylint: disable=W0223
self.cache()
return super(SubViewNodeList, self).__iter__()
-###############################################################################
-## Buildbot-specific code
+
+# Buildbot-specific code
class Slave(AddressableDataNode):
@@ -681,7 +686,7 @@ class BuildSteps(NonAddressableNodeList):
def __getitem__(self, key):
"""Accept step name in addition to index number."""
- if isinstance(key, basestring):
+ if isinstance(key, str):
# It's a string, try to find the corresponding index.
for i, step in enumerate(self.data):
if step['name'] == key:
@@ -840,7 +845,7 @@ class Builds(AddressableNodeList):
# highest key value and calculate from it.
key = max(self._keys) + key + 1
- if not key in self._cache:
+ if key not in self._cache:
# Create an empty object.
self._create_obj(key, None)
return self._cache[key]
@@ -856,7 +861,7 @@ class Builds(AddressableNodeList):
To access the older builds, use self.iterall() instead.
"""
self.cache()
- return reversed(self._cache.values())
+ return reversed(list(self._cache.values()))
def iterall(self):
"""Returns Build objects in decreasing order unbounded up to build 0.
@@ -970,22 +975,23 @@ class Buildbot(AddressableBaseDataNode):
else:
url += '?filter=1'
logging.info('read(%s)', suburl)
- channel = urllib.urlopen(url)
+ channel = urllib.request.urlopen(url)
data = channel.read()
try:
return json.loads(data)
except ValueError:
if channel.getcode() >= 400:
# Convert it into an HTTPError for easier processing.
- raise urllib2.HTTPError(url, channel.getcode(), '%s:\n%s' % (url, data),
- channel.headers, None)
+ raise urllib.error.HTTPError(url, channel.getcode(),
+ '%s:\n%s' % (url, data), channel.headers,
+ None)
raise
def _readall(self):
return self.read('project')
-###############################################################################
-## Controller code
+
+# Controller code
def usage(more):
@@ -1026,12 +1032,13 @@ def need_buildbot(fn):
@need_buildbot
def CMDpending(parser, args):
"""Lists pending jobs."""
- parser.add_option('-b',
- '--builder',
- dest='builders',
- action='append',
- default=[],
- help='Builders to filter on')
+ parser.add_option(
+ '-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
options, args, buildbot = parser.parse_args(args)
if args:
parser.error('Unrecognized parameters: %s' % ' '.join(args))
@@ -1049,7 +1056,7 @@ def CMDpending(parser, args):
print(' revision: %s' % pending['source']['revision'])
for change in pending['source']['changes']:
print(' change:')
- print(' comment: %r' % unicode(change['comments'][:50]))
+ print(' comment: %r' % change['comments'][:50])
print(' who: %s' % change['who'])
return 0
@@ -1063,13 +1070,11 @@ def CMDrun(parser, args):
was on its own line.
"""
parser.add_option('-f', '--file', help='Read script from file')
- parser.add_option('-i',
- dest='use_stdin',
- action='store_true',
- help='Read script on stdin')
+ parser.add_option(
+ '-i', dest='use_stdin', action='store_true', help='Read script on stdin')
# Variable 'buildbot' is not used directly.
# pylint: disable=W0612
- options, args, buildbot = parser.parse_args(args)
+ options, args, _ = parser.parse_args(args)
if (bool(args) + bool(options.use_stdin) + bool(options.file)) != 1:
parser.error('Need to pass only one of: <commands>, -f <file> or -i')
if options.use_stdin:
@@ -1090,10 +1095,9 @@ def CMDinteractive(parser, args):
_, args, buildbot = parser.parse_args(args)
if args:
parser.error('Unrecognized parameters: %s' % ' '.join(args))
- prompt = (
- 'Buildbot interactive console for "%s".\n'
- 'Hint: Start with typing: \'buildbot.printable_attributes\' or '
- '\'print str(buildbot)\' to explore.') % buildbot.url[:-len('/json')]
+ prompt = ('Buildbot interactive console for "%s".\n'
+ "Hint: Start with typing: 'buildbot.printable_attributes' or "
+ "'print str(buildbot)' to explore.") % buildbot.url[:-len('/json')]
local_vars = {'buildbot': buildbot, 'b': buildbot}
code.interact(prompt, None, local_vars)
@@ -1123,18 +1127,20 @@ def CMDdisconnected(parser, args):
def find_idle_busy_slaves(parser, args, show_idle):
- parser.add_option('-b',
- '--builder',
- dest='builders',
- action='append',
- default=[],
- help='Builders to filter on')
- parser.add_option('-s',
- '--slave',
- dest='slaves',
- action='append',
- default=[],
- help='Slaves to filter on')
+ parser.add_option(
+ '-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option(
+ '-s',
+ '--slave',
+ dest='slaves',
+ action='append',
+ default=[],
+ help='Slaves to filter on')
options, args, buildbot = parser.parse_args(args)
if args:
parser.error('Unrecognized parameters: %s' % ' '.join(args))
@@ -1212,8 +1218,9 @@ def last_failure(buildbot,
def CMDlast_failure(parser, args):
"""Lists all slaves that failed on that step on their last build.
- Example: to find all slaves where their last build was a compile failure,
- run with --step compile
+ Examples:
+ To find all slaves where their last build was a compile failure,
+ run with --step compile
"""
parser.add_option(
'-S',
@@ -1222,32 +1229,36 @@ def CMDlast_failure(parser, args):
action='append',
default=[],
help='List all slaves that failed on that step on their last build')
- parser.add_option('-b',
- '--builder',
- dest='builders',
- action='append',
- default=[],
- help='Builders to filter on')
- parser.add_option('-s',
- '--slave',
- dest='slaves',
- action='append',
- default=[],
- help='Slaves to filter on')
- parser.add_option('-n',
- '--no_cache',
- action='store_true',
- help='Don\'t load all builds at once')
+ parser.add_option(
+ '-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option(
+ '-s',
+ '--slave',
+ dest='slaves',
+ action='append',
+ default=[],
+ help='Slaves to filter on')
+ parser.add_option(
+ '-n',
+ '--no_cache',
+ action='store_true',
+ help="Don't load all builds at once")
options, args, buildbot = parser.parse_args(args)
if args:
parser.error('Unrecognized parameters: %s' % ' '.join(args))
print_builders = not options.quiet and len(options.builders) != 1
last_builder = None
- for build in last_failure(buildbot,
- builders=options.builders,
- slaves=options.slaves,
- steps=options.steps,
- no_cache=options.no_cache):
+ for build in last_failure(
+ buildbot,
+ builders=options.builders,
+ slaves=options.slaves,
+ steps=options.steps,
+ no_cache=options.no_cache):
if print_builders and last_builder != build.builder:
print(build.builder.name)
@@ -1259,8 +1270,8 @@ def CMDlast_failure(parser, args):
else:
print(build.slave.name)
else:
- out = '%d on %s: blame:%s' % (build.number, build.slave.name,
- ', '.join(build.blame))
+ out = '%d on %s: blame:%s' % (build.number, build.slave.name, ', '.join(
+ build.blame))
if print_builders:
out = ' ' + out
print(out)
@@ -1280,15 +1291,15 @@ def CMDlast_failure(parser, args):
@need_buildbot
def CMDcurrent(parser, args):
"""Lists current jobs."""
- parser.add_option('-b',
- '--builder',
- dest='builders',
- action='append',
- default=[],
- help='Builders to filter on')
- parser.add_option('--blame',
- action='store_true',
- help='Only print the blame list')
+ parser.add_option(
+ '-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option(
+ '--blame', action='store_true', help='Only print the blame list')
options, args, buildbot = parser.parse_args(args)
if args:
parser.error('Unrecognized parameters: %s' % ' '.join(args))
@@ -1330,28 +1341,30 @@ def CMDcurrent(parser, args):
def CMDbuilds(parser, args):
"""Lists all builds.
- Example: to find all builds on a single slave, run with -b bar -s foo
+ Examples:
+ To find all builds on a single slave, run with -b bar -s foo.
"""
- parser.add_option('-r',
- '--result',
- type='int',
- help='Build result to filter on')
- parser.add_option('-b',
- '--builder',
- dest='builders',
- action='append',
- default=[],
- help='Builders to filter on')
- parser.add_option('-s',
- '--slave',
- dest='slaves',
- action='append',
- default=[],
- help='Slaves to filter on')
- parser.add_option('-n',
- '--no_cache',
- action='store_true',
- help='Don\'t load all builds at once')
+ parser.add_option(
+ '-r', '--result', type='int', help='Build result to filter on')
+ parser.add_option(
+ '-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option(
+ '-s',
+ '--slave',
+ dest='slaves',
+ action='append',
+ default=[],
+ help='Slaves to filter on')
+ parser.add_option(
+ '-n',
+ '--no_cache',
+ action='store_true',
+ help="Don't load all builds at once")
options, args, buildbot = parser.parse_args(args)
if args:
parser.error('Unrecognized parameters: %s' % ' '.join(args))
@@ -1377,16 +1390,15 @@ def CMDbuilds(parser, args):
@need_buildbot
def CMDcount(parser, args):
"""Count the number of builds that occured during a specific period."""
- parser.add_option('-o',
- '--over',
- type='int',
- help='Number of seconds to look for')
- parser.add_option('-b',
- '--builder',
- dest='builders',
- action='append',
- default=[],
- help='Builders to filter on')
+ parser.add_option(
+ '-o', '--over', type='int', help='Number of seconds to look for')
+ parser.add_option(
+ '-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
options, args, buildbot = parser.parse_args(args)
if args:
parser.error('Unrecognized parameters: %s' % ' '.join(args))
@@ -1405,10 +1417,11 @@ def CMDcount(parser, args):
for build in builder.builds.iterall():
try:
start_time = build.start_time
- except urllib2.HTTPError:
+ except urllib.error.HTTPError:
# The build was probably trimmed.
- print('Failed to fetch build %s/%d' % (builder.name, build.number),
- file=sys.stderr)
+ print(
+ 'Failed to fetch build %s/%d' % (builder.name, build.number),
+ file=sys.stderr)
continue
if start_time >= since:
counts[builder.name] += 1
@@ -1418,10 +1431,10 @@ def CMDcount(parser, args):
print('.. %d' % counts[builder.name])
align_name = max(len(b) for b in counts)
- align_number = max(len(str(c)) for c in counts.itervalues())
+ align_number = max(len(str(c)) for c in counts.values())
for builder in sorted(counts):
print('%*s: %*d' % (align_name, builder, align_number, counts[builder]))
- print('Total: %d' % sum(counts.itervalues()))
+ print('Total: %d' % sum(counts.values()))
return 0
@@ -1448,22 +1461,24 @@ def gen_parser():
parser.parse_args = Parse
- parser.add_option('-v',
- '--verbose',
- action='count',
- help='Use multiple times to increase logging leve')
+ parser.add_option(
+ '-v',
+ '--verbose',
+ action='count',
+ help='Use multiple times to increase logging leve')
parser.add_option(
'-q',
'--quiet',
action='store_true',
help='Reduces the output to be parsed by scripts, independent of -v')
- parser.add_option('--throttle',
- type='float',
- help='Minimum delay to sleep between requests')
+ parser.add_option(
+ '--throttle',
+ type='float',
+ help='Minimum delay to sleep between requests')
return parser
-###############################################################################
-## Generic subcommand handling code
+
+# Generic subcommand handling code
def Command(name):
@@ -1497,7 +1512,8 @@ def main(args=None):
# pylint: disable=E1101
CMDhelp.__doc__ += '\n\nCommands are:\n' + '\n'.join(
' %-12s %s' % (fn[3:], Command(fn[3:]).__doc__.split('\n', 1)[0])
- for fn in dir(sys.modules[__name__]) if fn.startswith('CMD'))
+ for fn in dir(sys.modules[__name__])
+ if fn.startswith('CMD'))
parser = gen_parser()
if args is None:
diff --git a/cros_utils/buildbot_utils.py b/cros_utils/buildbot_utils.py
index 35dc3ac6..a06abd26 100644
--- a/cros_utils/buildbot_utils.py
+++ b/cros_utils/buildbot_utils.py
@@ -5,6 +5,7 @@
"""Utilities for launching and accessing ChromeOS buildbots."""
+from __future__ import division
from __future__ import print_function
import ast
@@ -133,7 +134,7 @@ def GetTrybotImage(chromeos_root,
patch_list,
tryjob_flags=None,
build_toolchain=False,
- async=False):
+ asynchronous=False):
"""Launch buildbot and get resulting trybot artifact name.
This function launches a buildbot with the appropriate flags to
@@ -151,7 +152,7 @@ def GetTrybotImage(chromeos_root,
tryjob_flags: See cros tryjob --help for available options.
build_toolchain: builds and uses the latest toolchain, rather than the
prebuilt one in SDK.
- async: don't wait for artifacts; just return the buildbucket id
+ asynchronous: don't wait for artifacts; just return the buildbucket id
Returns:
(buildbucket id, partial image url) e.g.
@@ -159,7 +160,7 @@ def GetTrybotImage(chromeos_root,
"""
buildbucket_id = SubmitTryjob(chromeos_root, buildbot_name, patch_list,
tryjob_flags, build_toolchain)
- if async:
+ if asynchronous:
return buildbucket_id, ' '
# The trybot generally takes more than 2 hours to finish.
@@ -246,7 +247,7 @@ def GetLatestImage(chromeos_root, path):
_, out, _ = ce.ChrootRunCommandWOutput(
chromeos_root, command, print_to_console=False)
candidates = [l.split('/')[-2] for l in out.split()]
- candidates = map(fmt.match, candidates)
+ candidates = [fmt.match(c) for c in candidates]
candidates = [[int(r) for r in m.group(1, 2, 3, 4)] for m in candidates if m]
candidates.sort(reverse=True)
for c in candidates:
diff --git a/cros_utils/buildbot_utils_unittest.py b/cros_utils/buildbot_utils_unittest.py
index bfba8d78..4fc3d170 100755
--- a/cros_utils/buildbot_utils_unittest.py
+++ b/cros_utils/buildbot_utils_unittest.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2018 The Chromium OS Authors. All rights reserved.
@@ -9,10 +9,10 @@
from __future__ import print_function
-from mock import patch
-
import time
+
import unittest
+from unittest.mock import patch
from cros_utils import buildbot_utils
from cros_utils import command_executer
@@ -82,7 +82,7 @@ class TrybotTest(unittest.TestCase):
# async
buildbucket_id, image = buildbot_utils.GetTrybotImage(
- '/tmp', 'falco-release-tryjob', [], async=True)
+ '/tmp', 'falco-release-tryjob', [], asynchronous=True)
self.assertEqual(buildbucket_id, self.buildbucket_id)
self.assertEqual(' ', image)
diff --git a/cros_utils/command_executer.py b/cros_utils/command_executer.py
index 08e4e6ae..c4d3fded 100644
--- a/cros_utils/command_executer.py
+++ b/cros_utils/command_executer.py
@@ -124,7 +124,7 @@ class CommandExecuter(object):
terminated_time = None
started_time = time.time()
- while len(pipes):
+ while pipes:
if command_terminator and command_terminator.IsTerminated():
os.killpg(os.getpgid(p.pid), signal.SIGTERM)
if self.logger:
@@ -136,7 +136,7 @@ class CommandExecuter(object):
l = my_poll.poll(100)
for (fd, _) in l:
if fd == p.stdout.fileno():
- out = os.read(p.stdout.fileno(), 16384)
+ out = os.read(p.stdout.fileno(), 16384).decode('utf8')
if return_output:
full_stdout += out
if self.logger:
@@ -145,7 +145,7 @@ class CommandExecuter(object):
pipes.remove(p.stdout)
my_poll.unregister(p.stdout)
if fd == p.stderr.fileno():
- err = os.read(p.stderr.fileno(), 16384)
+ err = os.read(p.stderr.fileno(), 16384).decode('utf8')
if return_output:
full_stderr += err
if self.logger:
@@ -182,8 +182,8 @@ class CommandExecuter(object):
if return_output:
return (p.returncode, full_stdout, full_stderr)
return (p.returncode, '', '')
- except BaseException as e:
- except_handler(p, e)
+ except BaseException as err:
+ except_handler(p, err)
raise
def RunCommand(self, *args, **kwargs):
@@ -233,7 +233,7 @@ class CommandExecuter(object):
command += '\n. ' + chromeos_root + '/src/scripts/common.sh'
command += '\n. ' + chromeos_root + '/src/scripts/remote_access.sh'
command += '\nTMP=$(mktemp -d)'
- command += "\nFLAGS \"$@\" || exit 1"
+ command += '\nFLAGS "$@" || exit 1'
command += '\nremote_access_init'
return command
@@ -300,7 +300,7 @@ class CommandExecuter(object):
command = self.RemoteAccessInitCommand(chromeos_root, machine)
command += '\nremote_sh bash %s' % command_file
- command += "\nl_retval=$?; echo \"$REMOTE_OUT\"; exit $l_retval"
+ command += '\nl_retval=$?; echo "$REMOTE_OUT"; exit $l_retval'
retval = self.RunCommandGeneric(
command,
return_output,
@@ -371,7 +371,7 @@ class CommandExecuter(object):
os.write(handle, '\n')
os.close(handle)
- os.chmod(command_file, 0777)
+ os.chmod(command_file, 0o777)
# if return_output is set, run a dummy command first to make sure that
# the chroot already exists. We want the final returned output to skip
@@ -474,7 +474,7 @@ class CommandExecuter(object):
ssh_command = (
'ssh -p ${FLAGS_ssh_port}' + ' -o StrictHostKeyChecking=no' +
' -o UserKnownHostsFile=$(mktemp)' + ' -i $TMP_PRIVATE_KEY')
- rsync_prefix = "\nrsync -r -e \"%s\" " % ssh_command
+ rsync_prefix = '\nrsync -r -e "%s" ' % ssh_command
if dest_cros:
command += rsync_prefix + '%s root@%s:%s' % (src, dest_machine, dest)
return self.RunCommand(
@@ -627,7 +627,7 @@ class CommandExecuter(object):
poll.register(errfd, select.POLLIN | select.POLLPRI)
handlermap[errfd] = StreamHandler(pobject, errfd, 'stderr',
line_consumer)
- while len(handlermap):
+ while handlermap:
readables = poll.poll(300)
for (fd, evt) in readables:
handler = handlermap[fd]
@@ -642,17 +642,14 @@ class CommandExecuter(object):
os.killpg(os.getpgid(pobject.pid), signal.SIGTERM)
return pobject.wait()
- except BaseException as e:
- except_handler(pobject, e)
+ except BaseException as err:
+ except_handler(pobject, err)
raise
class MockCommandExecuter(CommandExecuter):
"""Mock class for class CommandExecuter."""
- def __init__(self, log_level, logger_to_set=None):
- super(MockCommandExecuter, self).__init__(log_level, logger_to_set)
-
def RunCommandGeneric(self,
cmd,
return_output=False,
diff --git a/cros_utils/command_executer_unittest.py b/cros_utils/command_executer_unittest.py
index 4da4a4ac..959000fd 100755
--- a/cros_utils/command_executer_unittest.py
+++ b/cros_utils/command_executer_unittest.py
@@ -1,5 +1,8 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
"""Unittest for command_executer.py."""
diff --git a/cros_utils/constants.py b/cros_utils/constants.py
index 827e9233..b12175bb 100644
--- a/cros_utils/constants.py
+++ b/cros_utils/constants.py
@@ -1,4 +1,8 @@
-# Copyright 2010 Google Inc. All Rights Reserved.
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""Generic constants used accross modules.
"""
diff --git a/cros_utils/contextlib3.py b/cros_utils/contextlib3.py
deleted file mode 100644
index 9fabbf6e..00000000
--- a/cros_utils/contextlib3.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2019 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Random utilties from Python3's contextlib."""
-
-from __future__ import division
-from __future__ import print_function
-
-import sys
-
-
-class ExitStack(object):
- """https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack"""
-
- def __init__(self):
- self._stack = []
- self._is_entered = False
-
- def _assert_is_entered(self):
- # Strictly, entering has no effect on the operations that call this.
- # However, if you're trying to e.g. push things to an ExitStack that hasn't
- # yet been entered, that's likely a bug.
- assert self._is_entered, 'ExitStack op performed before entering'
-
- def __enter__(self):
- self._is_entered = True
- return self
-
- def _perform_exit(self, exc_type, exc, exc_traceback):
- # I suppose a better name for this is
- # `take_exception_handling_into_our_own_hands`, but that's harder to type.
- exception_handled = False
- while self._stack:
- fn = self._stack.pop()
- # The except clause below is meant to run as-if it's a `finally` block,
- # but `finally` blocks don't have easy access to exceptions currently in
- # flight. Hence, we do need to catch things like KeyboardInterrupt,
- # SystemExit, ...
- # pylint: disable=bare-except
- try:
- # If an __exit__ handler returns a truthy value, we should assume that
- # it handled the exception appropriately. Otherwise, we need to keep it
- # with us. (PEP 343)
- if fn(exc_type, exc, exc_traceback):
- exc_type, exc, exc_traceback = None, None, None
- exception_handled = True
- except:
- # Python2 doesn't appear to have the notion of 'exception causes',
- # which is super unfortunate. In the case:
- #
- # @contextlib.contextmanager
- # def foo()
- # try:
- # yield
- # finally:
- # raise ValueError
- #
- # with foo():
- # assert False
- #
- # ...Python will only note the ValueError; nothing about the failing
- # assertion is printed.
- #
- # I guess on the bright side, that means we don't have to fiddle with
- # __cause__s/etc.
- exc_type, exc, exc_traceback = sys.exc_info()
- exception_handled = True
-
- if not exception_handled:
- return False
-
- # Something changed. We either need to raise for ourselves, or note that
- # the exception has been suppressed.
- if exc_type is not None:
- raise exc_type, exc, exc_traceback
-
- # Otherwise, the exception was suppressed. Go us!
- return True
-
- def __exit__(self, exc_type, exc, exc_traceback):
- return self._perform_exit(exc_type, exc, exc_traceback)
-
- def close(self):
- """Unwinds the exit stack, unregistering all events"""
- self._perform_exit(None, None, None)
-
- def enter_context(self, cm):
- """Enters the given context manager, and registers it to be exited."""
- self._assert_is_entered()
-
- # The spec specifically notes that we should take __exit__ prior to calling
- # __enter__.
- exit_cleanup = cm.__exit__
- result = cm.__enter__()
- self._stack.append(exit_cleanup)
- return result
-
- # pylint complains about `exit` being redefined. `exit` is the documented
- # name of this param, and renaming it would break portability if someone
- # decided to `push(exit=foo)`, so just ignore the lint.
- # pylint: disable=redefined-builtin
- def push(self, exit):
- """Like `enter_context`, but won't enter the value given."""
- self._assert_is_entered()
- self._stack.append(exit.__exit__)
-
- def callback(self, callback, *args, **kwargs):
- """Performs the given callback on exit"""
- self._assert_is_entered()
-
- def fn(_exc_type, _exc, _exc_traceback):
- callback(*args, **kwargs)
-
- self._stack.append(fn)
diff --git a/cros_utils/contextlib3_test.py b/cros_utils/contextlib3_test.py
deleted file mode 100755
index 76c010f2..00000000
--- a/cros_utils/contextlib3_test.py
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-# Copyright 2019 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Tests for contextlib3"""
-
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import unittest
-
-import contextlib3
-
-
-class SomeException(Exception):
- """Just an alternative to ValueError in the Exception class hierarchy."""
- pass
-
-
-class TestExitStack(unittest.TestCase):
- """Tests contextlib3.ExitStack"""
-
- def test_exceptions_in_exit_override_exceptions_in_with(self):
-
- @contextlib.contextmanager
- def raise_exit():
- raised = False
- try:
- yield
- except Exception:
- raised = True
- raise ValueError
- finally:
- self.assertTrue(raised)
-
- # (As noted in comments in contextlib3, this behavior is consistent with
- # how python2 works. Namely, if __exit__ raises, the exception from
- # __exit__ overrides the inner exception)
- with self.assertRaises(ValueError):
- with contextlib3.ExitStack() as stack:
- stack.enter_context(raise_exit())
- raise SomeException()
-
- def test_raising_in_exit_doesnt_block_later_exits(self):
- exited = []
-
- @contextlib.contextmanager
- def raise_exit():
- try:
- yield
- finally:
- exited.append('raise')
- raise ValueError
-
- @contextlib.contextmanager
- def push_exit():
- try:
- yield
- finally:
- exited.append('push')
-
- with self.assertRaises(ValueError):
- with contextlib3.ExitStack() as stack:
- stack.enter_context(push_exit())
- stack.enter_context(raise_exit())
- self.assertEqual(exited, ['raise', 'push'])
-
- exited = []
- with self.assertRaises(ValueError):
- with contextlib3.ExitStack() as stack:
- stack.enter_context(push_exit())
- stack.enter_context(raise_exit())
- raise SomeException()
- self.assertEqual(exited, ['raise', 'push'])
-
- def test_push_doesnt_enter_the_context(self):
- exited = []
-
- test_self = self
-
- class Manager(object):
- """A simple ContextManager for testing purposes"""
-
- def __enter__(self):
- test_self.fail('context manager was entered :(')
-
- def __exit__(self, *args, **kwargs):
- exited.append(1)
-
- with contextlib3.ExitStack() as stack:
- stack.push(Manager())
- self.assertEqual(exited, [])
- self.assertEqual(exited, [1])
-
- def test_callbacks_are_run_properly(self):
- callback_was_run = []
-
- def callback(arg, some_kwarg=None):
- self.assertEqual(arg, 41)
- self.assertEqual(some_kwarg, 42)
- callback_was_run.append(1)
-
- with contextlib3.ExitStack() as stack:
- stack.callback(callback, 41, some_kwarg=42)
- self.assertEqual(callback_was_run, [])
- self.assertEqual(callback_was_run, [1])
-
- callback_was_run = []
- with self.assertRaises(ValueError):
- with contextlib3.ExitStack() as stack:
- stack.callback(callback, 41, some_kwarg=42)
- raise ValueError()
- self.assertEqual(callback_was_run, [1])
-
- def test_finallys_are_run(self):
- finally_run = []
-
- @contextlib.contextmanager
- def append_on_exit():
- try:
- yield
- finally:
- finally_run.append(0)
-
- with self.assertRaises(ValueError):
- with contextlib3.ExitStack() as stack:
- stack.enter_context(append_on_exit())
- raise ValueError()
- self.assertEqual(finally_run, [0])
-
- def test_unwinding_happens_in_reverse_order(self):
- exit_runs = []
-
- @contextlib.contextmanager
- def append_things(start_push, end_push):
- exit_runs.append(start_push)
- try:
- yield
- finally:
- exit_runs.append(end_push)
-
- with contextlib3.ExitStack() as stack:
- stack.enter_context(append_things(1, 4))
- stack.enter_context(append_things(2, 3))
- self.assertEqual(exit_runs, [1, 2, 3, 4])
-
- exit_runs = []
- with self.assertRaises(ValueError):
- with contextlib3.ExitStack() as stack:
- stack.enter_context(append_things(1, 4))
- stack.enter_context(append_things(2, 3))
- raise ValueError
- self.assertEqual(exit_runs, [1, 2, 3, 4])
-
- def test_exceptions_are_propagated(self):
-
- @contextlib.contextmanager
- def die_on_regular_exit():
- yield
- self.fail('Unreachable in theory')
-
- with self.assertRaises(ValueError):
- with contextlib3.ExitStack() as stack:
- stack.enter_context(die_on_regular_exit())
- raise ValueError()
-
- def test_exceptions_can_be_blocked(self):
-
- @contextlib.contextmanager
- def block():
- try:
- yield
- except Exception:
- pass
-
- with contextlib3.ExitStack() as stack:
- stack.enter_context(block())
- raise ValueError()
-
- def test_objects_are_returned_from_enter_context(self):
-
- @contextlib.contextmanager
- def yield_arg(arg):
- yield arg
-
- with contextlib3.ExitStack() as stack:
- val = stack.enter_context(yield_arg(1))
- self.assertEqual(val, 1)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/cros_utils/device_setup_utils.py b/cros_utils/device_setup_utils.py
index 93e681bd..61dbba27 100755..100644
--- a/cros_utils/device_setup_utils.py
+++ b/cros_utils/device_setup_utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# Copyright 2019 The Chromium OS Authors. All rights reserved.
@@ -18,15 +17,13 @@ __author__ = 'zhizhouy@google.com (Zhizhou Yang)'
import re
import time
+from contextlib import contextmanager
+
from cros_utils import command_executer
class DutWrapper(object):
- """Wrap DUT parameters inside.
-
- Eventially CommandExecuter will reqiure only one
- argument - command.
- """
+ """Wrap DUT parameters inside."""
def __init__(self,
chromeos_root,
@@ -43,6 +40,7 @@ class DutWrapper(object):
self.dut_config = dut_config
def RunCommandOnDut(self, command, ignore_status=False):
+ """Helper function to run command on DUT."""
ret, msg, err_msg = self.ce.CrosRunCommandWOutput(
command, machine=self.remote, chromeos_root=self.chromeos_root)
@@ -60,15 +58,17 @@ class DutWrapper(object):
return ret, msg, err_msg
def DisableASLR(self):
+ """Disable ASLR on DUT."""
disable_aslr = ('set -e; '
'if [[ -e /proc/sys/kernel/randomize_va_space ]]; then '
' echo 0 > /proc/sys/kernel/randomize_va_space; '
'fi')
if self.log_level == 'average':
self.logger.LogOutput('Disable ASLR.')
- self.RunCommandOnDut(disable_aslr, ignore_status=False)
+ self.RunCommandOnDut(disable_aslr)
def SetCpuGovernor(self, governor, ignore_status=False):
+ """Setup CPU Governor on DUT."""
set_gov_cmd = (
'for f in `ls -d /sys/devices/system/cpu/cpu*/cpufreq 2>/dev/null`; do '
# Skip writing scaling_governor if cpu is offline.
@@ -85,6 +85,7 @@ class DutWrapper(object):
return ret
def DisableTurbo(self):
+ """Disable Turbo on DUT."""
dis_turbo_cmd = (
'if [[ -e /sys/devices/system/cpu/intel_pstate/no_turbo ]]; then '
' if grep -q 0 /sys/devices/system/cpu/intel_pstate/no_turbo; then '
@@ -284,6 +285,7 @@ class DutWrapper(object):
(avail_ngt_target, max_freq_path, min_freq_path))
def WaitCooldown(self):
+ """Wait for DUT to cool down to certain temperature."""
waittime = 0
timeout_in_sec = int(self.dut_config['cooldown_time']) * 60
# Temperature from sensors come in uCelsius units.
@@ -327,11 +329,13 @@ class DutWrapper(object):
self.RunCommandOnDut(sed_command + FILE)
def StopUI(self):
+ """Stop UI on DUT."""
# Added "ignore_status" for the case when crosperf stops ui service which
# was already stopped. Command is going to fail with 1.
self.RunCommandOnDut('stop ui', ignore_status=True)
def StartUI(self):
+ """Start UI on DUT."""
# Similar to StopUI, `start ui` fails if the service is already started.
self.RunCommandOnDut('start ui', ignore_status=True)
@@ -456,3 +460,78 @@ class DutWrapper(object):
'Final verification failed with status %d' % ret_code)
self.logger.LogOutput('Kernel cmdline updated successfully.')
+
+ @contextmanager
+ def PauseUI(self):
+ """Stop UI before and Start UI after the context block.
+
+ Context manager will make sure UI is always resumed at the end.
+ """
+ self.StopUI()
+ try:
+ yield
+
+ finally:
+ self.StartUI()
+
+ def SetupDevice(self):
+ """Setup device to get it ready for testing.
+
+ @Returns Wait time of cool down for this benchmark run.
+ """
+ self.logger.LogOutput('Update kernel cmdline if necessary and reboot')
+ intel_pstate = self.dut_config['intel_pstate']
+ if intel_pstate and self.KerncmdUpdateNeeded(intel_pstate):
+ self.UpdateKerncmdIntelPstate(intel_pstate)
+
+ wait_time = 0
+ # Pause UI while configuring the DUT.
+ # This will accelerate setup (waiting for cooldown has x10 drop)
+ # and help to reset a Chrome state left after the previous test.
+ with self.PauseUI():
+ # Unless the user turns on ASLR in the flag, we first disable ASLR
+ # before running the benchmarks
+ if not self.dut_config['enable_aslr']:
+ self.DisableASLR()
+
+ # CPU usage setup comes first where we enable/disable cores.
+ self.SetupCpuUsage()
+ cpu_online_status = self.GetCpuOnline()
+ # List of online cores of type int (core number).
+ online_cores = [
+ core for core, status in cpu_online_status.items() if status
+ ]
+ if self.dut_config['cooldown_time']:
+ # Setup power conservative mode for effective cool down.
+ # Set ignore status since powersave may no be available
+ # on all platforms and we are going to handle it.
+ ret = self.SetCpuGovernor('powersave', ignore_status=True)
+ if ret:
+ # "powersave" is not available, use "ondemand".
+ # Still not a fatal error if it fails.
+ ret = self.SetCpuGovernor('ondemand', ignore_status=True)
+ # TODO(denik): Run comparison test for 'powersave' and 'ondemand'
+ # on scarlet and kevin64.
+ # We might have to consider reducing freq manually to the min
+ # if it helps to reduce waiting time.
+ wait_time = self.WaitCooldown()
+
+ # Setup CPU governor for the benchmark run.
+ # It overwrites the previous governor settings.
+ governor = self.dut_config['governor']
+ # FIXME(denik): Pass online cores to governor setup.
+ self.SetCpuGovernor(governor)
+
+ # Disable Turbo and Setup CPU freq should ALWAYS proceed governor setup
+ # since governor may change:
+ # - frequency;
+ # - turbo/boost.
+ self.DisableTurbo()
+ self.SetupCpuFreq(online_cores)
+
+ self.DecreaseWaitTime()
+ # FIXME(denik): Currently we are not recovering the previous cpufreq
+ # settings since we do reboot/setup every time anyway.
+ # But it may change in the future and then we have to recover the
+ # settings.
+ return wait_time
diff --git a/cros_utils/device_setup_utils_unittest.py b/cros_utils/device_setup_utils_unittest.py
index b2f76f7d..63f9bf66 100755
--- a/cros_utils/device_setup_utils_unittest.py
+++ b/cros_utils/device_setup_utils_unittest.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2019 The Chromium OS Authors. All rights reserved.
@@ -12,7 +12,7 @@ from __future__ import print_function
import time
import unittest
-import mock
+from unittest import mock
from device_setup_utils import DutWrapper
@@ -162,8 +162,7 @@ class DutWrapperTest(unittest.TestCase):
'if [[ -e /proc/sys/kernel/randomize_va_space ]]; then '
' echo 0 > /proc/sys/kernel/randomize_va_space; '
'fi')
- self.dw.RunCommandOnDut.assert_called_once_with(
- set_cpu_cmd, ignore_status=False)
+ self.dw.RunCommandOnDut.assert_called_once_with(set_cpu_cmd)
def test_set_cpu_governor(self):
self.dw.RunCommandOnDut = mock.Mock(return_value=(0, '', ''))
@@ -451,8 +450,8 @@ class DutWrapperTest(unittest.TestCase):
self.dw.dut_config['cpu_freq_pct'] = 50
self.dw.SetupCpuFreq(online)
self.dw.RunCommandOnDut.assert_called_once()
- self.assertNotRegexpMatches(self.dw.RunCommandOnDut.call_args_list[0][0][0],
- '^echo.*scaling_max_freq$')
+ self.assertNotRegex(self.dw.RunCommandOnDut.call_args_list[0][0][0],
+ '^echo.*scaling_max_freq$')
def test_setup_cpu_freq_multiple_no_access(self):
online = [0, 1]
@@ -615,6 +614,90 @@ class DutWrapperTest(unittest.TestCase):
self.dw.RunCommandOnDut.assert_called_once_with(
'start ui', ignore_status=True)
+ def test_setup_device(self):
+
+ def FakeRunner(command, ignore_status=False):
+ # pylint fix for unused variable.
+ del command, ignore_status
+ return 0, '', ''
+
+ def SetupMockFunctions():
+ self.dw.RunCommandOnDut = mock.Mock(return_value=FakeRunner)
+ self.dw.KerncmdUpdateNeeded = mock.Mock(return_value=True)
+ self.dw.UpdateKerncmdIntelPstate = mock.Mock(return_value=0)
+ self.dw.DisableASLR = mock.Mock(return_value=0)
+ self.dw.SetupCpuUsage = mock.Mock(return_value=0)
+ self.dw.SetupCpuFreq = mock.Mock(return_value=0)
+ self.dw.GetCpuOnline = mock.Mock(return_value={0: 1, 1: 1, 2: 0})
+ self.dw.SetCpuGovernor = mock.Mock(return_value=0)
+ self.dw.DisableTurbo = mock.Mock(return_value=0)
+ self.dw.StopUI = mock.Mock(return_value=0)
+ self.dw.StartUI = mock.Mock(return_value=0)
+ self.dw.WaitCooldown = mock.Mock(return_value=0)
+ self.dw.DecreaseWaitTime = mock.Mock(return_value=0)
+
+ self.dw.dut_config['enable_aslr'] = False
+ self.dw.dut_config['cooldown_time'] = 0
+ self.dw.dut_config['governor'] = 'fake_governor'
+ self.dw.dut_config['cpu_freq_pct'] = 65
+ self.dw.dut_config['intel_pstate'] = 'no_hwp'
+
+ SetupMockFunctions()
+ self.dw.SetupDevice()
+
+ self.dw.KerncmdUpdateNeeded.assert_called_once()
+ self.dw.UpdateKerncmdIntelPstate.assert_called_once()
+ self.dw.DisableASLR.assert_called_once()
+ self.dw.SetupCpuUsage.assert_called_once_with()
+ self.dw.SetupCpuFreq.assert_called_once_with([0, 1])
+ self.dw.GetCpuOnline.assert_called_once_with()
+ self.dw.SetCpuGovernor.assert_called_once_with('fake_governor')
+ self.dw.DisableTurbo.assert_called_once_with()
+ self.dw.DecreaseWaitTime.assert_called_once_with()
+ self.dw.StopUI.assert_called_once_with()
+ self.dw.StartUI.assert_called_once_with()
+ self.dw.WaitCooldown.assert_not_called()
+
+ # Test SetupDevice with cooldown
+ self.dw.dut_config['cooldown_time'] = 10
+
+ SetupMockFunctions()
+ self.dw.GetCpuOnline = mock.Mock(return_value={0: 0, 1: 1})
+
+ self.dw.SetupDevice()
+
+ self.dw.WaitCooldown.assert_called_once_with()
+ self.dw.DisableASLR.assert_called_once()
+ self.dw.DisableTurbo.assert_called_once_with()
+ self.dw.SetupCpuUsage.assert_called_once_with()
+ self.dw.SetupCpuFreq.assert_called_once_with([1])
+ self.dw.SetCpuGovernor.assert_called()
+ self.dw.GetCpuOnline.assert_called_once_with()
+ self.dw.StopUI.assert_called_once_with()
+ self.dw.StartUI.assert_called_once_with()
+ self.assertGreater(self.dw.SetCpuGovernor.call_count, 1)
+ self.assertEqual(self.dw.SetCpuGovernor.call_args,
+ mock.call('fake_governor'))
+
+ # Test SetupDevice with cooldown
+ SetupMockFunctions()
+ self.dw.SetupCpuUsage = mock.Mock(side_effect=RuntimeError())
+
+ with self.assertRaises(RuntimeError):
+ self.dw.SetupDevice()
+
+ # This call injected an exception.
+ self.dw.SetupCpuUsage.assert_called_once_with()
+ # Calls following the expeption are skipped.
+ self.dw.WaitCooldown.assert_not_called()
+ self.dw.DisableTurbo.assert_not_called()
+ self.dw.SetupCpuFreq.assert_not_called()
+ self.dw.SetCpuGovernor.assert_not_called()
+ self.dw.GetCpuOnline.assert_not_called()
+ # Check that Stop/Start UI are always called.
+ self.dw.StopUI.assert_called_once_with()
+ self.dw.StartUI.assert_called_once_with()
+
if __name__ == '__main__':
unittest.main()
diff --git a/cros_utils/email_sender.py b/cros_utils/email_sender.py
index 0d2bd651..3f724f64 100755
--- a/cros_utils/email_sender.py
+++ b/cros_utils/email_sender.py
@@ -1,6 +1,10 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
-# Copyright 2011 Google Inc. All Rights Reserved.
"""Utilities to send email either through SMTP or SendGMR."""
from __future__ import print_function
@@ -72,7 +76,7 @@ class EmailSender(object):
part.set_payload(attachment.content)
Encoders.encode_base64(part)
part.add_header('Content-Disposition',
- "attachment; filename=\"%s\"" % attachment.name)
+ 'attachment; filename="%s"' % attachment.name)
msg.attach(part)
# Send the message via our own SMTP server, but don't include the
diff --git a/cros_utils/file_utils.py b/cros_utils/file_utils.py
index 777e6251..fe6fc16f 100644
--- a/cros_utils/file_utils.py
+++ b/cros_utils/file_utils.py
@@ -1,4 +1,8 @@
-# Copyright 2011 Google Inc. All Rights Reserved.
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""Utilities for operations on files."""
from __future__ import print_function
diff --git a/cros_utils/html_tools.py b/cros_utils/html_tools.py
index 8ca795bf..688955ff 100644
--- a/cros_utils/html_tools.py
+++ b/cros_utils/html_tools.py
@@ -1,4 +1,8 @@
-# Copyright 2010 Google Inc. All Rights Reserved.
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""Utilities for generating html."""
diff --git a/cros_utils/logger.py b/cros_utils/logger.py
index 364d9c9d..4cc4618e 100644
--- a/cros_utils/logger.py
+++ b/cros_utils/logger.py
@@ -1,4 +1,8 @@
-# Copyright 2010 Google Inc. All Rights Reserved.
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""Logging helper module."""
from __future__ import print_function
@@ -9,7 +13,7 @@ import sys
import traceback
-#TODO(yunlian@google.com): Use GetRoot from misc
+# TODO(yunlian@google.com): Use GetRoot from misc
def GetRoot(scr_name):
"""Break up pathname into (dir+name)."""
abs_path = os.path.abspath(scr_name)
@@ -182,16 +186,12 @@ class Logger(object):
self.LogWarning(msg)
def LogCommandOutput(self, msg, print_to_console=True):
- self.LogMsg(self.stdout,
- self._GetStdout(print_to_console),
- msg,
- flush=False)
+ self.LogMsg(
+ self.stdout, self._GetStdout(print_to_console), msg, flush=False)
def LogCommandError(self, msg, print_to_console=True):
- self.LogMsg(self.stderr,
- self._GetStderr(print_to_console),
- msg,
- flush=False)
+ self.LogMsg(
+ self.stderr, self._GetStderr(print_to_console), msg, flush=False)
def Flush(self):
self.cmdfd.flush()
@@ -319,16 +319,12 @@ class MockLogger(object):
self.LogWarning(msg)
def LogCommandOutput(self, msg, print_to_console=True):
- self.LogMsg(self.stdout,
- self._GetStdout(print_to_console),
- msg,
- flush=False)
+ self.LogMsg(
+ self.stdout, self._GetStdout(print_to_console), msg, flush=False)
def LogCommandError(self, msg, print_to_console=True):
- self.LogMsg(self.stderr,
- self._GetStderr(print_to_console),
- msg,
- flush=False)
+ self.LogMsg(
+ self.stderr, self._GetStderr(print_to_console), msg, flush=False)
def Flush(self):
print('MockLogger: Flushing cmdfd, stdout, stderr')
@@ -363,7 +359,7 @@ def HandleUncaughtExceptions(fun):
def _Interceptor(*args, **kwargs):
try:
return fun(*args, **kwargs)
- except StandardError:
+ except Exception:
GetLogger().LogFatal('Uncaught exception:\n%s' % traceback.format_exc())
return _Interceptor
diff --git a/cros_utils/machines.py b/cros_utils/machines.py
index 722df3b8..89b51b01 100644
--- a/cros_utils/machines.py
+++ b/cros_utils/machines.py
@@ -1,6 +1,8 @@
+# -*- coding: utf-8 -*-
# Copyright 2015 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+
"""Utilities relating to machine-specific functions."""
from __future__ import print_function
diff --git a/cros_utils/manifest_versions.py b/cros_utils/manifest_versions.py
index 47e2fb20..83e88908 100644
--- a/cros_utils/manifest_versions.py
+++ b/cros_utils/manifest_versions.py
@@ -1,6 +1,8 @@
+# -*- coding: utf-8 -*-
# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+
"""Tools for searching/manipulating the manifests repository."""
from __future__ import print_function
@@ -76,8 +78,8 @@ class ManifestVersions(object):
if ret:
logger.GetLogger().LogFatal('Failed to checkout manifest at '
'specified time')
- path = os.path.realpath(
- '{0}/manifest-versions/LKGM/lkgm.xml'.format(self.clone_location))
+ path = os.path.realpath('{0}/manifest-versions/LKGM/lkgm.xml'.format(
+ self.clone_location))
pp = path.split('/')
new_list = copy.deepcopy(pp)
for i, e in enumerate(pp):
@@ -124,8 +126,8 @@ class ManifestVersions(object):
if ret:
logger.GetLogger().LogFatal('Failed to checkout manifest at '
'specified time')
- path = os.path.realpath(
- '{0}/manifest-versions/LKGM/lkgm.xml'.format(self.clone_location))
+ path = os.path.realpath('{0}/manifest-versions/LKGM/lkgm.xml'.format(
+ self.clone_location))
pp = path.split('/')
small = os.path.basename(path).split('.xml')[0]
version = pp[-2] + '.' + small
@@ -142,7 +144,9 @@ class ManifestVersions(object):
version = version.split('.', 1)[1]
os.chdir(self.clone_location)
files = [
- os.path.join(r, f) for r, _, fs in os.walk('.') for f in fs
+ os.path.join(r, f)
+ for r, _, fs in os.walk('.')
+ for f in fs
if version in f
]
if files:
diff --git a/cros_utils/misc.py b/cros_utils/misc.py
index 58076f40..48e3c724 100644
--- a/cros_utils/misc.py
+++ b/cros_utils/misc.py
@@ -1,8 +1,11 @@
+# -*- coding: utf-8 -*-
# Copyright 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+
"""Utilities for toolchain build."""
+from __future__ import division
from __future__ import print_function
__author__ = 'asharif@google.com (Ahmad Sharif)'
@@ -71,7 +74,8 @@ def GetFilenameFromString(string):
string,
(r'/', '__'),
(r'\s', '_'),
- (r'[\\$="?^]', ''),)
+ (r'[\\$="?^]', ''),
+ )
def GetRoot(scr_name):
@@ -160,9 +164,7 @@ def GetBuildImageCommand(board, dev=False):
dev_args))
-def GetSetupBoardCommand(board,
- usepkg=None,
- force=None):
+def GetSetupBoardCommand(board, usepkg=None, force=None):
"""Get setup_board command."""
options = []
@@ -202,8 +204,8 @@ def GetCtargetFromBoard(board, chromeos_root):
def GetArchFromBoard(board, chromeos_root):
"""Get Arch from board."""
base_board = board.split('_')[0]
- command = ('source %s; get_board_arch %s' % (TOOLCHAIN_UTILS_PATH,
- base_board))
+ command = (
+ 'source %s; get_board_arch %s' % (TOOLCHAIN_UTILS_PATH, base_board))
ce = command_executer.GetCommandExecuter()
ret, out, _ = ce.ChrootRunCommandWOutput(chromeos_root, command)
if ret != 0:
@@ -237,7 +239,7 @@ def GetChromeSrcDir():
def GetEnvStringFromDict(env_dict):
- return ' '.join(["%s=\"%s\"" % var for var in env_dict.items()])
+ return ' '.join(['%s="%s"' % var for var in env_dict.items()])
def MergeEnvStringWithDict(env_string, env_dict, prepend=True):
@@ -247,11 +249,11 @@ def MergeEnvStringWithDict(env_string, env_dict, prepend=True):
override_env_list = []
ce = command_executer.GetCommandExecuter()
for k, v in env_dict.items():
- v = v.strip("\"'")
+ v = v.strip('"\'')
if prepend:
- new_env = "%s=\"%s $%s\"" % (k, v, k)
+ new_env = '%s="%s $%s"' % (k, v, k)
else:
- new_env = "%s=\"$%s %s\"" % (k, k, v)
+ new_env = '%s="$%s %s"' % (k, k, v)
command = '; '.join([env_string, new_env, 'echo $%s' % k])
ret, out, _ = ce.RunCommandWOutput(command)
override_env_list.append('%s=%r' % (k, out.strip()))
@@ -479,8 +481,8 @@ def ApplyGerritPatches(chromeos_root, gerrit_patch_string,
pi_str = '{project}:{ref}'.format(project=pi.project, ref=pi.ref)
try:
project_git_path = project_checkout.GetPath(absolute=True)
- logger.GetLogger().LogOutput(
- 'Applying patch "{0}" in "{1}" ...'.format(pi_str, project_git_path))
+ logger.GetLogger().LogOutput('Applying patch "{0}" in "{1}" ...'.format(
+ pi_str, project_git_path))
pi.Apply(project_git_path, branch, trivial=False)
except Exception:
traceback.print_exc(file=sys.stdout)
@@ -524,7 +526,8 @@ def BooleanPrompt(prompt='Do you want to continue?',
while True:
try:
- response = raw_input(prompt).lower()
+ # pylint: disable=input-builtin, bad-builtin
+ response = input(prompt).lower()
except EOFError:
# If the user hits CTRL+D, or stdin is disabled, use the default.
print()
@@ -553,7 +556,7 @@ def rgb2short(r, g, b):
greencolor = [255, 118, 82, 46, 10]
if g == 0:
- return redcolor[r / 52]
+ return redcolor[r // 52]
if r == 0:
- return greencolor[g / 52]
+ return greencolor[g // 52]
return 4
diff --git a/cros_utils/misc_test.py b/cros_utils/misc_test.py
index 80082207..cb7b8c89 100644..100755
--- a/cros_utils/misc_test.py
+++ b/cros_utils/misc_test.py
@@ -1,4 +1,9 @@
-# Copyright 2012 Google Inc. All Rights Reserved.
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""Tests for misc."""
from __future__ import print_function
diff --git a/cros_utils/no_pseudo_terminal_test.py b/cros_utils/no_pseudo_terminal_test.py
index 41d71539..10fd9608 100755
--- a/cros_utils/no_pseudo_terminal_test.py
+++ b/cros_utils/no_pseudo_terminal_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2019 The Chromium OS Authors. All rights reserved.
@@ -27,6 +27,7 @@ class NoPsuedoTerminalTest(unittest.TestCase):
"""Attaches strace to the current process."""
args = ['sudo', 'strace', '-o', output_file, '-p', str(os.getpid())]
print(args)
+ # pylint: disable=bad-option-value, subprocess-popen-preexec-fn
self._strace_process = subprocess.Popen(args, preexec_fn=os.setpgrp)
# Wait until we see some activity.
start_time = time.time()
diff --git a/cros_utils/perf_diff.py b/cros_utils/perf_diff.py
index 31cde994..fed6a81d 100755
--- a/cros_utils/perf_diff.py
+++ b/cros_utils/perf_diff.py
@@ -1,5 +1,9 @@
-#!/usr/bin/env python2
-# Copyright 2012 Google Inc. All Rights Reserved.
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""One-line documentation for perf_diff module.
A detailed description of perf_diff.
@@ -10,6 +14,7 @@ from __future__ import print_function
__author__ = 'asharif@google.com (Ahmad Sharif)'
import argparse
+import functools
import re
import sys
@@ -38,7 +43,7 @@ def GetPerfDictFromReport(report_file):
def _SortDictionaryByValue(d):
- l = [(k, v) for (k, v) in d.iteritems()]
+ l = d.items()
def GetFloat(x):
if misc.IsFloat(x):
@@ -271,11 +276,11 @@ class PerfDiffer(object):
for report in self._reports:
if section_name in report.sections:
section = report.sections[section_name]
- function_names = [f.name for f in section.functions]
+ function_names = {f.name for f in section.functions}
function_names_list.append(function_names)
self._common_function_names[section_name] = (
- reduce(set.intersection, map(set, function_names_list)))
+ functools.reduce(set.intersection, function_names_list))
def _GetTopFunctions(self, section_name, num_functions):
all_functions = {}
@@ -301,17 +306,19 @@ class PerfDiffer(object):
def Main(argv):
"""The entry of the main."""
parser = argparse.ArgumentParser()
- parser.add_argument('-n',
- '--num_symbols',
- dest='num_symbols',
- default='5',
- help='The number of symbols to show.')
- parser.add_argument('-c',
- '--common_only',
- dest='common_only',
- action='store_true',
- default=False,
- help='Diff common symbols only.')
+ parser.add_argument(
+ '-n',
+ '--num_symbols',
+ dest='num_symbols',
+ default='5',
+ help='The number of symbols to show.')
+ parser.add_argument(
+ '-c',
+ '--common_only',
+ dest='common_only',
+ action='store_true',
+ default=False,
+ help='Diff common symbols only.')
options, args = parser.parse_known_args(argv)
diff --git a/cros_utils/pstat.py b/cros_utils/pstat.py
deleted file mode 100644
index 602fc0c7..00000000
--- a/cros_utils/pstat.py
+++ /dev/null
@@ -1,1077 +0,0 @@
-# We did not author this file nor mantain it. Skip linting it.
-#pylint: skip-file
-# Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# Comments and/or additions are welcome (send e-mail to:
-# strang@nmr.mgh.harvard.edu).
-#
-"""pstat.py module
-
-#################################################
-####### Written by: Gary Strangman ###########
-####### Last modified: Dec 18, 2007 ###########
-#################################################
-
-This module provides some useful list and array manipulation routines
-modeled after those found in the |Stat package by Gary Perlman, plus a
-number of other useful list/file manipulation functions. The list-based
-functions include:
-
- abut (source,*args)
- simpleabut (source, addon)
- colex (listoflists,cnums)
- collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
- dm (listoflists,criterion)
- flat (l)
- linexand (listoflists,columnlist,valuelist)
- linexor (listoflists,columnlist,valuelist)
- linedelimited (inlist,delimiter)
- lineincols (inlist,colsize)
- lineincustcols (inlist,colsizes)
- list2string (inlist)
- makelol(inlist)
- makestr(x)
- printcc (lst,extra=2)
- printincols (listoflists,colsize)
- pl (listoflists)
- printl(listoflists)
- replace (lst,oldval,newval)
- recode (inlist,listmap,cols='all')
- remap (listoflists,criterion)
- roundlist (inlist,num_digits_to_round_floats_to)
- sortby(listoflists,sortcols)
- unique (inlist)
- duplicates(inlist)
- writedelimited (listoflists, delimiter, file, writetype='w')
-
-Some of these functions have alternate versions which are defined only if
-Numeric (NumPy) can be imported. These functions are generally named as
-above, with an 'a' prefix.
-
- aabut (source, *args)
- acolex (a,indices,axis=1)
- acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
- adm (a,criterion)
- alinexand (a,columnlist,valuelist)
- alinexor (a,columnlist,valuelist)
- areplace (a,oldval,newval)
- arecode (a,listmap,col='all')
- arowcompare (row1, row2)
- arowsame (row1, row2)
- asortrows(a,axis=0)
- aunique(inarray)
- aduplicates(inarray)
-
-Currently, the code is all but completely un-optimized. In many cases, the
-array versions of functions amount simply to aliases to built-in array
-functions/methods. Their inclusion here is for function name consistency.
-"""
-
-## CHANGE LOG:
-## ==========
-## 07-11-26 ... edited to work with numpy
-## 01-11-15 ... changed list2string() to accept a delimiter
-## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
-## 01-05-31 ... added duplicates() and aduplicates() functions
-## 00-12-28 ... license made GPL, docstring and import requirements
-## 99-11-01 ... changed version to 0.3
-## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
-## 03/27/99 ... added areplace function, made replace fcn recursive
-## 12/31/98 ... added writefc function for ouput to fixed column sizes
-## 12/07/98 ... fixed import problem (failed on collapse() fcn)
-## added __version__ variable (now 0.2)
-## 12/05/98 ... updated doc-strings
-## added features to collapse() function
-## added flat() function for lists
-## fixed a broken asortrows()
-## 11/16/98 ... fixed minor bug in aput for 1D arrays
-##
-## 11/08/98 ... fixed aput to output large arrays correctly
-
-import stats # required 3rd party module
-import string, copy
-from types import *
-
-__version__ = 0.4
-
-###=========================== LIST FUNCTIONS ==========================
-###
-### Here are the list functions, DEFINED FOR ALL SYSTEMS.
-### Array functions (for NumPy-enabled computers) appear below.
-###
-
-
-def abut(source, *args):
- """
-Like the |Stat abut command. It concatenates two lists side-by-side
-and returns the result. '2D' lists are also accomodated for either argument
-(source or addon). CAUTION: If one list is shorter, it will be repeated
-until it is as long as the longest list. If this behavior is not desired,
-use pstat.simpleabut().
-
-Usage: abut(source, args) where args=any # of lists
-Returns: a list of lists as long as the LONGEST list past, source on the
- 'left', lists in <args> attached consecutively on the 'right'
-"""
-
- if type(source) not in [ListType, TupleType]:
- source = [source]
- for addon in args:
- if type(addon) not in [ListType, TupleType]:
- addon = [addon]
- if len(addon) < len(source): # is source list longer?
- if len(source) % len(addon) == 0: # are they integer multiples?
- repeats = len(source) / len(addon) # repeat addon n times
- origadd = copy.deepcopy(addon)
- for i in range(repeats - 1):
- addon = addon + origadd
- else:
- repeats = len(source) / len(addon) + 1 # repeat addon x times,
- origadd = copy.deepcopy(addon) # x is NOT an integer
- for i in range(repeats - 1):
- addon = addon + origadd
- addon = addon[0:len(source)]
- elif len(source) < len(addon): # is addon list longer?
- if len(addon) % len(source) == 0: # are they integer multiples?
- repeats = len(addon) / len(source) # repeat source n times
- origsour = copy.deepcopy(source)
- for i in range(repeats - 1):
- source = source + origsour
- else:
- repeats = len(addon) / len(source) + 1 # repeat source x times,
- origsour = copy.deepcopy(source) # x is NOT an integer
- for i in range(repeats - 1):
- source = source + origsour
- source = source[0:len(addon)]
-
- source = simpleabut(source, addon)
- return source
-
-
-def simpleabut(source, addon):
- """
-Concatenates two lists as columns and returns the result. '2D' lists
-are also accomodated for either argument (source or addon). This DOES NOT
-repeat either list to make the 2 lists of equal length. Beware of list pairs
-with different lengths ... the resulting list will be the length of the
-FIRST list passed.
-
-Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
-Returns: a list of lists as long as source, with source on the 'left' and
- addon on the 'right'
-"""
- if type(source) not in [ListType, TupleType]:
- source = [source]
- if type(addon) not in [ListType, TupleType]:
- addon = [addon]
- minlen = min(len(source), len(addon))
- list = copy.deepcopy(source) # start abut process
- if type(source[0]) not in [ListType, TupleType]:
- if type(addon[0]) not in [ListType, TupleType]:
- for i in range(minlen):
- list[i] = [source[i]] + [addon[i]] # source/addon = column
- else:
- for i in range(minlen):
- list[i] = [source[i]] + addon[i] # addon=list-of-lists
- else:
- if type(addon[0]) not in [ListType, TupleType]:
- for i in range(minlen):
- list[i] = source[i] + [addon[i]] # source=list-of-lists
- else:
- for i in range(minlen):
- list[i] = source[i] + addon[i] # source/addon = list-of-lists
- source = list
- return source
-
-
-def colex(listoflists, cnums):
- """
-Extracts from listoflists the columns specified in the list 'cnums'
-(cnums can be an integer, a sequence of integers, or a string-expression that
-corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
-columns 3 onward from the listoflists).
-
-Usage: colex (listoflists,cnums)
-Returns: a list-of-lists corresponding to the columns from listoflists
- specified by cnums, in the order the column numbers appear in cnums
-"""
- global index
- column = 0
- if type(cnums) in [ListType, TupleType]: # if multiple columns to get
- index = cnums[0]
- column = map(lambda x: x[index], listoflists)
- for col in cnums[1:]:
- index = col
- column = abut(column, map(lambda x: x[index], listoflists))
- elif type(cnums) == StringType: # if an 'x[3:]' type expr.
- evalstring = 'map(lambda x: x' + cnums + ', listoflists)'
- column = eval(evalstring)
- else: # else it's just 1 col to get
- index = cnums
- column = map(lambda x: x[index], listoflists)
- return column
-
-
-def collapse(listoflists,
- keepcols,
- collapsecols,
- fcn1=None,
- fcn2=None,
- cfcn=None):
- """
-Averages data in collapsecol, keeping all unique items in keepcols
-(using unique, which keeps unique LISTS of column numbers), retaining the
-unique sets of values in keepcols, the mean for each. Setting fcn1
-and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
-will append those results (e.g., the sterr, N) after each calculated mean.
-cfcn is the collapse function to apply (defaults to mean, defined here in the
-pstat module to avoid circular imports with stats.py, but harmonicmean or
-others could be passed).
-
-Usage: collapse
-(listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
-Returns: a list of lists with all unique permutations of entries appearing in
- columns ("conditions") specified by keepcols, abutted with the result of
- cfcn (if cfcn=None, defaults to the mean) of each column specified by
- collapsecols.
-"""
-
- def collmean(inlist):
- s = 0
- for item in inlist:
- s = s + item
- return s / float(len(inlist))
-
- if type(keepcols) not in [ListType, TupleType]:
- keepcols = [keepcols]
- if type(collapsecols) not in [ListType, TupleType]:
- collapsecols = [collapsecols]
- if cfcn == None:
- cfcn = collmean
- if keepcols == []:
- means = [0] * len(collapsecols)
- for i in range(len(collapsecols)):
- avgcol = colex(listoflists, collapsecols[i])
- means[i] = cfcn(avgcol)
- if fcn1:
- try:
- test = fcn1(avgcol)
- except:
- test = 'N/A'
- means[i] = [means[i], test]
- if fcn2:
- try:
- test = fcn2(avgcol)
- except:
- test = 'N/A'
- try:
- means[i] = means[i] + [len(avgcol)]
- except TypeError:
- means[i] = [means[i], len(avgcol)]
- return means
- else:
- values = colex(listoflists, keepcols)
- uniques = unique(values)
- uniques.sort()
- newlist = []
- if type(keepcols) not in [ListType, TupleType]:
- keepcols = [keepcols]
- for item in uniques:
- if type(item) not in [ListType, TupleType]:
- item = [item]
- tmprows = linexand(listoflists, keepcols, item)
- for col in collapsecols:
- avgcol = colex(tmprows, col)
- item.append(cfcn(avgcol))
- if fcn1 <> None:
- try:
- test = fcn1(avgcol)
- except:
- test = 'N/A'
- item.append(test)
- if fcn2 <> None:
- try:
- test = fcn2(avgcol)
- except:
- test = 'N/A'
- item.append(test)
- newlist.append(item)
- return newlist
-
-
-def dm(listoflists, criterion):
- """
-Returns rows from the passed list of lists that meet the criteria in
-the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
-will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
-with column 2 equal to the string 'N').
-
-Usage: dm (listoflists, criterion)
-Returns: rows from listoflists that meet the specified criterion.
-"""
- function = 'filter(lambda x: ' + criterion + ',listoflists)'
- lines = eval(function)
- return lines
-
-
-def flat(l):
- """
-Returns the flattened version of a '2D' list. List-correlate to the a.ravel()()
-method of NumPy arrays.
-
-Usage: flat(l)
-"""
- newl = []
- for i in range(len(l)):
- for j in range(len(l[i])):
- newl.append(l[i][j])
- return newl
-
-
-def linexand(listoflists, columnlist, valuelist):
- """
-Returns the rows of a list of lists where col (from columnlist) = val
-(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
-len(columnlist) must equal len(valuelist).
-
-Usage: linexand (listoflists,columnlist,valuelist)
-Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
-"""
- if type(columnlist) not in [ListType, TupleType]:
- columnlist = [columnlist]
- if type(valuelist) not in [ListType, TupleType]:
- valuelist = [valuelist]
- criterion = ''
- for i in range(len(columnlist)):
- if type(valuelist[i]) == StringType:
- critval = '\'' + valuelist[i] + '\''
- else:
- critval = str(valuelist[i])
- criterion = criterion + ' x[' + str(columnlist[
- i]) + ']==' + critval + ' and'
- criterion = criterion[0:-3] # remove the "and" after the last crit
- function = 'filter(lambda x: ' + criterion + ',listoflists)'
- lines = eval(function)
- return lines
-
-
-def linexor(listoflists, columnlist, valuelist):
- """
-Returns the rows of a list of lists where col (from columnlist) = val
-(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
-One value is required for each column in columnlist. If only one value
-exists for columnlist but multiple values appear in valuelist, the
-valuelist values are all assumed to pertain to the same column.
-
-Usage: linexor (listoflists,columnlist,valuelist)
-Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
-"""
- if type(columnlist) not in [ListType, TupleType]:
- columnlist = [columnlist]
- if type(valuelist) not in [ListType, TupleType]:
- valuelist = [valuelist]
- criterion = ''
- if len(columnlist) == 1 and len(valuelist) > 1:
- columnlist = columnlist * len(valuelist)
- for i in range(len(columnlist)): # build an exec string
- if type(valuelist[i]) == StringType:
- critval = '\'' + valuelist[i] + '\''
- else:
- critval = str(valuelist[i])
- criterion = criterion + ' x[' + str(columnlist[i]) + ']==' + critval + ' or'
- criterion = criterion[0:-2] # remove the "or" after the last crit
- function = 'filter(lambda x: ' + criterion + ',listoflists)'
- lines = eval(function)
- return lines
-
-
-def linedelimited(inlist, delimiter):
- """
-Returns a string composed of elements in inlist, with each element
-separated by 'delimiter.' Used by function writedelimited. Use '\t'
-for tab-delimiting.
-
-Usage: linedelimited (inlist,delimiter)
-"""
- outstr = ''
- for item in inlist:
- if type(item) <> StringType:
- item = str(item)
- outstr = outstr + item + delimiter
- outstr = outstr[0:-1]
- return outstr
-
-
-def lineincols(inlist, colsize):
- """
-Returns a string composed of elements in inlist, with each element
-right-aligned in columns of (fixed) colsize.
-
-Usage: lineincols (inlist,colsize) where colsize is an integer
-"""
- outstr = ''
- for item in inlist:
- if type(item) <> StringType:
- item = str(item)
- size = len(item)
- if size <= colsize:
- for i in range(colsize - size):
- outstr = outstr + ' '
- outstr = outstr + item
- else:
- outstr = outstr + item[0:colsize + 1]
- return outstr
-
-
-def lineincustcols(inlist, colsizes):
- """
-Returns a string composed of elements in inlist, with each element
-right-aligned in a column of width specified by a sequence colsizes. The
-length of colsizes must be greater than or equal to the number of columns
-in inlist.
-
-Usage: lineincustcols (inlist,colsizes)
-Returns: formatted string created from inlist
-"""
- outstr = ''
- for i in range(len(inlist)):
- if type(inlist[i]) <> StringType:
- item = str(inlist[i])
- else:
- item = inlist[i]
- size = len(item)
- if size <= colsizes[i]:
- for j in range(colsizes[i] - size):
- outstr = outstr + ' '
- outstr = outstr + item
- else:
- outstr = outstr + item[0:colsizes[i] + 1]
- return outstr
-
-
-def list2string(inlist, delimit=' '):
- """
-Converts a 1D list to a single long string for file output, using
-the string.join function.
-
-Usage: list2string (inlist,delimit=' ')
-Returns: the string created from inlist
-"""
- stringlist = map(makestr, inlist)
- return string.join(stringlist, delimit)
-
-
-def makelol(inlist):
- """
-Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
-want to use put() to write a 1D list one item per line in the file.
-
-Usage: makelol(inlist)
-Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
-"""
- x = []
- for item in inlist:
- x.append([item])
- return x
-
-
-def makestr(x):
- if type(x) <> StringType:
- x = str(x)
- return x
-
-
-def printcc(lst, extra=2):
- """
-Prints a list of lists in columns, customized by the max size of items
-within the columns (max size of items in col, plus 'extra' number of spaces).
-Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
-respectively.
-
-Usage: printcc (lst,extra=2)
-Returns: None
-"""
- if type(lst[0]) not in [ListType, TupleType]:
- lst = [lst]
- rowstokill = []
- list2print = copy.deepcopy(lst)
- for i in range(len(lst)):
- if lst[i] == [
- '\n'
- ] or lst[i] == '\n' or lst[i] == 'dashes' or lst[i] == '' or lst[i] == ['']:
- rowstokill = rowstokill + [i]
- rowstokill.reverse() # delete blank rows from the end
- for row in rowstokill:
- del list2print[row]
- maxsize = [0] * len(list2print[0])
- for col in range(len(list2print[0])):
- items = colex(list2print, col)
- items = map(makestr, items)
- maxsize[col] = max(map(len, items)) + extra
- for row in lst:
- if row == ['\n'] or row == '\n' or row == '' or row == ['']:
- print
- elif row == ['dashes'] or row == 'dashes':
- dashes = [0] * len(maxsize)
- for j in range(len(maxsize)):
- dashes[j] = '-' * (maxsize[j] - 2)
- print lineincustcols(dashes, maxsize)
- else:
- print lineincustcols(row, maxsize)
- return None
-
-
-def printincols(listoflists, colsize):
- """
-Prints a list of lists in columns of (fixed) colsize width, where
-colsize is an integer.
-
-Usage: printincols (listoflists,colsize)
-Returns: None
-"""
- for row in listoflists:
- print lineincols(row, colsize)
- return None
-
-
-def pl(listoflists):
- """
-Prints a list of lists, 1 list (row) at a time.
-
-Usage: pl(listoflists)
-Returns: None
-"""
- for row in listoflists:
- if row[-1] == '\n':
- print row,
- else:
- print row
- return None
-
-
-def printl(listoflists):
- """Alias for pl."""
- pl(listoflists)
- return
-
-
-def replace(inlst, oldval, newval):
- """
-Replaces all occurrences of 'oldval' with 'newval', recursively.
-
-Usage: replace (inlst,oldval,newval)
-"""
- lst = inlst * 1
- for i in range(len(lst)):
- if type(lst[i]) not in [ListType, TupleType]:
- if lst[i] == oldval:
- lst[i] = newval
- else:
- lst[i] = replace(lst[i], oldval, newval)
- return lst
-
-
-def recode(inlist, listmap, cols=None):
- """
-Changes the values in a list to a new set of values (useful when
-you need to recode data from (e.g.) strings to numbers. cols defaults
-to None (meaning all columns are recoded).
-
-Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
-Returns: inlist with the appropriate values replaced with new ones
-"""
- lst = copy.deepcopy(inlist)
- if cols != None:
- if type(cols) not in [ListType, TupleType]:
- cols = [cols]
- for col in cols:
- for row in range(len(lst)):
- try:
- idx = colex(listmap, 0).index(lst[row][col])
- lst[row][col] = listmap[idx][1]
- except ValueError:
- pass
- else:
- for row in range(len(lst)):
- for col in range(len(lst)):
- try:
- idx = colex(listmap, 0).index(lst[row][col])
- lst[row][col] = listmap[idx][1]
- except ValueError:
- pass
- return lst
-
-
-def remap(listoflists, criterion):
- """
-Remaps values in a given column of a 2D list (listoflists). This requires
-a criterion as a function of 'x' so that the result of the following is
-returned ... map(lambda x: 'criterion',listoflists).
-
-Usage: remap(listoflists,criterion) criterion=string
-Returns: remapped version of listoflists
-"""
- function = 'map(lambda x: ' + criterion + ',listoflists)'
- lines = eval(function)
- return lines
-
-
-def roundlist(inlist, digits):
- """
-Goes through each element in a 1D or 2D inlist, and applies the following
-function to all elements of FloatType ... round(element,digits).
-
-Usage: roundlist(inlist,digits)
-Returns: list with rounded floats
-"""
- if type(inlist[0]) in [IntType, FloatType]:
- inlist = [inlist]
- l = inlist * 1
- for i in range(len(l)):
- for j in range(len(l[i])):
- if type(l[i][j]) == FloatType:
- l[i][j] = round(l[i][j], digits)
- return l
-
-
-def sortby(listoflists, sortcols):
- """
-Sorts a list of lists on the column(s) specified in the sequence
-sortcols.
-
-Usage: sortby(listoflists,sortcols)
-Returns: sorted list, unchanged column ordering
-"""
- newlist = abut(colex(listoflists, sortcols), listoflists)
- newlist.sort()
- try:
- numcols = len(sortcols)
- except TypeError:
- numcols = 1
- crit = '[' + str(numcols) + ':]'
- newlist = colex(newlist, crit)
- return newlist
-
-
-def unique(inlist):
- """
-Returns all unique items in the passed list. If the a list-of-lists
-is passed, unique LISTS are found (i.e., items in the first dimension are
-compared).
-
-Usage: unique (inlist)
-Returns: the unique elements (or rows) in inlist
-"""
- uniques = []
- for item in inlist:
- if item not in uniques:
- uniques.append(item)
- return uniques
-
-
-def duplicates(inlist):
- """
-Returns duplicate items in the FIRST dimension of the passed list.
-
-Usage: duplicates (inlist)
-"""
- dups = []
- for i in range(len(inlist)):
- if inlist[i] in inlist[i + 1:]:
- dups.append(inlist[i])
- return dups
-
-
-def nonrepeats(inlist):
- """
-Returns items that are NOT duplicated in the first dim of the passed list.
-
-Usage: nonrepeats (inlist)
-"""
- nonrepeats = []
- for i in range(len(inlist)):
- if inlist.count(inlist[i]) == 1:
- nonrepeats.append(inlist[i])
- return nonrepeats
-
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-#=================== PSTAT ARRAY FUNCTIONS =====================
-
-try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE
- import numpy as N
-
- def aabut(source, *args):
- """
-Like the |Stat abut command. It concatenates two arrays column-wise
-and returns the result. CAUTION: If one array is shorter, it will be
-repeated until it is as long as the other.
-
-Usage: aabut (source, args) where args=any # of arrays
-Returns: an array as long as the LONGEST array past, source appearing on the
- 'left', arrays in <args> attached on the 'right'.
-"""
- if len(source.shape) == 1:
- width = 1
- source = N.resize(source, [source.shape[0], width])
- else:
- width = source.shape[1]
- for addon in args:
- if len(addon.shape) == 1:
- width = 1
- addon = N.resize(addon, [source.shape[0], width])
- else:
- width = source.shape[1]
- if len(addon) < len(source):
- addon = N.resize(addon, [source.shape[0], addon.shape[1]])
- elif len(source) < len(addon):
- source = N.resize(source, [addon.shape[0], source.shape[1]])
- source = N.concatenate((source, addon), 1)
- return source
-
- def acolex(a, indices, axis=1):
- """
-Extracts specified indices (a list) from passed array, along passed
-axis (column extraction is default). BEWARE: A 1D array is presumed to be a
-column-array (and that the whole array will be returned as a column).
-
-Usage: acolex (a,indices,axis=1)
-Returns: the columns of a specified by indices
-"""
- if type(indices) not in [ListType, TupleType, N.ndarray]:
- indices = [indices]
- if len(N.shape(a)) == 1:
- cols = N.resize(a, [a.shape[0], 1])
- else:
- # print a[:3]
- cols = N.take(a, indices, axis)
-# print cols[:3]
- return cols
-
- def acollapse(a, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None):
- """
-Averages data in collapsecol, keeping all unique items in keepcols
-(using unique, which keeps unique LISTS of column numbers), retaining
-the unique sets of values in keepcols, the mean for each. If stderror or
-N of the mean are desired, set either or both parameters to 1.
-
-Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
-Returns: unique 'conditions' specified by the contents of columns specified
- by keepcols, abutted with the mean(s) of column(s) specified by
- collapsecols
-"""
-
- def acollmean(inarray):
- return N.sum(N.ravel(inarray))
-
- if type(keepcols) not in [ListType, TupleType, N.ndarray]:
- keepcols = [keepcols]
- if type(collapsecols) not in [ListType, TupleType, N.ndarray]:
- collapsecols = [collapsecols]
-
- if cfcn == None:
- cfcn = acollmean
- if keepcols == []:
- avgcol = acolex(a, collapsecols)
- means = N.sum(avgcol) / float(len(avgcol))
- if fcn1 <> None:
- try:
- test = fcn1(avgcol)
- except:
- test = N.array(['N/A'] * len(means))
- means = aabut(means, test)
- if fcn2 <> None:
- try:
- test = fcn2(avgcol)
- except:
- test = N.array(['N/A'] * len(means))
- means = aabut(means, test)
- return means
- else:
- if type(keepcols) not in [ListType, TupleType, N.ndarray]:
- keepcols = [keepcols]
- values = colex(a, keepcols) # so that "item" can be appended (below)
- uniques = unique(values) # get a LIST, so .sort keeps rows intact
- uniques.sort()
- newlist = []
- for item in uniques:
- if type(item) not in [ListType, TupleType, N.ndarray]:
- item = [item]
- tmprows = alinexand(a, keepcols, item)
- for col in collapsecols:
- avgcol = acolex(tmprows, col)
- item.append(acollmean(avgcol))
- if fcn1 <> None:
- try:
- test = fcn1(avgcol)
- except:
- test = 'N/A'
- item.append(test)
- if fcn2 <> None:
- try:
- test = fcn2(avgcol)
- except:
- test = 'N/A'
- item.append(test)
- newlist.append(item)
- try:
- new_a = N.array(newlist)
- except TypeError:
- new_a = N.array(newlist, 'O')
- return new_a
-
- def adm(a, criterion):
- """
-Returns rows from the passed list of lists that meet the criteria in
-the passed criterion expression (a string as a function of x).
-
-Usage: adm (a,criterion) where criterion is like 'x[2]==37'
-"""
- function = 'filter(lambda x: ' + criterion + ',a)'
- lines = eval(function)
- try:
- lines = N.array(lines)
- except:
- lines = N.array(lines, dtype='O')
- return lines
-
- def isstring(x):
- if type(x) == StringType:
- return 1
- else:
- return 0
-
- def alinexand(a, columnlist, valuelist):
- """
-Returns the rows of an array where col (from columnlist) = val
-(from valuelist). One value is required for each column in columnlist.
-
-Usage: alinexand (a,columnlist,valuelist)
-Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
-"""
- if type(columnlist) not in [ListType, TupleType, N.ndarray]:
- columnlist = [columnlist]
- if type(valuelist) not in [ListType, TupleType, N.ndarray]:
- valuelist = [valuelist]
- criterion = ''
- for i in range(len(columnlist)):
- if type(valuelist[i]) == StringType:
- critval = '\'' + valuelist[i] + '\''
- else:
- critval = str(valuelist[i])
- criterion = criterion + ' x[' + str(columnlist[
- i]) + ']==' + critval + ' and'
- criterion = criterion[0:-3] # remove the "and" after the last crit
- return adm(a, criterion)
-
- def alinexor(a, columnlist, valuelist):
- """
-Returns the rows of an array where col (from columnlist) = val (from
-valuelist). One value is required for each column in columnlist.
-The exception is if either columnlist or valuelist has only 1 value,
-in which case that item will be expanded to match the length of the
-other list.
-
-Usage: alinexor (a,columnlist,valuelist)
-Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
-"""
- if type(columnlist) not in [ListType, TupleType, N.ndarray]:
- columnlist = [columnlist]
- if type(valuelist) not in [ListType, TupleType, N.ndarray]:
- valuelist = [valuelist]
- criterion = ''
- if len(columnlist) == 1 and len(valuelist) > 1:
- columnlist = columnlist * len(valuelist)
- elif len(valuelist) == 1 and len(columnlist) > 1:
- valuelist = valuelist * len(columnlist)
- for i in range(len(columnlist)):
- if type(valuelist[i]) == StringType:
- critval = '\'' + valuelist[i] + '\''
- else:
- critval = str(valuelist[i])
- criterion = criterion + ' x[' + str(columnlist[
- i]) + ']==' + critval + ' or'
- criterion = criterion[0:-2] # remove the "or" after the last crit
- return adm(a, criterion)
-
- def areplace(a, oldval, newval):
- """
-Replaces all occurrences of oldval with newval in array a.
-
-Usage: areplace(a,oldval,newval)
-"""
- return N.where(a == oldval, newval, a)
-
- def arecode(a, listmap, col='all'):
- """
-Remaps the values in an array to a new set of values (useful when
-you need to recode data from (e.g.) strings to numbers as most stats
-packages require. Can work on SINGLE columns, or 'all' columns at once.
-@@@BROKEN 2007-11-26
-
-Usage: arecode (a,listmap,col='all')
-Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
-"""
- ashape = a.shape
- if col == 'all':
- work = a.ravel()
- else:
- work = acolex(a, col)
- work = work.ravel()
- for pair in listmap:
- if type(pair[
- 1]) == StringType or work.dtype.char == 'O' or a.dtype.char == 'O':
- work = N.array(work, dtype='O')
- a = N.array(a, dtype='O')
- for i in range(len(work)):
- if work[i] == pair[0]:
- work[i] = pair[1]
- if col == 'all':
- return N.reshape(work, ashape)
- else:
- return N.concatenate(
- [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
- else: # must be a non-Object type array and replacement
- work = N.where(work == pair[0], pair[1], work)
- return N.concatenate(
- [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
-
- def arowcompare(row1, row2):
- """
-Compares two rows from an array, regardless of whether it is an
-array of numbers or of python objects (which requires the cmp function).
-@@@PURPOSE? 2007-11-26
-
-Usage: arowcompare(row1,row2)
-Returns: an array of equal length containing 1s where the two rows had
- identical elements and 0 otherwise
-"""
- return
- if row1.dtype.char == 'O' or row2.dtype == 'O':
- cmpvect = N.logical_not(
- abs(N.array(map(cmp, row1, row2)))) # cmp fcn gives -1,0,1
- else:
- cmpvect = N.equal(row1, row2)
- return cmpvect
-
- def arowsame(row1, row2):
- """
-Compares two rows from an array, regardless of whether it is an
-array of numbers or of python objects (which requires the cmp function).
-
-Usage: arowsame(row1,row2)
-Returns: 1 if the two rows are identical, 0 otherwise.
-"""
- cmpval = N.alltrue(arowcompare(row1, row2))
- return cmpval
-
- def asortrows(a, axis=0):
- """
-Sorts an array "by rows". This differs from the Numeric.sort() function,
-which sorts elements WITHIN the given axis. Instead, this function keeps
-the elements along the given axis intact, but shifts them 'up or down'
-relative to one another.
-
-Usage: asortrows(a,axis=0)
-Returns: sorted version of a
-"""
- return N.sort(a, axis=axis, kind='mergesort')
-
- def aunique(inarray):
- """
-Returns unique items in the FIRST dimension of the passed array. Only
-works on arrays NOT including string items.
-
-Usage: aunique (inarray)
-"""
- uniques = N.array([inarray[0]])
- if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY
- for item in inarray[1:]:
- if N.add.reduce(N.equal(uniques, item).ravel()) == 0:
- try:
- uniques = N.concatenate([uniques, N.array[N.newaxis, :]])
- except TypeError:
- uniques = N.concatenate([uniques, N.array([item])])
- else: # IT MUST BE A 2+D ARRAY
- if inarray.dtype.char != 'O': # not an Object array
- for item in inarray[1:]:
- if not N.sum(N.alltrue(N.equal(uniques, item), 1)):
- try:
- uniques = N.concatenate([uniques, item[N.newaxis, :]])
- except TypeError: # the item to add isn't a list
- uniques = N.concatenate([uniques, N.array([item])])
- else:
- pass # this item is already in the uniques array
- else: # must be an Object array, alltrue/equal functions don't work
- for item in inarray[1:]:
- newflag = 1
- for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=>
- test = N.sum(abs(N.array(map(cmp, item, unq))))
- if test == 0: # if item identical to any 1 row in uniques
- newflag = 0 # then not a novel item to add
- break
- if newflag == 1:
- try:
- uniques = N.concatenate([uniques, item[N.newaxis, :]])
- except TypeError: # the item to add isn't a list
- uniques = N.concatenate([uniques, N.array([item])])
- return uniques
-
- def aduplicates(inarray):
- """
-Returns duplicate items in the FIRST dimension of the passed array. Only
-works on arrays NOT including string items.
-
-Usage: aunique (inarray)
-"""
- inarray = N.array(inarray)
- if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY
- dups = []
- inarray = inarray.tolist()
- for i in range(len(inarray)):
- if inarray[i] in inarray[i + 1:]:
- dups.append(inarray[i])
- dups = aunique(dups)
- else: # IT MUST BE A 2+D ARRAY
- dups = []
- aslist = inarray.tolist()
- for i in range(len(aslist)):
- if aslist[i] in aslist[i + 1:]:
- dups.append(aslist[i])
- dups = unique(dups)
- dups = N.array(dups)
- return dups
-
-except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
- pass
diff --git a/cros_utils/stats.py b/cros_utils/stats.py
deleted file mode 100644
index 0387a076..00000000
--- a/cros_utils/stats.py
+++ /dev/null
@@ -1,4519 +0,0 @@
-# We did not author this file nor mantain it. Skip linting it.
-#pylint: skip-file
-# Copyright (c) 1999-2008 Gary Strangman; All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# Comments and/or additions are welcome (send e-mail to:
-# strang@nmr.mgh.harvard.edu).
-#
-"""stats.py module
-
-(Requires pstat.py module.)
-
-#################################################
-####### Written by: Gary Strangman ###########
-####### Last modified: Oct 31, 2008 ###########
-#################################################
-
-A collection of basic statistical functions for python. The function
-names appear below.
-
-IMPORTANT: There are really *3* sets of functions. The first set has an 'l'
-prefix, which can be used with list or tuple arguments. The second set has
-an 'a' prefix, which can accept NumPy array arguments. These latter
-functions are defined only when NumPy is available on the system. The third
-type has NO prefix (i.e., has the name that appears below). Functions of
-this set are members of a "Dispatch" class, c/o David Ascher. This class
-allows different functions to be called depending on the type of the passed
-arguments. Thus, stats.mean is a member of the Dispatch class and
-stats.mean(range(20)) will call stats.lmean(range(20)) while
-stats.mean(Numeric.arange(20)) will call stats.amean(Numeric.arange(20)).
-This is a handy way to keep consistent function names when different
-argument types require different functions to be called. Having
-implementated the Dispatch class, however, means that to get info on
-a given function, you must use the REAL function name ... that is
-"print stats.lmean.__doc__" or "print stats.amean.__doc__" work fine,
-while "print stats.mean.__doc__" will print the doc for the Dispatch
-class. NUMPY FUNCTIONS ('a' prefix) generally have more argument options
-but should otherwise be consistent with the corresponding list functions.
-
-Disclaimers: The function list is obviously incomplete and, worse, the
-functions are not optimized. All functions have been tested (some more
-so than others), but they are far from bulletproof. Thus, as with any
-free software, no warranty or guarantee is expressed or implied. :-) A
-few extra functions that don't appear in the list below can be found by
-interested treasure-hunters. These functions don't necessarily have
-both list and array versions but were deemed useful
-
-CENTRAL TENDENCY: geometricmean
- harmonicmean
- mean
- median
- medianscore
- mode
-
-MOMENTS: moment
- variation
- skew
- kurtosis
- skewtest (for Numpy arrays only)
- kurtosistest (for Numpy arrays only)
- normaltest (for Numpy arrays only)
-
-ALTERED VERSIONS: tmean (for Numpy arrays only)
- tvar (for Numpy arrays only)
- tmin (for Numpy arrays only)
- tmax (for Numpy arrays only)
- tstdev (for Numpy arrays only)
- tsem (for Numpy arrays only)
- describe
-
-FREQUENCY STATS: itemfreq
- scoreatpercentile
- percentileofscore
- histogram
- cumfreq
- relfreq
-
-VARIABILITY: obrientransform
- samplevar
- samplestdev
- signaltonoise (for Numpy arrays only)
- var
- stdev
- sterr
- sem
- z
- zs
- zmap (for Numpy arrays only)
-
-TRIMMING FCNS: threshold (for Numpy arrays only)
- trimboth
- trim1
- round (round all vals to 'n' decimals; Numpy only)
-
-CORRELATION FCNS: covariance (for Numpy arrays only)
- correlation (for Numpy arrays only)
- paired
- pearsonr
- spearmanr
- pointbiserialr
- kendalltau
- linregress
-
-INFERENTIAL STATS: ttest_1samp
- ttest_ind
- ttest_rel
- chisquare
- ks_2samp
- mannwhitneyu
- ranksums
- wilcoxont
- kruskalwallish
- friedmanchisquare
-
-PROBABILITY CALCS: chisqprob
- erfcc
- zprob
- ksprob
- fprob
- betacf
- gammln
- betai
-
-ANOVA FUNCTIONS: F_oneway
- F_value
-
-SUPPORT FUNCTIONS: writecc
- incr
- sign (for Numpy arrays only)
- sum
- cumsum
- ss
- summult
- sumdiffsquared
- square_of_sums
- shellsort
- rankdata
- outputpairedstats
- findwithin
-"""
-## CHANGE LOG:
-## ===========
-## 09-07-21 ... added capability for getting the 'proportion' out of l/amannwhitneyu (but comment-disabled)
-## 08-10-31 ... fixed import LinearAlgebra bug before glm fcns
-## 07-11-26 ... conversion for numpy started
-## 07-05-16 ... added Lin's Concordance Correlation Coefficient (alincc) and acov
-## 05-08-21 ... added "Dice's coefficient"
-## 04-10-26 ... added ap2t(), an ugly fcn for converting p-vals to T-vals
-## 04-04-03 ... added amasslinregress() function to do regression on N-D arrays
-## 03-01-03 ... CHANGED VERSION TO 0.6
-## fixed atsem() to properly handle limits=None case
-## improved histogram and median functions (estbinwidth) and
-## fixed atvar() function (wrong answers for neg numbers?!?)
-## 02-11-19 ... fixed attest_ind and attest_rel for div-by-zero Overflows
-## 02-05-10 ... fixed lchisqprob indentation (failed when df=even)
-## 00-12-28 ... removed aanova() to separate module, fixed licensing to
-## match Python License, fixed doc string & imports
-## 00-04-13 ... pulled all "global" statements, except from aanova()
-## added/fixed lots of documentation, removed io.py dependency
-## changed to version 0.5
-## 99-11-13 ... added asign() function
-## 99-11-01 ... changed version to 0.4 ... enough incremental changes now
-## 99-10-25 ... added acovariance and acorrelation functions
-## 99-10-10 ... fixed askew/akurtosis to avoid divide-by-zero errors
-## added aglm function (crude, but will be improved)
-## 99-10-04 ... upgraded acumsum, ass, asummult, asamplevar, avar, etc. to
-## all handle lists of 'dimension's and keepdims
-## REMOVED ar0, ar2, ar3, ar4 and replaced them with around
-## reinserted fixes for abetai to avoid math overflows
-## 99-09-05 ... rewrote achisqprob/aerfcc/aksprob/afprob/abetacf/abetai to
-## handle multi-dimensional arrays (whew!)
-## 99-08-30 ... fixed l/amoment, l/askew, l/akurtosis per D'Agostino (1990)
-## added anormaltest per same reference
-## re-wrote azprob to calc arrays of probs all at once
-## 99-08-22 ... edited attest_ind printing section so arrays could be rounded
-## 99-08-19 ... fixed amean and aharmonicmean for non-error(!) overflow on
-## short/byte arrays (mean of #s btw 100-300 = -150??)
-## 99-08-09 ... fixed asum so that the None case works for Byte arrays
-## 99-08-08 ... fixed 7/3 'improvement' to handle t-calcs on N-D arrays
-## 99-07-03 ... improved attest_ind, attest_rel (zero-division errortrap)
-## 99-06-24 ... fixed bug(?) in attest_ind (n1=a.shape[0])
-## 04/11/99 ... added asignaltonoise, athreshold functions, changed all
-## max/min in array section to N.maximum/N.minimum,
-## fixed square_of_sums to prevent integer overflow
-## 04/10/99 ... !!! Changed function name ... sumsquared ==> square_of_sums
-## 03/18/99 ... Added ar0, ar2, ar3 and ar4 rounding functions
-## 02/28/99 ... Fixed aobrientransform to return an array rather than a list
-## 01/15/99 ... Essentially ceased updating list-versions of functions (!!!)
-## 01/13/99 ... CHANGED TO VERSION 0.3
-## fixed bug in a/lmannwhitneyu p-value calculation
-## 12/31/98 ... fixed variable-name bug in ldescribe
-## 12/19/98 ... fixed bug in findwithin (fcns needed pstat. prefix)
-## 12/16/98 ... changed amedianscore to return float (not array) for 1 score
-## 12/14/98 ... added atmin and atmax functions
-## removed umath from import line (not needed)
-## l/ageometricmean modified to reduce chance of overflows (take
-## nth root first, then multiply)
-## 12/07/98 ... added __version__variable (now 0.2)
-## removed all 'stats.' from anova() fcn
-## 12/06/98 ... changed those functions (except shellsort) that altered
-## arguments in-place ... cumsum, ranksort, ...
-## updated (and fixed some) doc-strings
-## 12/01/98 ... added anova() function (requires NumPy)
-## incorporated Dispatch class
-## 11/12/98 ... added functionality to amean, aharmonicmean, ageometricmean
-## added 'asum' function (added functionality to N.add.reduce)
-## fixed both moment and amoment (two errors)
-## changed name of skewness and askewness to skew and askew
-## fixed (a)histogram (which sometimes counted points <lowerlimit)
-
-import pstat # required 3rd party module
-import math, string, copy # required python modules
-from types import *
-
-__version__ = 0.6
-
-############# DISPATCH CODE ##############
-
-
-class Dispatch:
- """
-The Dispatch class, care of David Ascher, allows different functions to
-be called depending on the argument types. This way, there can be one
-function name regardless of the argument type. To access function doc
-in stats.py module, prefix the function with an 'l' or 'a' for list or
-array arguments, respectively. That is, print stats.lmean.__doc__ or
-print stats.amean.__doc__ or whatever.
-"""
-
- def __init__(self, *tuples):
- self._dispatch = {}
- for func, types in tuples:
- for t in types:
- if t in self._dispatch.keys():
- raise ValueError, "can't have two dispatches on " + str(t)
- self._dispatch[t] = func
- self._types = self._dispatch.keys()
-
- def __call__(self, arg1, *args, **kw):
- if type(arg1) not in self._types:
- raise TypeError, "don't know how to dispatch %s arguments" % type(arg1)
- return apply(self._dispatch[type(arg1)], (arg1,) + args, kw)
-
-##########################################################################
-######################## LIST-BASED FUNCTIONS ########################
-##########################################################################
-
-### Define these regardless
-
-####################################
-####### CENTRAL TENDENCY #########
-####################################
-
-
-def lgeometricmean(inlist):
- """
-Calculates the geometric mean of the values in the passed list.
-That is: n-th root of (x1 * x2 * ... * xn). Assumes a '1D' list.
-
-Usage: lgeometricmean(inlist)
-"""
- mult = 1.0
- one_over_n = 1.0 / len(inlist)
- for item in inlist:
- mult = mult * pow(item, one_over_n)
- return mult
-
-
-def lharmonicmean(inlist):
- """
-Calculates the harmonic mean of the values in the passed list.
-That is: n / (1/x1 + 1/x2 + ... + 1/xn). Assumes a '1D' list.
-
-Usage: lharmonicmean(inlist)
-"""
- sum = 0
- for item in inlist:
- sum = sum + 1.0 / item
- return len(inlist) / sum
-
-
-def lmean(inlist):
- """
-Returns the arithematic mean of the values in the passed list.
-Assumes a '1D' list, but will function on the 1st dim of an array(!).
-
-Usage: lmean(inlist)
-"""
- sum = 0
- for item in inlist:
- sum = sum + item
- return sum / float(len(inlist))
-
-
-def lmedian(inlist, numbins=1000):
- """
-Returns the computed median value of a list of numbers, given the
-number of bins to use for the histogram (more bins brings the computed value
-closer to the median score, default number of bins = 1000). See G.W.
-Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics.
-
-Usage: lmedian (inlist, numbins=1000)
-"""
- (hist, smallest, binsize, extras) = histogram(
- inlist, numbins, [min(inlist), max(inlist)]) # make histog
- cumhist = cumsum(hist) # make cumulative histogram
- for i in range(len(cumhist)): # get 1st(!) index holding 50%ile score
- if cumhist[i] >= len(inlist) / 2.0:
- cfbin = i
- break
- LRL = smallest + binsize * cfbin # get lower read limit of that bin
- cfbelow = cumhist[cfbin - 1]
- freq = float(hist[cfbin]) # frequency IN the 50%ile bin
- median = LRL + (
- (len(inlist) / 2.0 - cfbelow) / float(freq)) * binsize # median formula
- return median
-
-
-def lmedianscore(inlist):
- """
-Returns the 'middle' score of the passed list. If there is an even
-number of scores, the mean of the 2 middle scores is returned.
-
-Usage: lmedianscore(inlist)
-"""
-
- newlist = copy.deepcopy(inlist)
- newlist.sort()
- if len(newlist) % 2 == 0: # if even number of scores, average middle 2
- index = len(newlist) / 2 # integer division correct
- median = float(newlist[index] + newlist[index - 1]) / 2
- else:
- index = len(newlist) / 2 # int divsion gives mid value when count from 0
- median = newlist[index]
- return median
-
-
-def lmode(inlist):
- """
-Returns a list of the modal (most common) score(s) in the passed
-list. If there is more than one such score, all are returned. The
-bin-count for the mode(s) is also returned.
-
-Usage: lmode(inlist)
-Returns: bin-count for mode(s), a list of modal value(s)
-"""
-
- scores = pstat.unique(inlist)
- scores.sort()
- freq = []
- for item in scores:
- freq.append(inlist.count(item))
- maxfreq = max(freq)
- mode = []
- stillmore = 1
- while stillmore:
- try:
- indx = freq.index(maxfreq)
- mode.append(scores[indx])
- del freq[indx]
- del scores[indx]
- except ValueError:
- stillmore = 0
- return maxfreq, mode
-
-####################################
-############ MOMENTS #############
-####################################
-
-
-def lmoment(inlist, moment=1):
- """
-Calculates the nth moment about the mean for a sample (defaults to
-the 1st moment). Used to calculate coefficients of skewness and kurtosis.
-
-Usage: lmoment(inlist,moment=1)
-Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r)
-"""
- if moment == 1:
- return 0.0
- else:
- mn = mean(inlist)
- n = len(inlist)
- s = 0
- for x in inlist:
- s = s + (x - mn)**moment
- return s / float(n)
-
-
-def lvariation(inlist):
- """
-Returns the coefficient of variation, as defined in CRC Standard
-Probability and Statistics, p.6.
-
-Usage: lvariation(inlist)
-"""
- return 100.0 * samplestdev(inlist) / float(mean(inlist))
-
-
-def lskew(inlist):
- """
-Returns the skewness of a distribution, as defined in Numerical
-Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
-
-Usage: lskew(inlist)
-"""
- return moment(inlist, 3) / pow(moment(inlist, 2), 1.5)
-
-
-def lkurtosis(inlist):
- """
-Returns the kurtosis of a distribution, as defined in Numerical
-Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
-
-Usage: lkurtosis(inlist)
-"""
- return moment(inlist, 4) / pow(moment(inlist, 2), 2.0)
-
-
-def ldescribe(inlist):
- """
-Returns some descriptive statistics of the passed list (assumed to be 1D).
-
-Usage: ldescribe(inlist)
-Returns: n, mean, standard deviation, skew, kurtosis
-"""
- n = len(inlist)
- mm = (min(inlist), max(inlist))
- m = mean(inlist)
- sd = stdev(inlist)
- sk = skew(inlist)
- kurt = kurtosis(inlist)
- return n, mm, m, sd, sk, kurt
-
-####################################
-####### FREQUENCY STATS ##########
-####################################
-
-
-def litemfreq(inlist):
- """
-Returns a list of pairs. Each pair consists of one of the scores in inlist
-and it's frequency count. Assumes a 1D list is passed.
-
-Usage: litemfreq(inlist)
-Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)
-"""
- scores = pstat.unique(inlist)
- scores.sort()
- freq = []
- for item in scores:
- freq.append(inlist.count(item))
- return pstat.abut(scores, freq)
-
-
-def lscoreatpercentile(inlist, percent):
- """
-Returns the score at a given percentile relative to the distribution
-given by inlist.
-
-Usage: lscoreatpercentile(inlist,percent)
-"""
- if percent > 1:
- print '\nDividing percent>1 by 100 in lscoreatpercentile().\n'
- percent = percent / 100.0
- targetcf = percent * len(inlist)
- h, lrl, binsize, extras = histogram(inlist)
- cumhist = cumsum(copy.deepcopy(h))
- for i in range(len(cumhist)):
- if cumhist[i] >= targetcf:
- break
- score = binsize * (
- (targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i)
- return score
-
-
-def lpercentileofscore(inlist, score, histbins=10, defaultlimits=None):
- """
-Returns the percentile value of a score relative to the distribution
-given by inlist. Formula depends on the values used to histogram the data(!).
-
-Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None)
-"""
-
- h, lrl, binsize, extras = histogram(inlist, histbins, defaultlimits)
- cumhist = cumsum(copy.deepcopy(h))
- i = int((score - lrl) / float(binsize))
- pct = (cumhist[i - 1] + (
- (score -
- (lrl + binsize * i)) / float(binsize)) * h[i]) / float(len(inlist)) * 100
- return pct
-
-
-def lhistogram(inlist, numbins=10, defaultreallimits=None, printextras=0):
- """
-Returns (i) a list of histogram bin counts, (ii) the smallest value
-of the histogram binning, and (iii) the bin width (the last 2 are not
-necessarily integers). Default number of bins is 10. If no sequence object
-is given for defaultreallimits, the routine picks (usually non-pretty) bins
-spanning all the numbers in the inlist.
-
-Usage: lhistogram (inlist, numbins=10,
-defaultreallimits=None,suppressoutput=0)
-Returns: list of bin values, lowerreallimit, binsize, extrapoints
-"""
- if (defaultreallimits <> None):
- if type(defaultreallimits) not in [ListType, TupleType] or len(
- defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd
- lowerreallimit = defaultreallimits
- upperreallimit = 1.000001 * max(inlist)
- else: # assume both limits given
- lowerreallimit = defaultreallimits[0]
- upperreallimit = defaultreallimits[1]
- binsize = (upperreallimit - lowerreallimit) / float(numbins)
- else: # no limits given for histogram, both must be calc'd
- estbinwidth = (max(inlist) -
- min(inlist)) / float(numbins) + 1e-6 #1=>cover all
- binsize = ((max(inlist) - min(inlist) + estbinwidth)) / float(numbins)
- lowerreallimit = min(inlist) - binsize / 2 #lower real limit,1st bin
- bins = [0] * (numbins)
- extrapoints = 0
- for num in inlist:
- try:
- if (num - lowerreallimit) < 0:
- extrapoints = extrapoints + 1
- else:
- bintoincrement = int((num - lowerreallimit) / float(binsize))
- bins[bintoincrement] = bins[bintoincrement] + 1
- except:
- extrapoints = extrapoints + 1
- if (extrapoints > 0 and printextras == 1):
- print '\nPoints outside given histogram range =', extrapoints
- return (bins, lowerreallimit, binsize, extrapoints)
-
-
-def lcumfreq(inlist, numbins=10, defaultreallimits=None):
- """
-Returns a cumulative frequency histogram, using the histogram function.
-
-Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None)
-Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
-"""
- h, l, b, e = histogram(inlist, numbins, defaultreallimits)
- cumhist = cumsum(copy.deepcopy(h))
- return cumhist, l, b, e
-
-
-def lrelfreq(inlist, numbins=10, defaultreallimits=None):
- """
-Returns a relative frequency histogram, using the histogram function.
-
-Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None)
-Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
-"""
- h, l, b, e = histogram(inlist, numbins, defaultreallimits)
- for i in range(len(h)):
- h[i] = h[i] / float(len(inlist))
- return h, l, b, e
-
-####################################
-##### VARIABILITY FUNCTIONS ######
-####################################
-
-
-def lobrientransform(*args):
- """
-Computes a transform on input data (any number of columns). Used to
-test for homogeneity of variance prior to running one-way stats. From
-Maxwell and Delaney, p.112.
-
-Usage: lobrientransform(*args)
-Returns: transformed data for use in an ANOVA
-"""
- TINY = 1e-10
- k = len(args)
- n = [0.0] * k
- v = [0.0] * k
- m = [0.0] * k
- nargs = []
- for i in range(k):
- nargs.append(copy.deepcopy(args[i]))
- n[i] = float(len(nargs[i]))
- v[i] = var(nargs[i])
- m[i] = mean(nargs[i])
- for j in range(k):
- for i in range(n[j]):
- t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j])**2
- t2 = 0.5 * v[j] * (n[j] - 1.0)
- t3 = (n[j] - 1.0) * (n[j] - 2.0)
- nargs[j][i] = (t1 - t2) / float(t3)
- check = 1
- for j in range(k):
- if v[j] - mean(nargs[j]) > TINY:
- check = 0
- if check <> 1:
- raise ValueError, 'Problem in obrientransform.'
- else:
- return nargs
-
-
-def lsamplevar(inlist):
- """
-Returns the variance of the values in the passed list using
-N for the denominator (i.e., DESCRIBES the sample variance only).
-
-Usage: lsamplevar(inlist)
-"""
- n = len(inlist)
- mn = mean(inlist)
- deviations = []
- for item in inlist:
- deviations.append(item - mn)
- return ss(deviations) / float(n)
-
-
-def lsamplestdev(inlist):
- """
-Returns the standard deviation of the values in the passed list using
-N for the denominator (i.e., DESCRIBES the sample stdev only).
-
-Usage: lsamplestdev(inlist)
-"""
- return math.sqrt(samplevar(inlist))
-
-
-def lcov(x, y, keepdims=0):
- """
-Returns the estimated covariance of the values in the passed
-array (i.e., N-1). Dimension can equal None (ravel array first), an
-integer (the dimension over which to operate), or a sequence (operate
-over multiple dimensions). Set keepdims=1 to return an array with the
-same number of dimensions as inarray.
-
-Usage: lcov(x,y,keepdims=0)
-"""
-
- n = len(x)
- xmn = mean(x)
- ymn = mean(y)
- xdeviations = [0] * len(x)
- ydeviations = [0] * len(y)
- for i in range(len(x)):
- xdeviations[i] = x[i] - xmn
- ydeviations[i] = y[i] - ymn
- ss = 0.0
- for i in range(len(xdeviations)):
- ss = ss + xdeviations[i] * ydeviations[i]
- return ss / float(n - 1)
-
-
-def lvar(inlist):
- """
-Returns the variance of the values in the passed list using N-1
-for the denominator (i.e., for estimating population variance).
-
-Usage: lvar(inlist)
-"""
- n = len(inlist)
- mn = mean(inlist)
- deviations = [0] * len(inlist)
- for i in range(len(inlist)):
- deviations[i] = inlist[i] - mn
- return ss(deviations) / float(n - 1)
-
-
-def lstdev(inlist):
- """
-Returns the standard deviation of the values in the passed list
-using N-1 in the denominator (i.e., to estimate population stdev).
-
-Usage: lstdev(inlist)
-"""
- return math.sqrt(var(inlist))
-
-
-def lsterr(inlist):
- """
-Returns the standard error of the values in the passed list using N-1
-in the denominator (i.e., to estimate population standard error).
-
-Usage: lsterr(inlist)
-"""
- return stdev(inlist) / float(math.sqrt(len(inlist)))
-
-
-def lsem(inlist):
- """
-Returns the estimated standard error of the mean (sx-bar) of the
-values in the passed list. sem = stdev / sqrt(n)
-
-Usage: lsem(inlist)
-"""
- sd = stdev(inlist)
- n = len(inlist)
- return sd / math.sqrt(n)
-
-
-def lz(inlist, score):
- """
-Returns the z-score for a given input score, given that score and the
-list from which that score came. Not appropriate for population calculations.
-
-Usage: lz(inlist, score)
-"""
- z = (score - mean(inlist)) / samplestdev(inlist)
- return z
-
-
-def lzs(inlist):
- """
-Returns a list of z-scores, one for each score in the passed list.
-
-Usage: lzs(inlist)
-"""
- zscores = []
- for item in inlist:
- zscores.append(z(inlist, item))
- return zscores
-
-####################################
-####### TRIMMING FUNCTIONS #######
-####################################
-
-
-def ltrimboth(l, proportiontocut):
- """
-Slices off the passed proportion of items from BOTH ends of the passed
-list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost'
-10% of scores. Assumes list is sorted by magnitude. Slices off LESS if
-proportion results in a non-integer slice index (i.e., conservatively
-slices off proportiontocut).
-
-Usage: ltrimboth (l,proportiontocut)
-Returns: trimmed version of list l
-"""
- lowercut = int(proportiontocut * len(l))
- uppercut = len(l) - lowercut
- return l[lowercut:uppercut]
-
-
-def ltrim1(l, proportiontocut, tail='right'):
- """
-Slices off the passed proportion of items from ONE end of the passed
-list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'
-10% of scores). Slices off LESS if proportion results in a non-integer
-slice index (i.e., conservatively slices off proportiontocut).
-
-Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left'
-Returns: trimmed version of list l
-"""
- if tail == 'right':
- lowercut = 0
- uppercut = len(l) - int(proportiontocut * len(l))
- elif tail == 'left':
- lowercut = int(proportiontocut * len(l))
- uppercut = len(l)
- return l[lowercut:uppercut]
-
-####################################
-##### CORRELATION FUNCTIONS ######
-####################################
-
-
-def lpaired(x, y):
- """
-Interactively determines the type of data and then runs the
-appropriated statistic for paired group data.
-
-Usage: lpaired(x,y)
-Returns: appropriate statistic name, value, and probability
-"""
- samples = ''
- while samples not in ['i', 'r', 'I', 'R', 'c', 'C']:
- print '\nIndependent or related samples, or correlation (i,r,c): ',
- samples = raw_input()
-
- if samples in ['i', 'I', 'r', 'R']:
- print '\nComparing variances ...',
- # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
- r = obrientransform(x, y)
- f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1))
- if p < 0.05:
- vartype = 'unequal, p=' + str(round(p, 4))
- else:
- vartype = 'equal'
- print vartype
- if samples in ['i', 'I']:
- if vartype[0] == 'e':
- t, p = ttest_ind(x, y, 0)
- print '\nIndependent samples t-test: ', round(t, 4), round(p, 4)
- else:
- if len(x) > 20 or len(y) > 20:
- z, p = ranksums(x, y)
- print '\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(
- p, 4)
- else:
- u, p = mannwhitneyu(x, y)
- print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(
- u, 4), round(p, 4)
-
- else: # RELATED SAMPLES
- if vartype[0] == 'e':
- t, p = ttest_rel(x, y, 0)
- print '\nRelated samples t-test: ', round(t, 4), round(p, 4)
- else:
- t, p = ranksums(x, y)
- print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)
- else: # CORRELATION ANALYSIS
- corrtype = ''
- while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']:
- print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
- corrtype = raw_input()
- if corrtype in ['c', 'C']:
- m, b, r, p, see = linregress(x, y)
- print '\nLinear regression for continuous variables ...'
- lol = [['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'],
- [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]
- ]
- pstat.printcc(lol)
- elif corrtype in ['r', 'R']:
- r, p = spearmanr(x, y)
- print '\nCorrelation for ranked variables ...'
- print "Spearman's r: ", round(r, 4), round(p, 4)
- else: # DICHOTOMOUS
- r, p = pointbiserialr(x, y)
- print '\nAssuming x contains a dichotomous variable ...'
- print 'Point Biserial r: ', round(r, 4), round(p, 4)
- print '\n\n'
- return None
-
-
-def lpearsonr(x, y):
- """
-Calculates a Pearson correlation coefficient and the associated
-probability value. Taken from Heiman's Basic Statistics for the Behav.
-Sci (2nd), p.195.
-
-Usage: lpearsonr(x,y) where x and y are equal-length lists
-Returns: Pearson's r value, two-tailed p-value
-"""
- TINY = 1.0e-30
- if len(x) <> len(y):
- raise ValueError, 'Input values not paired in pearsonr. Aborting.'
- n = len(x)
- x = map(float, x)
- y = map(float, y)
- xmean = mean(x)
- ymean = mean(y)
- r_num = n * (summult(x, y)) - sum(x) * sum(y)
- r_den = math.sqrt((n * ss(x) - square_of_sums(x)) *
- (n * ss(y) - square_of_sums(y)))
- r = (r_num / r_den) # denominator already a float
- df = n - 2
- t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
- prob = betai(0.5 * df, 0.5, df / float(df + t * t))
- return r, prob
-
-
-def llincc(x, y):
- """
-Calculates Lin's concordance correlation coefficient.
-
-Usage: alincc(x,y) where x, y are equal-length arrays
-Returns: Lin's CC
-"""
- covar = lcov(x, y) * (len(x) - 1) / float(len(x)) # correct denom to n
- xvar = lvar(x) * (len(x) - 1) / float(len(x)) # correct denom to n
- yvar = lvar(y) * (len(y) - 1) / float(len(y)) # correct denom to n
- lincc = (2 * covar) / ((xvar + yvar) + ((amean(x) - amean(y))**2))
- return lincc
-
-
-def lspearmanr(x, y):
- """
-Calculates a Spearman rank-order correlation coefficient. Taken
-from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.
-
-Usage: lspearmanr(x,y) where x and y are equal-length lists
-Returns: Spearman's r, two-tailed p-value
-"""
- TINY = 1e-30
- if len(x) <> len(y):
- raise ValueError, 'Input values not paired in spearmanr. Aborting.'
- n = len(x)
- rankx = rankdata(x)
- ranky = rankdata(y)
- dsq = sumdiffsquared(rankx, ranky)
- rs = 1 - 6 * dsq / float(n * (n**2 - 1))
- t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs)))
- df = n - 2
- probrs = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float
- # probability values for rs are from part 2 of the spearman function in
- # Numerical Recipies, p.510. They are close to tables, but not exact. (?)
- return rs, probrs
-
-
-def lpointbiserialr(x, y):
- """
-Calculates a point-biserial correlation coefficient and the associated
-probability value. Taken from Heiman's Basic Statistics for the Behav.
-Sci (1st), p.194.
-
-Usage: lpointbiserialr(x,y) where x,y are equal-length lists
-Returns: Point-biserial r, two-tailed p-value
-"""
- TINY = 1e-30
- if len(x) <> len(y):
- raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.'
- data = pstat.abut(x, y)
- categories = pstat.unique(x)
- if len(categories) <> 2:
- raise ValueError, 'Exactly 2 categories required for pointbiserialr().'
- else: # there are 2 categories, continue
- codemap = pstat.abut(categories, range(2))
- recoded = pstat.recode(data, codemap, 0)
- x = pstat.linexand(data, 0, categories[0])
- y = pstat.linexand(data, 0, categories[1])
- xmean = mean(pstat.colex(x, 1))
- ymean = mean(pstat.colex(y, 1))
- n = len(data)
- adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n)))
- rpb = (ymean - xmean) / samplestdev(pstat.colex(data, 1)) * adjust
- df = n - 2
- t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY)))
- prob = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float
- return rpb, prob
-
-
-def lkendalltau(x, y):
- """
-Calculates Kendall's tau ... correlation of ordinal data. Adapted
-from function kendl1 in Numerical Recipies. Needs good test-routine.@@@
-
-Usage: lkendalltau(x,y)
-Returns: Kendall's tau, two-tailed p-value
-"""
- n1 = 0
- n2 = 0
- iss = 0
- for j in range(len(x) - 1):
- for k in range(j, len(y)):
- a1 = x[j] - x[k]
- a2 = y[j] - y[k]
- aa = a1 * a2
- if (aa): # neither list has a tie
- n1 = n1 + 1
- n2 = n2 + 1
- if aa > 0:
- iss = iss + 1
- else:
- iss = iss - 1
- else:
- if (a1):
- n1 = n1 + 1
- else:
- n2 = n2 + 1
- tau = iss / math.sqrt(n1 * n2)
- svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1))
- z = tau / math.sqrt(svar)
- prob = erfcc(abs(z) / 1.4142136)
- return tau, prob
-
-
-def llinregress(x, y):
- """
-Calculates a regression line on x,y pairs.
-
-Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates
-Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
-"""
- TINY = 1.0e-20
- if len(x) <> len(y):
- raise ValueError, 'Input values not paired in linregress. Aborting.'
- n = len(x)
- x = map(float, x)
- y = map(float, y)
- xmean = mean(x)
- ymean = mean(y)
- r_num = float(n * (summult(x, y)) - sum(x) * sum(y))
- r_den = math.sqrt((n * ss(x) - square_of_sums(x)) *
- (n * ss(y) - square_of_sums(y)))
- r = r_num / r_den
- z = 0.5 * math.log((1.0 + r + TINY) / (1.0 - r + TINY))
- df = n - 2
- t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
- prob = betai(0.5 * df, 0.5, df / (df + t * t))
- slope = r_num / float(n * ss(x) - square_of_sums(x))
- intercept = ymean - slope * xmean
- sterrest = math.sqrt(1 - r * r) * samplestdev(y)
- return slope, intercept, r, prob, sterrest
-
-####################################
-##### INFERENTIAL STATISTICS #####
-####################################
-
-
-def lttest_1samp(a, popmean, printit=0, name='Sample', writemode='a'):
- """
-Calculates the t-obtained for the independent samples T-test on ONE group
-of scores a, given a population mean. If printit=1, results are printed
-to the screen. If printit='filename', the results are output to 'filename'
-using the given writemode (default=append). Returns t-value, and prob.
-
-Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
-Returns: t-value, two-tailed prob
-"""
- x = mean(a)
- v = var(a)
- n = len(a)
- df = n - 1
- svar = ((n - 1) * v) / float(df)
- t = (x - popmean) / math.sqrt(svar * (1.0 / n))
- prob = betai(0.5 * df, 0.5, float(df) / (df + t * t))
-
- if printit <> 0:
- statname = 'Single-sample T-test.'
- outputpairedstats(printit, writemode, 'Population', '--', popmean, 0, 0, 0,
- name, n, x, v, min(a), max(a), statname, t, prob)
- return t, prob
-
-
-def lttest_ind(a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'):
- """
-Calculates the t-obtained T-test on TWO INDEPENDENT samples of
-scores a, and b. From Numerical Recipies, p.483. If printit=1, results
-are printed to the screen. If printit='filename', the results are output
-to 'filename' using the given writemode (default=append). Returns t-value,
-and prob.
-
-Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a')
-Returns: t-value, two-tailed prob
-"""
- x1 = mean(a)
- x2 = mean(b)
- v1 = stdev(a)**2
- v2 = stdev(b)**2
- n1 = len(a)
- n2 = len(b)
- df = n1 + n2 - 2
- svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df)
- if not svar:
- svar = 1.0e-26
- t = (x1 - x2) / math.sqrt(svar * (1.0 / n1 + 1.0 / n2))
- prob = betai(0.5 * df, 0.5, df / (df + t * t))
-
- if printit <> 0:
- statname = 'Independent samples T-test.'
- outputpairedstats(printit, writemode, name1, n1, x1, v1, min(a), max(a),
- name2, n2, x2, v2, min(b), max(b), statname, t, prob)
- return t, prob
-
-
-def lttest_rel(a,
- b,
- printit=0,
- name1='Sample1',
- name2='Sample2',
- writemode='a'):
- """
-Calculates the t-obtained T-test on TWO RELATED samples of scores,
-a and b. From Numerical Recipies, p.483. If printit=1, results are
-printed to the screen. If printit='filename', the results are output to
-'filename' using the given writemode (default=append). Returns t-value,
-and prob.
-
-Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a')
-Returns: t-value, two-tailed prob
-"""
- if len(a) <> len(b):
- raise ValueError, 'Unequal length lists in ttest_rel.'
- x1 = mean(a)
- x2 = mean(b)
- v1 = var(a)
- v2 = var(b)
- n = len(a)
- cov = 0
- for i in range(len(a)):
- cov = cov + (a[i] - x1) * (b[i] - x2)
- df = n - 1
- cov = cov / float(df)
- sd = math.sqrt((v1 + v2 - 2.0 * cov) / float(n))
- t = (x1 - x2) / sd
- prob = betai(0.5 * df, 0.5, df / (df + t * t))
-
- if printit <> 0:
- statname = 'Related samples T-test.'
- outputpairedstats(printit, writemode, name1, n, x1, v1, min(a), max(a),
- name2, n, x2, v2, min(b), max(b), statname, t, prob)
- return t, prob
-
-
-def lchisquare(f_obs, f_exp=None):
- """
-Calculates a one-way chi square for list of observed frequencies and returns
-the result. If no expected frequencies are given, the total N is assumed to
-be equally distributed across all groups.
-
-Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq.
-Returns: chisquare-statistic, associated p-value
-"""
- k = len(f_obs) # number of groups
- if f_exp == None:
- f_exp = [sum(f_obs) / float(k)] * len(f_obs) # create k bins with = freq.
- chisq = 0
- for i in range(len(f_obs)):
- chisq = chisq + (f_obs[i] - f_exp[i])**2 / float(f_exp[i])
- return chisq, chisqprob(chisq, k - 1)
-
-
-def lks_2samp(data1, data2):
- """
-Computes the Kolmogorov-Smirnof statistic on 2 samples. From
-Numerical Recipies in C, page 493.
-
-Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions
-Returns: KS D-value, associated p-value
-"""
- j1 = 0
- j2 = 0
- fn1 = 0.0
- fn2 = 0.0
- n1 = len(data1)
- n2 = len(data2)
- en1 = n1
- en2 = n2
- d = 0.0
- data1.sort()
- data2.sort()
- while j1 < n1 and j2 < n2:
- d1 = data1[j1]
- d2 = data2[j2]
- if d1 <= d2:
- fn1 = (j1) / float(en1)
- j1 = j1 + 1
- if d2 <= d1:
- fn2 = (j2) / float(en2)
- j2 = j2 + 1
- dt = (fn2 - fn1)
- if math.fabs(dt) > math.fabs(d):
- d = dt
- try:
- en = math.sqrt(en1 * en2 / float(en1 + en2))
- prob = ksprob((en + 0.12 + 0.11 / en) * abs(d))
- except:
- prob = 1.0
- return d, prob
-
-
-def lmannwhitneyu(x, y):
- """
-Calculates a Mann-Whitney U statistic on the provided scores and
-returns the result. Use only when the n in each condition is < 20 and
-you have 2 independent samples of ranks. NOTE: Mann-Whitney U is
-significant if the u-obtained is LESS THAN or equal to the critical
-value of U found in the tables. Equivalent to Kruskal-Wallis H with
-just 2 groups.
-
-Usage: lmannwhitneyu(data)
-Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
-"""
- n1 = len(x)
- n2 = len(y)
- ranked = rankdata(x + y)
- rankx = ranked[0:n1] # get the x-ranks
- ranky = ranked[n1:] # the rest are y-ranks
- u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x
- u2 = n1 * n2 - u1 # remainder is U for y
- bigu = max(u1, u2)
- smallu = min(u1, u2)
- proportion = bigu / float(n1 * n2)
- T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
- if T == 0:
- raise ValueError, 'All numbers are identical in lmannwhitneyu'
- sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)
- z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc
- return smallu, 1.0 - zprob(z) #, proportion
-
-
-def ltiecorrect(rankvals):
- """
-Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See
-Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences.
-New York: McGraw-Hill. Code adapted from |Stat rankind.c code.
-
-Usage: ltiecorrect(rankvals)
-Returns: T correction factor for U or H
-"""
- sorted, posn = shellsort(rankvals)
- n = len(sorted)
- T = 0.0
- i = 0
- while (i < n - 1):
- if sorted[i] == sorted[i + 1]:
- nties = 1
- while (i < n - 1) and (sorted[i] == sorted[i + 1]):
- nties = nties + 1
- i = i + 1
- T = T + nties**3 - nties
- i = i + 1
- T = T / float(n**3 - n)
- return 1.0 - T
-
-
-def lranksums(x, y):
- """
-Calculates the rank sums statistic on the provided scores and
-returns the result. Use only when the n in each condition is > 20 and you
-have 2 independent samples of ranks.
-
-Usage: lranksums(x,y)
-Returns: a z-statistic, two-tailed p-value
-"""
- n1 = len(x)
- n2 = len(y)
- alldata = x + y
- ranked = rankdata(alldata)
- x = ranked[:n1]
- y = ranked[n1:]
- s = sum(x)
- expected = n1 * (n1 + n2 + 1) / 2.0
- z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0)
- prob = 2 * (1.0 - zprob(abs(z)))
- return z, prob
-
-
-def lwilcoxont(x, y):
- """
-Calculates the Wilcoxon T-test for related samples and returns the
-result. A non-parametric T-test.
-
-Usage: lwilcoxont(x,y)
-Returns: a t-statistic, two-tail probability estimate
-"""
- if len(x) <> len(y):
- raise ValueError, 'Unequal N in wilcoxont. Aborting.'
- d = []
- for i in range(len(x)):
- diff = x[i] - y[i]
- if diff <> 0:
- d.append(diff)
- count = len(d)
- absd = map(abs, d)
- absranked = rankdata(absd)
- r_plus = 0.0
- r_minus = 0.0
- for i in range(len(absd)):
- if d[i] < 0:
- r_minus = r_minus + absranked[i]
- else:
- r_plus = r_plus + absranked[i]
- wt = min(r_plus, r_minus)
- mn = count * (count + 1) * 0.25
- se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0)
- z = math.fabs(wt - mn) / se
- prob = 2 * (1.0 - zprob(abs(z)))
- return wt, prob
-
-
-def lkruskalwallish(*args):
- """
-The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more
-groups, requiring at least 5 subjects in each group. This function
-calculates the Kruskal-Wallis H-test for 3 or more independent samples
-and returns the result.
-
-Usage: lkruskalwallish(*args)
-Returns: H-statistic (corrected for ties), associated p-value
-"""
- args = list(args)
- n = [0] * len(args)
- all = []
- n = map(len, args)
- for i in range(len(args)):
- all = all + args[i]
- ranked = rankdata(all)
- T = tiecorrect(ranked)
- for i in range(len(args)):
- args[i] = ranked[0:n[i]]
- del ranked[0:n[i]]
- rsums = []
- for i in range(len(args)):
- rsums.append(sum(args[i])**2)
- rsums[i] = rsums[i] / float(n[i])
- ssbn = sum(rsums)
- totaln = sum(n)
- h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1)
- df = len(args) - 1
- if T == 0:
- raise ValueError, 'All numbers are identical in lkruskalwallish'
- h = h / float(T)
- return h, chisqprob(h, df)
-
-
-def lfriedmanchisquare(*args):
- """
-Friedman Chi-Square is a non-parametric, one-way within-subjects
-ANOVA. This function calculates the Friedman Chi-square test for repeated
-measures and returns the result, along with the associated probability
-value. It assumes 3 or more repeated measures. Only 3 levels requires a
-minimum of 10 subjects in the study. Four levels requires 5 subjects per
-level(??).
-
-Usage: lfriedmanchisquare(*args)
-Returns: chi-square statistic, associated p-value
-"""
- k = len(args)
- if k < 3:
- raise ValueError, 'Less than 3 levels. Friedman test not appropriate.'
- n = len(args[0])
- data = apply(pstat.abut, tuple(args))
- for i in range(len(data)):
- data[i] = rankdata(data[i])
- ssbn = 0
- for i in range(k):
- ssbn = ssbn + sum(args[i])**2
- chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1)
- return chisq, chisqprob(chisq, k - 1)
-
-####################################
-#### PROBABILITY CALCULATIONS ####
-####################################
-
-
-def lchisqprob(chisq, df):
- """
-Returns the (1-tailed) probability value associated with the provided
-chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat.
-
-Usage: lchisqprob(chisq,df)
-"""
- BIG = 20.0
-
- def ex(x):
- BIG = 20.0
- if x < -BIG:
- return 0.0
- else:
- return math.exp(x)
-
- if chisq <= 0 or df < 1:
- return 1.0
- a = 0.5 * chisq
- if df % 2 == 0:
- even = 1
- else:
- even = 0
- if df > 1:
- y = ex(-a)
- if even:
- s = y
- else:
- s = 2.0 * zprob(-math.sqrt(chisq))
- if (df > 2):
- chisq = 0.5 * (df - 1.0)
- if even:
- z = 1.0
- else:
- z = 0.5
- if a > BIG:
- if even:
- e = 0.0
- else:
- e = math.log(math.sqrt(math.pi))
- c = math.log(a)
- while (z <= chisq):
- e = math.log(z) + e
- s = s + ex(c * z - a - e)
- z = z + 1.0
- return s
- else:
- if even:
- e = 1.0
- else:
- e = 1.0 / math.sqrt(math.pi) / math.sqrt(a)
- c = 0.0
- while (z <= chisq):
- e = e * (a / float(z))
- c = c + e
- z = z + 1.0
- return (c * y + s)
- else:
- return s
-
-
-def lerfcc(x):
- """
-Returns the complementary error function erfc(x) with fractional
-error everywhere less than 1.2e-7. Adapted from Numerical Recipies.
-
-Usage: lerfcc(x)
-"""
- z = abs(x)
- t = 1.0 / (1.0 + 0.5 * z)
- ans = t * math.exp(-z * z - 1.26551223 + t * (1.00002368 + t * (
- 0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (0.27886807 + t * (
- -1.13520398 + t * (1.48851587 + t * (-0.82215223 + t * 0.17087277)))))
- ))))
- if x >= 0:
- return ans
- else:
- return 2.0 - ans
-
-
-def lzprob(z):
- """
-Returns the area under the normal curve 'to the left of' the given z value.
-Thus,
- for z<0, zprob(z) = 1-tail probability
- for z>0, 1.0-zprob(z) = 1-tail probability
- for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability
-Adapted from z.c in Gary Perlman's |Stat.
-
-Usage: lzprob(z)
-"""
- Z_MAX = 6.0 # maximum meaningful z-value
- if z == 0.0:
- x = 0.0
- else:
- y = 0.5 * math.fabs(z)
- if y >= (Z_MAX * 0.5):
- x = 1.0
- elif (y < 1.0):
- w = y * y
- x = ((
- ((((((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w -
- 0.019198292004) * w + 0.059054035642) * w - 0.151968751364) * w +
- 0.319152932694) * w - 0.531923007300) * w + 0.797884560593) * y * 2.0
- else:
- y = y - 2.0
- x = (((((((
- ((((((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) * y
- - 0.000676904986) * y + 0.001390604284) * y - 0.000794620820) * y
- - 0.002034254874) * y + 0.006549791214) * y - 0.010557625006) * y +
- 0.011630447319) * y - 0.009279453341) * y + 0.005353579108) * y -
- 0.002141268741) * y + 0.000535310849) * y + 0.999936657524
- if z > 0.0:
- prob = ((x + 1.0) * 0.5)
- else:
- prob = ((1.0 - x) * 0.5)
- return prob
-
-
-def lksprob(alam):
- """
-Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from
-Numerical Recipies.
-
-Usage: lksprob(alam)
-"""
- fac = 2.0
- sum = 0.0
- termbf = 0.0
- a2 = -2.0 * alam * alam
- for j in range(1, 201):
- term = fac * math.exp(a2 * j * j)
- sum = sum + term
- if math.fabs(term) <= (0.001 * termbf) or math.fabs(term) < (1.0e-8 * sum):
- return sum
- fac = -fac
- termbf = math.fabs(term)
- return 1.0 # Get here only if fails to converge; was 0.0!!
-
-
-def lfprob(dfnum, dfden, F):
- """
-Returns the (1-tailed) significance level (p-value) of an F
-statistic given the degrees of freedom for the numerator (dfR-dfF) and
-the degrees of freedom for the denominator (dfF).
-
-Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn
-"""
- p = betai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F))
- return p
-
-
-def lbetacf(a, b, x):
- """
-This function evaluates the continued fraction form of the incomplete
-Beta function, betai. (Adapted from: Numerical Recipies in C.)
-
-Usage: lbetacf(a,b,x)
-"""
- ITMAX = 200
- EPS = 3.0e-7
-
- bm = az = am = 1.0
- qab = a + b
- qap = a + 1.0
- qam = a - 1.0
- bz = 1.0 - qab * x / qap
- for i in range(ITMAX + 1):
- em = float(i + 1)
- tem = em + em
- d = em * (b - em) * x / ((qam + tem) * (a + tem))
- ap = az + d * am
- bp = bz + d * bm
- d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem))
- app = ap + d * az
- bpp = bp + d * bz
- aold = az
- am = ap / bpp
- bm = bp / bpp
- az = app / bpp
- bz = 1.0
- if (abs(az - aold) < (EPS * abs(az))):
- return az
- print 'a or b too big, or ITMAX too small in Betacf.'
-
-
-def lgammln(xx):
- """
-Returns the gamma function of xx.
- Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.
-(Adapted from: Numerical Recipies in C.)
-
-Usage: lgammln(xx)
-"""
-
- coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, 0.120858003e-2,
- -0.536382e-5]
- x = xx - 1.0
- tmp = x + 5.5
- tmp = tmp - (x + 0.5) * math.log(tmp)
- ser = 1.0
- for j in range(len(coeff)):
- x = x + 1
- ser = ser + coeff[j] / x
- return -tmp + math.log(2.50662827465 * ser)
-
-
-def lbetai(a, b, x):
- """
-Returns the incomplete beta function:
-
- I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)
-
-where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
-function of a. The continued fraction formulation is implemented here,
-using the betacf function. (Adapted from: Numerical Recipies in C.)
-
-Usage: lbetai(a,b,x)
-"""
- if (x < 0.0 or x > 1.0):
- raise ValueError, 'Bad x in lbetai'
- if (x == 0.0 or x == 1.0):
- bt = 0.0
- else:
- bt = math.exp(gammln(a + b) - gammln(a) - gammln(b) + a * math.log(x) + b *
- math.log(1.0 - x))
- if (x < (a + 1.0) / (a + b + 2.0)):
- return bt * betacf(a, b, x) / float(a)
- else:
- return 1.0 - bt * betacf(b, a, 1.0 - x) / float(b)
-
-####################################
-####### ANOVA CALCULATIONS #######
-####################################
-
-
-def lF_oneway(*lists):
- """
-Performs a 1-way ANOVA, returning an F-value and probability given
-any number of groups. From Heiman, pp.394-7.
-
-Usage: F_oneway(*lists) where *lists is any number of lists, one per
- treatment group
-Returns: F value, one-tailed p-value
-"""
- a = len(lists) # ANOVA on 'a' groups, each in it's own list
- means = [0] * a
- vars = [0] * a
- ns = [0] * a
- alldata = []
- tmp = map(N.array, lists)
- means = map(amean, tmp)
- vars = map(avar, tmp)
- ns = map(len, lists)
- for i in range(len(lists)):
- alldata = alldata + lists[i]
- alldata = N.array(alldata)
- bign = len(alldata)
- sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign))
- ssbn = 0
- for list in lists:
- ssbn = ssbn + asquare_of_sums(N.array(list)) / float(len(list))
- ssbn = ssbn - (asquare_of_sums(alldata) / float(bign))
- sswn = sstot - ssbn
- dfbn = a - 1
- dfwn = bign - a
- msb = ssbn / float(dfbn)
- msw = sswn / float(dfwn)
- f = msb / msw
- prob = fprob(dfbn, dfwn, f)
- return f, prob
-
-
-def lF_value(ER, EF, dfnum, dfden):
- """
-Returns an F-statistic given the following:
- ER = error associated with the null hypothesis (the Restricted model)
- EF = error associated with the alternate hypothesis (the Full model)
- dfR-dfF = degrees of freedom of the numerator
- dfF = degrees of freedom associated with the denominator/Full model
-
-Usage: lF_value(ER,EF,dfnum,dfden)
-"""
- return ((ER - EF) / float(dfnum) / (EF / float(dfden)))
-
-####################################
-######## SUPPORT FUNCTIONS #######
-####################################
-
-
-def writecc(listoflists, file, writetype='w', extra=2):
- """
-Writes a list of lists to a file in columns, customized by the max
-size of items within the columns (max size of items in col, +2 characters)
-to specified file. File-overwrite is the default.
-
-Usage: writecc (listoflists,file,writetype='w',extra=2)
-Returns: None
-"""
- if type(listoflists[0]) not in [ListType, TupleType]:
- listoflists = [listoflists]
- outfile = open(file, writetype)
- rowstokill = []
- list2print = copy.deepcopy(listoflists)
- for i in range(len(listoflists)):
- if listoflists[i] == [
- '\n'
- ] or listoflists[i] == '\n' or listoflists[i] == 'dashes':
- rowstokill = rowstokill + [i]
- rowstokill.reverse()
- for row in rowstokill:
- del list2print[row]
- maxsize = [0] * len(list2print[0])
- for col in range(len(list2print[0])):
- items = pstat.colex(list2print, col)
- items = map(pstat.makestr, items)
- maxsize[col] = max(map(len, items)) + extra
- for row in listoflists:
- if row == ['\n'] or row == '\n':
- outfile.write('\n')
- elif row == ['dashes'] or row == 'dashes':
- dashes = [0] * len(maxsize)
- for j in range(len(maxsize)):
- dashes[j] = '-' * (maxsize[j] - 2)
- outfile.write(pstat.lineincustcols(dashes, maxsize))
- else:
- outfile.write(pstat.lineincustcols(row, maxsize))
- outfile.write('\n')
- outfile.close()
- return None
-
-
-def lincr(l, cap): # to increment a list up to a max-list of 'cap'
- """
-Simulate a counting system from an n-dimensional list.
-
-Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n
-Returns: next set of values for list l, OR -1 (if overflow)
-"""
- l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap)
- for i in range(len(l)):
- if l[i] > cap[i] and i < len(l) - 1: # if carryover AND not done
- l[i] = 0
- l[i + 1] = l[i + 1] + 1
- elif l[i] > cap[i] and i == len(
- l) - 1: # overflow past last column, must be finished
- l = -1
- return l
-
-
-def lsum(inlist):
- """
-Returns the sum of the items in the passed list.
-
-Usage: lsum(inlist)
-"""
- s = 0
- for item in inlist:
- s = s + item
- return s
-
-
-def lcumsum(inlist):
- """
-Returns a list consisting of the cumulative sum of the items in the
-passed list.
-
-Usage: lcumsum(inlist)
-"""
- newlist = copy.deepcopy(inlist)
- for i in range(1, len(newlist)):
- newlist[i] = newlist[i] + newlist[i - 1]
- return newlist
-
-
-def lss(inlist):
- """
-Squares each value in the passed list, adds up these squares and
-returns the result.
-
-Usage: lss(inlist)
-"""
- ss = 0
- for item in inlist:
- ss = ss + item * item
- return ss
-
-
-def lsummult(list1, list2):
- """
-Multiplies elements in list1 and list2, element by element, and
-returns the sum of all resulting multiplications. Must provide equal
-length lists.
-
-Usage: lsummult(list1,list2)
-"""
- if len(list1) <> len(list2):
- raise ValueError, 'Lists not equal length in summult.'
- s = 0
- for item1, item2 in pstat.abut(list1, list2):
- s = s + item1 * item2
- return s
-
-
-def lsumdiffsquared(x, y):
- """
-Takes pairwise differences of the values in lists x and y, squares
-these differences, and returns the sum of these squares.
-
-Usage: lsumdiffsquared(x,y)
-Returns: sum[(x[i]-y[i])**2]
-"""
- sds = 0
- for i in range(len(x)):
- sds = sds + (x[i] - y[i])**2
- return sds
-
-
-def lsquare_of_sums(inlist):
- """
-Adds the values in the passed list, squares the sum, and returns
-the result.
-
-Usage: lsquare_of_sums(inlist)
-Returns: sum(inlist[i])**2
-"""
- s = sum(inlist)
- return float(s) * s
-
-
-def lshellsort(inlist):
- """
-Shellsort algorithm. Sorts a 1D-list.
-
-Usage: lshellsort(inlist)
-Returns: sorted-inlist, sorting-index-vector (for original list)
-"""
- n = len(inlist)
- svec = copy.deepcopy(inlist)
- ivec = range(n)
- gap = n / 2 # integer division needed
- while gap > 0:
- for i in range(gap, n):
- for j in range(i - gap, -1, -gap):
- while j >= 0 and svec[j] > svec[j + gap]:
- temp = svec[j]
- svec[j] = svec[j + gap]
- svec[j + gap] = temp
- itemp = ivec[j]
- ivec[j] = ivec[j + gap]
- ivec[j + gap] = itemp
- gap = gap / 2 # integer division needed
-# svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]]
- return svec, ivec
-
-
-def lrankdata(inlist):
- """
-Ranks the data in inlist, dealing with ties appropritely. Assumes
-a 1D inlist. Adapted from Gary Perlman's |Stat ranksort.
-
-Usage: lrankdata(inlist)
-Returns: a list of length equal to inlist, containing rank scores
-"""
- n = len(inlist)
- svec, ivec = shellsort(inlist)
- sumranks = 0
- dupcount = 0
- newlist = [0] * n
- for i in range(n):
- sumranks = sumranks + i
- dupcount = dupcount + 1
- if i == n - 1 or svec[i] <> svec[i + 1]:
- averank = sumranks / float(dupcount) + 1
- for j in range(i - dupcount + 1, i + 1):
- newlist[ivec[j]] = averank
- sumranks = 0
- dupcount = 0
- return newlist
-
-
-def outputpairedstats(fname, writemode, name1, n1, m1, se1, min1, max1, name2,
- n2, m2, se2, min2, max2, statname, stat, prob):
- """
-Prints or write to a file stats for two groups, using the name, n,
-mean, sterr, min and max for each group, as well as the statistic name,
-its value, and the associated p-value.
-
-Usage: outputpairedstats(fname,writemode,
- name1,n1,mean1,stderr1,min1,max1,
- name2,n2,mean2,stderr2,min2,max2,
- statname,stat,prob)
-Returns: None
-"""
- suffix = '' # for *s after the p-value
- try:
- x = prob.shape
- prob = prob[0]
- except:
- pass
- if prob < 0.001:
- suffix = ' ***'
- elif prob < 0.01:
- suffix = ' **'
- elif prob < 0.05:
- suffix = ' *'
- title = [['Name', 'N', 'Mean', 'SD', 'Min', 'Max']]
- lofl = title + [[name1, n1, round(m1, 3), round(
- math.sqrt(se1), 3), min1, max1], [name2, n2, round(m2, 3), round(
- math.sqrt(se2), 3), min2, max2]]
- if type(fname) <> StringType or len(fname) == 0:
- print
- print statname
- print
- pstat.printcc(lofl)
- print
- try:
- if stat.shape == ():
- stat = stat[0]
- if prob.shape == ():
- prob = prob[0]
- except:
- pass
- print 'Test statistic = ', round(stat, 3), ' p = ', round(prob, 3), suffix
- print
- else:
- file = open(fname, writemode)
- file.write('\n' + statname + '\n\n')
- file.close()
- writecc(lofl, fname, 'a')
- file = open(fname, 'a')
- try:
- if stat.shape == ():
- stat = stat[0]
- if prob.shape == ():
- prob = prob[0]
- except:
- pass
- file.write(pstat.list2string(['\nTest statistic = ', round(stat, 4),
- ' p = ', round(prob, 4), suffix, '\n\n']))
- file.close()
- return None
-
-
-def lfindwithin(data):
- """
-Returns an integer representing a binary vector, where 1=within-
-subject factor, 0=between. Input equals the entire data 2D list (i.e.,
-column 0=random factor, column -1=measured values (those two are skipped).
-Note: input data is in |Stat format ... a list of lists ("2D list") with
-one row per measured value, first column=subject identifier, last column=
-score, one in-between column per factor (these columns contain level
-designations on each factor). See also stats.anova.__doc__.
-
-Usage: lfindwithin(data) data in |Stat format
-"""
-
- numfact = len(data[0]) - 1
- withinvec = 0
- for col in range(1, numfact):
- examplelevel = pstat.unique(pstat.colex(data, col))[0]
- rows = pstat.linexand(data, col, examplelevel) # get 1 level of this factor
- factsubjs = pstat.unique(pstat.colex(rows, 0))
- allsubjs = pstat.unique(pstat.colex(data, 0))
- if len(factsubjs) == len(allsubjs): # fewer Ss than scores on this factor?
- withinvec = withinvec + (1 << col)
- return withinvec
-
-#########################################################
-#########################################################
-####### DISPATCH LISTS AND TUPLES TO ABOVE FCNS #########
-#########################################################
-#########################################################
-
-## CENTRAL TENDENCY:
-geometricmean = Dispatch((lgeometricmean, (ListType, TupleType)),)
-harmonicmean = Dispatch((lharmonicmean, (ListType, TupleType)),)
-mean = Dispatch((lmean, (ListType, TupleType)),)
-median = Dispatch((lmedian, (ListType, TupleType)),)
-medianscore = Dispatch((lmedianscore, (ListType, TupleType)),)
-mode = Dispatch((lmode, (ListType, TupleType)),)
-
-## MOMENTS:
-moment = Dispatch((lmoment, (ListType, TupleType)),)
-variation = Dispatch((lvariation, (ListType, TupleType)),)
-skew = Dispatch((lskew, (ListType, TupleType)),)
-kurtosis = Dispatch((lkurtosis, (ListType, TupleType)),)
-describe = Dispatch((ldescribe, (ListType, TupleType)),)
-
-## FREQUENCY STATISTICS:
-itemfreq = Dispatch((litemfreq, (ListType, TupleType)),)
-scoreatpercentile = Dispatch((lscoreatpercentile, (ListType, TupleType)),)
-percentileofscore = Dispatch((lpercentileofscore, (ListType, TupleType)),)
-histogram = Dispatch((lhistogram, (ListType, TupleType)),)
-cumfreq = Dispatch((lcumfreq, (ListType, TupleType)),)
-relfreq = Dispatch((lrelfreq, (ListType, TupleType)),)
-
-## VARIABILITY:
-obrientransform = Dispatch((lobrientransform, (ListType, TupleType)),)
-samplevar = Dispatch((lsamplevar, (ListType, TupleType)),)
-samplestdev = Dispatch((lsamplestdev, (ListType, TupleType)),)
-var = Dispatch((lvar, (ListType, TupleType)),)
-stdev = Dispatch((lstdev, (ListType, TupleType)),)
-sterr = Dispatch((lsterr, (ListType, TupleType)),)
-sem = Dispatch((lsem, (ListType, TupleType)),)
-z = Dispatch((lz, (ListType, TupleType)),)
-zs = Dispatch((lzs, (ListType, TupleType)),)
-
-## TRIMMING FCNS:
-trimboth = Dispatch((ltrimboth, (ListType, TupleType)),)
-trim1 = Dispatch((ltrim1, (ListType, TupleType)),)
-
-## CORRELATION FCNS:
-paired = Dispatch((lpaired, (ListType, TupleType)),)
-pearsonr = Dispatch((lpearsonr, (ListType, TupleType)),)
-spearmanr = Dispatch((lspearmanr, (ListType, TupleType)),)
-pointbiserialr = Dispatch((lpointbiserialr, (ListType, TupleType)),)
-kendalltau = Dispatch((lkendalltau, (ListType, TupleType)),)
-linregress = Dispatch((llinregress, (ListType, TupleType)),)
-
-## INFERENTIAL STATS:
-ttest_1samp = Dispatch((lttest_1samp, (ListType, TupleType)),)
-ttest_ind = Dispatch((lttest_ind, (ListType, TupleType)),)
-ttest_rel = Dispatch((lttest_rel, (ListType, TupleType)),)
-chisquare = Dispatch((lchisquare, (ListType, TupleType)),)
-ks_2samp = Dispatch((lks_2samp, (ListType, TupleType)),)
-mannwhitneyu = Dispatch((lmannwhitneyu, (ListType, TupleType)),)
-ranksums = Dispatch((lranksums, (ListType, TupleType)),)
-tiecorrect = Dispatch((ltiecorrect, (ListType, TupleType)),)
-wilcoxont = Dispatch((lwilcoxont, (ListType, TupleType)),)
-kruskalwallish = Dispatch((lkruskalwallish, (ListType, TupleType)),)
-friedmanchisquare = Dispatch((lfriedmanchisquare, (ListType, TupleType)),)
-
-## PROBABILITY CALCS:
-chisqprob = Dispatch((lchisqprob, (IntType, FloatType)),)
-zprob = Dispatch((lzprob, (IntType, FloatType)),)
-ksprob = Dispatch((lksprob, (IntType, FloatType)),)
-fprob = Dispatch((lfprob, (IntType, FloatType)),)
-betacf = Dispatch((lbetacf, (IntType, FloatType)),)
-betai = Dispatch((lbetai, (IntType, FloatType)),)
-erfcc = Dispatch((lerfcc, (IntType, FloatType)),)
-gammln = Dispatch((lgammln, (IntType, FloatType)),)
-
-## ANOVA FUNCTIONS:
-F_oneway = Dispatch((lF_oneway, (ListType, TupleType)),)
-F_value = Dispatch((lF_value, (ListType, TupleType)),)
-
-## SUPPORT FUNCTIONS:
-incr = Dispatch((lincr, (ListType, TupleType)),)
-sum = Dispatch((lsum, (ListType, TupleType)),)
-cumsum = Dispatch((lcumsum, (ListType, TupleType)),)
-ss = Dispatch((lss, (ListType, TupleType)),)
-summult = Dispatch((lsummult, (ListType, TupleType)),)
-square_of_sums = Dispatch((lsquare_of_sums, (ListType, TupleType)),)
-sumdiffsquared = Dispatch((lsumdiffsquared, (ListType, TupleType)),)
-shellsort = Dispatch((lshellsort, (ListType, TupleType)),)
-rankdata = Dispatch((lrankdata, (ListType, TupleType)),)
-findwithin = Dispatch((lfindwithin, (ListType, TupleType)),)
-
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
-
-try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE
- import numpy as N
- import numpy.linalg as LA
-
- #####################################
- ######## ACENTRAL TENDENCY ########
- #####################################
-
-
- def ageometricmean(inarray, dimension=None, keepdims=0):
- """
-Calculates the geometric mean of the values in the passed array.
-That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in
-the passed array. Use dimension=None to flatten array first. REMEMBER: if
-dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
-if dimension is a sequence, it collapses over all specified dimensions. If
-keepdims is set to 1, the resulting array will have as many dimensions as
-inarray, with only 1 'level' per dim that was collapsed over.
-
-Usage: ageometricmean(inarray,dimension=None,keepdims=0)
-Returns: geometric mean computed over dim(s) listed in dimension
-"""
- inarray = N.array(inarray, N.float_)
- if dimension == None:
- inarray = N.ravel(inarray)
- size = len(inarray)
- mult = N.power(inarray, 1.0 / size)
- mult = N.multiply.reduce(mult)
- elif type(dimension) in [IntType, FloatType]:
- size = inarray.shape[dimension]
- mult = N.power(inarray, 1.0 / size)
- mult = N.multiply.reduce(mult, dimension)
- if keepdims == 1:
- shp = list(inarray.shape)
- shp[dimension] = 1
- sum = N.reshape(sum, shp)
- else: # must be a SEQUENCE of dims to average over
- dims = list(dimension)
- dims.sort()
- dims.reverse()
- size = N.array(N.multiply.reduce(N.take(inarray.shape, dims)), N.float_)
- mult = N.power(inarray, 1.0 / size)
- for dim in dims:
- mult = N.multiply.reduce(mult, dim)
- if keepdims == 1:
- shp = list(inarray.shape)
- for dim in dims:
- shp[dim] = 1
- mult = N.reshape(mult, shp)
- return mult
-
- def aharmonicmean(inarray, dimension=None, keepdims=0):
- """
-Calculates the harmonic mean of the values in the passed array.
-That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in
-the passed array. Use dimension=None to flatten array first. REMEMBER: if
-dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
-if dimension is a sequence, it collapses over all specified dimensions. If
-keepdims is set to 1, the resulting array will have as many dimensions as
-inarray, with only 1 'level' per dim that was collapsed over.
-
-Usage: aharmonicmean(inarray,dimension=None,keepdims=0)
-Returns: harmonic mean computed over dim(s) in dimension
-"""
- inarray = inarray.astype(N.float_)
- if dimension == None:
- inarray = N.ravel(inarray)
- size = len(inarray)
- s = N.add.reduce(1.0 / inarray)
- elif type(dimension) in [IntType, FloatType]:
- size = float(inarray.shape[dimension])
- s = N.add.reduce(1.0 / inarray, dimension)
- if keepdims == 1:
- shp = list(inarray.shape)
- shp[dimension] = 1
- s = N.reshape(s, shp)
- else: # must be a SEQUENCE of dims to average over
- dims = list(dimension)
- dims.sort()
- nondims = []
- for i in range(len(inarray.shape)):
- if i not in dims:
- nondims.append(i)
- tinarray = N.transpose(inarray, nondims + dims) # put keep-dims first
- idx = [0] * len(nondims)
- if idx == []:
- size = len(N.ravel(inarray))
- s = asum(1.0 / inarray)
- if keepdims == 1:
- s = N.reshape([s], N.ones(len(inarray.shape)))
- else:
- idx[0] = -1
- loopcap = N.array(tinarray.shape[0:len(nondims)]) - 1
- s = N.zeros(loopcap + 1, N.float_)
- while incr(idx, loopcap) <> -1:
- s[idx] = asum(1.0 / tinarray[idx])
- size = N.multiply.reduce(N.take(inarray.shape, dims))
- if keepdims == 1:
- shp = list(inarray.shape)
- for dim in dims:
- shp[dim] = 1
- s = N.reshape(s, shp)
- return size / s
-
- def amean(inarray, dimension=None, keepdims=0):
- """
-Calculates the arithmatic mean of the values in the passed array.
-That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the
-passed array. Use dimension=None to flatten array first. REMEMBER: if
-dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
-if dimension is a sequence, it collapses over all specified dimensions. If
-keepdims is set to 1, the resulting array will have as many dimensions as
-inarray, with only 1 'level' per dim that was collapsed over.
-
-Usage: amean(inarray,dimension=None,keepdims=0)
-Returns: arithematic mean calculated over dim(s) in dimension
-"""
- if inarray.dtype in [N.int_, N.short, N.ubyte]:
- inarray = inarray.astype(N.float_)
- if dimension == None:
- inarray = N.ravel(inarray)
- sum = N.add.reduce(inarray)
- denom = float(len(inarray))
- elif type(dimension) in [IntType, FloatType]:
- sum = asum(inarray, dimension)
- denom = float(inarray.shape[dimension])
- if keepdims == 1:
- shp = list(inarray.shape)
- shp[dimension] = 1
- sum = N.reshape(sum, shp)
- else: # must be a TUPLE of dims to average over
- dims = list(dimension)
- dims.sort()
- dims.reverse()
- sum = inarray * 1.0
- for dim in dims:
- sum = N.add.reduce(sum, dim)
- denom = N.array(N.multiply.reduce(N.take(inarray.shape, dims)), N.float_)
- if keepdims == 1:
- shp = list(inarray.shape)
- for dim in dims:
- shp[dim] = 1
- sum = N.reshape(sum, shp)
- return sum / denom
-
- def amedian(inarray, numbins=1000):
- """
-Calculates the COMPUTED median value of an array of numbers, given the
-number of bins to use for the histogram (more bins approaches finding the
-precise median value of the array; default number of bins = 1000). From
-G.W. Heiman's Basic Stats, or CRC Probability & Statistics.
-NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first).
-
-Usage: amedian(inarray,numbins=1000)
-Returns: median calculated over ALL values in inarray
-"""
- inarray = N.ravel(inarray)
- (hist, smallest, binsize, extras) = ahistogram(inarray, numbins,
- [min(inarray), max(inarray)])
- cumhist = N.cumsum(hist) # make cumulative histogram
- otherbins = N.greater_equal(cumhist, len(inarray) / 2.0)
- otherbins = list(otherbins) # list of 0/1s, 1s start at median bin
- cfbin = otherbins.index(1) # get 1st(!) index holding 50%ile score
- LRL = smallest + binsize * cfbin # get lower read limit of that bin
- cfbelow = N.add.reduce(hist[0:cfbin]) # cum. freq. below bin
- freq = hist[cfbin] # frequency IN the 50%ile bin
- median = LRL + (
- (len(inarray) / 2.0 - cfbelow) / float(freq)) * binsize # MEDIAN
- return median
-
- def amedianscore(inarray, dimension=None):
- """
-Returns the 'middle' score of the passed array. If there is an even
-number of scores, the mean of the 2 middle scores is returned. Can function
-with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can
-be None, to pre-flatten the array, or else dimension must equal 0).
-
-Usage: amedianscore(inarray,dimension=None)
-Returns: 'middle' score of the array, or the mean of the 2 middle scores
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- inarray = N.sort(inarray, dimension)
- if inarray.shape[dimension] % 2 == 0: # if even number of elements
- indx = inarray.shape[dimension] / 2 # integer division correct
- median = N.asarray(inarray[indx] + inarray[indx - 1]) / 2.0
- else:
- indx = inarray.shape[dimension] / 2 # integer division correct
- median = N.take(inarray, [indx], dimension)
- if median.shape == (1,):
- median = median[0]
- return median
-
- def amode(a, dimension=None):
- """
-Returns an array of the modal (most common) score in the passed array.
-If there is more than one such score, ONLY THE FIRST is returned.
-The bin-count for the modal values is also returned. Operates on whole
-array (dimension=None), or on a given dimension.
-
-Usage: amode(a, dimension=None)
-Returns: array of bin-counts for mode(s), array of corresponding modal values
-"""
-
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- scores = pstat.aunique(N.ravel(a)) # get ALL unique values
- testshape = list(a.shape)
- testshape[dimension] = 1
- oldmostfreq = N.zeros(testshape)
- oldcounts = N.zeros(testshape)
- for score in scores:
- template = N.equal(a, score)
- counts = asum(template, dimension, 1)
- mostfrequent = N.where(counts > oldcounts, score, oldmostfreq)
- oldcounts = N.where(counts > oldcounts, counts, oldcounts)
- oldmostfreq = mostfrequent
- return oldcounts, mostfrequent
-
- def atmean(a, limits=None, inclusive=(1, 1)):
- """
-Returns the arithmetic mean of all values in an array, ignoring values
-strictly outside the sequence passed to 'limits'. Note: either limit
-in the sequence, or the value of limits itself, can be set to None. The
-inclusive list/tuple determines whether the lower and upper limiting bounds
-(respectively) are open/exclusive (0) or closed/inclusive (1).
-
-Usage: atmean(a,limits=None,inclusive=(1,1))
-"""
- if a.dtype in [N.int_, N.short, N.ubyte]:
- a = a.astype(N.float_)
- if limits == None:
- return mean(a)
- assert type(limits) in [ListType, TupleType, N.ndarray
- ], 'Wrong type for limits in atmean'
- if inclusive[0]:
- lowerfcn = N.greater_equal
- else:
- lowerfcn = N.greater
- if inclusive[1]:
- upperfcn = N.less_equal
- else:
- upperfcn = N.less
- if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(
- N.ravel(a)):
- raise ValueError, 'No array values within given limits (atmean).'
- elif limits[0] == None and limits[1] <> None:
- mask = upperfcn(a, limits[1])
- elif limits[0] <> None and limits[1] == None:
- mask = lowerfcn(a, limits[0])
- elif limits[0] <> None and limits[1] <> None:
- mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1])
- s = float(N.add.reduce(N.ravel(a * mask)))
- n = float(N.add.reduce(N.ravel(mask)))
- return s / n
-
- def atvar(a, limits=None, inclusive=(1, 1)):
- """
-Returns the sample variance of values in an array, (i.e., using N-1),
-ignoring values strictly outside the sequence passed to 'limits'.
-Note: either limit in the sequence, or the value of limits itself,
-can be set to None. The inclusive list/tuple determines whether the lower
-and upper limiting bounds (respectively) are open/exclusive (0) or
-closed/inclusive (1). ASSUMES A FLAT ARRAY (OR ELSE PREFLATTENS).
-
-Usage: atvar(a,limits=None,inclusive=(1,1))
-"""
- a = a.astype(N.float_)
- if limits == None or limits == [None, None]:
- return avar(a)
- assert type(limits) in [ListType, TupleType, N.ndarray
- ], 'Wrong type for limits in atvar'
- if inclusive[0]:
- lowerfcn = N.greater_equal
- else:
- lowerfcn = N.greater
- if inclusive[1]:
- upperfcn = N.less_equal
- else:
- upperfcn = N.less
- if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(
- N.ravel(a)):
- raise ValueError, 'No array values within given limits (atvar).'
- elif limits[0] == None and limits[1] <> None:
- mask = upperfcn(a, limits[1])
- elif limits[0] <> None and limits[1] == None:
- mask = lowerfcn(a, limits[0])
- elif limits[0] <> None and limits[1] <> None:
- mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1])
-
- a = N.compress(mask, a) # squish out excluded values
- return avar(a)
-
- def atmin(a, lowerlimit=None, dimension=None, inclusive=1):
- """
-Returns the minimum value of a, along dimension, including only values less
-than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None,
-all values in the array are used.
-
-Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1)
-"""
- if inclusive:
- lowerfcn = N.greater
- else:
- lowerfcn = N.greater_equal
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- if lowerlimit == None:
- lowerlimit = N.minimum.reduce(N.ravel(a)) - 11
- biggest = N.maximum.reduce(N.ravel(a))
- ta = N.where(lowerfcn(a, lowerlimit), a, biggest)
- return N.minimum.reduce(ta, dimension)
-
- def atmax(a, upperlimit, dimension=None, inclusive=1):
- """
-Returns the maximum value of a, along dimension, including only values greater
-than (or equal to, if inclusive=1) upperlimit. If the limit is set to None,
-a limit larger than the max value in the array is used.
-
-Usage: atmax(a,upperlimit,dimension=None,inclusive=1)
-"""
- if inclusive:
- upperfcn = N.less
- else:
- upperfcn = N.less_equal
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- if upperlimit == None:
- upperlimit = N.maximum.reduce(N.ravel(a)) + 1
- smallest = N.minimum.reduce(N.ravel(a))
- ta = N.where(upperfcn(a, upperlimit), a, smallest)
- return N.maximum.reduce(ta, dimension)
-
- def atstdev(a, limits=None, inclusive=(1, 1)):
- """
-Returns the standard deviation of all values in an array, ignoring values
-strictly outside the sequence passed to 'limits'. Note: either limit
-in the sequence, or the value of limits itself, can be set to None. The
-inclusive list/tuple determines whether the lower and upper limiting bounds
-(respectively) are open/exclusive (0) or closed/inclusive (1).
-
-Usage: atstdev(a,limits=None,inclusive=(1,1))
-"""
- return N.sqrt(tvar(a, limits, inclusive))
-
- def atsem(a, limits=None, inclusive=(1, 1)):
- """
-Returns the standard error of the mean for the values in an array,
-(i.e., using N for the denominator), ignoring values strictly outside
-the sequence passed to 'limits'. Note: either limit in the sequence,
-or the value of limits itself, can be set to None. The inclusive list/tuple
-determines whether the lower and upper limiting bounds (respectively) are
-open/exclusive (0) or closed/inclusive (1).
-
-Usage: atsem(a,limits=None,inclusive=(1,1))
-"""
- sd = tstdev(a, limits, inclusive)
- if limits == None or limits == [None, None]:
- n = float(len(N.ravel(a)))
- limits = [min(a) - 1, max(a) + 1]
- assert type(limits) in [ListType, TupleType, N.ndarray
- ], 'Wrong type for limits in atsem'
- if inclusive[0]:
- lowerfcn = N.greater_equal
- else:
- lowerfcn = N.greater
- if inclusive[1]:
- upperfcn = N.less_equal
- else:
- upperfcn = N.less
- if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(
- N.ravel(a)):
- raise ValueError, 'No array values within given limits (atsem).'
- elif limits[0] == None and limits[1] <> None:
- mask = upperfcn(a, limits[1])
- elif limits[0] <> None and limits[1] == None:
- mask = lowerfcn(a, limits[0])
- elif limits[0] <> None and limits[1] <> None:
- mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1])
- term1 = N.add.reduce(N.ravel(a * a * mask))
- n = float(N.add.reduce(N.ravel(mask)))
- return sd / math.sqrt(n)
-
-#####################################
-############ AMOMENTS #############
-#####################################
-
- def amoment(a, moment=1, dimension=None):
- """
-Calculates the nth moment about the mean for a sample (defaults to the
-1st moment). Generally used to calculate coefficients of skewness and
-kurtosis. Dimension can equal None (ravel array first), an integer
-(the dimension over which to operate), or a sequence (operate over
-multiple dimensions).
-
-Usage: amoment(a,moment=1,dimension=None)
-Returns: appropriate moment along given dimension
-"""
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- if moment == 1:
- return 0.0
- else:
- mn = amean(a, dimension, 1) # 1=keepdims
- s = N.power((a - mn), moment)
- return amean(s, dimension)
-
- def avariation(a, dimension=None):
- """
-Returns the coefficient of variation, as defined in CRC Standard
-Probability and Statistics, p.6. Dimension can equal None (ravel array
-first), an integer (the dimension over which to operate), or a
-sequence (operate over multiple dimensions).
-
-Usage: avariation(a,dimension=None)
-"""
- return 100.0 * asamplestdev(a, dimension) / amean(a, dimension)
-
- def askew(a, dimension=None):
- """
-Returns the skewness of a distribution (normal ==> 0.0; >0 means extra
-weight in left tail). Use askewtest() to see if it's close enough.
-Dimension can equal None (ravel array first), an integer (the
-dimension over which to operate), or a sequence (operate over multiple
-dimensions).
-
-Usage: askew(a, dimension=None)
-Returns: skew of vals in a along dimension, returning ZERO where all vals equal
-"""
- denom = N.power(amoment(a, 2, dimension), 1.5)
- zero = N.equal(denom, 0)
- if type(denom) == N.ndarray and asum(zero) <> 0:
- print 'Number of zeros in askew: ', asum(zero)
- denom = denom + zero # prevent divide-by-zero
- return N.where(zero, 0, amoment(a, 3, dimension) / denom)
-
- def akurtosis(a, dimension=None):
- """
-Returns the kurtosis of a distribution (normal ==> 3.0; >3 means
-heavier in the tails, and usually more peaked). Use akurtosistest()
-to see if it's close enough. Dimension can equal None (ravel array
-first), an integer (the dimension over which to operate), or a
-sequence (operate over multiple dimensions).
-
-Usage: akurtosis(a,dimension=None)
-Returns: kurtosis of values in a along dimension, and ZERO where all vals equal
-"""
- denom = N.power(amoment(a, 2, dimension), 2)
- zero = N.equal(denom, 0)
- if type(denom) == N.ndarray and asum(zero) <> 0:
- print 'Number of zeros in akurtosis: ', asum(zero)
- denom = denom + zero # prevent divide-by-zero
- return N.where(zero, 0, amoment(a, 4, dimension) / denom)
-
- def adescribe(inarray, dimension=None):
- """
-Returns several descriptive statistics of the passed array. Dimension
-can equal None (ravel array first), an integer (the dimension over
-which to operate), or a sequence (operate over multiple dimensions).
-
-Usage: adescribe(inarray,dimension=None)
-Returns: n, (min,max), mean, standard deviation, skew, kurtosis
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- n = inarray.shape[dimension]
- mm = (N.minimum.reduce(inarray), N.maximum.reduce(inarray))
- m = amean(inarray, dimension)
- sd = astdev(inarray, dimension)
- skew = askew(inarray, dimension)
- kurt = akurtosis(inarray, dimension)
- return n, mm, m, sd, skew, kurt
-
-#####################################
-######## NORMALITY TESTS ##########
-#####################################
-
- def askewtest(a, dimension=None):
- """
-Tests whether the skew is significantly different from a normal
-distribution. Dimension can equal None (ravel array first), an
-integer (the dimension over which to operate), or a sequence (operate
-over multiple dimensions).
-
-Usage: askewtest(a,dimension=None)
-Returns: z-score and 2-tail z-probability
-"""
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- b2 = askew(a, dimension)
- n = float(a.shape[dimension])
- y = b2 * N.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2)))
- beta2 = (3.0 * (n * n + 27 * n - 70) * (n + 1) *
- (n + 3)) / ((n - 2.0) * (n + 5) * (n + 7) * (n + 9))
- W2 = -1 + N.sqrt(2 * (beta2 - 1))
- delta = 1 / N.sqrt(N.log(N.sqrt(W2)))
- alpha = N.sqrt(2 / (W2 - 1))
- y = N.where(y == 0, 1, y)
- Z = delta * N.log(y / alpha + N.sqrt((y / alpha)**2 + 1))
- return Z, (1.0 - zprob(Z)) * 2
-
- def akurtosistest(a, dimension=None):
- """
-Tests whether a dataset has normal kurtosis (i.e.,
-kurtosis=3(n-1)/(n+1)) Valid only for n>20. Dimension can equal None
-(ravel array first), an integer (the dimension over which to operate),
-or a sequence (operate over multiple dimensions).
-
-Usage: akurtosistest(a,dimension=None)
-Returns: z-score and 2-tail z-probability, returns 0 for bad pixels
-"""
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- n = float(a.shape[dimension])
- if n < 20:
- print 'akurtosistest only valid for n>=20 ... continuing anyway, n=', n
- b2 = akurtosis(a, dimension)
- E = 3.0 * (n - 1) / (n + 1)
- varb2 = 24.0 * n * (n - 2) * (n - 3) / ((n + 1) * (n + 1) * (n + 3) *
- (n + 5))
- x = (b2 - E) / N.sqrt(varb2)
- sqrtbeta1 = 6.0 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) * N.sqrt(
- (6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3)))
- A = 6.0 + 8.0 / sqrtbeta1 * (2.0 / sqrtbeta1 +
- N.sqrt(1 + 4.0 / (sqrtbeta1**2)))
- term1 = 1 - 2 / (9.0 * A)
- denom = 1 + x * N.sqrt(2 / (A - 4.0))
- denom = N.where(N.less(denom, 0), 99, denom)
- term2 = N.where(
- N.equal(denom, 0), term1, N.power(
- (1 - 2.0 / A) / denom, 1 / 3.0))
- Z = (term1 - term2) / N.sqrt(2 / (9.0 * A))
- Z = N.where(N.equal(denom, 99), 0, Z)
- return Z, (1.0 - zprob(Z)) * 2
-
- def anormaltest(a, dimension=None):
- """
-Tests whether skew and/OR kurtosis of dataset differs from normal
-curve. Can operate over multiple dimensions. Dimension can equal
-None (ravel array first), an integer (the dimension over which to
-operate), or a sequence (operate over multiple dimensions).
-
-Usage: anormaltest(a,dimension=None)
-Returns: z-score and 2-tail probability
-"""
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- s, p = askewtest(a, dimension)
- k, p = akurtosistest(a, dimension)
- k2 = N.power(s, 2) + N.power(k, 2)
- return k2, achisqprob(k2, 2)
-
-#####################################
-###### AFREQUENCY FUNCTIONS #######
-#####################################
-
- def aitemfreq(a):
- """
-Returns a 2D array of item frequencies. Column 1 contains item values,
-column 2 contains their respective counts. Assumes a 1D array is passed.
-@@@sorting OK?
-
-Usage: aitemfreq(a)
-Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)
-"""
- scores = pstat.aunique(a)
- scores = N.sort(scores)
- freq = N.zeros(len(scores))
- for i in range(len(scores)):
- freq[i] = N.add.reduce(N.equal(a, scores[i]))
- return N.array(pstat.aabut(scores, freq))
-
- def ascoreatpercentile(inarray, percent):
- """
-Usage: ascoreatpercentile(inarray,percent) 0<percent<100
-Returns: score at given percentile, relative to inarray distribution
-"""
- percent = percent / 100.0
- targetcf = percent * len(inarray)
- h, lrl, binsize, extras = histogram(inarray)
- cumhist = cumsum(h * 1)
- for i in range(len(cumhist)):
- if cumhist[i] >= targetcf:
- break
- score = binsize * (
- (targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i)
- return score
-
- def apercentileofscore(inarray, score, histbins=10, defaultlimits=None):
- """
-Note: result of this function depends on the values used to histogram
-the data(!).
-
-Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None)
-Returns: percentile-position of score (0-100) relative to inarray
-"""
- h, lrl, binsize, extras = histogram(inarray, histbins, defaultlimits)
- cumhist = cumsum(h * 1)
- i = int((score - lrl) / float(binsize))
- pct = (cumhist[i - 1] + ((score - (lrl + binsize * i)) / float(binsize)) *
- h[i]) / float(len(inarray)) * 100
- return pct
-
- def ahistogram(inarray, numbins=10, defaultlimits=None, printextras=1):
- """
-Returns (i) an array of histogram bin counts, (ii) the smallest value
-of the histogram binning, and (iii) the bin width (the last 2 are not
-necessarily integers). Default number of bins is 10. Defaultlimits
-can be None (the routine picks bins spanning all the numbers in the
-inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the
-following: array of bin values, lowerreallimit, binsize, extrapoints.
-
-Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1)
-Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range)
-"""
- inarray = N.ravel(inarray) # flatten any >1D arrays
- if (defaultlimits <> None):
- lowerreallimit = defaultlimits[0]
- upperreallimit = defaultlimits[1]
- binsize = (upperreallimit - lowerreallimit) / float(numbins)
- else:
- Min = N.minimum.reduce(inarray)
- Max = N.maximum.reduce(inarray)
- estbinwidth = float(Max - Min) / float(numbins) + 1e-6
- binsize = (Max - Min + estbinwidth) / float(numbins)
- lowerreallimit = Min - binsize / 2.0 #lower real limit,1st bin
- bins = N.zeros(numbins)
- extrapoints = 0
- for num in inarray:
- try:
- if (num - lowerreallimit) < 0:
- extrapoints = extrapoints + 1
- else:
- bintoincrement = int((num - lowerreallimit) / float(binsize))
- bins[bintoincrement] = bins[bintoincrement] + 1
- except: # point outside lower/upper limits
- extrapoints = extrapoints + 1
- if (extrapoints > 0 and printextras == 1):
- print '\nPoints outside given histogram range =', extrapoints
- return (bins, lowerreallimit, binsize, extrapoints)
-
- def acumfreq(a, numbins=10, defaultreallimits=None):
- """
-Returns a cumulative frequency histogram, using the histogram function.
-Defaultreallimits can be None (use all data), or a 2-sequence containing
-lower and upper limits on values to include.
-
-Usage: acumfreq(a,numbins=10,defaultreallimits=None)
-Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints
-"""
- h, l, b, e = histogram(a, numbins, defaultreallimits)
- cumhist = cumsum(h * 1)
- return cumhist, l, b, e
-
- def arelfreq(a, numbins=10, defaultreallimits=None):
- """
-Returns a relative frequency histogram, using the histogram function.
-Defaultreallimits can be None (use all data), or a 2-sequence containing
-lower and upper limits on values to include.
-
-Usage: arelfreq(a,numbins=10,defaultreallimits=None)
-Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints
-"""
- h, l, b, e = histogram(a, numbins, defaultreallimits)
- h = N.array(h / float(a.shape[0]))
- return h, l, b, e
-
-#####################################
-###### AVARIABILITY FUNCTIONS #####
-#####################################
-
- def aobrientransform(*args):
- """
-Computes a transform on input data (any number of columns). Used to
-test for homogeneity of variance prior to running one-way stats. Each
-array in *args is one level of a factor. If an F_oneway() run on the
-transformed data and found significant, variances are unequal. From
-Maxwell and Delaney, p.112.
-
-Usage: aobrientransform(*args) *args = 1D arrays, one per level of factor
-Returns: transformed data for use in an ANOVA
-"""
- TINY = 1e-10
- k = len(args)
- n = N.zeros(k, N.float_)
- v = N.zeros(k, N.float_)
- m = N.zeros(k, N.float_)
- nargs = []
- for i in range(k):
- nargs.append(args[i].astype(N.float_))
- n[i] = float(len(nargs[i]))
- v[i] = var(nargs[i])
- m[i] = mean(nargs[i])
- for j in range(k):
- for i in range(n[j]):
- t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j])**2
- t2 = 0.5 * v[j] * (n[j] - 1.0)
- t3 = (n[j] - 1.0) * (n[j] - 2.0)
- nargs[j][i] = (t1 - t2) / float(t3)
- check = 1
- for j in range(k):
- if v[j] - mean(nargs[j]) > TINY:
- check = 0
- if check <> 1:
- raise ValueError, 'Lack of convergence in obrientransform.'
- else:
- return N.array(nargs)
-
- def asamplevar(inarray, dimension=None, keepdims=0):
- """
-Returns the sample standard deviation of the values in the passed
-array (i.e., using N). Dimension can equal None (ravel array first),
-an integer (the dimension over which to operate), or a sequence
-(operate over multiple dimensions). Set keepdims=1 to return an array
-with the same number of dimensions as inarray.
-
-Usage: asamplevar(inarray,dimension=None,keepdims=0)
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- if dimension == 1:
- mn = amean(inarray, dimension)[:, N.NewAxis]
- else:
- mn = amean(inarray, dimension, keepdims=1)
- deviations = inarray - mn
- if type(dimension) == ListType:
- n = 1
- for d in dimension:
- n = n * inarray.shape[d]
- else:
- n = inarray.shape[dimension]
- svar = ass(deviations, dimension, keepdims) / float(n)
- return svar
-
- def asamplestdev(inarray, dimension=None, keepdims=0):
- """
-Returns the sample standard deviation of the values in the passed
-array (i.e., using N). Dimension can equal None (ravel array first),
-an integer (the dimension over which to operate), or a sequence
-(operate over multiple dimensions). Set keepdims=1 to return an array
-with the same number of dimensions as inarray.
-
-Usage: asamplestdev(inarray,dimension=None,keepdims=0)
-"""
- return N.sqrt(asamplevar(inarray, dimension, keepdims))
-
- def asignaltonoise(instack, dimension=0):
- """
-Calculates signal-to-noise. Dimension can equal None (ravel array
-first), an integer (the dimension over which to operate), or a
-sequence (operate over multiple dimensions).
-
-Usage: asignaltonoise(instack,dimension=0):
-Returns: array containing the value of (mean/stdev) along dimension,
- or 0 when stdev=0
-"""
- m = mean(instack, dimension)
- sd = stdev(instack, dimension)
- return N.where(sd == 0, 0, m / sd)
-
- def acov(x, y, dimension=None, keepdims=0):
- """
-Returns the estimated covariance of the values in the passed
-array (i.e., N-1). Dimension can equal None (ravel array first), an
-integer (the dimension over which to operate), or a sequence (operate
-over multiple dimensions). Set keepdims=1 to return an array with the
-same number of dimensions as inarray.
-
-Usage: acov(x,y,dimension=None,keepdims=0)
-"""
- if dimension == None:
- x = N.ravel(x)
- y = N.ravel(y)
- dimension = 0
- xmn = amean(x, dimension, 1) # keepdims
- xdeviations = x - xmn
- ymn = amean(y, dimension, 1) # keepdims
- ydeviations = y - ymn
- if type(dimension) == ListType:
- n = 1
- for d in dimension:
- n = n * x.shape[d]
- else:
- n = x.shape[dimension]
- covar = N.sum(xdeviations * ydeviations) / float(n - 1)
- return covar
-
- def avar(inarray, dimension=None, keepdims=0):
- """
-Returns the estimated population variance of the values in the passed
-array (i.e., N-1). Dimension can equal None (ravel array first), an
-integer (the dimension over which to operate), or a sequence (operate
-over multiple dimensions). Set keepdims=1 to return an array with the
-same number of dimensions as inarray.
-
-Usage: avar(inarray,dimension=None,keepdims=0)
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- mn = amean(inarray, dimension, 1)
- deviations = inarray - mn
- if type(dimension) == ListType:
- n = 1
- for d in dimension:
- n = n * inarray.shape[d]
- else:
- n = inarray.shape[dimension]
- var = ass(deviations, dimension, keepdims) / float(n - 1)
- return var
-
- def astdev(inarray, dimension=None, keepdims=0):
- """
-Returns the estimated population standard deviation of the values in
-the passed array (i.e., N-1). Dimension can equal None (ravel array
-first), an integer (the dimension over which to operate), or a
-sequence (operate over multiple dimensions). Set keepdims=1 to return
-an array with the same number of dimensions as inarray.
-
-Usage: astdev(inarray,dimension=None,keepdims=0)
-"""
- return N.sqrt(avar(inarray, dimension, keepdims))
-
- def asterr(inarray, dimension=None, keepdims=0):
- """
-Returns the estimated population standard error of the values in the
-passed array (i.e., N-1). Dimension can equal None (ravel array
-first), an integer (the dimension over which to operate), or a
-sequence (operate over multiple dimensions). Set keepdims=1 to return
-an array with the same number of dimensions as inarray.
-
-Usage: asterr(inarray,dimension=None,keepdims=0)
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- return astdev(inarray, dimension,
- keepdims) / float(N.sqrt(inarray.shape[dimension]))
-
- def asem(inarray, dimension=None, keepdims=0):
- """
-Returns the standard error of the mean (i.e., using N) of the values
-in the passed array. Dimension can equal None (ravel array first), an
-integer (the dimension over which to operate), or a sequence (operate
-over multiple dimensions). Set keepdims=1 to return an array with the
-same number of dimensions as inarray.
-
-Usage: asem(inarray,dimension=None, keepdims=0)
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- if type(dimension) == ListType:
- n = 1
- for d in dimension:
- n = n * inarray.shape[d]
- else:
- n = inarray.shape[dimension]
- s = asamplestdev(inarray, dimension, keepdims) / N.sqrt(n - 1)
- return s
-
- def az(a, score):
- """
-Returns the z-score of a given input score, given thearray from which
-that score came. Not appropriate for population calculations, nor for
-arrays > 1D.
-
-Usage: az(a, score)
-"""
- z = (score - amean(a)) / asamplestdev(a)
- return z
-
- def azs(a):
- """
-Returns a 1D array of z-scores, one for each score in the passed array,
-computed relative to the passed array.
-
-Usage: azs(a)
-"""
- zscores = []
- for item in a:
- zscores.append(z(a, item))
- return N.array(zscores)
-
- def azmap(scores, compare, dimension=0):
- """
-Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to
-array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0
-of the compare array.
-
-Usage: azs(scores, compare, dimension=0)
-"""
- mns = amean(compare, dimension)
- sstd = asamplestdev(compare, 0)
- return (scores - mns) / sstd
-
-#####################################
-####### ATRIMMING FUNCTIONS #######
-#####################################
-
-## deleted around() as it's in numpy now
-
- def athreshold(a, threshmin=None, threshmax=None, newval=0):
- """
-Like Numeric.clip() except that values <threshmid or >threshmax are replaced
-by newval instead of by threshmin/threshmax (respectively).
-
-Usage: athreshold(a,threshmin=None,threshmax=None,newval=0)
-Returns: a, with values <threshmin or >threshmax replaced with newval
-"""
- mask = N.zeros(a.shape)
- if threshmin <> None:
- mask = mask + N.where(a < threshmin, 1, 0)
- if threshmax <> None:
- mask = mask + N.where(a > threshmax, 1, 0)
- mask = N.clip(mask, 0, 1)
- return N.where(mask, newval, a)
-
- def atrimboth(a, proportiontocut):
- """
-Slices off the passed proportion of items from BOTH ends of the passed
-array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND
-'rightmost' 10% of scores. You must pre-sort the array if you want
-"proper" trimming. Slices off LESS if proportion results in a
-non-integer slice index (i.e., conservatively slices off
-proportiontocut).
-
-Usage: atrimboth (a,proportiontocut)
-Returns: trimmed version of array a
-"""
- lowercut = int(proportiontocut * len(a))
- uppercut = len(a) - lowercut
- return a[lowercut:uppercut]
-
- def atrim1(a, proportiontocut, tail='right'):
- """
-Slices off the passed proportion of items from ONE end of the passed
-array (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'
-10% of scores). Slices off LESS if proportion results in a non-integer
-slice index (i.e., conservatively slices off proportiontocut).
-
-Usage: atrim1(a,proportiontocut,tail='right') or set tail='left'
-Returns: trimmed version of array a
-"""
- if string.lower(tail) == 'right':
- lowercut = 0
- uppercut = len(a) - int(proportiontocut * len(a))
- elif string.lower(tail) == 'left':
- lowercut = int(proportiontocut * len(a))
- uppercut = len(a)
- return a[lowercut:uppercut]
-
-#####################################
-##### ACORRELATION FUNCTIONS ######
-#####################################
-
- def acovariance(X):
- """
-Computes the covariance matrix of a matrix X. Requires a 2D matrix input.
-
-Usage: acovariance(X)
-Returns: covariance matrix of X
-"""
- if len(X.shape) <> 2:
- raise TypeError, 'acovariance requires 2D matrices'
- n = X.shape[0]
- mX = amean(X, 0)
- return N.dot(N.transpose(X), X) / float(n) - N.multiply.outer(mX, mX)
-
- def acorrelation(X):
- """
-Computes the correlation matrix of a matrix X. Requires a 2D matrix input.
-
-Usage: acorrelation(X)
-Returns: correlation matrix of X
-"""
- C = acovariance(X)
- V = N.diagonal(C)
- return C / N.sqrt(N.multiply.outer(V, V))
-
- def apaired(x, y):
- """
-Interactively determines the type of data in x and y, and then runs the
-appropriated statistic for paired group data.
-
-Usage: apaired(x,y) x,y = the two arrays of values to be compared
-Returns: appropriate statistic name, value, and probability
-"""
- samples = ''
- while samples not in ['i', 'r', 'I', 'R', 'c', 'C']:
- print '\nIndependent or related samples, or correlation (i,r,c): ',
- samples = raw_input()
-
- if samples in ['i', 'I', 'r', 'R']:
- print '\nComparing variances ...',
- # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
- r = obrientransform(x, y)
- f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1))
- if p < 0.05:
- vartype = 'unequal, p=' + str(round(p, 4))
- else:
- vartype = 'equal'
- print vartype
- if samples in ['i', 'I']:
- if vartype[0] == 'e':
- t, p = ttest_ind(x, y, None, 0)
- print '\nIndependent samples t-test: ', round(t, 4), round(p, 4)
- else:
- if len(x) > 20 or len(y) > 20:
- z, p = ranksums(x, y)
- print '\nRank Sums test (NONparametric, n>20): ', round(
- z, 4), round(p, 4)
- else:
- u, p = mannwhitneyu(x, y)
- print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(
- u, 4), round(p, 4)
-
- else: # RELATED SAMPLES
- if vartype[0] == 'e':
- t, p = ttest_rel(x, y, 0)
- print '\nRelated samples t-test: ', round(t, 4), round(p, 4)
- else:
- t, p = ranksums(x, y)
- print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)
- else: # CORRELATION ANALYSIS
- corrtype = ''
- while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']:
- print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
- corrtype = raw_input()
- if corrtype in ['c', 'C']:
- m, b, r, p, see = linregress(x, y)
- print '\nLinear regression for continuous variables ...'
- lol = [
- ['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'],
- [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]
- ]
- pstat.printcc(lol)
- elif corrtype in ['r', 'R']:
- r, p = spearmanr(x, y)
- print '\nCorrelation for ranked variables ...'
- print "Spearman's r: ", round(r, 4), round(p, 4)
- else: # DICHOTOMOUS
- r, p = pointbiserialr(x, y)
- print '\nAssuming x contains a dichotomous variable ...'
- print 'Point Biserial r: ', round(r, 4), round(p, 4)
- print '\n\n'
- return None
-
- def dices(x, y):
- """
-Calculates Dice's coefficient ... (2*number of common terms)/(number of terms in
-x +
-number of terms in y). Returns a value between 0 (orthogonal) and 1.
-
-Usage: dices(x,y)
-"""
- import sets
- x = sets.Set(x)
- y = sets.Set(y)
- common = len(x.intersection(y))
- total = float(len(x) + len(y))
- return 2 * common / total
-
- def icc(x, y=None, verbose=0):
- """
-Calculates intraclass correlation coefficients using simple, Type I sums of
-squares.
-If only one variable is passed, assumed it's an Nx2 matrix
-
-Usage: icc(x,y=None,verbose=0)
-Returns: icc rho, prob ####PROB IS A GUESS BASED ON PEARSON
-"""
- TINY = 1.0e-20
- if y:
- all = N.concatenate([x, y], 0)
- else:
- all = x + 0
- x = all[:, 0]
- y = all[:, 1]
- totalss = ass(all - mean(all))
- pairmeans = (x + y) / 2.
- withinss = ass(x - pairmeans) + ass(y - pairmeans)
- withindf = float(len(x))
- betwdf = float(len(x) - 1)
- withinms = withinss / withindf
- betweenms = (totalss - withinss) / betwdf
- rho = (betweenms - withinms) / (withinms + betweenms)
- t = rho * math.sqrt(betwdf / ((1.0 - rho + TINY) * (1.0 + rho + TINY)))
- prob = abetai(0.5 * betwdf, 0.5, betwdf / (betwdf + t * t), verbose)
- return rho, prob
-
- def alincc(x, y):
- """
-Calculates Lin's concordance correlation coefficient.
-
-Usage: alincc(x,y) where x, y are equal-length arrays
-Returns: Lin's CC
-"""
- x = N.ravel(x)
- y = N.ravel(y)
- covar = acov(x, y) * (len(x) - 1) / float(len(x)) # correct denom to n
- xvar = avar(x) * (len(x) - 1) / float(len(x)) # correct denom to n
- yvar = avar(y) * (len(y) - 1) / float(len(y)) # correct denom to n
- lincc = (2 * covar) / ((xvar + yvar) + ((amean(x) - amean(y))**2))
- return lincc
-
- def apearsonr(x, y, verbose=1):
- """
-Calculates a Pearson correlation coefficient and returns p. Taken
-from Heiman's Basic Statistics for the Behav. Sci (2nd), p.195.
-
-Usage: apearsonr(x,y,verbose=1) where x,y are equal length arrays
-Returns: Pearson's r, two-tailed p-value
-"""
- TINY = 1.0e-20
- n = len(x)
- xmean = amean(x)
- ymean = amean(y)
- r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y)
- r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) *
- (n * ass(y) - asquare_of_sums(y)))
- r = (r_num / r_den)
- df = n - 2
- t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
- prob = abetai(0.5 * df, 0.5, df / (df + t * t), verbose)
- return r, prob
-
- def aspearmanr(x, y):
- """
-Calculates a Spearman rank-order correlation coefficient. Taken
-from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.
-
-Usage: aspearmanr(x,y) where x,y are equal-length arrays
-Returns: Spearman's r, two-tailed p-value
-"""
- TINY = 1e-30
- n = len(x)
- rankx = rankdata(x)
- ranky = rankdata(y)
- dsq = N.add.reduce((rankx - ranky)**2)
- rs = 1 - 6 * dsq / float(n * (n**2 - 1))
- t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs)))
- df = n - 2
- probrs = abetai(0.5 * df, 0.5, df / (df + t * t))
- # probability values for rs are from part 2 of the spearman function in
- # Numerical Recipies, p.510. They close to tables, but not exact.(?)
- return rs, probrs
-
- def apointbiserialr(x, y):
- """
-Calculates a point-biserial correlation coefficient and the associated
-probability value. Taken from Heiman's Basic Statistics for the Behav.
-Sci (1st), p.194.
-
-Usage: apointbiserialr(x,y) where x,y are equal length arrays
-Returns: Point-biserial r, two-tailed p-value
-"""
- TINY = 1e-30
- categories = pstat.aunique(x)
- data = pstat.aabut(x, y)
- if len(categories) <> 2:
- raise ValueError, ('Exactly 2 categories required (in x) for '
- 'pointbiserialr().')
- else: # there are 2 categories, continue
- codemap = pstat.aabut(categories, N.arange(2))
- recoded = pstat.arecode(data, codemap, 0)
- x = pstat.alinexand(data, 0, categories[0])
- y = pstat.alinexand(data, 0, categories[1])
- xmean = amean(pstat.acolex(x, 1))
- ymean = amean(pstat.acolex(y, 1))
- n = len(data)
- adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n)))
- rpb = (ymean - xmean) / asamplestdev(pstat.acolex(data, 1)) * adjust
- df = n - 2
- t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY)))
- prob = abetai(0.5 * df, 0.5, df / (df + t * t))
- return rpb, prob
-
- def akendalltau(x, y):
- """
-Calculates Kendall's tau ... correlation of ordinal data. Adapted
-from function kendl1 in Numerical Recipies. Needs good test-cases.@@@
-
-Usage: akendalltau(x,y)
-Returns: Kendall's tau, two-tailed p-value
-"""
- n1 = 0
- n2 = 0
- iss = 0
- for j in range(len(x) - 1):
- for k in range(j, len(y)):
- a1 = x[j] - x[k]
- a2 = y[j] - y[k]
- aa = a1 * a2
- if (aa): # neither array has a tie
- n1 = n1 + 1
- n2 = n2 + 1
- if aa > 0:
- iss = iss + 1
- else:
- iss = iss - 1
- else:
- if (a1):
- n1 = n1 + 1
- else:
- n2 = n2 + 1
- tau = iss / math.sqrt(n1 * n2)
- svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1))
- z = tau / math.sqrt(svar)
- prob = erfcc(abs(z) / 1.4142136)
- return tau, prob
-
- def alinregress(*args):
- """
-Calculates a regression line on two arrays, x and y, corresponding to x,y
-pairs. If a single 2D array is passed, alinregress finds dim with 2 levels
-and splits data into x,y pairs along that dim.
-
-Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array
-Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n
-"""
- TINY = 1.0e-20
- if len(args) == 1: # more than 1D array?
- args = args[0]
- if len(args) == 2:
- x = args[0]
- y = args[1]
- else:
- x = args[:, 0]
- y = args[:, 1]
- else:
- x = args[0]
- y = args[1]
- n = len(x)
- xmean = amean(x)
- ymean = amean(y)
- r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y)
- r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) *
- (n * ass(y) - asquare_of_sums(y)))
- r = r_num / r_den
- z = 0.5 * math.log((1.0 + r + TINY) / (1.0 - r + TINY))
- df = n - 2
- t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
- prob = abetai(0.5 * df, 0.5, df / (df + t * t))
- slope = r_num / (float(n) * ass(x) - asquare_of_sums(x))
- intercept = ymean - slope * xmean
- sterrest = math.sqrt(1 - r * r) * asamplestdev(y)
- return slope, intercept, r, prob, sterrest, n
-
- def amasslinregress(*args):
- """
-Calculates a regression line on one 1D array (x) and one N-D array (y).
-
-Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n
-"""
- TINY = 1.0e-20
- if len(args) == 1: # more than 1D array?
- args = args[0]
- if len(args) == 2:
- x = N.ravel(args[0])
- y = args[1]
- else:
- x = N.ravel(args[:, 0])
- y = args[:, 1]
- else:
- x = args[0]
- y = args[1]
- x = x.astype(N.float_)
- y = y.astype(N.float_)
- n = len(x)
- xmean = amean(x)
- ymean = amean(y, 0)
- shp = N.ones(len(y.shape))
- shp[0] = len(x)
- x.shape = shp
- print x.shape, y.shape
- r_num = n * (N.add.reduce(x * y, 0)) - N.add.reduce(x) * N.add.reduce(y, 0)
- r_den = N.sqrt((n * ass(x) - asquare_of_sums(x)) *
- (n * ass(y, 0) - asquare_of_sums(y, 0)))
- zerodivproblem = N.equal(r_den, 0)
- r_den = N.where(zerodivproblem, 1, r_den
- ) # avoid zero-division in 1st place
- r = r_num / r_den # need to do this nicely for matrix division
- r = N.where(zerodivproblem, 0.0, r)
- z = 0.5 * N.log((1.0 + r + TINY) / (1.0 - r + TINY))
- df = n - 2
- t = r * N.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
- prob = abetai(0.5 * df, 0.5, df / (df + t * t))
-
- ss = float(n) * ass(x) - asquare_of_sums(x)
- s_den = N.where(ss == 0, 1, ss) # avoid zero-division in 1st place
- slope = r_num / s_den
- intercept = ymean - slope * xmean
- sterrest = N.sqrt(1 - r * r) * asamplestdev(y, 0)
- return slope, intercept, r, prob, sterrest, n
-
-#####################################
-##### AINFERENTIAL STATISTICS #####
-#####################################
-
- def attest_1samp(a, popmean, printit=0, name='Sample', writemode='a'):
- """
-Calculates the t-obtained for the independent samples T-test on ONE group
-of scores a, given a population mean. If printit=1, results are printed
-to the screen. If printit='filename', the results are output to 'filename'
-using the given writemode (default=append). Returns t-value, and prob.
-
-Usage: attest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
-Returns: t-value, two-tailed prob
-"""
- if type(a) != N.ndarray:
- a = N.array(a)
- x = amean(a)
- v = avar(a)
- n = len(a)
- df = n - 1
- svar = ((n - 1) * v) / float(df)
- t = (x - popmean) / math.sqrt(svar * (1.0 / n))
- prob = abetai(0.5 * df, 0.5, df / (df + t * t))
-
- if printit <> 0:
- statname = 'Single-sample T-test.'
- outputpairedstats(printit, writemode, 'Population', '--', popmean, 0, 0,
- 0, name, n, x, v, N.minimum.reduce(N.ravel(a)),
- N.maximum.reduce(N.ravel(a)), statname, t, prob)
- return t, prob
-
- def attest_ind(a,
- b,
- dimension=None,
- printit=0,
- name1='Samp1',
- name2='Samp2',
- writemode='a'):
- """
-Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores
-a, and b. From Numerical Recipies, p.483. If printit=1, results are
-printed to the screen. If printit='filename', the results are output
-to 'filename' using the given writemode (default=append). Dimension
-can equal None (ravel array first), or an integer (the dimension over
-which to operate on a and b).
-
-Usage: attest_ind (a,b,dimension=None,printit=0,
- Name1='Samp1',Name2='Samp2',writemode='a')
-Returns: t-value, two-tailed p-value
-"""
- if dimension == None:
- a = N.ravel(a)
- b = N.ravel(b)
- dimension = 0
- x1 = amean(a, dimension)
- x2 = amean(b, dimension)
- v1 = avar(a, dimension)
- v2 = avar(b, dimension)
- n1 = a.shape[dimension]
- n2 = b.shape[dimension]
- df = n1 + n2 - 2
- svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df)
- zerodivproblem = N.equal(svar, 0)
- svar = N.where(zerodivproblem, 1, svar) # avoid zero-division in 1st place
- t = (x1 - x2) / N.sqrt(svar *
- (1.0 / n1 + 1.0 / n2)) # N-D COMPUTATION HERE!!!!!!
- t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0
- probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
-
- if type(t) == N.ndarray:
- probs = N.reshape(probs, t.shape)
- if probs.shape == (1,):
- probs = probs[0]
-
- if printit <> 0:
- if type(t) == N.ndarray:
- t = t[0]
- if type(probs) == N.ndarray:
- probs = probs[0]
- statname = 'Independent samples T-test.'
- outputpairedstats(printit, writemode, name1, n1, x1, v1,
- N.minimum.reduce(N.ravel(a)),
- N.maximum.reduce(N.ravel(a)), name2, n2, x2, v2,
- N.minimum.reduce(N.ravel(b)),
- N.maximum.reduce(N.ravel(b)), statname, t, probs)
- return
- return t, probs
-
- def ap2t(pval, df):
- """
-Tries to compute a t-value from a p-value (or pval array) and associated df.
-SLOW for large numbers of elements(!) as it re-computes p-values 20 times
-(smaller step-sizes) at which point it decides it's done. Keeps the signs
-of the input array. Returns 1000 (or -1000) if t>100.
-
-Usage: ap2t(pval,df)
-Returns: an array of t-values with the shape of pval
- """
- pval = N.array(pval)
- signs = N.sign(pval)
- pval = abs(pval)
- t = N.ones(pval.shape, N.float_) * 50
- step = N.ones(pval.shape, N.float_) * 25
- print 'Initial ap2t() prob calc'
- prob = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
- print 'ap2t() iter: ',
- for i in range(10):
- print i, ' ',
- t = N.where(pval < prob, t + step, t - step)
- prob = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
- step = step / 2
- print
- # since this is an ugly hack, we get ugly boundaries
- t = N.where(t > 99.9, 1000, t) # hit upper-boundary
- t = t + signs
- return t #, prob, pval
-
- def attest_rel(a,
- b,
- dimension=None,
- printit=0,
- name1='Samp1',
- name2='Samp2',
- writemode='a'):
- """
-Calculates the t-obtained T-test on TWO RELATED samples of scores, a
-and b. From Numerical Recipies, p.483. If printit=1, results are
-printed to the screen. If printit='filename', the results are output
-to 'filename' using the given writemode (default=append). Dimension
-can equal None (ravel array first), or an integer (the dimension over
-which to operate on a and b).
-
-Usage: attest_rel(a,b,dimension=None,printit=0,
- name1='Samp1',name2='Samp2',writemode='a')
-Returns: t-value, two-tailed p-value
-"""
- if dimension == None:
- a = N.ravel(a)
- b = N.ravel(b)
- dimension = 0
- if len(a) <> len(b):
- raise ValueError, 'Unequal length arrays.'
- x1 = amean(a, dimension)
- x2 = amean(b, dimension)
- v1 = avar(a, dimension)
- v2 = avar(b, dimension)
- n = a.shape[dimension]
- df = float(n - 1)
- d = (a - b).astype('d')
-
- denom = N.sqrt(
- (n * N.add.reduce(d * d, dimension) - N.add.reduce(d, dimension)**2) /
- df)
- zerodivproblem = N.equal(denom, 0)
- denom = N.where(zerodivproblem, 1, denom
- ) # avoid zero-division in 1st place
- t = N.add.reduce(d, dimension) / denom # N-D COMPUTATION HERE!!!!!!
- t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0
- probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
- if type(t) == N.ndarray:
- probs = N.reshape(probs, t.shape)
- if probs.shape == (1,):
- probs = probs[0]
-
- if printit <> 0:
- statname = 'Related samples T-test.'
- outputpairedstats(printit, writemode, name1, n, x1, v1,
- N.minimum.reduce(N.ravel(a)),
- N.maximum.reduce(N.ravel(a)), name2, n, x2, v2,
- N.minimum.reduce(N.ravel(b)),
- N.maximum.reduce(N.ravel(b)), statname, t, probs)
- return
- return t, probs
-
- def achisquare(f_obs, f_exp=None):
- """
-Calculates a one-way chi square for array of observed frequencies and returns
-the result. If no expected frequencies are given, the total N is assumed to
-be equally distributed across all groups.
-@@@NOT RIGHT??
-
-Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq.
-Returns: chisquare-statistic, associated p-value
-"""
-
- k = len(f_obs)
- if f_exp == None:
- f_exp = N.array([sum(f_obs) / float(k)] * len(f_obs), N.float_)
- f_exp = f_exp.astype(N.float_)
- chisq = N.add.reduce((f_obs - f_exp)**2 / f_exp)
- return chisq, achisqprob(chisq, k - 1)
-
- def aks_2samp(data1, data2):
- """
-Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from
-Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc-
-like.
-
-Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays
-Returns: KS D-value, p-value
-"""
- j1 = 0 # N.zeros(data1.shape[1:]) TRIED TO MAKE THIS UFUNC-LIKE
- j2 = 0 # N.zeros(data2.shape[1:])
- fn1 = 0.0 # N.zeros(data1.shape[1:],N.float_)
- fn2 = 0.0 # N.zeros(data2.shape[1:],N.float_)
- n1 = data1.shape[0]
- n2 = data2.shape[0]
- en1 = n1 * 1
- en2 = n2 * 1
- d = N.zeros(data1.shape[1:], N.float_)
- data1 = N.sort(data1, 0)
- data2 = N.sort(data2, 0)
- while j1 < n1 and j2 < n2:
- d1 = data1[j1]
- d2 = data2[j2]
- if d1 <= d2:
- fn1 = (j1) / float(en1)
- j1 = j1 + 1
- if d2 <= d1:
- fn2 = (j2) / float(en2)
- j2 = j2 + 1
- dt = (fn2 - fn1)
- if abs(dt) > abs(d):
- d = dt
-# try:
- en = math.sqrt(en1 * en2 / float(en1 + en2))
- prob = aksprob((en + 0.12 + 0.11 / en) * N.fabs(d))
- # except:
- # prob = 1.0
- return d, prob
-
- def amannwhitneyu(x, y):
- """
-Calculates a Mann-Whitney U statistic on the provided scores and
-returns the result. Use only when the n in each condition is < 20 and
-you have 2 independent samples of ranks. REMEMBER: Mann-Whitney U is
-significant if the u-obtained is LESS THAN or equal to the critical
-value of U.
-
-Usage: amannwhitneyu(x,y) where x,y are arrays of values for 2 conditions
-Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
-"""
- n1 = len(x)
- n2 = len(y)
- ranked = rankdata(N.concatenate((x, y)))
- rankx = ranked[0:n1] # get the x-ranks
- ranky = ranked[n1:] # the rest are y-ranks
- u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x
- u2 = n1 * n2 - u1 # remainder is U for y
- bigu = max(u1, u2)
- smallu = min(u1, u2)
- proportion = bigu / float(n1 * n2)
- T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
- if T == 0:
- raise ValueError, 'All numbers are identical in amannwhitneyu'
- sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)
- z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc
- return smallu, 1.0 - azprob(z), proportion
-
- def atiecorrect(rankvals):
- """
-Tie-corrector for ties in Mann Whitney U and Kruskal Wallis H tests.
-See Siegel, S. (1956) Nonparametric Statistics for the Behavioral
-Sciences. New York: McGraw-Hill. Code adapted from |Stat rankind.c
-code.
-
-Usage: atiecorrect(rankvals)
-Returns: T correction factor for U or H
-"""
- sorted, posn = ashellsort(N.array(rankvals))
- n = len(sorted)
- T = 0.0
- i = 0
- while (i < n - 1):
- if sorted[i] == sorted[i + 1]:
- nties = 1
- while (i < n - 1) and (sorted[i] == sorted[i + 1]):
- nties = nties + 1
- i = i + 1
- T = T + nties**3 - nties
- i = i + 1
- T = T / float(n**3 - n)
- return 1.0 - T
-
- def aranksums(x, y):
- """
-Calculates the rank sums statistic on the provided scores and returns
-the result.
-
-Usage: aranksums(x,y) where x,y are arrays of values for 2 conditions
-Returns: z-statistic, two-tailed p-value
-"""
- n1 = len(x)
- n2 = len(y)
- alldata = N.concatenate((x, y))
- ranked = arankdata(alldata)
- x = ranked[:n1]
- y = ranked[n1:]
- s = sum(x)
- expected = n1 * (n1 + n2 + 1) / 2.0
- z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0)
- prob = 2 * (1.0 - azprob(abs(z)))
- return z, prob
-
- def awilcoxont(x, y):
- """
-Calculates the Wilcoxon T-test for related samples and returns the
-result. A non-parametric T-test.
-
-Usage: awilcoxont(x,y) where x,y are equal-length arrays for 2 conditions
-Returns: t-statistic, two-tailed p-value
-"""
- if len(x) <> len(y):
- raise ValueError, 'Unequal N in awilcoxont. Aborting.'
- d = x - y
- d = N.compress(N.not_equal(d, 0), d) # Keep all non-zero differences
- count = len(d)
- absd = abs(d)
- absranked = arankdata(absd)
- r_plus = 0.0
- r_minus = 0.0
- for i in range(len(absd)):
- if d[i] < 0:
- r_minus = r_minus + absranked[i]
- else:
- r_plus = r_plus + absranked[i]
- wt = min(r_plus, r_minus)
- mn = count * (count + 1) * 0.25
- se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0)
- z = math.fabs(wt - mn) / se
- z = math.fabs(wt - mn) / se
- prob = 2 * (1.0 - zprob(abs(z)))
- return wt, prob
-
- def akruskalwallish(*args):
- """
-The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more
-groups, requiring at least 5 subjects in each group. This function
-calculates the Kruskal-Wallis H and associated p-value for 3 or more
-independent samples.
-
-Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions
-Returns: H-statistic (corrected for ties), associated p-value
-"""
- assert len(args) == 3, 'Need at least 3 groups in stats.akruskalwallish()'
- args = list(args)
- n = [0] * len(args)
- n = map(len, args)
- all = []
- for i in range(len(args)):
- all = all + args[i].tolist()
- ranked = rankdata(all)
- T = tiecorrect(ranked)
- for i in range(len(args)):
- args[i] = ranked[0:n[i]]
- del ranked[0:n[i]]
- rsums = []
- for i in range(len(args)):
- rsums.append(sum(args[i])**2)
- rsums[i] = rsums[i] / float(n[i])
- ssbn = sum(rsums)
- totaln = sum(n)
- h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1)
- df = len(args) - 1
- if T == 0:
- raise ValueError, 'All numbers are identical in akruskalwallish'
- h = h / float(T)
- return h, chisqprob(h, df)
-
- def afriedmanchisquare(*args):
- """
-Friedman Chi-Square is a non-parametric, one-way within-subjects
-ANOVA. This function calculates the Friedman Chi-square test for
-repeated measures and returns the result, along with the associated
-probability value. It assumes 3 or more repeated measures. Only 3
-levels requires a minimum of 10 subjects in the study. Four levels
-requires 5 subjects per level(??).
-
-Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions
-Returns: chi-square statistic, associated p-value
-"""
- k = len(args)
- if k < 3:
- raise ValueError, ('\nLess than 3 levels. Friedman test not '
- 'appropriate.\n')
- n = len(args[0])
- data = apply(pstat.aabut, args)
- data = data.astype(N.float_)
- for i in range(len(data)):
- data[i] = arankdata(data[i])
- ssbn = asum(asum(args, 1)**2)
- chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1)
- return chisq, achisqprob(chisq, k - 1)
-
-#####################################
-#### APROBABILITY CALCULATIONS ####
-#####################################
-
- def achisqprob(chisq, df):
- """
-Returns the (1-tail) probability value associated with the provided chi-square
-value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can
-handle multiple dimensions.
-
-Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom
-"""
- BIG = 200.0
-
- def ex(x):
- BIG = 200.0
- exponents = N.where(N.less(x, -BIG), -BIG, x)
- return N.exp(exponents)
-
- if type(chisq) == N.ndarray:
- arrayflag = 1
- else:
- arrayflag = 0
- chisq = N.array([chisq])
- if df < 1:
- return N.ones(chisq.shape, N.float)
- probs = N.zeros(chisq.shape, N.float_)
- probs = N.where(
- N.less_equal(chisq, 0), 1.0, probs) # set prob=1 for chisq<0
- a = 0.5 * chisq
- if df > 1:
- y = ex(-a)
- if df % 2 == 0:
- even = 1
- s = y * 1
- s2 = s * 1
- else:
- even = 0
- s = 2.0 * azprob(-N.sqrt(chisq))
- s2 = s * 1
- if (df > 2):
- chisq = 0.5 * (df - 1.0)
- if even:
- z = N.ones(probs.shape, N.float_)
- else:
- z = 0.5 * N.ones(probs.shape, N.float_)
- if even:
- e = N.zeros(probs.shape, N.float_)
- else:
- e = N.log(N.sqrt(N.pi)) * N.ones(probs.shape, N.float_)
- c = N.log(a)
- mask = N.zeros(probs.shape)
- a_big = N.greater(a, BIG)
- a_big_frozen = -1 * N.ones(probs.shape, N.float_)
- totalelements = N.multiply.reduce(N.array(probs.shape))
- while asum(mask) <> totalelements:
- e = N.log(z) + e
- s = s + ex(c * z - a - e)
- z = z + 1.0
- # print z, e, s
- newmask = N.greater(z, chisq)
- a_big_frozen = N.where(newmask * N.equal(mask, 0) * a_big, s,
- a_big_frozen)
- mask = N.clip(newmask + mask, 0, 1)
- if even:
- z = N.ones(probs.shape, N.float_)
- e = N.ones(probs.shape, N.float_)
- else:
- z = 0.5 * N.ones(probs.shape, N.float_)
- e = 1.0 / N.sqrt(N.pi) / N.sqrt(a) * N.ones(probs.shape, N.float_)
- c = 0.0
- mask = N.zeros(probs.shape)
- a_notbig_frozen = -1 * N.ones(probs.shape, N.float_)
- while asum(mask) <> totalelements:
- e = e * (a / z.astype(N.float_))
- c = c + e
- z = z + 1.0
- # print '#2', z, e, c, s, c*y+s2
- newmask = N.greater(z, chisq)
- a_notbig_frozen = N.where(newmask * N.equal(mask, 0) * (1 - a_big),
- c * y + s2, a_notbig_frozen)
- mask = N.clip(newmask + mask, 0, 1)
- probs = N.where(
- N.equal(probs, 1), 1, N.where(
- N.greater(a, BIG), a_big_frozen, a_notbig_frozen))
- return probs
- else:
- return s
-
- def aerfcc(x):
- """
-Returns the complementary error function erfc(x) with fractional error
-everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can
-handle multiple dimensions.
-
-Usage: aerfcc(x)
-"""
- z = abs(x)
- t = 1.0 / (1.0 + 0.5 * z)
- ans = t * N.exp(-z * z - 1.26551223 + t * (1.00002368 + t * (
- 0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (
- 0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (
- -0.82215223 + t * 0.17087277)))))))))
- return N.where(N.greater_equal(x, 0), ans, 2.0 - ans)
-
- def azprob(z):
- """
-Returns the area under the normal curve 'to the left of' the given z value.
-Thus,
- for z<0, zprob(z) = 1-tail probability
- for z>0, 1.0-zprob(z) = 1-tail probability
- for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability
-Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions.
-
-Usage: azprob(z) where z is a z-value
-"""
-
- def yfunc(y):
- x = (((((((
- ((((((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) * y
- - 0.000676904986) * y + 0.001390604284) * y - 0.000794620820) * y
- - 0.002034254874) * y + 0.006549791214) * y - 0.010557625006) * y +
- 0.011630447319) * y - 0.009279453341) * y + 0.005353579108) * y -
- 0.002141268741) * y + 0.000535310849) * y + 0.999936657524
- return x
-
- def wfunc(w):
- x = ((((((((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w
- - 0.019198292004) * w + 0.059054035642) * w - 0.151968751364) *
- w + 0.319152932694) * w - 0.531923007300) * w +
- 0.797884560593) * N.sqrt(w) * 2.0
- return x
-
- Z_MAX = 6.0 # maximum meaningful z-value
- x = N.zeros(z.shape, N.float_) # initialize
- y = 0.5 * N.fabs(z)
- x = N.where(N.less(y, 1.0), wfunc(y * y), yfunc(y - 2.0)) # get x's
- x = N.where(N.greater(y, Z_MAX * 0.5), 1.0, x) # kill those with big Z
- prob = N.where(N.greater(z, 0), (x + 1) * 0.5, (1 - x) * 0.5)
- return prob
-
- def aksprob(alam):
- """
-Returns the probability value for a K-S statistic computed via ks_2samp.
-Adapted from Numerical Recipies. Can handle multiple dimensions.
-
-Usage: aksprob(alam)
-"""
- if type(alam) == N.ndarray:
- frozen = -1 * N.ones(alam.shape, N.float64)
- alam = alam.astype(N.float64)
- arrayflag = 1
- else:
- frozen = N.array(-1.)
- alam = N.array(alam, N.float64)
- arrayflag = 1
- mask = N.zeros(alam.shape)
- fac = 2.0 * N.ones(alam.shape, N.float_)
- sum = N.zeros(alam.shape, N.float_)
- termbf = N.zeros(alam.shape, N.float_)
- a2 = N.array(-2.0 * alam * alam, N.float64)
- totalelements = N.multiply.reduce(N.array(mask.shape))
- for j in range(1, 201):
- if asum(mask) == totalelements:
- break
- exponents = (a2 * j * j)
- overflowmask = N.less(exponents, -746)
- frozen = N.where(overflowmask, 0, frozen)
- mask = mask + overflowmask
- term = fac * N.exp(exponents)
- sum = sum + term
- newmask = N.where(
- N.less_equal(
- abs(term), (0.001 * termbf)) + N.less(
- abs(term), 1.0e-8 * sum), 1, 0)
- frozen = N.where(newmask * N.equal(mask, 0), sum, frozen)
- mask = N.clip(mask + newmask, 0, 1)
- fac = -fac
- termbf = abs(term)
- if arrayflag:
- return N.where(
- N.equal(frozen, -1), 1.0, frozen) # 1.0 if doesn't converge
- else:
- return N.where(
- N.equal(frozen, -1), 1.0, frozen)[0] # 1.0 if doesn't converge
-
- def afprob(dfnum, dfden, F):
- """
-Returns the 1-tailed significance level (p-value) of an F statistic
-given the degrees of freedom for the numerator (dfR-dfF) and the degrees
-of freedom for the denominator (dfF). Can handle multiple dims for F.
-
-Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn
-"""
- if type(F) == N.ndarray:
- return abetai(0.5 * dfden, 0.5 * dfnum, dfden / (1.0 * dfden + dfnum * F))
- else:
- return abetai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F))
-
- def abetacf(a, b, x, verbose=1):
- """
-Evaluates the continued fraction form of the incomplete Beta function,
-betai. (Adapted from: Numerical Recipies in C.) Can handle multiple
-dimensions for x.
-
-Usage: abetacf(a,b,x,verbose=1)
-"""
- ITMAX = 200
- EPS = 3.0e-7
-
- arrayflag = 1
- if type(x) == N.ndarray:
- frozen = N.ones(x.shape,
- N.float_) * -1 #start out w/ -1s, should replace all
- else:
- arrayflag = 0
- frozen = N.array([-1])
- x = N.array([x])
- mask = N.zeros(x.shape)
- bm = az = am = 1.0
- qab = a + b
- qap = a + 1.0
- qam = a - 1.0
- bz = 1.0 - qab * x / qap
- for i in range(ITMAX + 1):
- if N.sum(N.ravel(N.equal(frozen, -1))) == 0:
- break
- em = float(i + 1)
- tem = em + em
- d = em * (b - em) * x / ((qam + tem) * (a + tem))
- ap = az + d * am
- bp = bz + d * bm
- d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem))
- app = ap + d * az
- bpp = bp + d * bz
- aold = az * 1
- am = ap / bpp
- bm = bp / bpp
- az = app / bpp
- bz = 1.0
- newmask = N.less(abs(az - aold), EPS * abs(az))
- frozen = N.where(newmask * N.equal(mask, 0), az, frozen)
- mask = N.clip(mask + newmask, 0, 1)
- noconverge = asum(N.equal(frozen, -1))
- if noconverge <> 0 and verbose:
- print 'a or b too big, or ITMAX too small in Betacf for ', noconverge, ' elements'
- if arrayflag:
- return frozen
- else:
- return frozen[0]
-
- def agammln(xx):
- """
-Returns the gamma function of xx.
- Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.
-Adapted from: Numerical Recipies in C. Can handle multiple dims ... but
-probably doesn't normally have to.
-
-Usage: agammln(xx)
-"""
- coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,
- 0.120858003e-2, -0.536382e-5]
- x = xx - 1.0
- tmp = x + 5.5
- tmp = tmp - (x + 0.5) * N.log(tmp)
- ser = 1.0
- for j in range(len(coeff)):
- x = x + 1
- ser = ser + coeff[j] / x
- return -tmp + N.log(2.50662827465 * ser)
-
- def abetai(a, b, x, verbose=1):
- """
-Returns the incomplete beta function:
-
- I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)
-
-where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
-function of a. The continued fraction formulation is implemented
-here, using the betacf function. (Adapted from: Numerical Recipies in
-C.) Can handle multiple dimensions.
-
-Usage: abetai(a,b,x,verbose=1)
-"""
- TINY = 1e-15
- if type(a) == N.ndarray:
- if asum(N.less(x, 0) + N.greater(x, 1)) <> 0:
- raise ValueError, 'Bad x in abetai'
- x = N.where(N.equal(x, 0), TINY, x)
- x = N.where(N.equal(x, 1.0), 1 - TINY, x)
-
- bt = N.where(N.equal(x, 0) + N.equal(x, 1), 0, -1)
- exponents = (gammln(a + b) - gammln(a) - gammln(b) + a * N.log(x) + b *
- N.log(1.0 - x))
- # 746 (below) is the MAX POSSIBLE BEFORE OVERFLOW
- exponents = N.where(N.less(exponents, -740), -740, exponents)
- bt = N.exp(exponents)
- if type(x) == N.ndarray:
- ans = N.where(
- N.less(x, (a + 1) / (a + b + 2.0)), bt * abetacf(a, b, x, verbose) /
- float(a), 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b))
- else:
- if x < (a + 1) / (a + b + 2.0):
- ans = bt * abetacf(a, b, x, verbose) / float(a)
- else:
- ans = 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b)
- return ans
-
-#####################################
-####### AANOVA CALCULATIONS #######
-#####################################
-
- import numpy.linalg, operator
- LA = numpy.linalg
-
- def aglm(data, para):
- """
-Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken
-from:
- Peterson et al. Statistical limitations in functional neuroimaging
- I. Non-inferential methods and statistical models. Phil Trans Royal Soc
- Lond B 354: 1239-1260.
-
-Usage: aglm(data,para)
-Returns: statistic, p-value ???
-"""
- if len(para) <> len(data):
- print 'data and para must be same length in aglm'
- return
- n = len(para)
- p = pstat.aunique(para)
- x = N.zeros((n, len(p))) # design matrix
- for l in range(len(p)):
- x[:, l] = N.equal(para, p[l])
- b = N.dot(
- N.dot(
- LA.inv(N.dot(
- N.transpose(x), x)), # i.e., b=inv(X'X)X'Y
- N.transpose(x)),
- data)
- diffs = (data - N.dot(x, b))
- s_sq = 1. / (n - len(p)) * N.dot(N.transpose(diffs), diffs)
-
- if len(p) == 2: # ttest_ind
- c = N.array([1, -1])
- df = n - 2
- fact = asum(1.0 / asum(x, 0)) # i.e., 1/n1 + 1/n2 + 1/n3 ...
- t = N.dot(c, b) / N.sqrt(s_sq * fact)
- probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
- return t, probs
-
- def aF_oneway(*args):
- """
-Performs a 1-way ANOVA, returning an F-value and probability given
-any number of groups. From Heiman, pp.394-7.
-
-Usage: aF_oneway (*args) where *args is 2 or more arrays, one per
- treatment group
-Returns: f-value, probability
-"""
- na = len(args) # ANOVA on 'na' groups, each in it's own array
- means = [0] * na
- vars = [0] * na
- ns = [0] * na
- alldata = []
- tmp = map(N.array, args)
- means = map(amean, tmp)
- vars = map(avar, tmp)
- ns = map(len, args)
- alldata = N.concatenate(args)
- bign = len(alldata)
- sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign))
- ssbn = 0
- for a in args:
- ssbn = ssbn + asquare_of_sums(N.array(a)) / float(len(a))
- ssbn = ssbn - (asquare_of_sums(alldata) / float(bign))
- sswn = sstot - ssbn
- dfbn = na - 1
- dfwn = bign - na
- msb = ssbn / float(dfbn)
- msw = sswn / float(dfwn)
- f = msb / msw
- prob = fprob(dfbn, dfwn, f)
- return f, prob
-
- def aF_value(ER, EF, dfR, dfF):
- """
-Returns an F-statistic given the following:
- ER = error associated with the null hypothesis (the Restricted model)
- EF = error associated with the alternate hypothesis (the Full model)
- dfR = degrees of freedom the Restricted model
- dfF = degrees of freedom associated with the Restricted model
-"""
- return ((ER - EF) / float(dfR - dfF) / (EF / float(dfF)))
-
- def outputfstats(Enum, Eden, dfnum, dfden, f, prob):
- Enum = round(Enum, 3)
- Eden = round(Eden, 3)
- dfnum = round(Enum, 3)
- dfden = round(dfden, 3)
- f = round(f, 3)
- prob = round(prob, 3)
- suffix = '' # for *s after the p-value
- if prob < 0.001:
- suffix = ' ***'
- elif prob < 0.01:
- suffix = ' **'
- elif prob < 0.05:
- suffix = ' *'
- title = [['EF/ER', 'DF', 'Mean Square', 'F-value', 'prob', '']]
- lofl = title + [[Enum, dfnum, round(Enum / float(dfnum), 3), f, prob, suffix
- ], [Eden, dfden, round(Eden / float(dfden), 3), '', '', '']]
- pstat.printcc(lofl)
- return
-
- def F_value_multivariate(ER, EF, dfnum, dfden):
- """
-Returns an F-statistic given the following:
- ER = error associated with the null hypothesis (the Restricted model)
- EF = error associated with the alternate hypothesis (the Full model)
- dfR = degrees of freedom the Restricted model
- dfF = degrees of freedom associated with the Restricted model
-where ER and EF are matrices from a multivariate F calculation.
-"""
- if type(ER) in [IntType, FloatType]:
- ER = N.array([[ER]])
- if type(EF) in [IntType, FloatType]:
- EF = N.array([[EF]])
- n_um = (LA.det(ER) - LA.det(EF)) / float(dfnum)
- d_en = LA.det(EF) / float(dfden)
- return n_um / d_en
-
-#####################################
-####### ASUPPORT FUNCTIONS ########
-#####################################
-
- def asign(a):
- """
-Usage: asign(a)
-Returns: array shape of a, with -1 where a<0 and +1 where a>=0
-"""
- a = N.asarray(a)
- if ((type(a) == type(1.4)) or (type(a) == type(1))):
- return a - a - N.less(a, 0) + N.greater(a, 0)
- else:
- return N.zeros(N.shape(a)) - N.less(a, 0) + N.greater(a, 0)
-
- def asum(a, dimension=None, keepdims=0):
- """
-An alternative to the Numeric.add.reduce function, which allows one to
-(1) collapse over multiple dimensions at once, and/or (2) to retain
-all dimensions in the original array (squashing one down to size.
-Dimension can equal None (ravel array first), an integer (the
-dimension over which to operate), or a sequence (operate over multiple
-dimensions). If keepdims=1, the resulting array will have as many
-dimensions as the input array.
-
-Usage: asum(a, dimension=None, keepdims=0)
-Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1
-"""
- if type(a) == N.ndarray and a.dtype in [N.int_, N.short, N.ubyte]:
- a = a.astype(N.float_)
- if dimension == None:
- s = N.sum(N.ravel(a))
- elif type(dimension) in [IntType, FloatType]:
- s = N.add.reduce(a, dimension)
- if keepdims == 1:
- shp = list(a.shape)
- shp[dimension] = 1
- s = N.reshape(s, shp)
- else: # must be a SEQUENCE of dims to sum over
- dims = list(dimension)
- dims.sort()
- dims.reverse()
- s = a * 1.0
- for dim in dims:
- s = N.add.reduce(s, dim)
- if keepdims == 1:
- shp = list(a.shape)
- for dim in dims:
- shp[dim] = 1
- s = N.reshape(s, shp)
- return s
-
- def acumsum(a, dimension=None):
- """
-Returns an array consisting of the cumulative sum of the items in the
-passed array. Dimension can equal None (ravel array first), an
-integer (the dimension over which to operate), or a sequence (operate
-over multiple dimensions, but this last one just barely makes sense).
-
-Usage: acumsum(a,dimension=None)
-"""
- if dimension == None:
- a = N.ravel(a)
- dimension = 0
- if type(dimension) in [ListType, TupleType, N.ndarray]:
- dimension = list(dimension)
- dimension.sort()
- dimension.reverse()
- for d in dimension:
- a = N.add.accumulate(a, d)
- return a
- else:
- return N.add.accumulate(a, dimension)
-
- def ass(inarray, dimension=None, keepdims=0):
- """
-Squares each value in the passed array, adds these squares & returns
-the result. Unfortunate function name. :-) Defaults to ALL values in
-the array. Dimension can equal None (ravel array first), an integer
-(the dimension over which to operate), or a sequence (operate over
-multiple dimensions). Set keepdims=1 to maintain the original number
-of dimensions.
-
-Usage: ass(inarray, dimension=None, keepdims=0)
-Returns: sum-along-'dimension' for (inarray*inarray)
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- return asum(inarray * inarray, dimension, keepdims)
-
- def asummult(array1, array2, dimension=None, keepdims=0):
- """
-Multiplies elements in array1 and array2, element by element, and
-returns the sum (along 'dimension') of all resulting multiplications.
-Dimension can equal None (ravel array first), an integer (the
-dimension over which to operate), or a sequence (operate over multiple
-dimensions). A trivial function, but included for completeness.
-
-Usage: asummult(array1,array2,dimension=None,keepdims=0)
-"""
- if dimension == None:
- array1 = N.ravel(array1)
- array2 = N.ravel(array2)
- dimension = 0
- return asum(array1 * array2, dimension, keepdims)
-
- def asquare_of_sums(inarray, dimension=None, keepdims=0):
- """
-Adds the values in the passed array, squares that sum, and returns the
-result. Dimension can equal None (ravel array first), an integer (the
-dimension over which to operate), or a sequence (operate over multiple
-dimensions). If keepdims=1, the returned array will have the same
-NUMBER of dimensions as the original.
-
-Usage: asquare_of_sums(inarray, dimension=None, keepdims=0)
-Returns: the square of the sum over dim(s) in dimension
-"""
- if dimension == None:
- inarray = N.ravel(inarray)
- dimension = 0
- s = asum(inarray, dimension, keepdims)
- if type(s) == N.ndarray:
- return s.astype(N.float_) * s
- else:
- return float(s) * s
-
- def asumdiffsquared(a, b, dimension=None, keepdims=0):
- """
-Takes pairwise differences of the values in arrays a and b, squares
-these differences, and returns the sum of these squares. Dimension
-can equal None (ravel array first), an integer (the dimension over
-which to operate), or a sequence (operate over multiple dimensions).
-keepdims=1 means the return shape = len(a.shape) = len(b.shape)
-
-Usage: asumdiffsquared(a,b)
-Returns: sum[ravel(a-b)**2]
-"""
- if dimension == None:
- inarray = N.ravel(a)
- dimension = 0
- return asum((a - b)**2, dimension, keepdims)
-
- def ashellsort(inarray):
- """
-Shellsort algorithm. Sorts a 1D-array.
-
-Usage: ashellsort(inarray)
-Returns: sorted-inarray, sorting-index-vector (for original array)
-"""
- n = len(inarray)
- svec = inarray * 1.0
- ivec = range(n)
- gap = n / 2 # integer division needed
- while gap > 0:
- for i in range(gap, n):
- for j in range(i - gap, -1, -gap):
- while j >= 0 and svec[j] > svec[j + gap]:
- temp = svec[j]
- svec[j] = svec[j + gap]
- svec[j + gap] = temp
- itemp = ivec[j]
- ivec[j] = ivec[j + gap]
- ivec[j + gap] = itemp
- gap = gap / 2 # integer division needed
-# svec is now sorted input vector, ivec has the order svec[i] = vec[ivec[i]]
- return svec, ivec
-
- def arankdata(inarray):
- """
-Ranks the data in inarray, dealing with ties appropritely. Assumes
-a 1D inarray. Adapted from Gary Perlman's |Stat ranksort.
-
-Usage: arankdata(inarray)
-Returns: array of length equal to inarray, containing rank scores
-"""
- n = len(inarray)
- svec, ivec = ashellsort(inarray)
- sumranks = 0
- dupcount = 0
- newarray = N.zeros(n, N.float_)
- for i in range(n):
- sumranks = sumranks + i
- dupcount = dupcount + 1
- if i == n - 1 or svec[i] <> svec[i + 1]:
- averank = sumranks / float(dupcount) + 1
- for j in range(i - dupcount + 1, i + 1):
- newarray[ivec[j]] = averank
- sumranks = 0
- dupcount = 0
- return newarray
-
- def afindwithin(data):
- """
-Returns a binary vector, 1=within-subject factor, 0=between. Input
-equals the entire data array (i.e., column 1=random factor, last
-column = measured values.
-
-Usage: afindwithin(data) data in |Stat format
-"""
- numfact = len(data[0]) - 2
- withinvec = [0] * numfact
- for col in range(1, numfact + 1):
- rows = pstat.linexand(data, col, pstat.unique(pstat.colex(data, 1))[0]
- ) # get 1 level of this factor
- if len(pstat.unique(pstat.colex(rows, 0))) < len(
- rows): # if fewer subjects than scores on this factor
- withinvec[col - 1] = 1
- return withinvec
-
- #########################################################
- #########################################################
- ###### RE-DEFINE DISPATCHES TO INCLUDE ARRAYS #########
- #########################################################
- #########################################################
-
- ## CENTRAL TENDENCY:
- geometricmean = Dispatch(
- (lgeometricmean, (ListType, TupleType)), (ageometricmean, (N.ndarray,)))
- harmonicmean = Dispatch(
- (lharmonicmean, (ListType, TupleType)), (aharmonicmean, (N.ndarray,)))
- mean = Dispatch((lmean, (ListType, TupleType)), (amean, (N.ndarray,)))
- median = Dispatch((lmedian, (ListType, TupleType)), (amedian, (N.ndarray,)))
- medianscore = Dispatch(
- (lmedianscore, (ListType, TupleType)), (amedianscore, (N.ndarray,)))
- mode = Dispatch((lmode, (ListType, TupleType)), (amode, (N.ndarray,)))
- tmean = Dispatch((atmean, (N.ndarray,)))
- tvar = Dispatch((atvar, (N.ndarray,)))
- tstdev = Dispatch((atstdev, (N.ndarray,)))
- tsem = Dispatch((atsem, (N.ndarray,)))
-
- ## VARIATION:
- moment = Dispatch((lmoment, (ListType, TupleType)), (amoment, (N.ndarray,)))
- variation = Dispatch(
- (lvariation, (ListType, TupleType)), (avariation, (N.ndarray,)))
- skew = Dispatch((lskew, (ListType, TupleType)), (askew, (N.ndarray,)))
- kurtosis = Dispatch(
- (lkurtosis, (ListType, TupleType)), (akurtosis, (N.ndarray,)))
- describe = Dispatch(
- (ldescribe, (ListType, TupleType)), (adescribe, (N.ndarray,)))
-
- ## DISTRIBUTION TESTS
-
- skewtest = Dispatch(
- (askewtest, (ListType, TupleType)), (askewtest, (N.ndarray,)))
- kurtosistest = Dispatch(
- (akurtosistest, (ListType, TupleType)), (akurtosistest, (N.ndarray,)))
- normaltest = Dispatch(
- (anormaltest, (ListType, TupleType)), (anormaltest, (N.ndarray,)))
-
- ## FREQUENCY STATS:
- itemfreq = Dispatch(
- (litemfreq, (ListType, TupleType)), (aitemfreq, (N.ndarray,)))
- scoreatpercentile = Dispatch(
- (lscoreatpercentile, (ListType, TupleType)), (ascoreatpercentile,
- (N.ndarray,)))
- percentileofscore = Dispatch(
- (lpercentileofscore, (ListType, TupleType)), (apercentileofscore,
- (N.ndarray,)))
- histogram = Dispatch(
- (lhistogram, (ListType, TupleType)), (ahistogram, (N.ndarray,)))
- cumfreq = Dispatch(
- (lcumfreq, (ListType, TupleType)), (acumfreq, (N.ndarray,)))
- relfreq = Dispatch(
- (lrelfreq, (ListType, TupleType)), (arelfreq, (N.ndarray,)))
-
- ## VARIABILITY:
- obrientransform = Dispatch(
- (lobrientransform, (ListType, TupleType)), (aobrientransform,
- (N.ndarray,)))
- samplevar = Dispatch(
- (lsamplevar, (ListType, TupleType)), (asamplevar, (N.ndarray,)))
- samplestdev = Dispatch(
- (lsamplestdev, (ListType, TupleType)), (asamplestdev, (N.ndarray,)))
- signaltonoise = Dispatch((asignaltonoise, (N.ndarray,)),)
- var = Dispatch((lvar, (ListType, TupleType)), (avar, (N.ndarray,)))
- stdev = Dispatch((lstdev, (ListType, TupleType)), (astdev, (N.ndarray,)))
- sterr = Dispatch((lsterr, (ListType, TupleType)), (asterr, (N.ndarray,)))
- sem = Dispatch((lsem, (ListType, TupleType)), (asem, (N.ndarray,)))
- z = Dispatch((lz, (ListType, TupleType)), (az, (N.ndarray,)))
- zs = Dispatch((lzs, (ListType, TupleType)), (azs, (N.ndarray,)))
-
- ## TRIMMING FCNS:
- threshold = Dispatch((athreshold, (N.ndarray,)),)
- trimboth = Dispatch(
- (ltrimboth, (ListType, TupleType)), (atrimboth, (N.ndarray,)))
- trim1 = Dispatch((ltrim1, (ListType, TupleType)), (atrim1, (N.ndarray,)))
-
- ## CORRELATION FCNS:
- paired = Dispatch((lpaired, (ListType, TupleType)), (apaired, (N.ndarray,)))
- lincc = Dispatch((llincc, (ListType, TupleType)), (alincc, (N.ndarray,)))
- pearsonr = Dispatch(
- (lpearsonr, (ListType, TupleType)), (apearsonr, (N.ndarray,)))
- spearmanr = Dispatch(
- (lspearmanr, (ListType, TupleType)), (aspearmanr, (N.ndarray,)))
- pointbiserialr = Dispatch(
- (lpointbiserialr, (ListType, TupleType)), (apointbiserialr, (N.ndarray,)))
- kendalltau = Dispatch(
- (lkendalltau, (ListType, TupleType)), (akendalltau, (N.ndarray,)))
- linregress = Dispatch(
- (llinregress, (ListType, TupleType)), (alinregress, (N.ndarray,)))
-
- ## INFERENTIAL STATS:
- ttest_1samp = Dispatch(
- (lttest_1samp, (ListType, TupleType)), (attest_1samp, (N.ndarray,)))
- ttest_ind = Dispatch(
- (lttest_ind, (ListType, TupleType)), (attest_ind, (N.ndarray,)))
- ttest_rel = Dispatch(
- (lttest_rel, (ListType, TupleType)), (attest_rel, (N.ndarray,)))
- chisquare = Dispatch(
- (lchisquare, (ListType, TupleType)), (achisquare, (N.ndarray,)))
- ks_2samp = Dispatch(
- (lks_2samp, (ListType, TupleType)), (aks_2samp, (N.ndarray,)))
- mannwhitneyu = Dispatch(
- (lmannwhitneyu, (ListType, TupleType)), (amannwhitneyu, (N.ndarray,)))
- tiecorrect = Dispatch(
- (ltiecorrect, (ListType, TupleType)), (atiecorrect, (N.ndarray,)))
- ranksums = Dispatch(
- (lranksums, (ListType, TupleType)), (aranksums, (N.ndarray,)))
- wilcoxont = Dispatch(
- (lwilcoxont, (ListType, TupleType)), (awilcoxont, (N.ndarray,)))
- kruskalwallish = Dispatch(
- (lkruskalwallish, (ListType, TupleType)), (akruskalwallish, (N.ndarray,)))
- friedmanchisquare = Dispatch(
- (lfriedmanchisquare, (ListType, TupleType)), (afriedmanchisquare,
- (N.ndarray,)))
-
- ## PROBABILITY CALCS:
- chisqprob = Dispatch(
- (lchisqprob, (IntType, FloatType)), (achisqprob, (N.ndarray,)))
- zprob = Dispatch((lzprob, (IntType, FloatType)), (azprob, (N.ndarray,)))
- ksprob = Dispatch((lksprob, (IntType, FloatType)), (aksprob, (N.ndarray,)))
- fprob = Dispatch((lfprob, (IntType, FloatType)), (afprob, (N.ndarray,)))
- betacf = Dispatch((lbetacf, (IntType, FloatType)), (abetacf, (N.ndarray,)))
- betai = Dispatch((lbetai, (IntType, FloatType)), (abetai, (N.ndarray,)))
- erfcc = Dispatch((lerfcc, (IntType, FloatType)), (aerfcc, (N.ndarray,)))
- gammln = Dispatch((lgammln, (IntType, FloatType)), (agammln, (N.ndarray,)))
-
- ## ANOVA FUNCTIONS:
- F_oneway = Dispatch(
- (lF_oneway, (ListType, TupleType)), (aF_oneway, (N.ndarray,)))
- F_value = Dispatch(
- (lF_value, (ListType, TupleType)), (aF_value, (N.ndarray,)))
-
- ## SUPPORT FUNCTIONS:
- incr = Dispatch((lincr, (ListType, TupleType, N.ndarray)),)
- sum = Dispatch((lsum, (ListType, TupleType)), (asum, (N.ndarray,)))
- cumsum = Dispatch((lcumsum, (ListType, TupleType)), (acumsum, (N.ndarray,)))
- ss = Dispatch((lss, (ListType, TupleType)), (ass, (N.ndarray,)))
- summult = Dispatch(
- (lsummult, (ListType, TupleType)), (asummult, (N.ndarray,)))
- square_of_sums = Dispatch(
- (lsquare_of_sums, (ListType, TupleType)), (asquare_of_sums, (N.ndarray,)))
- sumdiffsquared = Dispatch(
- (lsumdiffsquared, (ListType, TupleType)), (asumdiffsquared, (N.ndarray,)))
- shellsort = Dispatch(
- (lshellsort, (ListType, TupleType)), (ashellsort, (N.ndarray,)))
- rankdata = Dispatch(
- (lrankdata, (ListType, TupleType)), (arankdata, (N.ndarray,)))
- findwithin = Dispatch(
- (lfindwithin, (ListType, TupleType)), (afindwithin, (N.ndarray,)))
-
-###################### END OF NUMERIC FUNCTION BLOCK #####################
-
-###################### END OF STATISTICAL FUNCTIONS ######################
-
-except ImportError:
- pass
diff --git a/cros_utils/tabulator.py b/cros_utils/tabulator.py
index 59e4d426..9527fde0 100644
--- a/cros_utils/tabulator.py
+++ b/cros_utils/tabulator.py
@@ -61,12 +61,16 @@ table:
print tp.Print()
"""
+from __future__ import division
from __future__ import print_function
import getpass
import math
import sys
+# TODO(zhizhouy): Drop numpy in the future
+# pylint: disable=import-error
import numpy
+import scipy
from email_sender import EmailSender
import misc
@@ -162,7 +166,7 @@ class TableGenerator(object):
keys = self._AggregateKeys()
return self._SortKeys(keys)
- def GetTable(self, number_of_rows=sys.maxint):
+ def GetTable(self, number_of_rows=sys.maxsize):
"""Returns a table from a list of list of dicts.
Examples:
@@ -239,7 +243,7 @@ class SamplesTableGenerator(TableGenerator):
keys = self._runs.keys()
return self._SortKeys(keys)
- def GetTable(self, number_of_rows=sys.maxint):
+ def GetTable(self, number_of_rows=sys.maxsize):
"""Returns a tuple, which contains three args:
1) a table from a list of list of dicts.
@@ -660,8 +664,7 @@ class PValueResult(ComparisonResult):
if len(values) < 2 or len(baseline_values) < 2:
cell.value = float('nan')
return
- import stats
- _, cell.value = stats.lttest_ind(values, baseline_values)
+ _, cell.value = scipy.stats.ttest_ind(values, baseline_values)
def _ComputeString(self, cell, values, baseline_values):
return float('nan')
@@ -877,7 +880,7 @@ class WeightFormat(Format):
class StorageFormat(Format):
"""Format the cell as a storage number.
- Example:
+ Examples:
If the cell contains a value of 1024, the string_value will be 1.0K.
"""
@@ -899,7 +902,7 @@ class StorageFormat(Format):
class CoeffVarFormat(Format):
"""Format the cell as a percent.
- Example:
+ Examples:
If the cell contains a value of 1.5, the string_value will be +150%.
"""
@@ -917,7 +920,7 @@ class CoeffVarFormat(Format):
class PercentFormat(Format):
"""Format the cell as a percent.
- Example:
+ Examples:
If the cell contains a value of 1.5, the string_value will be +50%.
"""
@@ -930,7 +933,7 @@ class PercentFormat(Format):
class RatioFormat(Format):
"""Format the cell as a ratio.
- Example:
+ Examples:
If the cell contains a value of 1.5642, the string_value will be 1.56.
"""
@@ -943,7 +946,7 @@ class RatioFormat(Format):
class ColorBoxFormat(Format):
"""Format the cell as a color box.
- Example:
+ Examples:
If the cell contains a value of 1.5, it will get a green color.
If the cell contains a value of 0.5, it will get a red color.
The intensity of the green/red will be determined by how much above or below
@@ -989,7 +992,7 @@ class Cell(object):
# Entire row inherits this color.
self.color_row = False
self.bgcolor_row = False
- self.width = None
+ self.width = 0
self.colspan = 1
self.name = None
self.header = False
@@ -1297,7 +1300,7 @@ class TablePrinter(object):
suffix = '\033[0m'
elif self._output_type in [self.EMAIL, self.HTML]:
rgb = color.GetRGB()
- prefix = ("<FONT style=\"BACKGROUND-COLOR:#{0}\">".format(rgb))
+ prefix = ('<FONT style="BACKGROUND-COLOR:#{0}">'.format(rgb))
suffix = '</FONT>'
elif self._output_type in [self.PLAIN, self.TSV]:
prefix = ''
@@ -1365,7 +1368,7 @@ class TablePrinter(object):
tag = 'th'
else:
tag = 'td'
- out = "<{0} colspan = \"{2}\"> {1} </{0}>".format(tag, out, cell.colspan)
+ out = '<{0} colspan = "{2}"> {1} </{0}>'.format(tag, out, cell.colspan)
return out
@@ -1387,7 +1390,7 @@ class TablePrinter(object):
if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
return ''
if self._output_type == self.HTML:
- return "<p></p><table id=\"box-table-a\">\n<tr>"
+ return '<p></p><table id="box-table-a">\n<tr>'
def _GetSuffix(self):
if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
diff --git a/cros_utils/tabulator_test.py b/cros_utils/tabulator_test.py
index 33c8da25..4c0f8677 100644..100755
--- a/cros_utils/tabulator_test.py
+++ b/cros_utils/tabulator_test.py
@@ -1,7 +1,9 @@
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+
"""Tests for the tabulator module."""
from __future__ import print_function
@@ -71,7 +73,7 @@ class TabulatorTest(unittest.TestCase):
a = [1.0e+308] * 3
# pylint: disable=protected-access
b = tabulator.Result()._GetGmean(a)
- self.assertTrue(b >= 0.99e+308 and b <= 1.01e+308)
+ self.assertTrue(0.99e+308 <= b <= 1.01e+308)
def testIgnoreMinMax(self):
amr = tabulator.AmeanResult(ignore_min_max=True)
diff --git a/cros_utils/timeline.py b/cros_utils/timeline.py
index 873aaa30..cce0b05c 100644
--- a/cros_utils/timeline.py
+++ b/cros_utils/timeline.py
@@ -1,5 +1,8 @@
-# Copyright 2012 Google Inc. All Rights Reserved.
-#
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""Tools for recording and reporting timeline of benchmark_run."""
from __future__ import print_function
@@ -25,8 +28,8 @@ class Timeline(object):
def Record(self, event):
for e in self.events:
- assert e.name != event, ('The event {0} is already recorded.'
- .format(event))
+ assert e.name != event, (
+ 'The event {0} is already recorded.'.format(event))
cur_event = Event(name=event, cur_time=time.time())
self.events.append(cur_event)
@@ -43,7 +46,7 @@ class Timeline(object):
for e in self.events:
if e.name == event:
return e.timestamp
- raise IndexError, 'The event {0} is not recorded'.format(event)
+ raise IndexError('The event {0} is not recorded'.format(event))
def GetLastEventTime(self):
return self.events[-1].timestamp
diff --git a/cros_utils/timeline_test.py b/cros_utils/timeline_test.py
index c93a1274..314cf3c4 100644..100755
--- a/cros_utils/timeline_test.py
+++ b/cros_utils/timeline_test.py
@@ -1,4 +1,9 @@
-# Copyright 2012 Google Inc. All Rights Reserved.
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
"""Tests for time_line.py."""
from __future__ import print_function
diff --git a/crosperf/crosperf.py b/crosperf/crosperf.py
index fddd18b9..75494371 100755
--- a/crosperf/crosperf.py
+++ b/crosperf/crosperf.py
@@ -90,11 +90,6 @@ def RunCrosperf(argv):
dest='log_dir',
default='',
help='The log_dir, default is under <crosperf_logs>/logs')
- parser.add_argument(
- '--no_hwp',
- default=False,
- action='store_true',
- help='Disable intel_pstate on Intel CPU with HWP support.')
SetupParserOptions(parser)
options, args = parser.parse_known_args(argv)
@@ -121,8 +116,6 @@ def RunCrosperf(argv):
experiment_file.GetGlobalSettings().SetField('name', experiment_name)
experiment = ExperimentFactory().GetExperiment(experiment_file,
working_directory, log_dir)
- if options.no_hwp:
- experiment.intel_pstate = 'no_hwp'
json_report = experiment_file.GetGlobalSettings().GetField('json_report')
diff --git a/crosperf/experiment.py b/crosperf/experiment.py
index d6d9ee4c..45a028ad 100644
--- a/crosperf/experiment.py
+++ b/crosperf/experiment.py
@@ -57,7 +57,6 @@ class Experiment(object):
self.ignore_min_max = ignore_min_max
self.skylab = skylab
self.l = logger.GetLogger(log_dir)
- self.intel_pstate = dut_config['intel_pstate']
if not self.benchmarks:
raise RuntimeError('No benchmarks specified')
diff --git a/crosperf/results_cache.py b/crosperf/results_cache.py
index 977e3e22..bd80e8a7 100644
--- a/crosperf/results_cache.py
+++ b/crosperf/results_cache.py
@@ -58,6 +58,7 @@ class Result(object):
self.turbostat_log_file = ''
self.cpustats_log_file = ''
self.top_log_file = ''
+ self.wait_time_log_file = ''
self.chrome_version = ''
self.err = None
self.chroot_results_dir = ''
@@ -110,7 +111,7 @@ class Result(object):
def CopyResultsTo(self, dest_dir):
self.CopyFilesTo(dest_dir, self.perf_data_files)
self.CopyFilesTo(dest_dir, self.perf_report_files)
- if len(self.perf_data_files) or len(self.perf_report_files):
+ if self.perf_data_files or self.perf_report_files:
self._logger.LogOutput('Perf results files stored in %s.' % dest_dir)
def GetNewKeyvals(self, keyvals_dict):
@@ -316,6 +317,10 @@ class Result(object):
"""Get cpustats log path string."""
return self.FindFilesInResultsDir('-name top.log').split('\n')[0]
+ def GetWaitTimeFile(self):
+ """Get wait time log path string."""
+ return self.FindFilesInResultsDir('-name wait_time.log').split('\n')[0]
+
def _CheckDebugPath(self, option, path):
relative_path = path[1:]
out_chroot_path = os.path.join(self.chromeos_root, 'chroot', relative_path)
@@ -415,6 +420,7 @@ class Result(object):
self.turbostat_log_file = self.GetTurbostatFile()
self.cpustats_log_file = self.GetCpustatsFile()
self.top_log_file = self.GetTopFile()
+ self.wait_time_log_file = self.GetWaitTimeFile()
# TODO(asharif): Do something similar with perf stat.
# Grab keyvals from the directory.
@@ -818,6 +824,16 @@ class Result(object):
cpustats = self.ProcessCpustatsResults()
if self.top_log_file:
self.top_cmds = self.ProcessTopResults()
+ if self.wait_time_log_file:
+ with open(self.wait_time_log_file) as f:
+ wait_time = f.readline().strip()
+ try:
+ wait_time = float(wait_time)
+ except ValueError:
+ raise ValueError('Wait time in log file is not a number.')
+ # This is for accumulating wait time for telemtry_Crosperf runs only,
+ # for test_that runs, please refer to suite_runner.
+ self.machine.AddCooldownWaitTime(wait_time)
for param_key, param in cpustats.items():
for param_type, param_values in param.items():
@@ -980,10 +996,6 @@ class Result(object):
class TelemetryResult(Result):
"""Class to hold the results of a single Telemetry run."""
- def __init__(self, logger, label, log_level, machine, cmd_exec=None):
- super(TelemetryResult, self).__init__(logger, label, log_level, machine,
- cmd_exec)
-
def PopulateFromRun(self, out, err, retval, test, suite, cwp_dso):
self.out = out
self.err = err
@@ -1237,6 +1249,9 @@ class ResultsCache(object):
class MockResultsCache(ResultsCache):
"""Class for mock testing, corresponding to ResultsCache class."""
+ # FIXME: pylint complains about this mock init method, we should probably
+ # replace all Mock classes in Crosperf with simple Mock.mock().
+ # pylint: disable=arguments-differ
def Init(self, *args):
pass
diff --git a/crosperf/results_cache_unittest.py b/crosperf/results_cache_unittest.py
index 7ce04221..c1c27265 100755
--- a/crosperf/results_cache_unittest.py
+++ b/crosperf/results_cache_unittest.py
@@ -434,6 +434,7 @@ class ResultTest(unittest.TestCase):
self.callGetTurbostatFile = False
self.callGetCpustatsFile = False
self.callGetTopFile = False
+ self.callGetWaitTimeFile = False
self.args = None
self.callGatherPerfResults = False
self.mock_logger = mock.Mock(spec=logger.Logger)
@@ -1126,6 +1127,10 @@ class ResultTest(unittest.TestCase):
self.callGetTopFile = True
return []
+ def FakeGetWaitTimeFile():
+ self.callGetWaitTimeFile = True
+ return []
+
def FakeProcessResults(show_results=False):
if show_results:
pass
@@ -1143,6 +1148,7 @@ class ResultTest(unittest.TestCase):
self.callGetTurbostatFile = False
self.callGetCpustatsFile = False
self.callGetTopFile = False
+ self.callGetWaitTimeFile = False
self.callProcessResults = False
self.result.GetResultsDir = FakeGetResultsDir
@@ -1152,6 +1158,7 @@ class ResultTest(unittest.TestCase):
self.result.GetTurbostatFile = FakeGetTurbostatFile
self.result.GetCpustatsFile = FakeGetCpustatsFile
self.result.GetTopFile = FakeGetTopFile
+ self.result.GetWaitTimeFile = FakeGetWaitTimeFile
self.result.ProcessResults = FakeProcessResults
self.result.PopulateFromRun(OUTPUT, '', 0, 'test', 'telemetry_Crosperf',
@@ -1163,6 +1170,7 @@ class ResultTest(unittest.TestCase):
self.assertTrue(self.callGetTurbostatFile)
self.assertTrue(self.callGetCpustatsFile)
self.assertTrue(self.callGetTopFile)
+ self.assertTrue(self.callGetWaitTimeFile)
self.assertTrue(self.callProcessResults)
def FakeGetKeyvals(self, show_all=False):
diff --git a/crosperf/schedv2.py b/crosperf/schedv2.py
index d15f18c0..68e1e5b8 100644
--- a/crosperf/schedv2.py
+++ b/crosperf/schedv2.py
@@ -20,7 +20,6 @@ import test_flag
from machine_image_manager import MachineImageManager
from cros_utils import command_executer
from cros_utils import logger
-from cros_utils.device_setup_utils import DutWrapper
class DutWorker(Thread):
@@ -55,7 +54,6 @@ class DutWorker(Thread):
Note - 'br' below means 'benchmark_run'.
"""
- intel_pstate = self._sched.get_experiment().intel_pstate
# Firstly, handle benchmarkruns that have cache hit.
br = self._sched.get_cached_benchmark_run()
while br:
@@ -89,15 +87,6 @@ class DutWorker(Thread):
'working thread {}.'.format(self))
break
else:
- self._logger.LogOutput('Update kernel cmdline if necessary '
- 'and reboot')
- run_on_dut = DutWrapper(
- self._sched.get_labels(0).chromeos_root,
- self._dut.name,
- logger=self._logger)
- if run_on_dut.KerncmdUpdateNeeded(intel_pstate):
- run_on_dut.UpdateKerncmdIntelPstate(intel_pstate)
-
# Execute the br.
self._execute_benchmark_run(br)
finally:
@@ -119,6 +108,11 @@ class DutWorker(Thread):
if self._terminated:
return 1
+ if self._sched.get_experiment().skylab:
+ self._logger.LogOutput('Skylab mode, do not image before testing.')
+ self._dut.label = label
+ return 0
+
self._logger.LogOutput('Reimaging {} using {}'.format(self, label))
self._stat_num_reimage += 1
self._stat_annotation = 'reimaging using "{}"'.format(label.name)
diff --git a/crosperf/suite_runner.py b/crosperf/suite_runner.py
index 418358e0..579e30b2 100644
--- a/crosperf/suite_runner.py
+++ b/crosperf/suite_runner.py
@@ -10,11 +10,10 @@ from __future__ import print_function
import json
import os
+import pipes
import shlex
import time
-from contextlib import contextmanager
-
from cros_utils import command_executer
from cros_utils.device_setup_utils import DutWrapper
@@ -52,6 +51,10 @@ def GetProfilerArgs(profiler_args):
return ' '.join(args_list)
+def GetDutConfigArgs(dut_config):
+ return 'dut_config={}'.format(pipes.quote(json.dumps(dut_config)))
+
+
class SuiteRunner(object):
"""This defines the interface from crosperf to test script."""
@@ -72,8 +75,8 @@ class SuiteRunner(object):
def Run(self, cros_machine, label, benchmark, test_args, profiler_args):
machine_name = cros_machine.name
- if not label.skylab:
- # Initialize command executer on DUT.
+ if benchmark.suite != 'telemetry_Crosperf':
+ # Initialize command executer on DUT for test_that runs.
run_on_dut = DutWrapper(
label.chromeos_root,
machine_name,
@@ -82,22 +85,23 @@ class SuiteRunner(object):
ce=self._ce,
dut_config=self.dut_config)
for i in range(0, benchmark.retries + 1):
+ # TODO: For telemetry_Crosperf run, device setup has been moved into
+ # server test script; for client runs, need to figure out wrapper to do
+ # it before running, now it is still setup here.
+ if benchmark.suite != 'telemetry_Crosperf':
+ wait_time = run_on_dut.SetupDevice()
+ # This is for accumulating wait time for test_that runs only,
+ # for telemetry_Cropserf runs, please refer to result_cache.
+ cros_machine.AddCooldownWaitTime(wait_time)
+
if label.skylab:
- # TODO: need to migrate Device setups to autotest for skylab.
- # Since in skylab mode, we may not know the DUT until one is assigned
- # to the test. For telemetry_Crosperf run, we can move them into the
- # server test script, for client runs, need to figure out wrapper to do
- # it before running.
ret_tup = self.Skylab_Run(label, benchmark, test_args, profiler_args)
+ elif benchmark.suite == 'telemetry_Crosperf':
+ ret_tup = self.Telemetry_Crosperf_Run(machine_name, label, benchmark,
+ test_args, profiler_args)
else:
- self.SetupDevice(run_on_dut, cros_machine)
- if benchmark.suite == 'telemetry_Crosperf':
- run_on_dut.DecreaseWaitTime()
- ret_tup = self.Telemetry_Crosperf_Run(machine_name, label, benchmark,
- test_args, profiler_args)
- else:
- ret_tup = self.Test_That_Run(machine_name, label, benchmark,
- test_args, profiler_args)
+ ret_tup = self.Test_That_Run(machine_name, label, benchmark, test_args,
+ profiler_args)
if ret_tup[0] != 0:
self.logger.LogOutput('benchmark %s failed. Retries left: %s' %
@@ -112,76 +116,13 @@ class SuiteRunner(object):
break
return ret_tup
- @contextmanager
- def PauseUI(self, run_on_dut):
- """Stop UI before and Start UI after the context block.
-
- Context manager will make sure UI is always resumed at the end.
- """
- run_on_dut.StopUI()
- try:
- yield
-
- finally:
- run_on_dut.StartUI()
-
- def SetupDevice(self, run_on_dut, cros_machine):
- # Pause UI while configuring the DUT.
- # This will accelerate setup (waiting for cooldown has x10 drop)
- # and help to reset a Chrome state left after the previous test.
- with self.PauseUI(run_on_dut):
- # Unless the user turns on ASLR in the flag, we first disable ASLR
- # before running the benchmarks
- if not self.dut_config['enable_aslr']:
- run_on_dut.DisableASLR()
-
- # CPU usage setup comes first where we enable/disable cores.
- run_on_dut.SetupCpuUsage()
- cpu_online_status = run_on_dut.GetCpuOnline()
- # List of online cores of type int (core number).
- online_cores = [
- core for core, status in cpu_online_status.items() if status
- ]
- if self.dut_config['cooldown_time']:
- # Setup power conservative mode for effective cool down.
- # Set ignore status since powersave may no be available
- # on all platforms and we are going to handle it.
- ret = run_on_dut.SetCpuGovernor('powersave', ignore_status=True)
- if ret:
- # "powersave" is not available, use "ondemand".
- # Still not a fatal error if it fails.
- ret = run_on_dut.SetCpuGovernor('ondemand', ignore_status=True)
- # TODO(denik): Run comparison test for 'powersave' and 'ondemand'
- # on scarlet and kevin64.
- # We might have to consider reducing freq manually to the min
- # if it helps to reduce waiting time.
- wait_time = run_on_dut.WaitCooldown()
- cros_machine.AddCooldownWaitTime(wait_time)
-
- # Setup CPU governor for the benchmark run.
- # It overwrites the previous governor settings.
- governor = self.dut_config['governor']
- # FIXME(denik): Pass online cores to governor setup.
- run_on_dut.SetCpuGovernor(governor, ignore_status=False)
-
- # Disable Turbo and Setup CPU freq should ALWAYS proceed governor setup
- # since governor may change:
- # - frequency;
- # - turbo/boost.
- run_on_dut.DisableTurbo()
- run_on_dut.SetupCpuFreq(online_cores)
- # FIXME(denik): Currently we are not recovering the previous cpufreq
- # settings since we do reboot/setup every time anyway.
- # But it may change in the future and then we have to recover the
- # settings.
-
def Test_That_Run(self, machine, label, benchmark, test_args, profiler_args):
"""Run the test_that test.."""
- options = ''
+ options = []
if label.board:
- options += ' --board=%s' % label.board
+ options.append('--board=%s' % label.board)
if test_args:
- options += ' %s' % test_args
+ options.append(test_args)
if profiler_args:
self.logger.LogFatal('test_that does not support profiler.')
command = 'rm -rf /usr/local/autotest/results/*'
@@ -199,8 +140,8 @@ class SuiteRunner(object):
autotest_dir_arg = ''
command = (('%s %s --fast '
- '%s %s %s') % (TEST_THAT_PATH, autotest_dir_arg, options,
- machine, benchmark.test_name))
+ '%s %s %s') % (TEST_THAT_PATH, autotest_dir_arg,
+ ' '.join(options), machine, benchmark.test_name))
if self.log_level != 'verbose':
self.logger.LogOutput('Running test.')
self.logger.LogOutput('CMD: %s' % command)
@@ -251,38 +192,43 @@ class SuiteRunner(object):
def Skylab_Run(self, label, benchmark, test_args, profiler_args):
"""Run the test via skylab.."""
- options = ''
+ options = []
if label.board:
- options += '-board=%s' % label.board
+ options.append('-board=%s' % label.board)
if label.build:
- options += ' -image=%s' % label.build
+ options.append('-image=%s' % label.build)
# TODO: now only put toolchain pool here, user need to be able to specify
# which pool to use. Need to request feature to not use this option at all.
- options += ' -pool=toolchain'
+ options.append('-pool=toolchain')
+ test_args_list = []
if benchmark.suite == 'telemetry_Crosperf':
if test_args:
# Strip double quotes off args (so we can wrap them in single
# quotes, to pass through to Telemetry).
if test_args[0] == '"' and test_args[-1] == '"':
- test_args = test_args[1:-1]
+ test_args_list.append(test_args[1:-1])
if profiler_args:
- test_args += GetProfilerArgs(profiler_args)
- test_args += ' run_local={} test={}'.format(
- benchmark.run_local,
- benchmark.test_name,
- )
+ test_args_list.append(GetProfilerArgs(profiler_args))
+ if self.dut_config:
+ test_args_list.append(GetDutConfigArgs(self.dut_config))
+ test_args_list.append('run_local=%s' % benchmark.run_local)
+ test_args_list.append('test=%s' % benchmark.test_name)
else:
if profiler_args:
self.logger.LogFatal('Client tests do not support profiler.')
- if test_args:
- options += ' -test-args="%s"' % test_args
+ if test_args:
+ test_args_list.append(test_args)
+ if test_args_list:
+ options.append('-test-args=%s' % pipes.quote(' '.join(test_args_list)))
- dimensions = ''
+ dimensions = []
for dut in label.remote:
- dimensions += ' -dim dut_name:%s' % dut.rstrip('.cros')
+ dimensions.append('-dim dut_name:%s' % dut.rstrip('.cros'))
- command = (('%s create-test%s %s %s') % \
- (SKYLAB_PATH, dimensions, options, benchmark.test_name))
+ command = (('%s create-test %s %s %s') % \
+ (SKYLAB_PATH, ' '.join(dimensions), ' '.join(options),
+ 'telemetry_Crosperf' if benchmark.suite == 'telemetry_Crosperf'
+ else benchmark.test_name))
if self.log_level != 'verbose':
self.logger.LogOutput('Starting skylab test.')
@@ -357,6 +303,7 @@ class SuiteRunner(object):
autotest_dir_arg = '--autotest_dir %s' % label.autotest_path
profiler_args = GetProfilerArgs(profiler_args)
+ dut_config_args = GetDutConfigArgs(self.dut_config)
# --fast avoids unnecessary copies of syslogs.
fast_arg = '--fast'
args_string = ''
@@ -367,13 +314,13 @@ class SuiteRunner(object):
test_args = test_args[1:-1]
args_string = "test_args='%s'" % test_args
- top_interval = self.dut_config['top_interval']
- turbostat = self.dut_config['turbostat']
- cmd = ('{} {} {} --board={} --args="{} run_local={} test={} '
- 'turbostat={} top_interval={} {}" {} telemetry_Crosperf'.format(
- TEST_THAT_PATH, autotest_dir_arg, fast_arg, label.board,
- args_string, benchmark.run_local, benchmark.test_name, turbostat,
- top_interval, profiler_args, machine))
+ args = '{} run_local={} test={} {} {}'.format(
+ args_string, benchmark.run_local, benchmark.test_name, dut_config_args,
+ profiler_args)
+
+ cmd = ('{} {} {} --board={} --args={} {} telemetry_Crosperf'.format(
+ TEST_THAT_PATH, autotest_dir_arg, fast_arg, label.board,
+ pipes.quote(args), machine))
# Use --no-ns-pid so that cros_sdk does not create a different
# process namespace and we can kill process created easily by their
diff --git a/crosperf/suite_runner_unittest.py b/crosperf/suite_runner_unittest.py
index c2e33226..413822a2 100755
--- a/crosperf/suite_runner_unittest.py
+++ b/crosperf/suite_runner_unittest.py
@@ -57,16 +57,6 @@ class SuiteRunnerTest(unittest.TestCase):
False, # rm_chroot_tmp
'record -e cycles') # perf_args
- telemetry_bench = Benchmark(
- 'b3_test', # name
- 'octane', # test_name
- '', # test_args
- 3, # iterations
- False, # rm_chroot_tmp
- 'record -e cycles', # perf_args
- 'telemetry', # suite
- False) # show_all_results
-
def __init__(self, *args, **kwargs):
super(SuiteRunnerTest, self).__init__(*args, **kwargs)
self.call_test_that_run = False
@@ -129,13 +119,14 @@ class SuiteRunnerTest(unittest.TestCase):
self.runner.Skylab_Run = FakeSkylabRun
self.runner.Telemetry_Crosperf_Run = FakeTelemetryCrosperfRun
self.runner.Test_That_Run = FakeTestThatRun
- self.runner.SetupDevice = mock.Mock()
+ DutWrapper.SetupDevice = mock.Mock(return_value=0)
DutWrapper.RunCommandOnDut = mock.Mock(return_value=FakeRunner)
self.runner.dut_config['enable_aslr'] = False
self.runner.dut_config['cooldown_time'] = 0
self.runner.dut_config['governor'] = 'fake_governor'
self.runner.dut_config['cpu_freq_pct'] = 65
+ self.runner.dut_config['intel_pstate'] = 'no_hwp'
machine = 'fake_machine'
cros_machine = MockCrosMachine(machine, self.mock_label.chromeos_root,
self.mock_logger)
@@ -144,14 +135,14 @@ class SuiteRunnerTest(unittest.TestCase):
reset()
self.mock_label.skylab = True
- self.runner.Run(cros_machine, self.mock_label, self.telemetry_bench,
+ self.runner.Run(cros_machine, self.mock_label, self.test_that_bench,
test_args, profiler_args)
self.assertTrue(self.call_skylab_run)
self.assertFalse(self.call_test_that_run)
self.assertFalse(self.call_telemetry_crosperf_run)
self.assertEqual(self.skylab_run_args,
- [self.mock_label, self.telemetry_bench, '', ''])
- self.runner.SetupDevice.assert_not_called()
+ [self.mock_label, self.test_that_bench, '', ''])
+ DutWrapper.SetupDevice.assert_called_once()
self.mock_label.skylab = False
reset()
@@ -172,101 +163,6 @@ class SuiteRunnerTest(unittest.TestCase):
'fake_machine', self.mock_label, self.telemetry_crosperf_bench, '', ''
])
- def test_setup_device(self):
-
- def FakeRunner(command, ignore_status=False):
- # pylint fix for unused variable.
- del command, ignore_status
- return 0, '', ''
-
- machine = 'fake_machine'
- mock_run_on_dut = mock.Mock(spec=DutWrapper)
- cros_machine = MockCrosMachine(machine, self.mock_label.chromeos_root,
- self.mock_logger)
-
- mock_run_on_dut.RunCommandOnDut = mock.Mock(return_value=FakeRunner)
- mock_run_on_dut.WaitCooldown = mock.Mock(return_value=0)
- mock_run_on_dut.GetCpuOnline = mock.Mock(return_value={0: 1, 1: 1, 2: 0})
-
- self.runner.dut_config['enable_aslr'] = False
- self.runner.dut_config['cooldown_time'] = 0
- self.runner.dut_config['governor'] = 'fake_governor'
- self.runner.dut_config['cpu_freq_pct'] = 65
-
- self.runner.SetupDevice(mock_run_on_dut, cros_machine)
-
- mock_run_on_dut.SetupCpuUsage.assert_called_once_with()
- mock_run_on_dut.SetupCpuFreq.assert_called_once_with([0, 1])
- mock_run_on_dut.GetCpuOnline.assert_called_once_with()
- mock_run_on_dut.SetCpuGovernor.assert_called_once_with(
- 'fake_governor', ignore_status=False)
- mock_run_on_dut.DisableTurbo.assert_called_once_with()
- mock_run_on_dut.StopUI.assert_called_once_with()
- mock_run_on_dut.StartUI.assert_called_once_with()
- mock_run_on_dut.WaitCooldown.assert_not_called()
-
- def test_setup_device_with_cooldown(self):
-
- def FakeRunner(command, ignore_status=False):
- # pylint fix for unused variable.
- del command, ignore_status
- return 0, '', ''
-
- machine = 'fake_machine'
- mock_run_on_dut = mock.Mock(spec=DutWrapper)
- cros_machine = MockCrosMachine(machine, self.mock_label.chromeos_root,
- self.mock_logger)
-
- mock_run_on_dut.RunCommandOnDut = mock.Mock(return_value=FakeRunner)
- mock_run_on_dut.WaitCooldown = mock.Mock(return_value=0)
- mock_run_on_dut.GetCpuOnline = mock.Mock(return_value={0: 0, 1: 1})
-
- self.runner.dut_config['enable_aslr'] = False
- self.runner.dut_config['cooldown_time'] = 10
- self.runner.dut_config['governor'] = 'fake_governor'
- self.runner.dut_config['cpu_freq_pct'] = 75
-
- self.runner.SetupDevice(mock_run_on_dut, cros_machine)
-
- mock_run_on_dut.WaitCooldown.assert_called_once_with()
- mock_run_on_dut.DisableASLR.assert_called_once()
- mock_run_on_dut.DisableTurbo.assert_called_once_with()
- mock_run_on_dut.SetupCpuUsage.assert_called_once_with()
- mock_run_on_dut.SetupCpuFreq.assert_called_once_with([1])
- mock_run_on_dut.SetCpuGovernor.assert_called()
- mock_run_on_dut.GetCpuOnline.assert_called_once_with()
- mock_run_on_dut.StopUI.assert_called_once_with()
- mock_run_on_dut.StartUI.assert_called_once_with()
- self.assertGreater(mock_run_on_dut.SetCpuGovernor.call_count, 1)
- self.assertEqual(mock_run_on_dut.SetCpuGovernor.call_args,
- mock.call('fake_governor', ignore_status=False))
-
- def test_setup_device_with_exception(self):
- """Test SetupDevice with an exception."""
-
- machine = 'fake_machine'
- mock_run_on_dut = mock.Mock(spec=DutWrapper)
- cros_machine = MockCrosMachine(machine, self.mock_label.chromeos_root,
- self.mock_logger)
-
- mock_run_on_dut.SetupCpuUsage = mock.Mock(side_effect=RuntimeError())
- self.runner.dut_config['enable_aslr'] = False
-
- with self.assertRaises(RuntimeError):
- self.runner.SetupDevice(mock_run_on_dut, cros_machine)
-
- # This called injected an exception.
- mock_run_on_dut.SetupCpuUsage.assert_called_once_with()
- # Calls following the expeption are skipped.
- mock_run_on_dut.WaitCooldown.assert_not_called()
- mock_run_on_dut.DisableTurbo.assert_not_called()
- mock_run_on_dut.SetupCpuFreq.assert_not_called()
- mock_run_on_dut.SetCpuGovernor.assert_not_called()
- mock_run_on_dut.GetCpuOnline.assert_not_called()
- # Check that Stop/Start UI are always called.
- mock_run_on_dut.StopUI.assert_called_once_with()
- mock_run_on_dut.StartUI.assert_called_once_with()
-
@mock.patch.object(command_executer.CommandExecuter, 'CrosRunCommand')
@mock.patch.object(command_executer.CommandExecuter,
'ChrootRunCommandWOutput')
@@ -303,7 +199,7 @@ class SuiteRunnerTest(unittest.TestCase):
self.assertEqual(len(args_list), 2)
self.assertEqual(args_list[0], '/tmp/chromeos')
self.assertEqual(args_list[1], ('/usr/bin/test_that '
- '--fast --board=lumpy '
+ '--fast --board=lumpy '
'--iterations=2 lumpy1.cros octane'))
self.assertEqual(args_dict['command_terminator'], self.mock_cmd_term)
self.real_logger.LogMsg = save_log_msg
@@ -328,13 +224,14 @@ class SuiteRunnerTest(unittest.TestCase):
args_list = mock_chroot_runcmd.call_args_list[0][0]
args_dict = mock_chroot_runcmd.call_args_list[0][1]
self.assertEqual(args_list[0], '/tmp/chromeos')
- self.assertEqual(args_list[1],
- ('/usr/bin/test_that --autotest_dir '
- '~/trunk/src/third_party/autotest/files --fast '
- '--board=lumpy --args=" run_local=False test=octane '
- 'turbostat=True top_interval=3 profiler=custom_perf '
- 'profiler_args=\'record -a -e cycles,instructions\'" '
- 'lumpy1.cros telemetry_Crosperf'))
+ self.assertEqual(
+ args_list[1],
+ ('/usr/bin/test_that --autotest_dir '
+ '~/trunk/src/third_party/autotest/files --fast '
+ "--board=lumpy --args=' run_local=False test=octane "
+ 'dut_config=\'"\'"\'{"turbostat": true, "top_interval": 3}\'"\'"\' '
+ 'profiler=custom_perf profiler_args=\'"\'"\'record -a -e '
+ 'cycles,instructions\'"\'"\'\' lumpy1.cros telemetry_Crosperf'))
self.assertEqual(args_dict['cros_sdk_options'],
('--no-ns-pid --chrome_root= '
'--chrome_root_mount=/tmp/chrome_root '
@@ -344,17 +241,18 @@ class SuiteRunnerTest(unittest.TestCase):
@mock.patch.object(command_executer.CommandExecuter, 'RunCommandWOutput')
@mock.patch.object(json, 'loads')
- def test_skylab_run(self, mock_json_loads, mock_runcmd):
+ def test_skylab_run_client(self, mock_json_loads, mock_runcmd):
def FakeDownloadResult(l, task_id):
if l and task_id:
self.assertEqual(task_id, '12345')
return 0
- mock_runcmd.return_value = \
- (0,
- 'Created Swarming task https://swarming/task/b12345',
- '')
+ mock_runcmd.return_value = (
+ 0,
+ 'Created Swarming task https://swarming/task/b12345',
+ '',
+ )
self.mock_cmd_exec.RunCommandWOutput = mock_runcmd
mock_json_loads.return_value = {
@@ -385,6 +283,43 @@ class SuiteRunnerTest(unittest.TestCase):
self.assertEqual(args_list[0], ('skylab wait-task 12345'))
self.assertEqual(args_dict['command_terminator'], self.mock_cmd_term)
+ @mock.patch.object(command_executer.CommandExecuter, 'RunCommandWOutput')
+ @mock.patch.object(json, 'loads')
+ def test_skylab_run_telemetry_Crosperf(self, mock_json_loads, mock_runcmd):
+
+ def FakeDownloadResult(l, task_id):
+ if l and task_id:
+ self.assertEqual(task_id, '12345')
+ return None
+
+ mock_runcmd.return_value = (
+ 0,
+ 'Created Swarming task https://swarming/task/b12345',
+ '',
+ )
+ self.mock_cmd_exec.RunCommandWOutput = mock_runcmd
+
+ mock_json_loads.return_value = {
+ 'child-results': [{
+ 'success': True,
+ 'task-run-url': 'https://swarming/task?id=12345'
+ }]
+ }
+ self.mock_json.loads = mock_json_loads
+
+ self.mock_label.skylab = True
+ self.runner.DownloadResult = FakeDownloadResult
+ self.runner.Skylab_Run(self.mock_label, self.telemetry_crosperf_bench, '',
+ '')
+ args_list = mock_runcmd.call_args_list[0][0]
+ self.assertEqual(
+ args_list[0],
+ ('/usr/local/bin/skylab create-test '
+ '-dim dut_name:lumpy1 -dim dut_name:lumpy.cros2 '
+ '-board=lumpy -image=build '
+ "-pool=toolchain -test-args='run_local=False test=octane' "
+ 'telemetry_Crosperf'))
+
@mock.patch.object(time, 'sleep')
@mock.patch.object(command_executer.CommandExecuter, 'RunCommand')
def test_download_result(self, mock_runcmd, mock_sleep):
diff --git a/llvm_tools/copy_helpers_to_chromiumos_overlay.py b/llvm_tools/copy_helpers_to_chromiumos_overlay.py
index dd6dc9d6..98f7b966 100755
--- a/llvm_tools/copy_helpers_to_chromiumos_overlay.py
+++ b/llvm_tools/copy_helpers_to_chromiumos_overlay.py
@@ -49,6 +49,7 @@ def main():
clone_files = [
'failure_modes.py',
'get_llvm_hash.py',
+ 'git_llvm_rev.py',
'patch_manager.py',
'subprocess_helpers.py',
]
diff --git a/llvm_tools/get_llvm_hash.py b/llvm_tools/get_llvm_hash.py
index 01acbfaf..1864170d 100755
--- a/llvm_tools/get_llvm_hash.py
+++ b/llvm_tools/get_llvm_hash.py
@@ -39,10 +39,11 @@ def GetVersionFrom(src_dir, git_hash):
An SVN-style version number associated with the git hash.
"""
- args = ['--llvm_dir', src_dir, '--sha', git_hash]
- version = git_llvm_rev.main(args).strip()
- assert version.startswith('r')
- return int(version[1:])
+ version = git_llvm_rev.translate_sha_to_rev(
+ git_llvm_rev.LLVMConfig(remote='origin', dir=src_dir), git_hash)
+ # Note: branches aren't supported
+ assert version.branch == 'master', version.branch
+ return version.number
def GetGitHashFrom(src_dir, version):
@@ -59,12 +60,9 @@ def GetGitHashFrom(src_dir, version):
subprocess.CalledProcessError: Failed to find a git hash.
"""
- assert isinstance(version, int)
- args = ['--llvm_dir', src_dir, '--rev', 'r' + str(version)]
-
- git_hash = git_llvm_rev.main(args).rstrip()
-
- return git_hash
+ return git_llvm_rev.translate_rev_to_sha(
+ git_llvm_rev.LLVMConfig(remote='origin', dir=src_dir),
+ git_llvm_rev.Rev(branch='master', number=version))
@contextmanager
@@ -185,7 +183,7 @@ def GetGoogle3LLVMVersion(stable):
git_hash = check_output(cmd)
# Change type to an integer
- return GetVersionFrom(GetAndUpdateLLVMProjectInLLVMTools(), git_hash.strip())
+ return GetVersionFrom(GetAndUpdateLLVMProjectInLLVMTools(), git_hash.rstrip())
def is_svn_option(svn_option):
diff --git a/llvm_tools/git_llvm_rev.py b/llvm_tools/git_llvm_rev.py
index 76a67d65..8eefcdce 100755
--- a/llvm_tools/git_llvm_rev.py
+++ b/llvm_tools/git_llvm_rev.py
@@ -10,6 +10,8 @@ Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand,
r1234 is parsed as '(master, 1234)'.
"""
+from __future__ import print_function
+
import argparse
import re
import subprocess
@@ -129,7 +131,13 @@ def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev:
if merge_base == base_llvm_sha:
result = check_output(
- ['git', 'rev-list', '--count', f'{base_llvm_sha}..{sha}'],
+ [
+ 'git',
+ 'rev-list',
+ '--count',
+ '--first-parent',
+ f'{base_llvm_sha}..{sha}',
+ ],
cwd=llvm_config.dir,
)
count = int(result.strip())
@@ -145,7 +153,13 @@ def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev:
return Rev(branch='master', number=merge_base_number)
distance_from_base = check_output(
- ['git', 'rev-list', '--count', f'{merge_base}..{sha}'],
+ [
+ 'git',
+ 'rev-list',
+ '--count',
+ '--first-parent',
+ f'{merge_base}..{sha}',
+ ],
cwd=llvm_config.dir,
)
@@ -172,8 +186,7 @@ def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev:
f'Ambiguity: multiple branches from {llvm_config.remote} have {sha}: '
f'{sorted(candidates)}')
- branch, = candidates
- return Rev(branch=branch, number=revision_number)
+ return Rev(branch=candidates[0], number=revision_number)
def parse_git_commit_messages(stream: t.Iterable[str],
@@ -196,6 +209,8 @@ def parse_git_commit_messages(stream: t.Iterable[str],
lines = iter(stream)
while True:
+ # Looks like a potential bug in pylint? crbug.com/1041148
+ # pylint: disable=stop-iteration-return
sha = next(lines, None)
if sha is None:
return
@@ -283,7 +298,13 @@ def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
commit_number = number - base_revision_number
revs_between_str = check_output(
- ['git', 'rev-list', '--count', f'{base_sha}..{branch_head_sha}'],
+ [
+ 'git',
+ 'rev-list',
+ '--count',
+ '--first-parent',
+ f'{base_sha}..{branch_head_sha}',
+ ],
cwd=llvm_config.dir,
)
revs_between = int(revs_between_str.strip())
@@ -343,13 +364,11 @@ def main(argv: t.List[str]) -> None:
)
if opts.sha:
- rev = str(translate_sha_to_rev(config, opts.sha))
+ rev = translate_sha_to_rev(config, opts.sha)
print(rev)
- return rev
else:
- sha = str(translate_rev_to_sha(config, Rev.parse(opts.rev)))
+ sha = translate_rev_to_sha(config, Rev.parse(opts.rev))
print(sha)
- return sha
if __name__ == '__main__':
diff --git a/llvm_tools/git_llvm_rev_test.py b/llvm_tools/git_llvm_rev_test.py
index ebb654ff..c7d59b27 100755
--- a/llvm_tools/git_llvm_rev_test.py
+++ b/llvm_tools/git_llvm_rev_test.py
@@ -4,10 +4,17 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+"""Tests for git_llvm_rev."""
+
+from __future__ import print_function
+
+import datetime
import os
+import subprocess
import sys
import unittest
+import get_llvm_hash
import git_llvm_rev
@@ -16,11 +23,50 @@ def get_llvm_checkout() -> str:
return os.path.join(my_dir, 'llvm-project-copy')
+def ensure_llvm_project_up_to_date():
+ checkout = get_llvm_checkout()
+ if not os.path.isdir(checkout):
+ print(
+ 'No llvm-project exists locally; syncing it. This takes a while.',
+ file=sys.stderr)
+ actual_checkout = get_llvm_hash.GetAndUpdateLLVMProjectInLLVMTools()
+ assert checkout == actual_checkout, '%s != %s' % (actual_checkout, checkout)
+
+ commit_timestamp = git_llvm_rev.check_output(
+ ['git', 'log', '-n1', '--format=%ct', 'origin/master'], cwd=checkout)
+
+ commit_time = datetime.datetime.fromtimestamp(int(commit_timestamp.strip()))
+ now = datetime.datetime.now()
+
+ time_since_last_commit = now - commit_time
+
+ # Arbitrary, but if it's been more than 2d since we've seen a commit, it's
+ # probably best to bring us up-to-date.
+ if time_since_last_commit <= datetime.timedelta(days=2):
+ return
+
+ print(
+ '%d days have elapsed since the last commit to %s; auto-syncing' %
+ (time_since_last_commit.days, checkout),
+ file=sys.stderr)
+
+ result = subprocess.run(
+ ['git', 'fetch', 'origin'],
+ cwd=checkout,
+ check=True,
+ )
+ if result.returncode:
+ print(
+ 'Sync failed somehow; hoping that things are fresh enough, then...',
+ file=sys.stderr)
+
+
def get_llvm_config() -> git_llvm_rev.LLVMConfig:
return git_llvm_rev.LLVMConfig(dir=get_llvm_checkout(), remote='origin')
class Test(unittest.TestCase):
+ """Test cases for git_llvm_rev."""
def rev_to_sha_with_round_trip(self, rev: git_llvm_rev.Rev) -> str:
config = get_llvm_config()
@@ -66,6 +112,22 @@ class Test(unittest.TestCase):
self.assertIn('Try updating your tree?', str(r.exception))
+ def test_merge_commits_count_as_one_commit_crbug1041079(self) -> None:
+ # This CL merged _a lot_ of commits in. Verify a few hand-computed
+ # properties about it.
+ merge_sha_rev_number = 4496 + git_llvm_rev.base_llvm_revision
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=merge_sha_rev_number))
+ self.assertEqual(sha, '0f0d0ed1c78f1a80139a1f2133fad5284691a121')
+
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=merge_sha_rev_number - 1))
+ self.assertEqual(sha, '6f635f90929da9545dd696071a829a1a42f84b30')
+
+ sha = self.rev_to_sha_with_round_trip(
+ git_llvm_rev.Rev(branch='master', number=merge_sha_rev_number + 1))
+ self.assertEqual(sha, '199700a5cfeedf227619f966aa3125cef18bc958')
+
# NOTE: The below tests have _zz_ in their name as an optimization. Iterating
# on a quick test is painful when these larger tests come before it and take
# 7secs to run. Python's unittest module guarantees tests are run in
@@ -110,13 +172,5 @@ class Test(unittest.TestCase):
# back to the base commit for.
if __name__ == '__main__':
- # We have exactly one concrete target that we often have checked out anyway.
- # Rather than building tests that hopefully match that target, use it
- # directly.
- if not os.path.isdir(get_llvm_checkout()):
- print(
- 'Please checkout llvm-project-copy to run these tests. A simple way '
- 'to do that is running `./get_llvm_hash.py --llvm_version 370000`',
- file=sys.stderr)
- sys.exit(1)
+ ensure_llvm_project_up_to_date()
unittest.main()
diff --git a/llvm_tools/patch_manager.py b/llvm_tools/patch_manager.py
index c51ba200..910ecfc9 100755
--- a/llvm_tools/patch_manager.py
+++ b/llvm_tools/patch_manager.py
@@ -8,18 +8,18 @@
from __future__ import print_function
+from collections import namedtuple
+from failure_modes import FailureModes
+from subprocess_helpers import check_call
+from subprocess_helpers import check_output
+
import argparse
import json
+import get_llvm_hash
import os
import subprocess
import sys
-from collections import namedtuple
-from failure_modes import FailureModes
-from get_llvm_hash import LLVMHash
-from subprocess_helpers import check_call
-from subprocess_helpers import check_output
-
def is_directory(dir_path):
"""Validates that the argument passed into 'argparse' is a directory."""
@@ -151,7 +151,7 @@ def GetHEADSVNVersion(src_path):
git_hash = check_output(cmd)
- version = GetVersionFrom(src_path, git_hash)
+ version = get_llvm_hash.GetVersionFrom(src_path, git_hash.rstrip())
return version
@@ -322,7 +322,7 @@ def PerformBisection(src_path, good_commit, bad_commit, svn_version,
check_output(end_cmd)
# `git bisect run` returns the bad commit hash and the commit message.
- version = GetVersionFrom(src_path, git_hash)
+ version = get_llvm_hash.GetVersionFrom(src_path, git_hash.rstrip())
return version
diff --git a/llvm_tools/update_chromeos_llvm_next_hash.py b/llvm_tools/update_chromeos_llvm_next_hash.py
index 30660de3..d47da46d 100755
--- a/llvm_tools/update_chromeos_llvm_next_hash.py
+++ b/llvm_tools/update_chromeos_llvm_next_hash.py
@@ -167,8 +167,8 @@ def _ConvertChrootPathsToSymLinkPaths(chromeos_root, chroot_file_paths):
# and combine the chroot path with the result and add it to the list.
for cur_chroot_file_path in chroot_file_paths:
if not cur_chroot_file_path.startswith(chroot_prefix):
- raise ValueError('Invalid prefix for the chroot path: %s' %
- cur_chroot_file_path)
+ raise ValueError(
+ 'Invalid prefix for the chroot path: %s' % cur_chroot_file_path)
rel_path = cur_chroot_file_path[len(chroot_prefix):]
@@ -537,8 +537,8 @@ def StagePatchMetadataFileForCommit(patch_metadata_file_path):
"""
if not os.path.isfile(patch_metadata_file_path):
- raise ValueError('Invalid patch metadata file provided: %s' %
- patch_metadata_file_path)
+ raise ValueError(
+ 'Invalid patch metadata file provided: %s' % patch_metadata_file_path)
# Cmd to stage the patch metadata file for commit.
stage_patch_file = [
@@ -635,14 +635,15 @@ def UpdatePackages(packages, llvm_hash, llvm_version, chroot_path,
try:
if svn_option in get_llvm_hash.KNOWN_HASH_SOURCES:
- commit_message_header = ('llvm-next/%s: Update packages to r%d' %
- (svn_option, llvm_version))
+ commit_message_header = ('llvm-next/%s: upgrade to %s (r%d)' %
+ (svn_option, llvm_hash, llvm_version))
else:
- commit_message_header = 'llvm-next: Update packages to r%d' % llvm_version
+ commit_message_header = (
+ 'llvm-next: upgrade to %s (r%d)' % (llvm_hash, llvm_version))
commit_messages = ['-m %s' % commit_message_header]
- commit_messages.append('-m %s' % 'Following packages have been updated:')
+ commit_messages.append('-m The following packages have been updated:')
# Holds the list of packages that are updating.
packages = []
@@ -700,11 +701,10 @@ def main():
llvm_hash, llvm_version = GetLLVMHashAndVersionFromSVNOption(svn_option)
- change_list = UpdatePackages(args_output.update_packages, llvm_hash,
- llvm_version, args_output.chroot_path,
- args_output.patch_metadata_file,
- FailureModes(args_output.failure_mode),
- svn_option)
+ change_list = UpdatePackages(
+ args_output.update_packages, llvm_hash, llvm_version,
+ args_output.chroot_path, args_output.patch_metadata_file,
+ FailureModes(args_output.failure_mode), svn_option)
print('Successfully updated packages to %d' % llvm_version)
print('Gerrit URL: %s' % change_list.url)
diff --git a/llvm_tools/update_chromeos_llvm_next_hash_unittest.py b/llvm_tools/update_chromeos_llvm_next_hash_unittest.py
index 756ee9c9..4e39bafa 100755
--- a/llvm_tools/update_chromeos_llvm_next_hash_unittest.py
+++ b/llvm_tools/update_chromeos_llvm_next_hash_unittest.py
@@ -917,8 +917,8 @@ class UpdateLLVMNextHashTest(unittest.TestCase):
mock_uprev_ebuild.assert_called_once_with(symlink_path_to_package)
expected_commit_messages = [
- '-m %s' % 'llvm-next/tot: Update packages to r1000',
- '-m %s' % 'Following packages have been updated:',
+ '-m %s' % 'llvm-next/tot: upgrade to a123testhash5 (r1000)',
+ '-m %s' % 'The following packages have been updated:',
'-m %s' % 'path/to',
'-m %s' % 'For the package path/to:',
'-m %s' % 'The patch metadata file PATCHES.json was modified',