aboutsummaryrefslogtreecommitdiff
path: root/cros_utils
diff options
context:
space:
mode:
authorCaroline Tice <cmtice@google.com>2016-07-20 12:52:59 -0700
committerchrome-bot <chrome-bot@chromium.org>2016-07-25 11:00:38 -0700
commita8af9a7a2462b00e72deff99327bdb452a715277 (patch)
tree92573f258457cc6a737c10df0dd250265b9efb8d /cros_utils
parent19b6f5fc11dcf97144e9723c8f78534cce27423a (diff)
downloadtoolchain-utils-a8af9a7a2462b00e72deff99327bdb452a715277.tar.gz
[toolchain-utils] Finish switching utils/ to cros_utils/.
This CL finishes switching the subdirectory from 'utils' to 'cros_utils'. It changes all the remaining import statements to use 'cros_utils'; it removes the 'cros_utils' symlink, and it renames the 'utils' subdirectory to 'cros_utils'. BUG=chromium:568195 TEST=ran crosperf & binary search tool unittests. Change-Id: I7427f8bfb2ddac3a4b6108e46782039059684382 Reviewed-on: https://chrome-internal-review.googlesource.com/270396 Commit-Ready: Caroline Tice <cmtice@google.com> Tested-by: Caroline Tice <cmtice@google.com> Reviewed-by: Cassidy Burden <cburden@google.com> Reviewed-by: Luis Lozano <llozano@chromium.org>
Diffstat (limited to 'cros_utils')
l---------cros_utils1
-rw-r--r--cros_utils/__init__.py1
-rwxr-xr-xcros_utils/buildbot_json.py1518
-rw-r--r--cros_utils/buildbot_utils.py328
-rw-r--r--cros_utils/colortrans.py388
-rw-r--r--cros_utils/command_executer.py685
-rwxr-xr-xcros_utils/command_executer_unittest.py27
-rw-r--r--cros_utils/constants.py10
-rwxr-xr-xcros_utils/email_sender.py144
-rw-r--r--cros_utils/file_utils.py87
-rw-r--r--cros_utils/html_tools.py91
-rw-r--r--cros_utils/locks.py44
-rw-r--r--cros_utils/logger.py369
-rw-r--r--cros_utils/machines.py25
-rw-r--r--cros_utils/manifest_versions.py97
-rw-r--r--cros_utils/misc.py557
-rw-r--r--cros_utils/misc_test.py51
-rw-r--r--cros_utils/no_pseudo_terminal_test.py53
-rwxr-xr-xcros_utils/perf_diff.py332
-rw-r--r--cros_utils/pstat.py1077
-rw-r--r--cros_utils/stats.py4519
-rw-r--r--cros_utils/tabulator.py1248
-rw-r--r--cros_utils/tabulator_test.py141
-rw-r--r--cros_utils/timeline.py52
-rw-r--r--cros_utils/timeline_test.py57
25 files changed, 11901 insertions, 1 deletions
diff --git a/cros_utils b/cros_utils
deleted file mode 120000
index 66252432..00000000
--- a/cros_utils
+++ /dev/null
@@ -1 +0,0 @@
-utils \ No newline at end of file
diff --git a/cros_utils/__init__.py b/cros_utils/__init__.py
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/cros_utils/__init__.py
@@ -0,0 +1 @@
+
diff --git a/cros_utils/buildbot_json.py b/cros_utils/buildbot_json.py
new file mode 100755
index 00000000..693a42cd
--- /dev/null
+++ b/cros_utils/buildbot_json.py
@@ -0,0 +1,1518 @@
+#!/usr/bin/python2
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# NOTE: This file is NOT under GPL. See above.
+"""Queries buildbot through the json interface.
+"""
+
+from __future__ import print_function
+
+__author__ = 'maruel@chromium.org'
+__version__ = '1.2'
+
+import code
+import datetime
+import functools
+import json
+
+# Pylint recommends we use "from chromite.lib import cros_logging as logging".
+# Chromite specific policy message, we want to keep using the standard logging.
+# pylint: disable=cros-logging-import
+import logging
+
+# pylint: disable=deprecated-module
+import optparse
+
+import time
+import urllib
+import urllib2
+import sys
+
+try:
+ from natsort import natsorted
+except ImportError:
+ # natsorted is a simple helper to sort "naturally", e.g. "vm40" is sorted
+ # after "vm7". Defaults to normal sorting.
+ natsorted = sorted
+
+# These values are buildbot constants used for Build and BuildStep.
+# This line was copied from master/buildbot/status/builder.py.
+SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION, RETRY = range(6)
+
+## Generic node caching code.
+
+
+class Node(object):
+ """Root class for all nodes in the graph.
+
+ Provides base functionality for any node in the graph, independent if it has
+ children or not or if its content can be addressed through an url or needs to
+ be fetched as part of another node.
+
+ self.printable_attributes is only used for self documentation and for str()
+ implementation.
+ """
+ printable_attributes = []
+
+ def __init__(self, parent, url):
+ self.printable_attributes = self.printable_attributes[:]
+ if url:
+ self.printable_attributes.append('url')
+ url = url.rstrip('/')
+ if parent is not None:
+ self.printable_attributes.append('parent')
+ self.url = url
+ self.parent = parent
+
+ def __str__(self):
+ return self.to_string()
+
+ def __repr__(self):
+ """Embeds key if present."""
+ key = getattr(self, 'key', None)
+ if key is not None:
+ return '<%s key=%s>' % (self.__class__.__name__, key)
+ cached_keys = getattr(self, 'cached_keys', None)
+ if cached_keys is not None:
+ return '<%s keys=%s>' % (self.__class__.__name__, cached_keys)
+ return super(Node, self).__repr__()
+
+ def to_string(self, maximum=100):
+ out = ['%s:' % self.__class__.__name__]
+ assert not 'printable_attributes' in self.printable_attributes
+
+ def limit(txt):
+ txt = str(txt)
+ if maximum > 0:
+ if len(txt) > maximum + 2:
+ txt = txt[:maximum] + '...'
+ return txt
+
+ for k in sorted(self.printable_attributes):
+ if k == 'parent':
+ # Avoid infinite recursion.
+ continue
+ out.append(limit(' %s: %r' % (k, getattr(self, k))))
+ return '\n'.join(out)
+
+ def refresh(self):
+ """Refreshes the data."""
+ self.discard()
+ return self.cache()
+
+ def cache(self): # pragma: no cover
+ """Caches the data."""
+ raise NotImplementedError()
+
+ def discard(self): # pragma: no cover
+ """Discards cached data.
+
+ Pretty much everything is temporary except completed Build.
+ """
+ raise NotImplementedError()
+
+
+class AddressableBaseDataNode(Node): # pylint: disable=W0223
+ """A node that contains a dictionary of data that can be fetched with an url.
+
+ The node is directly addressable. It also often can be fetched by the parent.
+ """
+ printable_attributes = Node.printable_attributes + ['data']
+
+ def __init__(self, parent, url, data):
+ super(AddressableBaseDataNode, self).__init__(parent, url)
+ self._data = data
+
+ @property
+ def cached_data(self):
+ return self._data
+
+ @property
+ def data(self):
+ self.cache()
+ return self._data
+
+ def cache(self):
+ if self._data is None:
+ self._data = self._readall()
+ return True
+ return False
+
+ def discard(self):
+ self._data = None
+
+ def read(self, suburl):
+ assert self.url, self.__class__.__name__
+ url = self.url
+ if suburl:
+ url = '%s/%s' % (self.url, suburl)
+ return self.parent.read(url)
+
+ def _readall(self):
+ return self.read('')
+
+
+class AddressableDataNode(AddressableBaseDataNode): # pylint: disable=W0223
+ """Automatically encodes the url."""
+
+ def __init__(self, parent, url, data):
+ super(AddressableDataNode, self).__init__(parent, urllib.quote(url), data)
+
+
+class NonAddressableDataNode(Node): # pylint: disable=W0223
+ """A node that cannot be addressed by an unique url.
+
+ The data comes directly from the parent.
+ """
+
+ def __init__(self, parent, subkey):
+ super(NonAddressableDataNode, self).__init__(parent, None)
+ self.subkey = subkey
+
+ @property
+ def cached_data(self):
+ if self.parent.cached_data is None:
+ return None
+ return self.parent.cached_data[self.subkey]
+
+ @property
+ def data(self):
+ return self.parent.data[self.subkey]
+
+ def cache(self):
+ self.parent.cache()
+
+ def discard(self): # pragma: no cover
+ """Avoid invalid state when parent recreate the object."""
+ raise AttributeError('Call parent discard() instead')
+
+
+class VirtualNodeList(Node):
+ """Base class for every node that has children.
+
+ Adds partial supports for keys and iterator functionality. 'key' can be a
+ string or a int. Not to be used directly.
+ """
+ printable_attributes = Node.printable_attributes + ['keys']
+
+ def __init__(self, parent, url):
+ super(VirtualNodeList, self).__init__(parent, url)
+ # Keeps the keys independently when ordering is needed.
+ self._is_cached = False
+ self._has_keys_cached = False
+
+ def __contains__(self, key):
+ """Enables 'if i in obj:'."""
+ return key in self.keys
+
+ def __iter__(self):
+ """Enables 'for i in obj:'. It returns children."""
+ self.cache_keys()
+ for key in self.keys:
+ yield self[key]
+
+ def __len__(self):
+ """Enables 'len(obj)' to get the number of childs."""
+ return len(self.keys)
+
+ def discard(self):
+ """Discards data.
+
+ The default behavior is to not invalidate cached keys. The only place where
+ keys need to be invalidated is with Builds.
+ """
+ self._is_cached = False
+ self._has_keys_cached = False
+
+ @property
+ def cached_children(self): # pragma: no cover
+ """Returns an iterator over the children that are cached."""
+ raise NotImplementedError()
+
+ @property
+ def cached_keys(self): # pragma: no cover
+ raise NotImplementedError()
+
+ @property
+ def keys(self): # pragma: no cover
+ """Returns the keys for every children."""
+ raise NotImplementedError()
+
+ def __getitem__(self, key): # pragma: no cover
+ """Returns a child, without fetching its data.
+
+ The children could be invalid since no verification is done.
+ """
+ raise NotImplementedError()
+
+ def cache(self): # pragma: no cover
+ """Cache all the children."""
+ raise NotImplementedError()
+
+ def cache_keys(self): # pragma: no cover
+ """Cache all children's keys."""
+ raise NotImplementedError()
+
+
+class NodeList(VirtualNodeList): # pylint: disable=W0223
+ """Adds a cache of the keys."""
+
+ def __init__(self, parent, url):
+ super(NodeList, self).__init__(parent, url)
+ self._keys = []
+
+ @property
+ def cached_keys(self):
+ return self._keys
+
+ @property
+ def keys(self):
+ self.cache_keys()
+ return self._keys
+
+
+class NonAddressableNodeList(VirtualNodeList): # pylint: disable=W0223
+ """A node that contains children but retrieves all its data from its parent.
+
+ I.e. there's no url to get directly this data.
+ """
+ # Child class object for children of this instance. For example, BuildSteps
+ # has BuildStep children.
+ _child_cls = None
+
+ def __init__(self, parent, subkey):
+ super(NonAddressableNodeList, self).__init__(parent, None)
+ self.subkey = subkey
+ assert (not isinstance(self._child_cls, NonAddressableDataNode) and
+ issubclass(self._child_cls, NonAddressableDataNode)), (
+ self._child_cls.__name__)
+
+ @property
+ def cached_children(self):
+ if self.parent.cached_data is not None:
+ for i in xrange(len(self.parent.cached_data[self.subkey])):
+ yield self[i]
+
+ @property
+ def cached_data(self):
+ if self.parent.cached_data is None:
+ return None
+ return self.parent.data.get(self.subkey, None)
+
+ @property
+ def cached_keys(self):
+ if self.parent.cached_data is None:
+ return None
+ return range(len(self.parent.data.get(self.subkey, [])))
+
+ @property
+ def data(self):
+ return self.parent.data[self.subkey]
+
+ def cache(self):
+ self.parent.cache()
+
+ def cache_keys(self):
+ self.parent.cache()
+
+ def discard(self): # pragma: no cover
+ """Do not call.
+
+ Avoid infinite recursion by having the caller calls the parent's
+ discard() explicitely.
+ """
+ raise AttributeError('Call parent discard() instead')
+
+ def __iter__(self):
+ """Enables 'for i in obj:'. It returns children."""
+ if self.data:
+ for i in xrange(len(self.data)):
+ yield self[i]
+
+ def __getitem__(self, key):
+ """Doesn't cache the value, it's not needed.
+
+ TODO(maruel): Cache?
+ """
+ if isinstance(key, int) and key < 0:
+ key = len(self.data) + key
+ # pylint: disable=E1102
+ return self._child_cls(self, key)
+
+
+class AddressableNodeList(NodeList):
+ """A node that has children that can be addressed with an url."""
+
+ # Child class object for children of this instance. For example, Builders has
+ # Builder children and Builds has Build children.
+ _child_cls = None
+
+ def __init__(self, parent, url):
+ super(AddressableNodeList, self).__init__(parent, url)
+ self._cache = {}
+ assert (not isinstance(self._child_cls, AddressableDataNode) and
+ issubclass(self._child_cls, AddressableDataNode)), (
+ self._child_cls.__name__)
+
+ @property
+ def cached_children(self):
+ for item in self._cache.itervalues():
+ if item.cached_data is not None:
+ yield item
+
+ @property
+ def cached_keys(self):
+ return self._cache.keys()
+
+ def __getitem__(self, key):
+ """Enables 'obj[i]'."""
+ if self._has_keys_cached and not key in self._keys:
+ raise KeyError(key)
+
+ if not key in self._cache:
+ # Create an empty object.
+ self._create_obj(key, None)
+ return self._cache[key]
+
+ def cache(self):
+ if not self._is_cached:
+ data = self._readall()
+ for key in sorted(data):
+ self._create_obj(key, data[key])
+ self._is_cached = True
+ self._has_keys_cached = True
+
+ def cache_partial(self, children):
+ """Caches a partial number of children.
+
+ This method is more efficient since it does a single request for all the
+ children instead of one request per children.
+
+ It only grab objects not already cached.
+ """
+ # pylint: disable=W0212
+ if not self._is_cached:
+ to_fetch = [
+ child
+ for child in children
+ if not (child in self._cache and self._cache[child].cached_data)
+ ]
+ if to_fetch:
+ # Similar to cache(). The only reason to sort is to simplify testing.
+ params = '&'.join('select=%s' % urllib.quote(str(v))
+ for v in sorted(to_fetch))
+ data = self.read('?' + params)
+ for key in sorted(data):
+ self._create_obj(key, data[key])
+
+ def cache_keys(self):
+ """Implement to speed up enumeration. Defaults to call cache()."""
+ if not self._has_keys_cached:
+ self.cache()
+ assert self._has_keys_cached
+
+ def discard(self):
+ """Discards temporary children."""
+ super(AddressableNodeList, self).discard()
+ for v in self._cache.itervalues():
+ v.discard()
+
+ def read(self, suburl):
+ assert self.url, self.__class__.__name__
+ url = self.url
+ if suburl:
+ url = '%s/%s' % (self.url, suburl)
+ return self.parent.read(url)
+
+ def _create_obj(self, key, data):
+ """Creates an object of type self._child_cls."""
+ # pylint: disable=E1102
+ obj = self._child_cls(self, key, data)
+ # obj.key and key may be different.
+ # No need to overide cached data with None.
+ if data is not None or obj.key not in self._cache:
+ self._cache[obj.key] = obj
+ if obj.key not in self._keys:
+ self._keys.append(obj.key)
+
+ def _readall(self):
+ return self.read('')
+
+
+class SubViewNodeList(VirtualNodeList): # pylint: disable=W0223
+ """A node that shows a subset of children that comes from another structure.
+
+ The node is not addressable.
+
+ E.g. the keys are retrieved from parent but the actual data comes from
+ virtual_parent.
+ """
+
+ def __init__(self, parent, virtual_parent, subkey):
+ super(SubViewNodeList, self).__init__(parent, None)
+ self.subkey = subkey
+ self.virtual_parent = virtual_parent
+ assert isinstance(self.parent, AddressableDataNode)
+ assert isinstance(self.virtual_parent, NodeList)
+
+ @property
+ def cached_children(self):
+ if self.parent.cached_data is not None:
+ for item in self.keys:
+ if item in self.virtual_parent.keys:
+ child = self[item]
+ if child.cached_data is not None:
+ yield child
+
+ @property
+ def cached_keys(self):
+ return (self.parent.cached_data or {}).get(self.subkey, [])
+
+ @property
+ def keys(self):
+ self.cache_keys()
+ return self.parent.data.get(self.subkey, [])
+
+ def cache(self):
+ """Batch request for each child in a single read request."""
+ if not self._is_cached:
+ self.virtual_parent.cache_partial(self.keys)
+ self._is_cached = True
+
+ def cache_keys(self):
+ if not self._has_keys_cached:
+ self.parent.cache()
+ self._has_keys_cached = True
+
+ def discard(self):
+ if self.parent.cached_data is not None:
+ for child in self.virtual_parent.cached_children:
+ if child.key in self.keys:
+ child.discard()
+ self.parent.discard()
+ super(SubViewNodeList, self).discard()
+
+ def __getitem__(self, key):
+ """Makes sure the key is in our key but grab it from the virtual parent."""
+ return self.virtual_parent[key]
+
+ def __iter__(self):
+ self.cache()
+ return super(SubViewNodeList, self).__iter__()
+
+###############################################################################
+## Buildbot-specific code
+
+
+class Slave(AddressableDataNode):
+ """Buildbot slave class."""
+ printable_attributes = AddressableDataNode.printable_attributes + [
+ 'name',
+ 'key',
+ 'connected',
+ 'version',
+ ]
+
+ def __init__(self, parent, name, data):
+ super(Slave, self).__init__(parent, name, data)
+ self.name = name
+ self.key = self.name
+ # TODO(maruel): Add SlaveBuilders and a 'builders' property.
+ # TODO(maruel): Add a 'running_builds' property.
+
+ @property
+ def connected(self):
+ return self.data.get('connected', False)
+
+ @property
+ def version(self):
+ return self.data.get('version')
+
+
+class Slaves(AddressableNodeList):
+ """Buildbot slaves."""
+ _child_cls = Slave
+ printable_attributes = AddressableNodeList.printable_attributes + ['names']
+
+ def __init__(self, parent):
+ super(Slaves, self).__init__(parent, 'slaves')
+
+ @property
+ def names(self):
+ return self.keys
+
+
+class BuilderSlaves(SubViewNodeList):
+ """Similar to Slaves but only list slaves connected to a specific builder."""
+ printable_attributes = SubViewNodeList.printable_attributes + ['names']
+
+ def __init__(self, parent):
+ super(BuilderSlaves, self).__init__(parent, parent.parent.parent.slaves,
+ 'slaves')
+
+ @property
+ def names(self):
+ return self.keys
+
+
+class BuildStep(NonAddressableDataNode):
+ """Class for a buildbot build step."""
+ printable_attributes = NonAddressableDataNode.printable_attributes + [
+ 'name',
+ 'number',
+ 'start_time',
+ 'end_time',
+ 'duration',
+ 'is_started',
+ 'is_finished',
+ 'is_running',
+ 'result',
+ 'simplified_result',
+ ]
+
+ def __init__(self, parent, number):
+ """Pre-loaded, since the data is retrieved via the Build object."""
+ assert isinstance(number, int)
+ super(BuildStep, self).__init__(parent, number)
+ self.number = number
+
+ @property
+ def start_time(self):
+ if self.data.get('times'):
+ return int(round(self.data['times'][0]))
+
+ @property
+ def end_time(self):
+ times = self.data.get('times')
+ if times and len(times) == 2 and times[1]:
+ return int(round(times[1]))
+
+ @property
+ def duration(self):
+ if self.start_time:
+ return (self.end_time or int(round(time.time()))) - self.start_time
+
+ @property
+ def name(self):
+ return self.data['name']
+
+ @property
+ def is_started(self):
+ return self.data.get('isStarted', False)
+
+ @property
+ def is_finished(self):
+ return self.data.get('isFinished', False)
+
+ @property
+ def is_running(self):
+ return self.is_started and not self.is_finished
+
+ @property
+ def result(self):
+ result = self.data.get('results')
+ if result is None:
+ # results may be 0, in that case with filter=1, the value won't be
+ # present.
+ if self.data.get('isFinished'):
+ result = self.data.get('results', 0)
+ while isinstance(result, list):
+ result = result[0]
+ return result
+
+ @property
+ def simplified_result(self):
+ """Returns a simplified 3 state value, True, False or None."""
+ result = self.result
+ if result in (SUCCESS, WARNINGS):
+ return True
+ elif result in (FAILURE, EXCEPTION, RETRY):
+ return False
+ assert result in (None, SKIPPED), (result, self.data)
+ return None
+
+
+class BuildSteps(NonAddressableNodeList):
+ """Duplicates keys to support lookup by both step number and step name."""
+ printable_attributes = NonAddressableNodeList.printable_attributes + [
+ 'failed',
+ ]
+ _child_cls = BuildStep
+
+ def __init__(self, parent):
+ """Pre-loaded, since the data is retrieved via the Build object."""
+ super(BuildSteps, self).__init__(parent, 'steps')
+
+ @property
+ def keys(self):
+ """Returns the steps name in order."""
+ return [i['name'] for i in self.data or []]
+
+ @property
+ def failed(self):
+ """Shortcuts that lists the step names of steps that failed."""
+ return [step.name for step in self if step.simplified_result is False]
+
+ def __getitem__(self, key):
+ """Accept step name in addition to index number."""
+ if isinstance(key, basestring):
+ # It's a string, try to find the corresponding index.
+ for i, step in enumerate(self.data):
+ if step['name'] == key:
+ key = i
+ break
+ else:
+ raise KeyError(key)
+ return super(BuildSteps, self).__getitem__(key)
+
+
+class Build(AddressableDataNode):
+ """Buildbot build info."""
+ printable_attributes = AddressableDataNode.printable_attributes + [
+ 'key',
+ 'number',
+ 'steps',
+ 'blame',
+ 'reason',
+ 'revision',
+ 'result',
+ 'simplified_result',
+ 'start_time',
+ 'end_time',
+ 'duration',
+ 'slave',
+ 'properties',
+ 'completed',
+ ]
+
+ def __init__(self, parent, key, data):
+ super(Build, self).__init__(parent, str(key), data)
+ self.number = int(key)
+ self.key = self.number
+ self.steps = BuildSteps(self)
+
+ @property
+ def blame(self):
+ return self.data.get('blame', [])
+
+ @property
+ def builder(self):
+ """Returns the Builder object.
+
+ Goes up the hierarchy to find the Buildbot.builders[builder] instance.
+ """
+ return self.parent.parent.parent.parent.builders[self.data['builderName']]
+
+ @property
+ def start_time(self):
+ if self.data.get('times'):
+ return int(round(self.data['times'][0]))
+
+ @property
+ def end_time(self):
+ times = self.data.get('times')
+ if times and len(times) == 2 and times[1]:
+ return int(round(times[1]))
+
+ @property
+ def duration(self):
+ if self.start_time:
+ return (self.end_time or int(round(time.time()))) - self.start_time
+
+ @property
+ def eta(self):
+ return self.data.get('eta', 0)
+
+ @property
+ def completed(self):
+ return self.data.get('currentStep') is None
+
+ @property
+ def properties(self):
+ return self.data.get('properties', [])
+
+ @property
+ def reason(self):
+ return self.data.get('reason')
+
+ @property
+ def result(self):
+ result = self.data.get('results')
+ while isinstance(result, list):
+ result = result[0]
+ if result is None and self.steps:
+ # results may be 0, in that case with filter=1, the value won't be
+ # present.
+ result = self.steps[-1].result
+ return result
+
+ @property
+ def revision(self):
+ return self.data.get('sourceStamp', {}).get('revision')
+
+ @property
+ def simplified_result(self):
+ """Returns a simplified 3 state value, True, False or None."""
+ result = self.result
+ if result in (SUCCESS, WARNINGS, SKIPPED):
+ return True
+ elif result in (FAILURE, EXCEPTION, RETRY):
+ return False
+ assert result is None, (result, self.data)
+ return None
+
+ @property
+ def slave(self):
+ """Returns the Slave object.
+
+ Goes up the hierarchy to find the Buildbot.slaves[slave] instance.
+ """
+ return self.parent.parent.parent.parent.slaves[self.data['slave']]
+
+ def discard(self):
+ """Completed Build isn't discarded."""
+ if self._data and self.result is None:
+ assert not self.steps or not self.steps[-1].data.get('isFinished')
+ self._data = None
+
+
+class CurrentBuilds(SubViewNodeList):
+ """Lists of the current builds."""
+
+ def __init__(self, parent):
+ super(CurrentBuilds, self).__init__(parent, parent.builds, 'currentBuilds')
+
+
+class PendingBuilds(AddressableDataNode):
+ """List of the pending builds."""
+
+ def __init__(self, parent):
+ super(PendingBuilds, self).__init__(parent, 'pendingBuilds', None)
+
+
+class Builds(AddressableNodeList):
+ """Supports iteration.
+
+ Recommends using .cache() to speed up if a significant number of builds are
+ iterated over.
+ """
+ _child_cls = Build
+
+ def __init__(self, parent):
+ super(Builds, self).__init__(parent, 'builds')
+
+ def __getitem__(self, key):
+ """Support for negative reference and enable retrieving non-cached builds.
+
+ e.g. -1 is the last build, -2 is the previous build before the last one.
+ """
+ key = int(key)
+ if key < 0:
+ # Convert negative to positive build number.
+ self.cache_keys()
+ # Since the negative value can be outside of the cache keys range, use the
+ # highest key value and calculate from it.
+ key = max(self._keys) + key + 1
+
+ if not key in self._cache:
+ # Create an empty object.
+ self._create_obj(key, None)
+ return self._cache[key]
+
+ def __iter__(self):
+ """Returns cached Build objects in reversed order.
+
+ The most recent build is returned first and then in reverse chronological
+ order, up to the oldest cached build by the server. Older builds can be
+ accessed but will trigger significantly more I/O so they are not included by
+ default in the iteration.
+
+ To access the older builds, use self.iterall() instead.
+ """
+ self.cache()
+ return reversed(self._cache.values())
+
+ def iterall(self):
+ """Returns Build objects in decreasing order unbounded up to build 0.
+
+ The most recent build is returned first and then in reverse chronological
+ order. Older builds can be accessed and will trigger significantly more I/O
+ so use this carefully.
+ """
+ # Only cache keys here.
+ self.cache_keys()
+ if self._keys:
+ for i in xrange(max(self._keys), -1, -1):
+ yield self[i]
+
+ def cache_keys(self):
+ """Grabs the keys (build numbers) from the builder."""
+ if not self._has_keys_cached:
+ for i in self.parent.data.get('cachedBuilds', []):
+ i = int(i)
+ self._cache.setdefault(i, Build(self, i, None))
+ if i not in self._keys:
+ self._keys.append(i)
+ self._has_keys_cached = True
+
+ def discard(self):
+ super(Builds, self).discard()
+ # Can't keep keys.
+ self._has_keys_cached = False
+
+ def _readall(self):
+ return self.read('_all')
+
+
+class Builder(AddressableDataNode):
+ """Builder status."""
+ printable_attributes = AddressableDataNode.printable_attributes + [
+ 'name',
+ 'key',
+ 'builds',
+ 'slaves',
+ 'pending_builds',
+ 'current_builds',
+ ]
+
+ def __init__(self, parent, name, data):
+ super(Builder, self).__init__(parent, name, data)
+ self.name = name
+ self.key = name
+ self.builds = Builds(self)
+ self.slaves = BuilderSlaves(self)
+ self.current_builds = CurrentBuilds(self)
+ self.pending_builds = PendingBuilds(self)
+
+ def discard(self):
+ super(Builder, self).discard()
+ self.builds.discard()
+ self.slaves.discard()
+ self.current_builds.discard()
+
+
+class Builders(AddressableNodeList):
+ """Root list of builders."""
+ _child_cls = Builder
+
+ def __init__(self, parent):
+ super(Builders, self).__init__(parent, 'builders')
+
+
+class Buildbot(AddressableBaseDataNode):
+ """This object should be recreated on a master restart as it caches data."""
+ # Throttle fetches to not kill the server.
+ auto_throttle = None
+ printable_attributes = AddressableDataNode.printable_attributes + [
+ 'slaves',
+ 'builders',
+ 'last_fetch',
+ ]
+
+ def __init__(self, url):
+ super(Buildbot, self).__init__(None, url.rstrip('/') + '/json', None)
+ self._builders = Builders(self)
+ self._slaves = Slaves(self)
+ self.last_fetch = None
+
+ @property
+ def builders(self):
+ return self._builders
+
+ @property
+ def slaves(self):
+ return self._slaves
+
+ def discard(self):
+ """Discards information about Builders and Slaves."""
+ super(Buildbot, self).discard()
+ self._builders.discard()
+ self._slaves.discard()
+
+ def read(self, suburl):
+ if self.auto_throttle:
+ if self.last_fetch:
+ delta = datetime.datetime.utcnow() - self.last_fetch
+ remaining = (datetime.timedelta(seconds=self.auto_throttle) - delta)
+ if remaining > datetime.timedelta(seconds=0):
+ logging.debug('Sleeping for %ss', remaining)
+ time.sleep(remaining.seconds)
+ self.last_fetch = datetime.datetime.utcnow()
+ url = '%s/%s' % (self.url, suburl)
+ if '?' in url:
+ url += '&filter=1'
+ else:
+ url += '?filter=1'
+ logging.info('read(%s)', suburl)
+ channel = urllib.urlopen(url)
+ data = channel.read()
+ try:
+ return json.loads(data)
+ except ValueError:
+ if channel.getcode() >= 400:
+ # Convert it into an HTTPError for easier processing.
+ raise urllib2.HTTPError(url, channel.getcode(), '%s:\n%s' % (url, data),
+ channel.headers, None)
+ raise
+
+ def _readall(self):
+ return self.read('project')
+
+###############################################################################
+## Controller code
+
+
+def usage(more):
+
+ def hook(fn):
+ fn.func_usage_more = more
+ return fn
+
+ return hook
+
+
+def need_buildbot(fn):
+ """Post-parse args to create a buildbot object."""
+
+ @functools.wraps(fn)
+ def hook(parser, args, *extra_args, **kwargs):
+ old_parse_args = parser.parse_args
+
+ def new_parse_args(args):
+ options, args = old_parse_args(args)
+ if len(args) < 1:
+ parser.error('Need to pass the root url of the buildbot')
+ url = args.pop(0)
+ if not url.startswith('http'):
+ url = 'http://' + url
+ buildbot = Buildbot(url)
+ buildbot.auto_throttle = options.throttle
+ return options, args, buildbot
+
+ parser.parse_args = new_parse_args
+ # Call the original function with the modified parser.
+ return fn(parser, args, *extra_args, **kwargs)
+
+ hook.func_usage_more = '[options] <url>'
+ return hook
+
+
+@need_buildbot
+def CMDpending(parser, args):
+ """Lists pending jobs."""
+ parser.add_option('-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ options, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ if not options.builders:
+ options.builders = buildbot.builders.keys
+ for builder in options.builders:
+ builder = buildbot.builders[builder]
+ pending_builds = builder.data.get('pendingBuilds', 0)
+ if not pending_builds:
+ continue
+ print('Builder %s: %d' % (builder.name, pending_builds))
+ if not options.quiet:
+ for pending in builder.pending_builds.data:
+ if 'revision' in pending['source']:
+ print(' revision: %s' % pending['source']['revision'])
+ for change in pending['source']['changes']:
+ print(' change:')
+ print(' comment: %r' % unicode(change['comments'][:50]))
+ print(' who: %s' % change['who'])
+ return 0
+
+
+@usage('[options] <url> [commands] ...')
+@need_buildbot
+def CMDrun(parser, args):
+ """Runs commands passed as parameters.
+
+ When passing commands on the command line, each command will be run as if it
+ was on its own line.
+ """
+ parser.add_option('-f', '--file', help='Read script from file')
+ parser.add_option('-i',
+ dest='use_stdin',
+ action='store_true',
+ help='Read script on stdin')
+ # Variable 'buildbot' is not used directly.
+ # pylint: disable=W0612
+ options, args, buildbot = parser.parse_args(args)
+ if (bool(args) + bool(options.use_stdin) + bool(options.file)) != 1:
+ parser.error('Need to pass only one of: <commands>, -f <file> or -i')
+ if options.use_stdin:
+ cmds = sys.stdin.read()
+ elif options.file:
+ cmds = open(options.file).read()
+ else:
+ cmds = '\n'.join(args)
+ compiled = compile(cmds, '<cmd line>', 'exec')
+ # pylint: disable=eval-used
+ eval(compiled, globals(), locals())
+ return 0
+
+
+@need_buildbot
+def CMDinteractive(parser, args):
+ """Runs an interactive shell to run queries."""
+ _, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ prompt = (
+ 'Buildbot interactive console for "%s".\n'
+ 'Hint: Start with typing: \'buildbot.printable_attributes\' or '
+ '\'print str(buildbot)\' to explore.') % buildbot.url[:-len('/json')]
+ local_vars = {'buildbot': buildbot, 'b': buildbot}
+ code.interact(prompt, None, local_vars)
+
+
+@need_buildbot
+def CMDidle(parser, args):
+ """Lists idle slaves."""
+ return find_idle_busy_slaves(parser, args, True)
+
+
+@need_buildbot
+def CMDbusy(parser, args):
+ """Lists idle slaves."""
+ return find_idle_busy_slaves(parser, args, False)
+
+
+@need_buildbot
+def CMDdisconnected(parser, args):
+ """Lists disconnected slaves."""
+ _, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ for slave in buildbot.slaves:
+ if not slave.connected:
+ print(slave.name)
+ return 0
+
+
+def find_idle_busy_slaves(parser, args, show_idle):
+ parser.add_option('-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option('-s',
+ '--slave',
+ dest='slaves',
+ action='append',
+ default=[],
+ help='Slaves to filter on')
+ options, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ if not options.builders:
+ options.builders = buildbot.builders.keys
+ for builder in options.builders:
+ builder = buildbot.builders[builder]
+ if options.slaves:
+ # Only the subset of slaves connected to the builder.
+ slaves = list(set(options.slaves).intersection(set(builder.slaves.names)))
+ if not slaves:
+ continue
+ else:
+ slaves = builder.slaves.names
+ busy_slaves = [build.slave.name for build in builder.current_builds]
+ if show_idle:
+ slaves = natsorted(set(slaves) - set(busy_slaves))
+ else:
+ slaves = natsorted(set(slaves) & set(busy_slaves))
+ if options.quiet:
+ for slave in slaves:
+ print(slave)
+ else:
+ if slaves:
+ print('Builder %s: %s' % (builder.name, ', '.join(slaves)))
+ return 0
+
+
+def last_failure(buildbot,
+ builders=None,
+ slaves=None,
+ steps=None,
+ no_cache=False):
+ """Returns Build object with last failure with the specific filters."""
+ builders = builders or buildbot.builders.keys
+ for builder in builders:
+ builder = buildbot.builders[builder]
+ if slaves:
+ # Only the subset of slaves connected to the builder.
+ builder_slaves = list(set(slaves).intersection(set(builder.slaves.names)))
+ if not builder_slaves:
+ continue
+ else:
+ builder_slaves = builder.slaves.names
+
+ if not no_cache and len(builder.slaves) > 2:
+ # Unless you just want the last few builds, it's often faster to
+ # fetch the whole thing at once, at the cost of a small hickup on
+ # the buildbot.
+ # TODO(maruel): Cache only N last builds or all builds since
+ # datetime.
+ builder.builds.cache()
+
+ found = []
+ for build in builder.builds:
+ if build.slave.name not in builder_slaves or build.slave.name in found:
+ continue
+ # Only add the slave for the first completed build but still look for
+ # incomplete builds.
+ if build.completed:
+ found.append(build.slave.name)
+
+ if steps:
+ if any(build.steps[step].simplified_result is False for step in steps):
+ yield build
+ elif build.simplified_result is False:
+ yield build
+
+ if len(found) == len(builder_slaves):
+ # Found all the slaves, quit.
+ break
+
+
+@need_buildbot
+def CMDlast_failure(parser, args):
+ """Lists all slaves that failed on that step on their last build.
+
+ Example: to find all slaves where their last build was a compile failure,
+ run with --step compile
+ """
+ parser.add_option(
+ '-S',
+ '--step',
+ dest='steps',
+ action='append',
+ default=[],
+ help='List all slaves that failed on that step on their last build')
+ parser.add_option('-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option('-s',
+ '--slave',
+ dest='slaves',
+ action='append',
+ default=[],
+ help='Slaves to filter on')
+ parser.add_option('-n',
+ '--no_cache',
+ action='store_true',
+ help='Don\'t load all builds at once')
+ options, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ print_builders = not options.quiet and len(options.builders) != 1
+ last_builder = None
+ for build in last_failure(buildbot,
+ builders=options.builders,
+ slaves=options.slaves,
+ steps=options.steps,
+ no_cache=options.no_cache):
+
+ if print_builders and last_builder != build.builder:
+ print(build.builder.name)
+ last_builder = build.builder
+
+ if options.quiet:
+ if options.slaves:
+ print('%s: %s' % (build.builder.name, build.slave.name))
+ else:
+ print(build.slave.name)
+ else:
+ out = '%d on %s: blame:%s' % (build.number, build.slave.name,
+ ', '.join(build.blame))
+ if print_builders:
+ out = ' ' + out
+ print(out)
+
+ if len(options.steps) != 1:
+ for step in build.steps:
+ if step.simplified_result is False:
+ # Assume the first line is the text name anyway.
+ summary = ', '.join(step.data['text'][1:])[:40]
+ out = ' %s: "%s"' % (step.data['name'], summary)
+ if print_builders:
+ out = ' ' + out
+ print(out)
+ return 0
+
+
+@need_buildbot
+def CMDcurrent(parser, args):
+ """Lists current jobs."""
+ parser.add_option('-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option('--blame',
+ action='store_true',
+ help='Only print the blame list')
+ options, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ if not options.builders:
+ options.builders = buildbot.builders.keys
+
+ if options.blame:
+ blame = set()
+ for builder in options.builders:
+ for build in buildbot.builders[builder].current_builds:
+ if build.blame:
+ for blamed in build.blame:
+ blame.add(blamed)
+ print('\n'.join(blame))
+ return 0
+
+ for builder in options.builders:
+ builder = buildbot.builders[builder]
+ if not options.quiet and builder.current_builds:
+ print(builder.name)
+ for build in builder.current_builds:
+ if options.quiet:
+ print(build.slave.name)
+ else:
+ out = '%4d: slave=%10s' % (build.number, build.slave.name)
+ out += ' duration=%5d' % (build.duration or 0)
+ if build.eta:
+ out += ' eta=%5.0f' % build.eta
+ else:
+ out += ' '
+ if build.blame:
+ out += ' blame=' + ', '.join(build.blame)
+ print(out)
+
+ return 0
+
+
+@need_buildbot
+def CMDbuilds(parser, args):
+ """Lists all builds.
+
+ Example: to find all builds on a single slave, run with -b bar -s foo
+ """
+ parser.add_option('-r',
+ '--result',
+ type='int',
+ help='Build result to filter on')
+ parser.add_option('-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ parser.add_option('-s',
+ '--slave',
+ dest='slaves',
+ action='append',
+ default=[],
+ help='Slaves to filter on')
+ parser.add_option('-n',
+ '--no_cache',
+ action='store_true',
+ help='Don\'t load all builds at once')
+ options, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ builders = options.builders or buildbot.builders.keys
+ for builder in builders:
+ builder = buildbot.builders[builder]
+ for build in builder.builds:
+ if not options.slaves or build.slave.name in options.slaves:
+ if options.quiet:
+ out = ''
+ if options.builders:
+ out += '%s/' % builder.name
+ if len(options.slaves) != 1:
+ out += '%s/' % build.slave.name
+ out += '%d revision:%s result:%s blame:%s' % (
+ build.number, build.revision, build.result, ','.join(build.blame))
+ print(out)
+ else:
+ print(build)
+ return 0
+
+
+@need_buildbot
+def CMDcount(parser, args):
+ """Count the number of builds that occured during a specific period."""
+ parser.add_option('-o',
+ '--over',
+ type='int',
+ help='Number of seconds to look for')
+ parser.add_option('-b',
+ '--builder',
+ dest='builders',
+ action='append',
+ default=[],
+ help='Builders to filter on')
+ options, args, buildbot = parser.parse_args(args)
+ if args:
+ parser.error('Unrecognized parameters: %s' % ' '.join(args))
+ if not options.over:
+ parser.error(
+ 'Specify the number of seconds, e.g. --over 86400 for the last 24 '
+ 'hours')
+ builders = options.builders or buildbot.builders.keys
+ counts = {}
+ since = time.time() - options.over
+ for builder in builders:
+ builder = buildbot.builders[builder]
+ counts[builder.name] = 0
+ if not options.quiet:
+ print(builder.name)
+ for build in builder.builds.iterall():
+ try:
+ start_time = build.start_time
+ except urllib2.HTTPError:
+ # The build was probably trimmed.
+ print('Failed to fetch build %s/%d' % (builder.name, build.number),
+ file=sys.stderr)
+ continue
+ if start_time >= since:
+ counts[builder.name] += 1
+ else:
+ break
+ if not options.quiet:
+ print('.. %d' % counts[builder.name])
+
+ align_name = max(len(b) for b in counts)
+ align_number = max(len(str(c)) for c in counts.itervalues())
+ for builder in sorted(counts):
+ print('%*s: %*d' % (align_name, builder, align_number, counts[builder]))
+ print('Total: %d' % sum(counts.itervalues()))
+ return 0
+
+
+def gen_parser():
+ """Returns an OptionParser instance with default options.
+
+ It should be then processed with gen_usage() before being used.
+ """
+ parser = optparse.OptionParser(version=__version__)
+ # Remove description formatting
+ parser.format_description = lambda x: parser.description
+ # Add common parsing.
+ old_parser_args = parser.parse_args
+
+ def Parse(*args, **kwargs):
+ options, args = old_parser_args(*args, **kwargs)
+ if options.verbose >= 2:
+ logging.basicConfig(level=logging.DEBUG)
+ elif options.verbose:
+ logging.basicConfig(level=logging.INFO)
+ else:
+ logging.basicConfig(level=logging.WARNING)
+ return options, args
+
+ parser.parse_args = Parse
+
+ parser.add_option('-v',
+ '--verbose',
+ action='count',
+ help='Use multiple times to increase logging leve')
+ parser.add_option(
+ '-q',
+ '--quiet',
+ action='store_true',
+ help='Reduces the output to be parsed by scripts, independent of -v')
+ parser.add_option('--throttle',
+ type='float',
+ help='Minimum delay to sleep between requests')
+ return parser
+
+###############################################################################
+## Generic subcommand handling code
+
+
+def Command(name):
+ return getattr(sys.modules[__name__], 'CMD' + name, None)
+
+
+@usage('<command>')
+def CMDhelp(parser, args):
+ """Print list of commands or use 'help <command>'."""
+ _, args = parser.parse_args(args)
+ if len(args) == 1:
+ return main(args + ['--help'])
+ parser.print_help()
+ return 0
+
+
+def gen_usage(parser, command):
+ """Modifies an OptionParser object with the command's documentation.
+
+ The documentation is taken from the function's docstring.
+ """
+ obj = Command(command)
+ more = getattr(obj, 'func_usage_more')
+ # OptParser.description prefer nicely non-formatted strings.
+ parser.description = obj.__doc__ + '\n'
+ parser.set_usage('usage: %%prog %s %s' % (command, more))
+
+
+def main(args=None):
+ # Do it late so all commands are listed.
+ # pylint: disable=E1101
+ CMDhelp.__doc__ += '\n\nCommands are:\n' + '\n'.join(
+ ' %-12s %s' % (fn[3:], Command(fn[3:]).__doc__.split('\n', 1)[0])
+ for fn in dir(sys.modules[__name__]) if fn.startswith('CMD'))
+
+ parser = gen_parser()
+ if args is None:
+ args = sys.argv[1:]
+ if args:
+ command = Command(args[0])
+ if command:
+ # "fix" the usage and the description now that we know the subcommand.
+ gen_usage(parser, args[0])
+ return command(parser, args[1:])
+
+ # Not a known command. Default to help.
+ gen_usage(parser, 'help')
+ return CMDhelp(parser, args)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/cros_utils/buildbot_utils.py b/cros_utils/buildbot_utils.py
new file mode 100644
index 00000000..a80b7ad4
--- /dev/null
+++ b/cros_utils/buildbot_utils.py
@@ -0,0 +1,328 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utilities for launching and accessing ChromeOS buildbots."""
+
+from __future__ import print_function
+
+import os
+import time
+import urllib2
+
+from cros_utils import command_executer
+from cros_utils import logger
+from cros_utils import buildbot_json
+
+SLEEP_TIME = 600 # 10 minutes; time between polling of buildbot.
+TIME_OUT = 18000 # Decide the build is dead or will never finish
+# after this time (5 hours).
+OK_STATUS = [ # List of result status values that are 'ok'.
+ # This was obtained from:
+ # https://chromium.googlesource.com/chromium/tools/build/+/
+ # master/third_party/buildbot_8_4p1/buildbot/status/results.py
+ 0, # "success"
+ 1, # "warnings"
+ 6, # "retry"
+]
+
+
+class BuildbotTimeout(Exception):
+ """Exception to throw when a buildbot operation timesout."""
+ pass
+
+
+def ParseReportLog(url, build):
+ """Scrape the trybot image name off the Reports log page.
+
+ This takes the URL for a trybot Reports Stage web page,
+ and a trybot build type, such as 'daisy-release'. It
+ opens the web page and parses it looking for the trybot
+ artifact name (e.g. something like
+ 'trybot-daisy-release/R40-6394.0.0-b1389'). It returns the
+ artifact name, if found.
+ """
+ trybot_image = ''
+ url += '/text'
+ newurl = url.replace('uberchromegw', 'chromegw')
+ webpage = urllib2.urlopen(newurl)
+ data = webpage.read()
+ lines = data.split('\n')
+ for l in lines:
+ if l.find('Artifacts') > 0 and l.find('trybot') > 0:
+ trybot_name = 'trybot-%s' % build
+ start_pos = l.find(trybot_name)
+ end_pos = l.find('@https://storage')
+ trybot_image = l[start_pos:end_pos]
+
+ return trybot_image
+
+
+def GetBuildData(buildbot_queue, build_id):
+ """Find the Reports stage web page for a trybot build.
+
+ This takes the name of a buildbot_queue, such as 'daisy-release'
+ and a build id (the build number), and uses the json buildbot api to
+ find the Reports stage web page for that build, if it exists.
+ """
+ builder = buildbot_json.Buildbot(
+ 'http://chromegw/p/tryserver.chromiumos/').builders[buildbot_queue]
+ build_data = builder.builds[build_id].data
+ logs = build_data['logs']
+ for l in logs:
+ fname = l[1]
+ if 'steps/Report/' in fname:
+ return fname
+
+ return ''
+
+
+def FindBuildRecordFromLog(description, log_info):
+ """Find the right build record in the build logs.
+
+ Get the first build record from build log with a reason field
+ that matches 'description'. ('description' is a special tag we
+ created when we launched the buildbot, so we could find it at this
+ point.)
+ """
+
+ current_line = 1
+ while current_line < len(log_info):
+ my_dict = {}
+ # Read all the lines from one "Build" to the next into my_dict
+ while True:
+ key = log_info[current_line].split(':')[0].strip()
+ value = log_info[current_line].split(':', 1)[1].strip()
+ my_dict[key] = value
+ current_line += 1
+ if 'Build' in key or current_line == len(log_info):
+ break
+ try:
+ # Check to see of the build record is the right one.
+ if str(description) in my_dict['reason']:
+ # We found a match; we're done.
+ return my_dict
+ except KeyError:
+ print("reason is not in dictionary: '%s'" % repr(my_dict))
+ else:
+ # Keep going.
+ continue
+
+ # We hit the bottom of the log without a match.
+ return {}
+
+
+def GetBuildInfo(file_dir, builder):
+ """Get all the build records for the trybot builds.
+
+ file_dir is the toolchain_utils directory.
+ """
+ ce = command_executer.GetCommandExecuter()
+ commands = ('{0}/cros_utils/buildbot_json.py builds '
+ 'http://chromegw/i/tryserver.chromiumos/'.format(file_dir))
+
+ if builder:
+ # For release builds, get logs from the 'release' builder.
+ if builder.endswith('-release'):
+ commands += ' -b release'
+ else:
+ commands += ' -b %s' % builder
+ _, buildinfo, _ = ce.RunCommandWOutput(commands, print_to_console=False)
+ build_log = buildinfo.splitlines()
+ return build_log
+
+
+def FindArchiveImage(chromeos_root, build, build_id):
+ """Returns name of the trybot artifact for board/build_id."""
+ ce = command_executer.GetCommandExecuter()
+ command = ('gsutil ls gs://chromeos-image-archive/trybot-%s/*b%s'
+ '/chromiumos_test_image.tar.xz' % (build, build_id))
+ _, out, _ = ce.ChrootRunCommandWOutput(chromeos_root,
+ command,
+ print_to_console=False)
+ #
+ # If build_id is not unique, there may be multiple archive images
+ # to choose from; sort them & pick the first (newest).
+ #
+ # If there are multiple archive images found, out will look something
+ # like this:
+ #
+ # 'gs://.../R35-5692.0.0-b105/chromiumos_test_image.tar.xz
+ # gs://.../R46-7339.0.0-b105/chromiumos_test_image.tar.xz'
+ #
+ out = out.rstrip('\n')
+ tmp_list = out.split('\n')
+ # After stripping the final '\n' and splitting on any other '\n', we get
+ # something like this:
+ # tmp_list = [ 'gs://.../R35-5692.0.0-b105/chromiumos_test_image.tar.xz' ,
+ # 'gs://.../R46-7339.0.0-b105/chromiumos_test_image.tar.xz' ]
+ #
+ # If we sort this in descending order, we should end up with the most
+ # recent test image first, so that's what we do here.
+ #
+ if len(tmp_list) > 1:
+ tmp_list = sorted(tmp_list, reverse=True)
+ out = tmp_list[0]
+
+ trybot_image = ''
+ trybot_name = 'trybot-%s' % build
+ if out and out.find(trybot_name) > 0:
+ start_pos = out.find(trybot_name)
+ end_pos = out.find('/chromiumos_test_image')
+ trybot_image = out[start_pos:end_pos]
+
+ return trybot_image
+
+
+def GetTrybotImage(chromeos_root,
+ buildbot_name,
+ patch_list,
+ build_tag,
+ build_toolchain=False):
+ """Launch buildbot and get resulting trybot artifact name.
+
+ This function launches a buildbot with the appropriate flags to
+ build the test ChromeOS image, with the current ToT mobile compiler. It
+ checks every 10 minutes to see if the trybot has finished. When the trybot
+ has finished, it parses the resulting report logs to find the trybot
+ artifact (if one was created), and returns that artifact name.
+
+ chromeos_root is the path to the ChromeOS root, needed for finding chromite
+ and launching the buildbot.
+
+ buildbot_name is the name of the buildbot queue, such as lumpy-release or
+ daisy-paladin.
+
+ patch_list a python list of the patches, if any, for the buildbot to use.
+
+ build_tag is a (unique) string to be used to look up the buildbot results
+ from among all the build records.
+ """
+ ce = command_executer.GetCommandExecuter()
+ cbuildbot_path = os.path.join(chromeos_root, 'chromite/cbuildbot')
+ base_dir = os.getcwd()
+ patch_arg = ''
+ if patch_list:
+ for p in patch_list:
+ patch_arg = patch_arg + ' -g ' + repr(p)
+ toolchain_flags = ''
+ if build_toolchain:
+ toolchain_flags += '--latest-toolchain'
+ os.chdir(cbuildbot_path)
+
+ # Launch buildbot with appropriate flags.
+ build = buildbot_name
+ description = build_tag
+ command = ('./cbuildbot --remote --nochromesdk --notests'
+ ' --remote-description=%s %s %s %s' %
+ (description, toolchain_flags, patch_arg, build))
+ _, out, _ = ce.RunCommandWOutput(command)
+ if 'Tryjob submitted!' not in out:
+ logger.GetLogger().LogFatal('Error occurred while launching trybot job: '
+ '%s' % command)
+ os.chdir(base_dir)
+
+ build_id = 0
+ build_status = None
+ # Wait for buildbot to finish running (check every 10 minutes). Wait
+ # 10 minutes before the first check to give the buildbot time to launch
+ # (so we don't start looking for build data before it's out there).
+ time.sleep(SLEEP_TIME)
+ done = False
+ pending = True
+ # pending_time is the time between when we submit the job and when the
+ # buildbot actually launches the build. running_time is the time between
+ # when the buildbot job launches and when it finishes. The job is
+ # considered 'pending' until we can find an entry for it in the buildbot
+ # logs.
+ pending_time = SLEEP_TIME
+ running_time = 0
+ while not done:
+ done = True
+ build_info = GetBuildInfo(base_dir, build)
+ if not build_info:
+ if pending_time > TIME_OUT:
+ logger.GetLogger().LogFatal('Unable to get build logs for target %s.' %
+ build)
+ else:
+ pending_message = 'Unable to find build log; job may be pending.'
+ done = False
+
+ if done:
+ data_dict = FindBuildRecordFromLog(description, build_info)
+ if not data_dict:
+ # Trybot job may be pending (not actually launched yet).
+ if pending_time > TIME_OUT:
+ logger.GetLogger().LogFatal('Unable to find build record for trybot'
+ ' %s.' % description)
+ else:
+ pending_message = 'Unable to find build record; job may be pending.'
+ done = False
+
+ else:
+ # Now that we have actually found the entry for the build
+ # job in the build log, we know the job is actually
+ # runnning, not pending, so we flip the 'pending' flag. We
+ # still have to wait for the buildbot job to finish running
+ # however.
+ pending = False
+ if 'True' in data_dict['completed']:
+ build_id = data_dict['number']
+ build_status = int(data_dict['result'])
+ else:
+ done = False
+
+ if not done:
+ if pending:
+ logger.GetLogger().LogOutput(pending_message)
+ logger.GetLogger().LogOutput('Current pending time: %d minutes.' %
+ (pending_time / 60))
+ pending_time += SLEEP_TIME
+ else:
+ logger.GetLogger().LogOutput('{0} minutes passed.'.format(running_time /
+ 60))
+ logger.GetLogger().LogOutput('Sleeping {0} seconds.'.format(SLEEP_TIME))
+ running_time += SLEEP_TIME
+
+ time.sleep(SLEEP_TIME)
+ if running_time > TIME_OUT:
+ done = True
+
+ trybot_image = ''
+
+ if build_status in OK_STATUS:
+ trybot_image = FindArchiveImage(chromeos_root, build, build_id)
+ if not trybot_image:
+ logger.GetLogger().LogError('Trybot job %s failed with status %d;'
+ ' no trybot image generated.' %
+ (description, build_status))
+
+ logger.GetLogger().LogOutput("trybot_image is '%s'" % trybot_image)
+ logger.GetLogger().LogOutput('build_status is %d' % build_status)
+ return trybot_image
+
+
+def DoesImageExist(chromeos_root, build):
+ """Check if the image for the given build exists."""
+
+ ce = command_executer.GetCommandExecuter()
+ command = ('gsutil ls gs://chromeos-image-archive/%s'
+ '/chromiumos_test_image.tar.xz' % (build))
+ ret = ce.ChrootRunCommand(chromeos_root, command, print_to_console=False)
+ return not ret
+
+
+def WaitForImage(chromeos_root, build):
+ """Wait for an image to be ready."""
+
+ elapsed_time = 0
+ while elapsed_time < TIME_OUT:
+ if DoesImageExist(chromeos_root, build):
+ return
+ logger.GetLogger().LogOutput('Image %s not ready, waiting for 10 minutes' %
+ build)
+ time.sleep(SLEEP_TIME)
+ elapsed_time += SLEEP_TIME
+
+ logger.GetLogger().LogOutput('Image %s not found, waited for %d hours' %
+ (build, (TIME_OUT / 3600)))
+ raise BuildbotTimeout('Timeout while waiting for image %s' % build)
diff --git a/cros_utils/colortrans.py b/cros_utils/colortrans.py
new file mode 100644
index 00000000..9458cc49
--- /dev/null
+++ b/cros_utils/colortrans.py
@@ -0,0 +1,388 @@
+# We did not author this file nor mantain it. Skip linting it.
+#pylint: skip-file
+""" Convert values between RGB hex codes and xterm-256 color codes.
+
+Nice long listing of all 256 colors and their codes. Useful for
+developing console color themes, or even script output schemes.
+
+Resources:
+* http://en.wikipedia.org/wiki/8-bit_color
+* http://en.wikipedia.org/wiki/ANSI_escape_code
+* /usr/share/X11/rgb.txt
+
+I'm not sure where this script was inspired from. I think I must have
+written it from scratch, though it's been several years now.
+"""
+
+__author__ = 'Micah Elliott http://MicahElliott.com'
+__version__ = '0.1'
+__copyright__ = 'Copyright (C) 2011 Micah Elliott. All rights reserved.'
+__license__ = 'WTFPL http://sam.zoy.org/wtfpl/'
+
+#---------------------------------------------------------------------
+
+import sys, re
+
+CLUT = [ # color look-up table
+ # 8-bit, RGB hex
+
+ # Primary 3-bit (8 colors). Unique representation!
+ ('00', '000000'),
+ ('01', '800000'),
+ ('02', '008000'),
+ ('03', '808000'),
+ ('04', '000080'),
+ ('05', '800080'),
+ ('06', '008080'),
+ ('07', 'c0c0c0'),
+
+ # Equivalent "bright" versions of original 8 colors.
+ ('08', '808080'),
+ ('09', 'ff0000'),
+ ('10', '00ff00'),
+ ('11', 'ffff00'),
+ ('12', '0000ff'),
+ ('13', 'ff00ff'),
+ ('14', '00ffff'),
+ ('15', 'ffffff'),
+
+ # Strictly ascending.
+ ('16', '000000'),
+ ('17', '00005f'),
+ ('18', '000087'),
+ ('19', '0000af'),
+ ('20', '0000d7'),
+ ('21', '0000ff'),
+ ('22', '005f00'),
+ ('23', '005f5f'),
+ ('24', '005f87'),
+ ('25', '005faf'),
+ ('26', '005fd7'),
+ ('27', '005fff'),
+ ('28', '008700'),
+ ('29', '00875f'),
+ ('30', '008787'),
+ ('31', '0087af'),
+ ('32', '0087d7'),
+ ('33', '0087ff'),
+ ('34', '00af00'),
+ ('35', '00af5f'),
+ ('36', '00af87'),
+ ('37', '00afaf'),
+ ('38', '00afd7'),
+ ('39', '00afff'),
+ ('40', '00d700'),
+ ('41', '00d75f'),
+ ('42', '00d787'),
+ ('43', '00d7af'),
+ ('44', '00d7d7'),
+ ('45', '00d7ff'),
+ ('46', '00ff00'),
+ ('47', '00ff5f'),
+ ('48', '00ff87'),
+ ('49', '00ffaf'),
+ ('50', '00ffd7'),
+ ('51', '00ffff'),
+ ('52', '5f0000'),
+ ('53', '5f005f'),
+ ('54', '5f0087'),
+ ('55', '5f00af'),
+ ('56', '5f00d7'),
+ ('57', '5f00ff'),
+ ('58', '5f5f00'),
+ ('59', '5f5f5f'),
+ ('60', '5f5f87'),
+ ('61', '5f5faf'),
+ ('62', '5f5fd7'),
+ ('63', '5f5fff'),
+ ('64', '5f8700'),
+ ('65', '5f875f'),
+ ('66', '5f8787'),
+ ('67', '5f87af'),
+ ('68', '5f87d7'),
+ ('69', '5f87ff'),
+ ('70', '5faf00'),
+ ('71', '5faf5f'),
+ ('72', '5faf87'),
+ ('73', '5fafaf'),
+ ('74', '5fafd7'),
+ ('75', '5fafff'),
+ ('76', '5fd700'),
+ ('77', '5fd75f'),
+ ('78', '5fd787'),
+ ('79', '5fd7af'),
+ ('80', '5fd7d7'),
+ ('81', '5fd7ff'),
+ ('82', '5fff00'),
+ ('83', '5fff5f'),
+ ('84', '5fff87'),
+ ('85', '5fffaf'),
+ ('86', '5fffd7'),
+ ('87', '5fffff'),
+ ('88', '870000'),
+ ('89', '87005f'),
+ ('90', '870087'),
+ ('91', '8700af'),
+ ('92', '8700d7'),
+ ('93', '8700ff'),
+ ('94', '875f00'),
+ ('95', '875f5f'),
+ ('96', '875f87'),
+ ('97', '875faf'),
+ ('98', '875fd7'),
+ ('99', '875fff'),
+ ('100', '878700'),
+ ('101', '87875f'),
+ ('102', '878787'),
+ ('103', '8787af'),
+ ('104', '8787d7'),
+ ('105', '8787ff'),
+ ('106', '87af00'),
+ ('107', '87af5f'),
+ ('108', '87af87'),
+ ('109', '87afaf'),
+ ('110', '87afd7'),
+ ('111', '87afff'),
+ ('112', '87d700'),
+ ('113', '87d75f'),
+ ('114', '87d787'),
+ ('115', '87d7af'),
+ ('116', '87d7d7'),
+ ('117', '87d7ff'),
+ ('118', '87ff00'),
+ ('119', '87ff5f'),
+ ('120', '87ff87'),
+ ('121', '87ffaf'),
+ ('122', '87ffd7'),
+ ('123', '87ffff'),
+ ('124', 'af0000'),
+ ('125', 'af005f'),
+ ('126', 'af0087'),
+ ('127', 'af00af'),
+ ('128', 'af00d7'),
+ ('129', 'af00ff'),
+ ('130', 'af5f00'),
+ ('131', 'af5f5f'),
+ ('132', 'af5f87'),
+ ('133', 'af5faf'),
+ ('134', 'af5fd7'),
+ ('135', 'af5fff'),
+ ('136', 'af8700'),
+ ('137', 'af875f'),
+ ('138', 'af8787'),
+ ('139', 'af87af'),
+ ('140', 'af87d7'),
+ ('141', 'af87ff'),
+ ('142', 'afaf00'),
+ ('143', 'afaf5f'),
+ ('144', 'afaf87'),
+ ('145', 'afafaf'),
+ ('146', 'afafd7'),
+ ('147', 'afafff'),
+ ('148', 'afd700'),
+ ('149', 'afd75f'),
+ ('150', 'afd787'),
+ ('151', 'afd7af'),
+ ('152', 'afd7d7'),
+ ('153', 'afd7ff'),
+ ('154', 'afff00'),
+ ('155', 'afff5f'),
+ ('156', 'afff87'),
+ ('157', 'afffaf'),
+ ('158', 'afffd7'),
+ ('159', 'afffff'),
+ ('160', 'd70000'),
+ ('161', 'd7005f'),
+ ('162', 'd70087'),
+ ('163', 'd700af'),
+ ('164', 'd700d7'),
+ ('165', 'd700ff'),
+ ('166', 'd75f00'),
+ ('167', 'd75f5f'),
+ ('168', 'd75f87'),
+ ('169', 'd75faf'),
+ ('170', 'd75fd7'),
+ ('171', 'd75fff'),
+ ('172', 'd78700'),
+ ('173', 'd7875f'),
+ ('174', 'd78787'),
+ ('175', 'd787af'),
+ ('176', 'd787d7'),
+ ('177', 'd787ff'),
+ ('178', 'd7af00'),
+ ('179', 'd7af5f'),
+ ('180', 'd7af87'),
+ ('181', 'd7afaf'),
+ ('182', 'd7afd7'),
+ ('183', 'd7afff'),
+ ('184', 'd7d700'),
+ ('185', 'd7d75f'),
+ ('186', 'd7d787'),
+ ('187', 'd7d7af'),
+ ('188', 'd7d7d7'),
+ ('189', 'd7d7ff'),
+ ('190', 'd7ff00'),
+ ('191', 'd7ff5f'),
+ ('192', 'd7ff87'),
+ ('193', 'd7ffaf'),
+ ('194', 'd7ffd7'),
+ ('195', 'd7ffff'),
+ ('196', 'ff0000'),
+ ('197', 'ff005f'),
+ ('198', 'ff0087'),
+ ('199', 'ff00af'),
+ ('200', 'ff00d7'),
+ ('201', 'ff00ff'),
+ ('202', 'ff5f00'),
+ ('203', 'ff5f5f'),
+ ('204', 'ff5f87'),
+ ('205', 'ff5faf'),
+ ('206', 'ff5fd7'),
+ ('207', 'ff5fff'),
+ ('208', 'ff8700'),
+ ('209', 'ff875f'),
+ ('210', 'ff8787'),
+ ('211', 'ff87af'),
+ ('212', 'ff87d7'),
+ ('213', 'ff87ff'),
+ ('214', 'ffaf00'),
+ ('215', 'ffaf5f'),
+ ('216', 'ffaf87'),
+ ('217', 'ffafaf'),
+ ('218', 'ffafd7'),
+ ('219', 'ffafff'),
+ ('220', 'ffd700'),
+ ('221', 'ffd75f'),
+ ('222', 'ffd787'),
+ ('223', 'ffd7af'),
+ ('224', 'ffd7d7'),
+ ('225', 'ffd7ff'),
+ ('226', 'ffff00'),
+ ('227', 'ffff5f'),
+ ('228', 'ffff87'),
+ ('229', 'ffffaf'),
+ ('230', 'ffffd7'),
+ ('231', 'ffffff'),
+
+ # Gray-scale range.
+ ('232', '080808'),
+ ('233', '121212'),
+ ('234', '1c1c1c'),
+ ('235', '262626'),
+ ('236', '303030'),
+ ('237', '3a3a3a'),
+ ('238', '444444'),
+ ('239', '4e4e4e'),
+ ('240', '585858'),
+ ('241', '626262'),
+ ('242', '6c6c6c'),
+ ('243', '767676'),
+ ('244', '808080'),
+ ('245', '8a8a8a'),
+ ('246', '949494'),
+ ('247', '9e9e9e'),
+ ('248', 'a8a8a8'),
+ ('249', 'b2b2b2'),
+ ('250', 'bcbcbc'),
+ ('251', 'c6c6c6'),
+ ('252', 'd0d0d0'),
+ ('253', 'dadada'),
+ ('254', 'e4e4e4'),
+ ('255', 'eeeeee'),
+]
+
+
+def _str2hex(hexstr):
+ return int(hexstr, 16)
+
+
+def _strip_hash(rgb):
+ # Strip leading `#` if exists.
+ if rgb.startswith('#'):
+ rgb = rgb.lstrip('#')
+ return rgb
+
+
+def _create_dicts():
+ short2rgb_dict = dict(CLUT)
+ rgb2short_dict = {}
+ for k, v in short2rgb_dict.items():
+ rgb2short_dict[v] = k
+ return rgb2short_dict, short2rgb_dict
+
+
+def short2rgb(short):
+ return SHORT2RGB_DICT[short]
+
+
+def print_all():
+ """ Print all 256 xterm color codes.
+ """
+ for short, rgb in CLUT:
+ sys.stdout.write('\033[48;5;%sm%s:%s' % (short, short, rgb))
+ sys.stdout.write('\033[0m ')
+ sys.stdout.write('\033[38;5;%sm%s:%s' % (short, short, rgb))
+ sys.stdout.write('\033[0m\n')
+ print 'Printed all codes.'
+ print 'You can translate a hex or 0-255 code by providing an argument.'
+
+
+def rgb2short(rgb):
+ """ Find the closest xterm-256 approximation to the given RGB value.
+ @param rgb: Hex code representing an RGB value, eg, 'abcdef'
+ @returns: String between 0 and 255, compatible with xterm.
+ >>> rgb2short('123456')
+ ('23', '005f5f')
+ >>> rgb2short('ffffff')
+ ('231', 'ffffff')
+ >>> rgb2short('0DADD6') # vimeo logo
+ ('38', '00afd7')
+ """
+ rgb = _strip_hash(rgb)
+ incs = (0x00, 0x5f, 0x87, 0xaf, 0xd7, 0xff)
+ # Break 6-char RGB code into 3 integer vals.
+ parts = [int(h, 16) for h in re.split(r'(..)(..)(..)', rgb)[1:4]]
+ res = []
+ for part in parts:
+ i = 0
+ while i < len(incs) - 1:
+ s, b = incs[i], incs[i + 1] # smaller, bigger
+ if s <= part <= b:
+ s1 = abs(s - part)
+ b1 = abs(b - part)
+ if s1 < b1:
+ closest = s
+ else:
+ closest = b
+ res.append(closest)
+ break
+ i += 1
+ #print '***', res
+ res = ''.join([('%02.x' % i) for i in res])
+ equiv = RGB2SHORT_DICT[res]
+ #print '***', res, equiv
+ return equiv, res
+
+
+RGB2SHORT_DICT, SHORT2RGB_DICT = _create_dicts()
+
+#---------------------------------------------------------------------
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
+ if len(sys.argv) == 1:
+ print_all()
+ raise SystemExit
+ arg = sys.argv[1]
+ if len(arg) < 4 and int(arg) < 256:
+ rgb = short2rgb(arg)
+ sys.stdout.write(
+ 'xterm color \033[38;5;%sm%s\033[0m -> RGB exact \033[38;5;%sm%s\033[0m'
+ % (arg, arg, arg, rgb))
+ sys.stdout.write('\033[0m\n')
+ else:
+ short, rgb = rgb2short(arg)
+ sys.stdout.write('RGB %s -> xterm color approx \033[38;5;%sm%s (%s)' %
+ (arg, short, short, rgb))
+ sys.stdout.write('\033[0m\n')
diff --git a/cros_utils/command_executer.py b/cros_utils/command_executer.py
new file mode 100644
index 00000000..c5614513
--- /dev/null
+++ b/cros_utils/command_executer.py
@@ -0,0 +1,685 @@
+# Copyright 2011 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utilities to run commands in outside/inside chroot and on the board."""
+
+from __future__ import print_function
+
+import getpass
+import os
+import re
+import select
+import signal
+import subprocess
+import sys
+import tempfile
+import time
+
+import logger
+import misc
+
+mock_default = False
+
+LOG_LEVEL = ('none', 'quiet', 'average', 'verbose')
+
+
+def InitCommandExecuter(mock=False):
+ # pylint: disable=global-statement
+ global mock_default
+ # Whether to default to a mock command executer or not
+ mock_default = mock
+
+
+def GetCommandExecuter(logger_to_set=None, mock=False, log_level='verbose'):
+ # If the default is a mock executer, always return one.
+ if mock_default or mock:
+ return MockCommandExecuter(log_level, logger_to_set)
+ else:
+ return CommandExecuter(log_level, logger_to_set)
+
+
+class CommandExecuter(object):
+ """Provides several methods to execute commands on several environments."""
+
+ def __init__(self, log_level, logger_to_set=None):
+ self.log_level = log_level
+ if log_level == 'none':
+ self.logger = None
+ else:
+ if logger_to_set is not None:
+ self.logger = logger_to_set
+ else:
+ self.logger = logger.GetLogger()
+
+ def GetLogLevel(self):
+ return self.log_level
+
+ def SetLogLevel(self, log_level):
+ self.log_level = log_level
+
+ def RunCommandGeneric(self,
+ cmd,
+ return_output=False,
+ machine=None,
+ username=None,
+ command_terminator=None,
+ command_timeout=None,
+ terminated_timeout=10,
+ print_to_console=True,
+ except_handler=lambda p, e: None):
+ """Run a command.
+
+ Returns triplet (returncode, stdout, stderr).
+ """
+
+ cmd = str(cmd)
+
+ if self.log_level == 'quiet':
+ print_to_console = False
+
+ if self.log_level == 'verbose':
+ self.logger.LogCmd(cmd, machine, username, print_to_console)
+ elif self.logger:
+ self.logger.LogCmdToFileOnly(cmd, machine, username)
+ if command_terminator and command_terminator.IsTerminated():
+ if self.logger:
+ self.logger.LogError('Command was terminated!', print_to_console)
+ return (1, '', '')
+
+ if machine is not None:
+ user = ''
+ if username is not None:
+ user = username + '@'
+ cmd = "ssh -t -t %s%s -- '%s'" % (user, machine, cmd)
+
+ # We use setsid so that the child will have a different session id
+ # and we can easily kill the process group. This is also important
+ # because the child will be disassociated from the parent terminal.
+ # In this way the child cannot mess the parent's terminal.
+ p = None
+ try:
+ p = subprocess.Popen(cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ shell=True,
+ preexec_fn=os.setsid)
+
+ full_stdout = ''
+ full_stderr = ''
+
+ # Pull output from pipes, send it to file/stdout/string
+ out = err = None
+ pipes = [p.stdout, p.stderr]
+
+ my_poll = select.poll()
+ my_poll.register(p.stdout, select.POLLIN)
+ my_poll.register(p.stderr, select.POLLIN)
+
+ terminated_time = None
+ started_time = time.time()
+
+ while len(pipes):
+ if command_terminator and command_terminator.IsTerminated():
+ os.killpg(os.getpgid(p.pid), signal.SIGTERM)
+ if self.logger:
+ self.logger.LogError('Command received termination request. '
+ 'Killed child process group.',
+ print_to_console)
+ break
+
+ l = my_poll.poll(100)
+ for (fd, _) in l:
+ if fd == p.stdout.fileno():
+ out = os.read(p.stdout.fileno(), 16384)
+ if return_output:
+ full_stdout += out
+ if self.logger:
+ self.logger.LogCommandOutput(out, print_to_console)
+ if out == '':
+ pipes.remove(p.stdout)
+ my_poll.unregister(p.stdout)
+ if fd == p.stderr.fileno():
+ err = os.read(p.stderr.fileno(), 16384)
+ if return_output:
+ full_stderr += err
+ if self.logger:
+ self.logger.LogCommandError(err, print_to_console)
+ if err == '':
+ pipes.remove(p.stderr)
+ my_poll.unregister(p.stderr)
+
+ if p.poll() is not None:
+ if terminated_time is None:
+ terminated_time = time.time()
+ elif (terminated_timeout is not None and
+ time.time() - terminated_time > terminated_timeout):
+ if self.logger:
+ self.logger.LogWarning('Timeout of %s seconds reached since '
+ 'process termination.' %
+ terminated_timeout,
+ print_to_console)
+ break
+
+ if (command_timeout is not None and
+ time.time() - started_time > command_timeout):
+ os.killpg(os.getpgid(p.pid), signal.SIGTERM)
+ if self.logger:
+ self.logger.LogWarning('Timeout of %s seconds reached since process'
+ 'started. Killed child process group.' %
+ command_timeout, print_to_console)
+ break
+
+ if out == err == '':
+ break
+
+ p.wait()
+ if return_output:
+ return (p.returncode, full_stdout, full_stderr)
+ return (p.returncode, '', '')
+ except BaseException as e:
+ except_handler(p, e)
+ raise
+
+ def RunCommand(self, *args, **kwargs):
+ """Run a command.
+
+ Takes the same arguments as RunCommandGeneric except for return_output.
+ Returns a single value returncode.
+ """
+ # Make sure that args does not overwrite 'return_output'
+ assert len(args) <= 1
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = False
+ return self.RunCommandGeneric(*args, **kwargs)[0]
+
+ def RunCommandWExceptionCleanup(self, *args, **kwargs):
+ """Run a command and kill process if exception is thrown.
+
+ Takes the same arguments as RunCommandGeneric except for except_handler.
+ Returns same as RunCommandGeneric.
+ """
+
+ def KillProc(proc, _):
+ if proc:
+ os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
+
+ # Make sure that args does not overwrite 'except_handler'
+ assert len(args) <= 8
+ assert 'except_handler' not in kwargs
+ kwargs['except_handler'] = KillProc
+ return self.RunCommandGeneric(*args, **kwargs)
+
+ def RunCommandWOutput(self, *args, **kwargs):
+ """Run a command.
+
+ Takes the same arguments as RunCommandGeneric except for return_output.
+ Returns a triplet (returncode, stdout, stderr).
+ """
+ # Make sure that args does not overwrite 'return_output'
+ assert len(args) <= 1
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = True
+ return self.RunCommandGeneric(*args, **kwargs)
+
+ def RemoteAccessInitCommand(self, chromeos_root, machine):
+ command = ''
+ command += '\nset -- --remote=' + machine
+ command += '\n. ' + chromeos_root + '/src/scripts/common.sh'
+ command += '\n. ' + chromeos_root + '/src/scripts/remote_access.sh'
+ command += '\nTMP=$(mktemp -d)'
+ command += "\nFLAGS \"$@\" || exit 1"
+ command += '\nremote_access_init'
+ return command
+
+ def WriteToTempShFile(self, contents):
+ handle, command_file = tempfile.mkstemp(prefix=os.uname()[1], suffix='.sh')
+ os.write(handle, '#!/bin/bash\n')
+ os.write(handle, contents)
+ os.close(handle)
+ return command_file
+
+ def CrosLearnBoard(self, chromeos_root, machine):
+ command = self.RemoteAccessInitCommand(chromeos_root, machine)
+ command += '\nlearn_board'
+ command += '\necho ${FLAGS_board}'
+ retval, output, _ = self.RunCommandWOutput(command)
+ if self.logger:
+ self.logger.LogFatalIf(retval, 'learn_board command failed')
+ elif retval:
+ sys.exit(1)
+ return output.split()[-1]
+
+ def CrosRunCommandGeneric(self,
+ cmd,
+ return_output=False,
+ machine=None,
+ command_terminator=None,
+ chromeos_root=None,
+ command_timeout=None,
+ terminated_timeout=10,
+ print_to_console=True):
+ """Run a command on a ChromeOS box.
+
+ Returns triplet (returncode, stdout, stderr).
+ """
+
+ if self.log_level != 'verbose':
+ print_to_console = False
+
+ if self.logger:
+ self.logger.LogCmd(cmd, print_to_console=print_to_console)
+ self.logger.LogFatalIf(not machine, 'No machine provided!')
+ self.logger.LogFatalIf(not chromeos_root, 'chromeos_root not given!')
+ else:
+ if not chromeos_root or not machine:
+ sys.exit(1)
+ chromeos_root = os.path.expanduser(chromeos_root)
+
+ # Write all commands to a file.
+ command_file = self.WriteToTempShFile(cmd)
+ retval = self.CopyFiles(command_file,
+ command_file,
+ dest_machine=machine,
+ command_terminator=command_terminator,
+ chromeos_root=chromeos_root,
+ dest_cros=True,
+ recursive=False,
+ print_to_console=print_to_console)
+ if retval:
+ if self.logger:
+ self.logger.LogError('Could not run remote command on machine.'
+ ' Is the machine up?')
+ return (retval, '', '')
+
+ command = self.RemoteAccessInitCommand(chromeos_root, machine)
+ command += '\nremote_sh bash %s' % command_file
+ command += "\nl_retval=$?; echo \"$REMOTE_OUT\"; exit $l_retval"
+ retval = self.RunCommandGeneric(command,
+ return_output,
+ command_terminator=command_terminator,
+ command_timeout=command_timeout,
+ terminated_timeout=terminated_timeout,
+ print_to_console=print_to_console)
+ if return_output:
+ connect_signature = (
+ 'Initiating first contact with remote host\n' + 'Connection OK\n')
+ connect_signature_re = re.compile(connect_signature)
+ modded_retval = list(retval)
+ modded_retval[1] = connect_signature_re.sub('', retval[1])
+ return modded_retval
+ return retval
+
+ def CrosRunCommand(self, *args, **kwargs):
+ """Run a command on a ChromeOS box.
+
+ Takes the same arguments as CrosRunCommandGeneric except for return_output.
+ Returns a single value returncode.
+ """
+ # Make sure that args does not overwrite 'return_output'
+ assert len(args) <= 1
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = False
+ return self.CrosRunCommandGeneric(*args, **kwargs)[0]
+
+ def CrosRunCommandWOutput(self, *args, **kwargs):
+ """Run a command on a ChromeOS box.
+
+ Takes the same arguments as CrosRunCommandGeneric except for return_output.
+ Returns a triplet (returncode, stdout, stderr).
+ """
+ # Make sure that args does not overwrite 'return_output'
+ assert len(args) <= 1
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = True
+ return self.CrosRunCommandGeneric(*args, **kwargs)
+
+ def ChrootRunCommandGeneric(self,
+ chromeos_root,
+ command,
+ return_output=False,
+ command_terminator=None,
+ command_timeout=None,
+ terminated_timeout=10,
+ print_to_console=True,
+ cros_sdk_options=''):
+ """Runs a command within the chroot.
+
+ Returns triplet (returncode, stdout, stderr).
+ """
+
+ if self.log_level != 'verbose':
+ print_to_console = False
+
+ if self.logger:
+ self.logger.LogCmd(command, print_to_console=print_to_console)
+
+ handle, command_file = tempfile.mkstemp(
+ dir=os.path.join(chromeos_root, 'src/scripts'),
+ suffix='.sh',
+ prefix='in_chroot_cmd')
+ os.write(handle, '#!/bin/bash\n')
+ os.write(handle, command)
+ os.write(handle, '\n')
+ os.close(handle)
+
+ os.chmod(command_file, 0777)
+
+ # if return_output is set, run a dummy command first to make sure that
+ # the chroot already exists. We want the final returned output to skip
+ # the output from chroot creation steps.
+ if return_output:
+ ret = self.RunCommand('cd %s; cros_sdk %s -- true' %
+ (chromeos_root, cros_sdk_options))
+ if ret:
+ return (ret, '', '')
+
+ # Run command_file inside the chroot, making sure that any "~" is expanded
+ # by the shell inside the chroot, not outside.
+ command = ("cd %s; cros_sdk %s -- bash -c '%s/%s'" %
+ (chromeos_root, cros_sdk_options, misc.CHROMEOS_SCRIPTS_DIR,
+ os.path.basename(command_file)))
+ ret = self.RunCommandGeneric(command,
+ return_output,
+ command_terminator=command_terminator,
+ command_timeout=command_timeout,
+ terminated_timeout=terminated_timeout,
+ print_to_console=print_to_console)
+ os.remove(command_file)
+ return ret
+
+ def ChrootRunCommand(self, *args, **kwargs):
+ """Runs a command within the chroot.
+
+ Takes the same arguments as ChrootRunCommandGeneric except for
+ return_output.
+ Returns a single value returncode.
+ """
+ # Make sure that args does not overwrite 'return_output'
+ assert len(args) <= 2
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = False
+ return self.ChrootRunCommandGeneric(*args, **kwargs)[0]
+
+ def ChrootRunCommandWOutput(self, *args, **kwargs):
+ """Runs a command within the chroot.
+
+ Takes the same arguments as ChrootRunCommandGeneric except for
+ return_output.
+ Returns a triplet (returncode, stdout, stderr).
+ """
+ # Make sure that args does not overwrite 'return_output'
+ assert len(args) <= 2
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = True
+ return self.ChrootRunCommandGeneric(*args, **kwargs)
+
+ def RunCommands(self,
+ cmdlist,
+ machine=None,
+ username=None,
+ command_terminator=None):
+ cmd = ' ;\n'.join(cmdlist)
+ return self.RunCommand(cmd,
+ machine=machine,
+ username=username,
+ command_terminator=command_terminator)
+
+ def CopyFiles(self,
+ src,
+ dest,
+ src_machine=None,
+ dest_machine=None,
+ src_user=None,
+ dest_user=None,
+ recursive=True,
+ command_terminator=None,
+ chromeos_root=None,
+ src_cros=False,
+ dest_cros=False,
+ print_to_console=True):
+ src = os.path.expanduser(src)
+ dest = os.path.expanduser(dest)
+
+ if recursive:
+ src = src + '/'
+ dest = dest + '/'
+
+ if src_cros == True or dest_cros == True:
+ if self.logger:
+ self.logger.LogFatalIf(src_cros == dest_cros,
+ 'Only one of src_cros and desc_cros can '
+ 'be True.')
+ self.logger.LogFatalIf(not chromeos_root, 'chromeos_root not given!')
+ elif src_cros == dest_cros or not chromeos_root:
+ sys.exit(1)
+ if src_cros == True:
+ cros_machine = src_machine
+ else:
+ cros_machine = dest_machine
+
+ command = self.RemoteAccessInitCommand(chromeos_root, cros_machine)
+ ssh_command = (
+ 'ssh -p ${FLAGS_ssh_port}' + ' -o StrictHostKeyChecking=no' +
+ ' -o UserKnownHostsFile=$(mktemp)' + ' -i $TMP_PRIVATE_KEY')
+ rsync_prefix = "\nrsync -r -e \"%s\" " % ssh_command
+ if dest_cros == True:
+ command += rsync_prefix + '%s root@%s:%s' % (src, dest_machine, dest)
+ return self.RunCommand(command,
+ machine=src_machine,
+ username=src_user,
+ command_terminator=command_terminator,
+ print_to_console=print_to_console)
+ else:
+ command += rsync_prefix + 'root@%s:%s %s' % (src_machine, src, dest)
+ return self.RunCommand(command,
+ machine=dest_machine,
+ username=dest_user,
+ command_terminator=command_terminator,
+ print_to_console=print_to_console)
+
+ if dest_machine == src_machine:
+ command = 'rsync -a %s %s' % (src, dest)
+ else:
+ if src_machine is None:
+ src_machine = os.uname()[1]
+ src_user = getpass.getuser()
+ command = 'rsync -a %s@%s:%s %s' % (src_user, src_machine, src, dest)
+ return self.RunCommand(command,
+ machine=dest_machine,
+ username=dest_user,
+ command_terminator=command_terminator,
+ print_to_console=print_to_console)
+
+ def RunCommand2(self,
+ cmd,
+ cwd=None,
+ line_consumer=None,
+ timeout=None,
+ shell=True,
+ join_stderr=True,
+ env=None,
+ except_handler=lambda p, e: None):
+ """Run the command with an extra feature line_consumer.
+
+ This version allow developers to provide a line_consumer which will be
+ fed execution output lines.
+
+ A line_consumer is a callback, which is given a chance to run for each
+ line the execution outputs (either to stdout or stderr). The
+ line_consumer must accept one and exactly one dict argument, the dict
+ argument has these items -
+ 'line' - The line output by the binary. Notice, this string includes
+ the trailing '\n'.
+ 'output' - Whether this is a stdout or stderr output, values are either
+ 'stdout' or 'stderr'. When join_stderr is True, this value
+ will always be 'output'.
+ 'pobject' - The object used to control execution, for example, call
+ pobject.kill().
+
+ Note: As this is written, the stdin for the process executed is
+ not associated with the stdin of the caller of this routine.
+
+ Args:
+ cmd: Command in a single string.
+ cwd: Working directory for execution.
+ line_consumer: A function that will ba called by this function. See above
+ for details.
+ timeout: terminate command after this timeout.
+ shell: Whether to use a shell for execution.
+ join_stderr: Whether join stderr to stdout stream.
+ env: Execution environment.
+ except_handler: Callback for when exception is thrown during command
+ execution. Passed process object and exception.
+
+ Returns:
+ Execution return code.
+
+ Raises:
+ child_exception: if fails to start the command process (missing
+ permission, no such file, etc)
+ """
+
+ class StreamHandler(object):
+ """Internal utility class."""
+
+ def __init__(self, pobject, fd, name, line_consumer):
+ self._pobject = pobject
+ self._fd = fd
+ self._name = name
+ self._buf = ''
+ self._line_consumer = line_consumer
+
+ def read_and_notify_line(self):
+ t = os.read(fd, 1024)
+ self._buf = self._buf + t
+ self.notify_line()
+
+ def notify_line(self):
+ p = self._buf.find('\n')
+ while p >= 0:
+ self._line_consumer(line=self._buf[:p + 1],
+ output=self._name,
+ pobject=self._pobject)
+ if p < len(self._buf) - 1:
+ self._buf = self._buf[p + 1:]
+ p = self._buf.find('\n')
+ else:
+ self._buf = ''
+ p = -1
+ break
+
+ def notify_eos(self):
+ # Notify end of stream. The last line may not end with a '\n'.
+ if self._buf != '':
+ self._line_consumer(line=self._buf,
+ output=self._name,
+ pobject=self._pobject)
+ self._buf = ''
+
+ if self.log_level == 'verbose':
+ self.logger.LogCmd(cmd)
+ elif self.logger:
+ self.logger.LogCmdToFileOnly(cmd)
+
+ # We use setsid so that the child will have a different session id
+ # and we can easily kill the process group. This is also important
+ # because the child will be disassociated from the parent terminal.
+ # In this way the child cannot mess the parent's terminal.
+ pobject = None
+ try:
+ pobject = subprocess.Popen(
+ cmd,
+ cwd=cwd,
+ bufsize=1024,
+ env=env,
+ shell=shell,
+ universal_newlines=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT if join_stderr else subprocess.PIPE,
+ preexec_fn=os.setsid)
+
+ # We provide a default line_consumer
+ if line_consumer is None:
+ line_consumer = lambda **d: None
+ start_time = time.time()
+ poll = select.poll()
+ outfd = pobject.stdout.fileno()
+ poll.register(outfd, select.POLLIN | select.POLLPRI)
+ handlermap = {outfd:
+ StreamHandler(pobject, outfd, 'stdout', line_consumer)}
+ if not join_stderr:
+ errfd = pobject.stderr.fileno()
+ poll.register(errfd,
+ select.POLLIN | select.POLLPRI)
+ handlermap[errfd] = StreamHandler(pobject,
+ errfd,
+ 'stderr',
+ line_consumer)
+ while len(handlermap):
+ readables = poll.poll(300)
+ for (fd, evt) in readables:
+ handler = handlermap[fd]
+ if evt & (select.POLLPRI | select.POLLIN):
+ handler.read_and_notify_line()
+ elif evt & (select.POLLHUP | select.POLLERR | select.POLLNVAL):
+ handler.notify_eos()
+ poll.unregister(fd)
+ del handlermap[fd]
+
+ if timeout is not None and (time.time() - start_time > timeout):
+ os.killpg(os.getpgid(pobject.pid), signal.SIGTERM)
+
+ return pobject.wait()
+ except BaseException as e:
+ except_handler(pobject, e)
+ raise
+
+
+class MockCommandExecuter(CommandExecuter):
+ """Mock class for class CommandExecuter."""
+
+ def __init__(self, log_level, logger_to_set=None):
+ super(MockCommandExecuter, self).__init__(log_level, logger_to_set)
+
+ def RunCommandGeneric(self,
+ cmd,
+ return_output=False,
+ machine=None,
+ username=None,
+ command_terminator=None,
+ command_timeout=None,
+ terminated_timeout=10,
+ print_to_console=True,
+ except_handler=lambda p, e: None):
+ assert not command_timeout
+ cmd = str(cmd)
+ if machine is None:
+ machine = 'localhost'
+ if username is None:
+ username = 'current'
+ logger.GetLogger().LogCmd('(Mock) ' + cmd, machine, username,
+ print_to_console)
+ return (0, '', '')
+
+ def RunCommand(self, *args, **kwargs):
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = False
+ return self.RunCommandGeneric(*args, **kwargs)[0]
+
+ def RunCommandWOutput(self, *args, **kwargs):
+ assert 'return_output' not in kwargs
+ kwargs['return_output'] = True
+ return self.RunCommandGeneric(*args, **kwargs)
+
+
+class CommandTerminator(object):
+ """Object to request termination of a command in execution."""
+
+ def __init__(self):
+ self.terminated = False
+
+ def Terminate(self):
+ self.terminated = True
+
+ def IsTerminated(self):
+ return self.terminated
diff --git a/cros_utils/command_executer_unittest.py b/cros_utils/command_executer_unittest.py
new file mode 100755
index 00000000..d5f5d0cf
--- /dev/null
+++ b/cros_utils/command_executer_unittest.py
@@ -0,0 +1,27 @@
+#!/usr/bin/python2
+"""Unittest for command_executer.py."""
+
+from __future__ import print_function
+
+import time
+import unittest
+
+import command_executer
+
+
+class CommandExecuterTest(unittest.TestCase):
+ """Test for CommandExecuter class."""
+
+ def testTimeout(self):
+ timeout = 1
+ logging_level = 'average'
+ ce = command_executer.CommandExecuter(logging_level)
+ start = time.time()
+ command = 'sleep 20'
+ ce.RunCommand(command, command_timeout=timeout, terminated_timeout=timeout)
+ end = time.time()
+ self.assertTrue(round(end - start) == timeout)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/cros_utils/constants.py b/cros_utils/constants.py
new file mode 100644
index 00000000..827e9233
--- /dev/null
+++ b/cros_utils/constants.py
@@ -0,0 +1,10 @@
+# Copyright 2010 Google Inc. All Rights Reserved.
+"""Generic constants used accross modules.
+"""
+
+__author__ = 'shenhan@google.com (Han Shen)'
+
+MOUNTED_TOOLCHAIN_ROOT = '/usr/local/toolchain_root'
+
+# Root directory for night testing run.
+CROSTC_WORKSPACE = '/usr/local/google/crostc'
diff --git a/cros_utils/email_sender.py b/cros_utils/email_sender.py
new file mode 100755
index 00000000..cd45b4ec
--- /dev/null
+++ b/cros_utils/email_sender.py
@@ -0,0 +1,144 @@
+#!/usr/bin/python2
+
+# Copyright 2011 Google Inc. All Rights Reserved.
+"""Utilities to send email either through SMTP or SendGMR."""
+
+from __future__ import print_function
+
+from email import encoders as Encoders
+from email.mime.base import MIMEBase
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+import os
+import smtplib
+import tempfile
+
+from cros_utils import command_executer
+
+
+class EmailSender(object):
+ """Utility class to send email through SMTP or SendGMR."""
+
+ class Attachment(object):
+ """Small class to keep track of attachment info."""
+
+ def __init__(self, name, content):
+ self.name = name
+ self.content = content
+
+ def SendEmail(self,
+ email_to,
+ subject,
+ text_to_send,
+ email_cc=None,
+ email_bcc=None,
+ email_from=None,
+ msg_type='plain',
+ attachments=None):
+ """Choose appropriate email method and call it."""
+ if os.path.exists('/usr/bin/sendgmr'):
+ self.SendGMREmail(email_to, subject, text_to_send, email_cc, email_bcc,
+ email_from, msg_type, attachments)
+ else:
+ self.SendSMTPEmail(email_to, subject, text_to_send, email_cc, email_bcc,
+ email_from, msg_type, attachments)
+
+ def SendSMTPEmail(self, email_to, subject, text_to_send, email_cc, email_bcc,
+ email_from, msg_type, attachments):
+ """Send email via standard smtp mail."""
+ # Email summary to the current user.
+ msg = MIMEMultipart()
+
+ if not email_from:
+ email_from = os.path.basename(__file__)
+
+ msg['To'] = ','.join(email_to)
+ msg['Subject'] = subject
+
+ if email_from:
+ msg['From'] = email_from
+ if email_cc:
+ msg['CC'] = ','.join(email_cc)
+ email_to += email_cc
+ if email_bcc:
+ msg['BCC'] = ','.join(email_bcc)
+ email_to += email_bcc
+
+ msg.attach(MIMEText(text_to_send, msg_type))
+ if attachments:
+ for attachment in attachments:
+ part = MIMEBase('application', 'octet-stream')
+ part.set_payload(attachment.content)
+ Encoders.encode_base64(part)
+ part.add_header('Content-Disposition',
+ "attachment; filename=\"%s\"" % attachment.name)
+ msg.attach(part)
+
+ # Send the message via our own SMTP server, but don't include the
+ # envelope header.
+ s = smtplib.SMTP('localhost')
+ s.sendmail(email_from, email_to, msg.as_string())
+ s.quit()
+
+ def SendGMREmail(self, email_to, subject, text_to_send, email_cc, email_bcc,
+ email_from, msg_type, attachments):
+ """Send email via sendgmr program."""
+ ce = command_executer.GetCommandExecuter(log_level='none')
+
+ if not email_from:
+ email_from = os.path.basename(__file__)
+
+ to_list = ','.join(email_to)
+
+ if not text_to_send:
+ text_to_send = 'Empty message body.'
+ body_fd, body_filename = tempfile.mkstemp()
+ to_be_deleted = [body_filename]
+
+ try:
+ os.write(body_fd, text_to_send)
+ os.close(body_fd)
+
+ # Fix single-quotes inside the subject. In bash, to escape a single quote
+ # (e.g 'don't') you need to replace it with '\'' (e.g. 'don'\''t'). To
+ # make Python read the backslash as a backslash rather than an escape
+ # character, you need to double it. So...
+ subject = subject.replace("'", "'\\''")
+
+ if msg_type == 'html':
+ command = ("sendgmr --to='%s' --subject='%s' --html_file='%s' "
+ '--body_file=/dev/null' % (to_list, subject, body_filename))
+ else:
+ command = ("sendgmr --to='%s' --subject='%s' --body_file='%s'" %
+ (to_list, subject, body_filename))
+ if email_from:
+ command += ' --from=%s' % email_from
+ if email_cc:
+ cc_list = ','.join(email_cc)
+ command += " --cc='%s'" % cc_list
+ if email_bcc:
+ bcc_list = ','.join(email_bcc)
+ command += " --bcc='%s'" % bcc_list
+
+ if attachments:
+ attachment_files = []
+ for attachment in attachments:
+ if '<html>' in attachment.content:
+ report_suffix = '_report.html'
+ else:
+ report_suffix = '_report.txt'
+ fd, fname = tempfile.mkstemp(suffix=report_suffix)
+ os.write(fd, attachment.content)
+ os.close(fd)
+ attachment_files.append(fname)
+ files = ','.join(attachment_files)
+ command += " --attachment_files='%s'" % files
+ to_be_deleted += attachment_files
+
+ # Send the message via our own GMR server.
+ status = ce.RunCommand(command)
+ return status
+
+ finally:
+ for f in to_be_deleted:
+ os.remove(f)
diff --git a/cros_utils/file_utils.py b/cros_utils/file_utils.py
new file mode 100644
index 00000000..b7aad7b3
--- /dev/null
+++ b/cros_utils/file_utils.py
@@ -0,0 +1,87 @@
+# Copyright 2011 Google Inc. All Rights Reserved.
+"""Utilities for operations on files."""
+
+from __future__ import print_function
+
+import errno
+import os
+import shutil
+import command_executer
+
+
+class FileUtils(object):
+ """Utilities for operations on files."""
+ _instance = None
+ DRY_RUN = False
+
+ @classmethod
+ def Configure(cls, dry_run):
+ cls.DRY_RUN = dry_run
+
+ def __new__(cls, *args, **kwargs):
+ if not cls._instance:
+ if cls.DRY_RUN:
+ cls._instance = super(FileUtils, cls).__new__(MockFileUtils, *args,
+ **kwargs)
+ else:
+ cls._instance = super(FileUtils, cls).__new__(cls, *args, **kwargs)
+ return cls._instance
+
+ def Md5File(self, filename, log_level='verbose', _block_size=2**10):
+ command = 'md5sum %s' % filename
+ ce = command_executer.GetCommandExecuter(log_level=log_level)
+ ret, out, _ = ce.RunCommandWOutput(command)
+ if ret:
+ raise Exception('Could not run md5sum on: %s' % filename)
+
+ return out.strip().split()[0]
+
+ def CanonicalizeChromeOSRoot(self, chromeos_root):
+ chromeos_root = os.path.expanduser(chromeos_root)
+ if os.path.isdir(os.path.join(chromeos_root, 'chromite')):
+ return chromeos_root
+ else:
+ return None
+
+ def ChromeOSRootFromImage(self, chromeos_image):
+ chromeos_root = os.path.join(
+ os.path.dirname(chromeos_image), '../../../../..')
+ return self.CanonicalizeChromeOSRoot(chromeos_root)
+
+ def MkDirP(self, path):
+ try:
+ os.makedirs(path)
+ except OSError as exc:
+ if exc.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+ def RmDir(self, path):
+ shutil.rmtree(path, ignore_errors=True)
+
+ def WriteFile(self, path, contents):
+ with open(path, 'wb') as f:
+ f.write(contents)
+
+
+class MockFileUtils(FileUtils):
+ """Mock class for file utilities."""
+
+ def Md5File(self, filename, log_level='verbose', _block_size=2**10):
+ return 'd41d8cd98f00b204e9800998ecf8427e'
+
+ def CanonicalizeChromeOSRoot(self, chromeos_root):
+ return '/tmp/chromeos_root'
+
+ def ChromeOSRootFromImage(self, chromeos_image):
+ return '/tmp/chromeos_root'
+
+ def RmDir(self, path):
+ pass
+
+ def MkDirP(self, path):
+ pass
+
+ def WriteFile(self, path, contents):
+ pass
diff --git a/cros_utils/html_tools.py b/cros_utils/html_tools.py
new file mode 100644
index 00000000..8ca795bf
--- /dev/null
+++ b/cros_utils/html_tools.py
@@ -0,0 +1,91 @@
+# Copyright 2010 Google Inc. All Rights Reserved.
+"""Utilities for generating html."""
+
+
+def GetPageHeader(page_title):
+ return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+<head>
+<style type="text/css">
+table
+{
+border-collapse:collapse;
+}
+table, td, th
+{
+border:1px solid black;
+}
+</style>
+<script type="text/javascript">
+function displayRow(id){
+ var row = document.getElementById("group_"+id);
+ if (row.style.display == '') row.style.display = 'none';
+ else row.style.display = '';
+ }
+</script>
+<title>%s</title>
+</head>
+<body>
+
+""" % page_title
+
+
+def GetListHeader():
+ return '<ul>'
+
+
+def GetListItem(text):
+ return '<li>%s</li>' % text
+
+
+def GetListFooter():
+ return '</ul>'
+
+
+def GetList(items):
+ return '<ul>%s</ul>' % ''.join(['<li>%s</li>' % item for item in items])
+
+
+def GetParagraph(text):
+ return '<p>%s</p>' % text
+
+
+def GetFooter():
+ return '</body>\n</html>'
+
+
+def GetHeader(text, h=1):
+ return '<h%s>%s</h%s>' % (h, text, h)
+
+
+def GetTableHeader(headers):
+ row = ''.join(['<th>%s</th>' % header for header in headers])
+ return '<table><tr>%s</tr>' % row
+
+
+def GetTableFooter():
+ return '</table>'
+
+
+def FormatLineBreaks(text):
+ return text.replace('\n', '<br/>')
+
+
+def GetTableCell(text):
+ return '<td>%s</td>' % FormatLineBreaks(str(text))
+
+
+def GetTableRow(columns):
+ return '<tr>%s</tr>' % '\n'.join([GetTableCell(column) for column in columns])
+
+
+def GetTable(headers, rows):
+ table = [GetTableHeader(headers)]
+ table.extend([GetTableRow(row) for row in rows])
+ table.append(GetTableFooter())
+ return '\n'.join(table)
+
+
+def GetLink(link, text):
+ return "<a href='%s'>%s</a>" % (link, text)
diff --git a/cros_utils/locks.py b/cros_utils/locks.py
new file mode 100644
index 00000000..cb96368e
--- /dev/null
+++ b/cros_utils/locks.py
@@ -0,0 +1,44 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+"""Utilities for locking machines."""
+
+from __future__ import print_function
+
+import time
+
+import afe_lock_machine
+
+import logger
+
+
+def AcquireLock(machines, chromeos_root, timeout=1200):
+ """Acquire lock for machine(s) with timeout, using AFE server for locking."""
+ start_time = time.time()
+ locked = True
+ sleep_time = min(10, timeout / 10.0)
+ while True:
+ try:
+ afe_lock_machine.AFELockManager(machines, False, chromeos_root,
+ None).UpdateMachines(True)
+ break
+ except Exception as e:
+ if time.time() - start_time > timeout:
+ locked = False
+ logger.GetLogger().LogWarning(
+ 'Could not acquire lock on {0} within {1} seconds: {2}'.format(
+ repr(machines), timeout, str(e)))
+ break
+ time.sleep(sleep_time)
+ return locked
+
+
+def ReleaseLock(machines, chromeos_root):
+ """Release locked machine(s), using AFE server for locking."""
+ unlocked = True
+ try:
+ afe_lock_machine.AFELockManager(machines, False, chromeos_root,
+ None).UpdateMachines(False)
+ except Exception as e:
+ unlocked = False
+ logger.GetLogger().LogWarning('Could not unlock %s. %s' %
+ (repr(machines), str(e)))
+ return unlocked
diff --git a/cros_utils/logger.py b/cros_utils/logger.py
new file mode 100644
index 00000000..364d9c9d
--- /dev/null
+++ b/cros_utils/logger.py
@@ -0,0 +1,369 @@
+# Copyright 2010 Google Inc. All Rights Reserved.
+"""Logging helper module."""
+
+from __future__ import print_function
+
+# System modules
+import os.path
+import sys
+import traceback
+
+
+#TODO(yunlian@google.com): Use GetRoot from misc
+def GetRoot(scr_name):
+ """Break up pathname into (dir+name)."""
+ abs_path = os.path.abspath(scr_name)
+ return (os.path.dirname(abs_path), os.path.basename(abs_path))
+
+
+class Logger(object):
+ """Logging helper class."""
+
+ MAX_LOG_FILES = 10
+
+ def __init__(self, rootdir, basefilename, print_console, subdir='logs'):
+ logdir = os.path.join(rootdir, subdir)
+ basename = os.path.join(logdir, basefilename)
+
+ try:
+ os.makedirs(logdir)
+ except OSError:
+ pass
+ # print("Warning: Logs directory '%s' already exists." % logdir)
+
+ self.print_console = print_console
+
+ self._CreateLogFileHandles(basename)
+
+ self._WriteTo(self.cmdfd, ' '.join(sys.argv), True)
+
+ def _AddSuffix(self, basename, suffix):
+ return '%s%s' % (basename, suffix)
+
+ def _FindSuffix(self, basename):
+ timestamps = []
+ found_suffix = None
+ for i in range(self.MAX_LOG_FILES):
+ suffix = str(i)
+ suffixed_basename = self._AddSuffix(basename, suffix)
+ cmd_file = '%s.cmd' % suffixed_basename
+ if not os.path.exists(cmd_file):
+ found_suffix = suffix
+ break
+ timestamps.append(os.stat(cmd_file).st_mtime)
+
+ if found_suffix:
+ return found_suffix
+
+ # Try to pick the oldest file with the suffix and return that one.
+ suffix = str(timestamps.index(min(timestamps)))
+ # print ("Warning: Overwriting log file: %s" %
+ # self._AddSuffix(basename, suffix))
+ return suffix
+
+ def _CreateLogFileHandle(self, name):
+ fd = None
+ try:
+ fd = open(name, 'w')
+ except IOError:
+ print('Warning: could not open %s for writing.' % name)
+ return fd
+
+ def _CreateLogFileHandles(self, basename):
+ suffix = self._FindSuffix(basename)
+ suffixed_basename = self._AddSuffix(basename, suffix)
+
+ self.cmdfd = self._CreateLogFileHandle('%s.cmd' % suffixed_basename)
+ self.stdout = self._CreateLogFileHandle('%s.out' % suffixed_basename)
+ self.stderr = self._CreateLogFileHandle('%s.err' % suffixed_basename)
+
+ self._CreateLogFileSymlinks(basename, suffixed_basename)
+
+ # Symlink unsuffixed basename to currently suffixed one.
+ def _CreateLogFileSymlinks(self, basename, suffixed_basename):
+ try:
+ for extension in ['cmd', 'out', 'err']:
+ src_file = '%s.%s' % (os.path.basename(suffixed_basename), extension)
+ dest_file = '%s.%s' % (basename, extension)
+ if os.path.exists(dest_file):
+ os.remove(dest_file)
+ os.symlink(src_file, dest_file)
+ except Exception as ex:
+ print('Exception while creating symlinks: %s' % str(ex))
+
+ def _WriteTo(self, fd, msg, flush):
+ if fd:
+ fd.write(msg)
+ if flush:
+ fd.flush()
+
+ def LogStartDots(self, print_to_console=True):
+ term_fd = self._GetStdout(print_to_console)
+ if term_fd:
+ term_fd.flush()
+ term_fd.write('. ')
+ term_fd.flush()
+
+ def LogAppendDot(self, print_to_console=True):
+ term_fd = self._GetStdout(print_to_console)
+ if term_fd:
+ term_fd.write('. ')
+ term_fd.flush()
+
+ def LogEndDots(self, print_to_console=True):
+ term_fd = self._GetStdout(print_to_console)
+ if term_fd:
+ term_fd.write('\n')
+ term_fd.flush()
+
+ def LogMsg(self, file_fd, term_fd, msg, flush=True):
+ if file_fd:
+ self._WriteTo(file_fd, msg, flush)
+ if self.print_console:
+ self._WriteTo(term_fd, msg, flush)
+
+ def _GetStdout(self, print_to_console):
+ if print_to_console:
+ return sys.stdout
+ return None
+
+ def _GetStderr(self, print_to_console):
+ if print_to_console:
+ return sys.stderr
+ return None
+
+ def LogCmdToFileOnly(self, cmd, machine='', user=None):
+ if not self.cmdfd:
+ return
+
+ host = ('%s@%s' % (user, machine)) if user else machine
+ flush = True
+ cmd_string = 'CMD (%s): %s\n' % (host, cmd)
+ self._WriteTo(self.cmdfd, cmd_string, flush)
+
+ def LogCmd(self, cmd, machine='', user=None, print_to_console=True):
+ if user:
+ host = '%s@%s' % (user, machine)
+ else:
+ host = machine
+
+ self.LogMsg(self.cmdfd, self._GetStdout(print_to_console),
+ 'CMD (%s): %s\n' % (host, cmd))
+
+ def LogFatal(self, msg, print_to_console=True):
+ self.LogMsg(self.stderr, self._GetStderr(print_to_console),
+ 'FATAL: %s\n' % msg)
+ self.LogMsg(self.stderr, self._GetStderr(print_to_console),
+ '\n'.join(traceback.format_stack()))
+ sys.exit(1)
+
+ def LogError(self, msg, print_to_console=True):
+ self.LogMsg(self.stderr, self._GetStderr(print_to_console),
+ 'ERROR: %s\n' % msg)
+
+ def LogWarning(self, msg, print_to_console=True):
+ self.LogMsg(self.stderr, self._GetStderr(print_to_console),
+ 'WARNING: %s\n' % msg)
+
+ def LogOutput(self, msg, print_to_console=True):
+ self.LogMsg(self.stdout, self._GetStdout(print_to_console),
+ 'OUTPUT: %s\n' % msg)
+
+ def LogFatalIf(self, condition, msg):
+ if condition:
+ self.LogFatal(msg)
+
+ def LogErrorIf(self, condition, msg):
+ if condition:
+ self.LogError(msg)
+
+ def LogWarningIf(self, condition, msg):
+ if condition:
+ self.LogWarning(msg)
+
+ def LogCommandOutput(self, msg, print_to_console=True):
+ self.LogMsg(self.stdout,
+ self._GetStdout(print_to_console),
+ msg,
+ flush=False)
+
+ def LogCommandError(self, msg, print_to_console=True):
+ self.LogMsg(self.stderr,
+ self._GetStderr(print_to_console),
+ msg,
+ flush=False)
+
+ def Flush(self):
+ self.cmdfd.flush()
+ self.stdout.flush()
+ self.stderr.flush()
+
+
+class MockLogger(object):
+ """Logging helper class."""
+
+ MAX_LOG_FILES = 10
+
+ def __init__(self, *_args, **_kwargs):
+ self.stdout = sys.stdout
+ self.stderr = sys.stderr
+ return None
+
+ def _AddSuffix(self, basename, suffix):
+ return '%s%s' % (basename, suffix)
+
+ def _FindSuffix(self, basename):
+ timestamps = []
+ found_suffix = None
+ for i in range(self.MAX_LOG_FILES):
+ suffix = str(i)
+ suffixed_basename = self._AddSuffix(basename, suffix)
+ cmd_file = '%s.cmd' % suffixed_basename
+ if not os.path.exists(cmd_file):
+ found_suffix = suffix
+ break
+ timestamps.append(os.stat(cmd_file).st_mtime)
+
+ if found_suffix:
+ return found_suffix
+
+ # Try to pick the oldest file with the suffix and return that one.
+ suffix = str(timestamps.index(min(timestamps)))
+ # print ("Warning: Overwriting log file: %s" %
+ # self._AddSuffix(basename, suffix))
+ return suffix
+
+ def _CreateLogFileHandle(self, name):
+ print('MockLogger: creating open file handle for %s (writing)' % name)
+
+ def _CreateLogFileHandles(self, basename):
+ suffix = self._FindSuffix(basename)
+ suffixed_basename = self._AddSuffix(basename, suffix)
+
+ print('MockLogger: opening file %s.cmd' % suffixed_basename)
+ print('MockLogger: opening file %s.out' % suffixed_basename)
+ print('MockLogger: opening file %s.err' % suffixed_basename)
+
+ self._CreateLogFileSymlinks(basename, suffixed_basename)
+
+ # Symlink unsuffixed basename to currently suffixed one.
+ def _CreateLogFileSymlinks(self, basename, suffixed_basename):
+ for extension in ['cmd', 'out', 'err']:
+ src_file = '%s.%s' % (os.path.basename(suffixed_basename), extension)
+ dest_file = '%s.%s' % (basename, extension)
+ print('MockLogger: Calling os.symlink(%s, %s)' % (src_file, dest_file))
+
+ def _WriteTo(self, _fd, msg, _flush):
+ print('MockLogger: %s' % msg)
+
+ def LogStartDots(self, _print_to_console=True):
+ print('. ')
+
+ def LogAppendDot(self, _print_to_console=True):
+ print('. ')
+
+ def LogEndDots(self, _print_to_console=True):
+ print('\n')
+
+ def LogMsg(self, _file_fd, _term_fd, msg, **_kwargs):
+ print('MockLogger: %s' % msg)
+
+ def _GetStdout(self, _print_to_console):
+ return None
+
+ def _GetStderr(self, _print_to_console):
+ return None
+
+ def LogCmdToFileOnly(self, *_args, **_kwargs):
+ return
+
+ # def LogCmdToFileOnly(self, cmd, machine='', user=None):
+ # host = ('%s@%s' % (user, machine)) if user else machine
+ # cmd_string = 'CMD (%s): %s\n' % (host, cmd)
+ # print('MockLogger: Writing to file ONLY: %s' % cmd_string)
+
+ def LogCmd(self, cmd, machine='', user=None, print_to_console=True):
+ if user:
+ host = '%s@%s' % (user, machine)
+ else:
+ host = machine
+
+ self.LogMsg(0, self._GetStdout(print_to_console),
+ 'CMD (%s): %s\n' % (host, cmd))
+
+ def LogFatal(self, msg, print_to_console=True):
+ self.LogMsg(0, self._GetStderr(print_to_console), 'FATAL: %s\n' % msg)
+ self.LogMsg(0, self._GetStderr(print_to_console),
+ '\n'.join(traceback.format_stack()))
+ print('MockLogger: Calling sysexit(1)')
+
+ def LogError(self, msg, print_to_console=True):
+ self.LogMsg(0, self._GetStderr(print_to_console), 'ERROR: %s\n' % msg)
+
+ def LogWarning(self, msg, print_to_console=True):
+ self.LogMsg(0, self._GetStderr(print_to_console), 'WARNING: %s\n' % msg)
+
+ def LogOutput(self, msg, print_to_console=True):
+ self.LogMsg(0, self._GetStdout(print_to_console), 'OUTPUT: %s\n' % msg)
+
+ def LogFatalIf(self, condition, msg):
+ if condition:
+ self.LogFatal(msg)
+
+ def LogErrorIf(self, condition, msg):
+ if condition:
+ self.LogError(msg)
+
+ def LogWarningIf(self, condition, msg):
+ if condition:
+ self.LogWarning(msg)
+
+ def LogCommandOutput(self, msg, print_to_console=True):
+ self.LogMsg(self.stdout,
+ self._GetStdout(print_to_console),
+ msg,
+ flush=False)
+
+ def LogCommandError(self, msg, print_to_console=True):
+ self.LogMsg(self.stderr,
+ self._GetStderr(print_to_console),
+ msg,
+ flush=False)
+
+ def Flush(self):
+ print('MockLogger: Flushing cmdfd, stdout, stderr')
+
+
+main_logger = None
+
+
+def InitLogger(script_name, log_dir, print_console=True, mock=False):
+ """Initialize a global logger. To be called only once."""
+ # pylint: disable=global-statement
+ global main_logger
+ assert not main_logger, 'The logger has already been initialized'
+ rootdir, basefilename = GetRoot(script_name)
+ if not log_dir:
+ log_dir = rootdir
+ if not mock:
+ main_logger = Logger(log_dir, basefilename, print_console)
+ else:
+ main_logger = MockLogger(log_dir, basefilename, print_console)
+
+
+def GetLogger(log_dir='', mock=False):
+ if not main_logger:
+ InitLogger(sys.argv[0], log_dir, mock=mock)
+ return main_logger
+
+
+def HandleUncaughtExceptions(fun):
+ """Catches all exceptions that would go outside decorated fun scope."""
+
+ def _Interceptor(*args, **kwargs):
+ try:
+ return fun(*args, **kwargs)
+ except StandardError:
+ GetLogger().LogFatal('Uncaught exception:\n%s' % traceback.format_exc())
+
+ return _Interceptor
diff --git a/cros_utils/machines.py b/cros_utils/machines.py
new file mode 100644
index 00000000..722df3b8
--- /dev/null
+++ b/cros_utils/machines.py
@@ -0,0 +1,25 @@
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utilities relating to machine-specific functions."""
+
+from __future__ import print_function
+
+from cros_utils import command_executer
+
+
+def MachineIsPingable(machine, logging_level='average'):
+ """Checks to see if a machine is responding to 'ping'.
+
+ Args:
+ machine: String containing the name or ip address of the machine to check.
+ logging_level: The logging level with which to initialize the
+ command_executer (from command_executor.LOG_LEVEL enum list).
+
+ Returns:
+ Boolean indicating whether machine is responding to ping or not.
+ """
+ ce = command_executer.GetCommandExecuter(log_level=logging_level)
+ cmd = 'ping -c 1 -w 3 %s' % machine
+ status = ce.RunCommand(cmd)
+ return status == 0
diff --git a/cros_utils/manifest_versions.py b/cros_utils/manifest_versions.py
new file mode 100644
index 00000000..f011282b
--- /dev/null
+++ b/cros_utils/manifest_versions.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Tools for searching/manipulating the manifests repository."""
+
+from __future__ import print_function
+
+__author__ = 'llozano@google.com (Luis Lozano)'
+
+import os
+import re
+import shutil
+import tempfile
+import time
+
+import command_executer
+import logger
+
+
+def IsCrosVersion(version):
+ match = re.search(r'(\d+\.\d+\.\d+\.\d+)', version)
+ return match is not None
+
+
+def IsRFormatCrosVersion(version):
+ match = re.search(r'(R\d+-\d+\.\d+\.\d+)', version)
+ return match is not None
+
+
+def RFormatCrosVersion(version):
+ assert IsCrosVersion(version)
+ tmp_major, tmp_minor = version.split('.', 1)
+ rformat = 'R' + tmp_major + '-' + tmp_minor
+ assert IsRFormatCrosVersion(rformat)
+ return rformat
+
+
+class ManifestVersions(object):
+ """This class handles interactions with the manifests repo."""
+
+ def __init__(self, internal=True):
+ self.internal = internal
+ self.clone_location = tempfile.mkdtemp()
+ self.ce = command_executer.GetCommandExecuter()
+ if internal:
+ versions_git = ('https://chrome-internal.googlesource.com/'
+ 'chromeos/manifest-versions.git')
+ else:
+ versions_git = (
+ 'https://chromium.googlesource.com/chromiumos/manifest-versions.git')
+ commands = ['cd {0}'.format(self.clone_location),
+ 'git clone {0}'.format(versions_git)]
+ ret = self.ce.RunCommands(commands)
+ if ret:
+ logger.GetLogger().LogFatal('Failed to clone manifest-versions.')
+
+ def __del__(self):
+ if self.clone_location:
+ shutil.rmtree(self.clone_location)
+
+ def TimeToVersion(self, my_time):
+ """Convert timestamp to version number."""
+ cur_time = time.mktime(time.gmtime())
+ des_time = float(my_time)
+ if cur_time - des_time > 7000000:
+ logger.GetLogger().LogFatal('The time you specify is too early.')
+ commands = ['cd {0}'.format(self.clone_location), 'cd manifest-versions',
+ 'git checkout -f $(git rev-list' +
+ ' --max-count=1 --before={0} origin/master)'.format(my_time)]
+ ret = self.ce.RunCommands(commands)
+ if ret:
+ logger.GetLogger().LogFatal('Failed to checkout manifest at '
+ 'specified time')
+ path = os.path.realpath('{0}/manifest-versions/LKGM/lkgm.xml'.format(
+ self.clone_location))
+ pp = path.split('/')
+ small = os.path.basename(path).split('.xml')[0]
+ version = pp[-2] + '.' + small
+ commands = ['cd {0}'.format(self.clone_location), 'cd manifest-versions',
+ 'git checkout master']
+ self.ce.RunCommands(commands)
+ return version
+
+ def GetManifest(self, version, to_file):
+ """Get the manifest file from a given chromeos-internal version."""
+ assert not IsRFormatCrosVersion(version)
+ version = version.split('.', 1)[1]
+ os.chdir(self.clone_location)
+ files = [os.path.join(r, f)
+ for r, _, fs in os.walk('.') for f in fs if version in f]
+ if files:
+ command = 'cp {0} {1}'.format(files[0], to_file)
+ ret = self.ce.RunCommand(command)
+ if ret:
+ raise Exception('Cannot copy manifest to {0}'.format(to_file))
+ else:
+ raise Exception('Version {0} is not available.'.format(version))
diff --git a/cros_utils/misc.py b/cros_utils/misc.py
new file mode 100644
index 00000000..ae234fe3
--- /dev/null
+++ b/cros_utils/misc.py
@@ -0,0 +1,557 @@
+# Copyright 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Utilities for toolchain build."""
+
+from __future__ import print_function
+
+__author__ = 'asharif@google.com (Ahmad Sharif)'
+
+from contextlib import contextmanager
+import os
+import re
+import shutil
+import sys
+import traceback
+
+import command_executer
+import logger
+
+CHROMEOS_SCRIPTS_DIR = '~/trunk/src/scripts'
+TOOLCHAIN_UTILS_PATH = '~/trunk/src/platform/dev/toolchain_utils.sh'
+
+
+def GetChromeOSVersionFromLSBVersion(lsb_version):
+ """Get Chromeos version from Lsb version."""
+ ce = command_executer.GetCommandExecuter()
+ command = ('git ls-remote '
+ 'https://chromium.googlesource.com/chromiumos/manifest.git')
+ ret, out, _ = ce.RunCommandWOutput(command, print_to_console=False)
+ assert ret == 0, 'Command %s failed' % command
+ lower = []
+ for line in out.splitlines():
+ mo = re.search(r'refs/heads/release-R(\d+)-(\d+)\.B', line)
+ if mo:
+ revision = int(mo.group(1))
+ build = int(mo.group(2))
+ lsb_build = int(lsb_version.split('.')[0])
+ if lsb_build > build:
+ lower.append(revision)
+ lower = sorted(lower)
+ if lower:
+ return 'R%d-%s' % (lower[-1] + 1, lsb_version)
+ else:
+ return 'Unknown'
+
+
+def ApplySubs(string, *substitutions):
+ for pattern, replacement in substitutions:
+ string = re.sub(pattern, replacement, string)
+ return string
+
+
+def UnitToNumber(unit_num, base=1000):
+ """Convert a number with unit to float."""
+ unit_dict = {'kilo': base, 'mega': base**2, 'giga': base**3}
+ unit_num = unit_num.lower()
+ mo = re.search(r'(\d*)(.+)?', unit_num)
+ number = mo.group(1)
+ unit = mo.group(2)
+ if not unit:
+ return float(number)
+ for k, v in unit_dict.items():
+ if k.startswith(unit):
+ return float(number) * v
+ raise Exception('Unit: %s not found in byte: %s!' % (unit, unit_num))
+
+
+def GetFilenameFromString(string):
+ return ApplySubs(string, (r'/', '__'), (r'\s', '_'), (r'[\\$="?^]', ''),)
+
+
+def GetRoot(scr_name):
+ """Break up pathname into (dir+name)."""
+ abs_path = os.path.abspath(scr_name)
+ return (os.path.dirname(abs_path), os.path.basename(abs_path))
+
+
+def GetChromeOSKeyFile(chromeos_root):
+ return os.path.join(chromeos_root, 'src', 'scripts', 'mod_for_test_scripts',
+ 'ssh_keys', 'testing_rsa')
+
+
+def GetChrootPath(chromeos_root):
+ return os.path.join(chromeos_root, 'chroot')
+
+
+def GetInsideChrootPath(chromeos_root, file_path):
+ if not file_path.startswith(GetChrootPath(chromeos_root)):
+ raise Exception("File: %s doesn't seem to be in the chroot: %s" %
+ (file_path, chromeos_root))
+ return file_path[len(GetChrootPath(chromeos_root)):]
+
+
+def GetOutsideChrootPath(chromeos_root, file_path):
+ return os.path.join(GetChrootPath(chromeos_root), file_path.lstrip('/'))
+
+
+def FormatQuotedCommand(command):
+ return ApplySubs(command, ('"', r'\"'))
+
+
+def FormatCommands(commands):
+ return ApplySubs(
+ str(commands), ('&&', '&&\n'), (';', ';\n'), (r'\n+\s*', '\n'))
+
+
+def GetImageDir(chromeos_root, board):
+ return os.path.join(chromeos_root, 'src', 'build', 'images', board)
+
+
+def LabelLatestImage(chromeos_root, board, label, vanilla_path=None):
+ image_dir = GetImageDir(chromeos_root, board)
+ latest_image_dir = os.path.join(image_dir, 'latest')
+ latest_image_dir = os.path.realpath(latest_image_dir)
+ latest_image_dir = os.path.basename(latest_image_dir)
+ retval = 0
+ with WorkingDirectory(image_dir):
+ command = 'ln -sf -T %s %s' % (latest_image_dir, label)
+ ce = command_executer.GetCommandExecuter()
+ retval = ce.RunCommand(command)
+ if retval:
+ return retval
+ if vanilla_path:
+ command = 'ln -sf -T %s %s' % (vanilla_path, 'vanilla')
+ retval2 = ce.RunCommand(command)
+ return retval2
+ return retval
+
+
+def DoesLabelExist(chromeos_root, board, label):
+ image_label = os.path.join(GetImageDir(chromeos_root, board), label)
+ return os.path.exists(image_label)
+
+
+def GetBuildPackagesCommand(board, usepkg=False, debug=False):
+ if usepkg:
+ usepkg_flag = '--usepkg'
+ else:
+ usepkg_flag = '--nousepkg'
+ if debug:
+ withdebug_flag = '--withdebug'
+ else:
+ withdebug_flag = '--nowithdebug'
+ return ('%s/build_packages %s --withdev --withtest --withautotest '
+ '--skip_toolchain_update %s --board=%s '
+ '--accept_licenses=@CHROMEOS' %
+ (CHROMEOS_SCRIPTS_DIR, usepkg_flag, withdebug_flag, board))
+
+
+def GetBuildImageCommand(board, dev=False):
+ dev_args = ''
+ if dev:
+ dev_args = '--noenable_rootfs_verification --disk_layout=2gb-rootfs'
+ return ('%s/build_image --board=%s %s test' %
+ (CHROMEOS_SCRIPTS_DIR, board, dev_args))
+
+
+def GetSetupBoardCommand(board,
+ gcc_version=None,
+ binutils_version=None,
+ usepkg=None,
+ force=None):
+ """Get setup_board command."""
+ options = []
+
+ if gcc_version:
+ options.append('--gcc_version=%s' % gcc_version)
+
+ if binutils_version:
+ options.append('--binutils_version=%s' % binutils_version)
+
+ if usepkg:
+ options.append('--usepkg')
+ else:
+ options.append('--nousepkg')
+
+ if force:
+ options.append('--force')
+
+ options.append('--accept_licenses=@CHROMEOS')
+
+ return ('%s/setup_board --board=%s %s' %
+ (CHROMEOS_SCRIPTS_DIR, board, ' '.join(options)))
+
+
+def CanonicalizePath(path):
+ path = os.path.expanduser(path)
+ path = os.path.realpath(path)
+ return path
+
+
+def GetCtargetFromBoard(board, chromeos_root):
+ """Get Ctarget from board."""
+ base_board = board.split('_')[0]
+ command = ('source %s; get_ctarget_from_board %s' %
+ (TOOLCHAIN_UTILS_PATH, base_board))
+ ce = command_executer.GetCommandExecuter()
+ ret, out, _ = ce.ChrootRunCommandWOutput(chromeos_root, command)
+ if ret != 0:
+ raise ValueError('Board %s is invalid!' % board)
+ # Remove ANSI escape sequences.
+ out = StripANSIEscapeSequences(out)
+ return out.strip()
+
+
+def GetArchFromBoard(board, chromeos_root):
+ """Get Arch from board."""
+ base_board = board.split('_')[0]
+ command = ('source %s; get_board_arch %s' %
+ (TOOLCHAIN_UTILS_PATH, base_board))
+ ce = command_executer.GetCommandExecuter()
+ ret, out, _ = ce.ChrootRunCommandWOutput(chromeos_root, command)
+ if ret != 0:
+ raise ValueError('Board %s is invalid!' % board)
+ # Remove ANSI escape sequences.
+ out = StripANSIEscapeSequences(out)
+ return out.strip()
+
+
+def GetGccLibsDestForBoard(board, chromeos_root):
+ """Get gcc libs destination from board."""
+ arch = GetArchFromBoard(board, chromeos_root)
+ if arch == 'x86':
+ return '/build/%s/usr/lib/gcc/' % board
+ if arch == 'amd64':
+ return '/build/%s/usr/lib64/gcc/' % board
+ if arch == 'arm':
+ return '/build/%s/usr/lib/gcc/' % board
+ if arch == 'arm64':
+ return '/build/%s/usr/lib/gcc/' % board
+ raise ValueError('Arch %s is invalid!' % arch)
+
+
+def StripANSIEscapeSequences(string):
+ string = re.sub(r'\x1b\[[0-9]*[a-zA-Z]', '', string)
+ return string
+
+
+def GetChromeSrcDir():
+ return 'var/cache/distfiles/target/chrome-src/src'
+
+
+def GetEnvStringFromDict(env_dict):
+ return ' '.join(["%s=\"%s\"" % var for var in env_dict.items()])
+
+
+def MergeEnvStringWithDict(env_string, env_dict, prepend=True):
+ """Merge env string with dict."""
+ if not env_string.strip():
+ return GetEnvStringFromDict(env_dict)
+ override_env_list = []
+ ce = command_executer.GetCommandExecuter()
+ for k, v in env_dict.items():
+ v = v.strip("\"'")
+ if prepend:
+ new_env = "%s=\"%s $%s\"" % (k, v, k)
+ else:
+ new_env = "%s=\"$%s %s\"" % (k, k, v)
+ command = '; '.join([env_string, new_env, 'echo $%s' % k])
+ ret, out, _ = ce.RunCommandWOutput(command)
+ override_env_list.append('%s=%r' % (k, out.strip()))
+ ret = env_string + ' ' + ' '.join(override_env_list)
+ return ret.strip()
+
+
+def GetAllImages(chromeos_root, board):
+ ce = command_executer.GetCommandExecuter()
+ command = ('find %s/src/build/images/%s -name chromiumos_test_image.bin' %
+ (chromeos_root, board))
+ ret, out, _ = ce.RunCommandWOutput(command)
+ assert ret == 0, 'Could not run command: %s' % command
+ return out.splitlines()
+
+
+def IsFloat(text):
+ if text is None:
+ return False
+ try:
+ float(text)
+ return True
+ except ValueError:
+ return False
+
+
+def RemoveChromeBrowserObjectFiles(chromeos_root, board):
+ """Remove any object files from all the posible locations."""
+ out_dir = os.path.join(
+ GetChrootPath(chromeos_root),
+ 'var/cache/chromeos-chrome/chrome-src/src/out_%s' % board)
+ if os.path.exists(out_dir):
+ shutil.rmtree(out_dir)
+ logger.GetLogger().LogCmd('rm -rf %s' % out_dir)
+ out_dir = os.path.join(
+ GetChrootPath(chromeos_root),
+ 'var/cache/chromeos-chrome/chrome-src-internal/src/out_%s' % board)
+ if os.path.exists(out_dir):
+ shutil.rmtree(out_dir)
+ logger.GetLogger().LogCmd('rm -rf %s' % out_dir)
+
+
+@contextmanager
+def WorkingDirectory(new_dir):
+ """Get the working directory."""
+ old_dir = os.getcwd()
+ if old_dir != new_dir:
+ msg = 'cd %s' % new_dir
+ logger.GetLogger().LogCmd(msg)
+ os.chdir(new_dir)
+ yield new_dir
+ if old_dir != new_dir:
+ msg = 'cd %s' % old_dir
+ logger.GetLogger().LogCmd(msg)
+ os.chdir(old_dir)
+
+
+def HasGitStagedChanges(git_dir):
+ """Return True if git repository has staged changes."""
+ command = 'cd {0} && git diff --quiet --cached --exit-code HEAD'.format(
+ git_dir)
+ return command_executer.GetCommandExecuter().RunCommand(
+ command,
+ print_to_console=False)
+
+
+def HasGitUnstagedChanges(git_dir):
+ """Return True if git repository has un-staged changes."""
+ command = 'cd {0} && git diff --quiet --exit-code HEAD'.format(git_dir)
+ return command_executer.GetCommandExecuter().RunCommand(
+ command,
+ print_to_console=False)
+
+
+def HasGitUntrackedChanges(git_dir):
+ """Return True if git repository has un-tracked changes."""
+ command = ('cd {0} && test -z '
+ '$(git ls-files --exclude-standard --others)').format(git_dir)
+ return command_executer.GetCommandExecuter().RunCommand(
+ command,
+ print_to_console=False)
+
+
+def GitGetCommitHash(git_dir, commit_symbolic_name):
+ """Return githash for the symbolic git commit.
+
+ For example, commit_symbolic_name could be
+ "cros/gcc.gnu.org/branches/gcc/gcc-4_8-mobile, this function returns the git
+ hash for this symbolic name.
+
+ Args:
+ git_dir: a git working tree.
+ commit_symbolic_name: a symbolic name for a particular git commit.
+
+ Returns:
+ The git hash for the symbolic name or None if fails.
+ """
+
+ command = ('cd {0} && git log -n 1 --pretty="format:%H" {1}').format(
+ git_dir, commit_symbolic_name)
+ rv, out, _ = command_executer.GetCommandExecuter().RunCommandWOutput(
+ command,
+ print_to_console=False)
+ if rv == 0:
+ return out.strip()
+ return None
+
+
+def IsGitTreeClean(git_dir):
+ """Test if git tree has no local changes.
+
+ Args:
+ git_dir: git tree directory.
+
+ Returns:
+ True if git dir is clean.
+ """
+ if HasGitStagedChanges(git_dir):
+ logger.GetLogger().LogWarning('Git tree has staged changes.')
+ return False
+ if HasGitUnstagedChanges(git_dir):
+ logger.GetLogger().LogWarning('Git tree has unstaged changes.')
+ return False
+ if HasGitUntrackedChanges(git_dir):
+ logger.GetLogger().LogWarning('Git tree has un-tracked changes.')
+ return False
+ return True
+
+
+def GetGitChangesAsList(git_dir, path=None, staged=False):
+ """Get changed files as a list.
+
+ Args:
+ git_dir: git tree directory.
+ path: a relative path that is part of the tree directory, could be null.
+ staged: whether to include staged files as well.
+
+ Returns:
+ A list containing all the changed files.
+ """
+ command = 'cd {0} && git diff --name-only'.format(git_dir)
+ if staged:
+ command += ' --cached'
+ if path:
+ command += ' -- ' + path
+ _, out, _ = command_executer.GetCommandExecuter().RunCommandWOutput(
+ command,
+ print_to_console=False)
+ rv = []
+ for line in out.splitlines():
+ rv.append(line)
+ return rv
+
+
+def IsChromeOsTree(chromeos_root):
+ return (os.path.isdir(os.path.join(chromeos_root,
+ 'src/third_party/chromiumos-overlay')) and
+ os.path.isdir(os.path.join(chromeos_root, 'manifest')))
+
+
+def DeleteChromeOsTree(chromeos_root, dry_run=False):
+ """Delete a ChromeOs tree *safely*.
+
+ Args:
+ chromeos_root: dir of the tree, could be a relative one (but be careful)
+ dry_run: only prints out the command if True
+
+ Returns:
+ True if everything is ok.
+ """
+ if not IsChromeOsTree(chromeos_root):
+ logger.GetLogger().LogWarning(
+ '"{0}" does not seem to be a valid chromeos tree, do nothing.'.format(
+ chromeos_root))
+ return False
+ cmd0 = 'cd {0} && cros_sdk --delete'.format(chromeos_root)
+ if dry_run:
+ print(cmd0)
+ else:
+ if command_executer.GetCommandExecuter().RunCommand(
+ cmd0,
+ print_to_console=True) != 0:
+ return False
+
+ cmd1 = ('export CHROMEOSDIRNAME="$(dirname $(cd {0} && pwd))" && '
+ 'export CHROMEOSBASENAME="$(basename $(cd {0} && pwd))" && '
+ 'cd $CHROMEOSDIRNAME && sudo rm -fr $CHROMEOSBASENAME').format(
+ chromeos_root)
+ if dry_run:
+ print(cmd1)
+ return True
+
+ return command_executer.GetCommandExecuter().RunCommand(
+ cmd1,
+ print_to_console=True) == 0
+
+
+def ApplyGerritPatches(chromeos_root,
+ gerrit_patch_string,
+ branch='cros/master'):
+ """Apply gerrit patches on a chromeos tree.
+
+ Args:
+ chromeos_root: chromeos tree path
+ gerrit_patch_string: a patch string just like the one gives to cbuildbot,
+ 'id1 id2 *id3 ... idn'. A prefix of '* means this is an internal patch.
+ branch: the tree based on which to apply the patches.
+
+ Returns:
+ True if success.
+ """
+
+ ### First of all, we need chromite libs
+ sys.path.append(os.path.join(chromeos_root, 'chromite'))
+ # Imports below are ok after modifying path to add chromite.
+ # Pylint cannot detect that and complains.
+ # pylint: disable=import-error
+ from lib import git
+ from lib import gerrit
+ manifest = git.ManifestCheckout(chromeos_root)
+ patch_list = gerrit_patch_string.split(' ')
+ ### This takes time, print log information.
+ logger.GetLogger().LogOutput('Retrieving patch information from server ...')
+ patch_info_list = gerrit.GetGerritPatchInfo(patch_list)
+ for pi in patch_info_list:
+ project_checkout = manifest.FindCheckout(pi.project, strict=False)
+ if not project_checkout:
+ logger.GetLogger().LogError(
+ 'Failed to find patch project "{project}" in manifest.'.format(
+ project=pi.project))
+ return False
+
+ pi_str = '{project}:{ref}'.format(project=pi.project, ref=pi.ref)
+ try:
+ project_git_path = project_checkout.GetPath(absolute=True)
+ logger.GetLogger().LogOutput('Applying patch "{0}" in "{1}" ...'.format(
+ pi_str, project_git_path))
+ pi.Apply(project_git_path, branch, trivial=False)
+ except Exception:
+ traceback.print_exc(file=sys.stdout)
+ logger.GetLogger().LogError('Failed to apply patch "{0}"'.format(pi_str))
+ return False
+ return True
+
+
+def BooleanPrompt(prompt='Do you want to continue?',
+ default=True,
+ true_value='yes',
+ false_value='no',
+ prolog=None):
+ """Helper function for processing boolean choice prompts.
+
+ Args:
+ prompt: The question to present to the user.
+ default: Boolean to return if the user just presses enter.
+ true_value: The text to display that represents a True returned.
+ false_value: The text to display that represents a False returned.
+ prolog: The text to display before prompt.
+
+ Returns:
+ True or False.
+ """
+ true_value, false_value = true_value.lower(), false_value.lower()
+ true_text, false_text = true_value, false_value
+ if true_value == false_value:
+ raise ValueError('true_value and false_value must differ: got %r' %
+ true_value)
+
+ if default:
+ true_text = true_text[0].upper() + true_text[1:]
+ else:
+ false_text = false_text[0].upper() + false_text[1:]
+
+ prompt = ('\n%s (%s/%s)? ' % (prompt, true_text, false_text))
+
+ if prolog:
+ prompt = ('\n%s\n%s' % (prolog, prompt))
+
+ while True:
+ try:
+ response = raw_input(prompt).lower()
+ except EOFError:
+ # If the user hits CTRL+D, or stdin is disabled, use the default.
+ print()
+ response = None
+ except KeyboardInterrupt:
+ # If the user hits CTRL+C, just exit the process.
+ print()
+ print('CTRL+C detected; exiting')
+ sys.exit()
+
+ if not response:
+ return default
+ if true_value.startswith(response):
+ if not false_value.startswith(response):
+ return True
+ # common prefix between the two...
+ elif false_value.startswith(response):
+ return False
diff --git a/cros_utils/misc_test.py b/cros_utils/misc_test.py
new file mode 100644
index 00000000..80082207
--- /dev/null
+++ b/cros_utils/misc_test.py
@@ -0,0 +1,51 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+"""Tests for misc."""
+
+from __future__ import print_function
+
+__author__ = 'asharif@google.com (Ahmad Sharif)'
+
+# System modules
+import unittest
+
+# Local modules
+import misc
+
+
+class UtilsTest(unittest.TestCase):
+ """Tests for misc."""
+
+ def testGetFilenameFromString(self):
+ string = 'a /b=c"d^$?\\'
+ filename = misc.GetFilenameFromString(string)
+ self.assertEqual(filename, 'a___bcd')
+
+ def testPrependMergeEnv(self):
+ var = 'USE'
+ use_flags = 'hello 123'
+ added_use_flags = 'bla bla'
+ env_string = '%s=%r' % (var, use_flags)
+ new_env_string = misc.MergeEnvStringWithDict(env_string,
+ {var: added_use_flags})
+ expected_new_env = '%s=%r' % (var, ' '.join([added_use_flags, use_flags]))
+ self.assertEqual(new_env_string, ' '.join([env_string, expected_new_env]))
+
+ def testGetChromeOSVersionFromLSBVersion(self):
+ versions_dict = {'2630.0.0': '22', '2030.0.0': '19'}
+ f = misc.GetChromeOSVersionFromLSBVersion
+ for k, v in versions_dict.items():
+ self.assertEqual(f(k), 'R%s-%s' % (v, k))
+
+ def testPostpendMergeEnv(self):
+ var = 'USE'
+ use_flags = 'hello 123'
+ added_use_flags = 'bla bla'
+ env_string = '%s=%r' % (var, use_flags)
+ new_env_string = misc.MergeEnvStringWithDict(env_string,
+ {var: added_use_flags}, False)
+ expected_new_env = '%s=%r' % (var, ' '.join([use_flags, added_use_flags]))
+ self.assertEqual(new_env_string, ' '.join([env_string, expected_new_env]))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/cros_utils/no_pseudo_terminal_test.py b/cros_utils/no_pseudo_terminal_test.py
new file mode 100644
index 00000000..43eabb13
--- /dev/null
+++ b/cros_utils/no_pseudo_terminal_test.py
@@ -0,0 +1,53 @@
+"""Test to ensure we're not touching /dev/ptmx when running commands."""
+
+from __future__ import print_function
+
+import os
+import subprocess
+import tempfile
+import time
+import unittest
+from cros_utils import command_executer
+
+
+class NoPsuedoTerminalTest(unittest.TestCase):
+ """Test to ensure we're not touching /dev/ptmx when running commands."""
+
+ _strace_process = None
+ STRACE_TIMEOUT = 10
+
+ def _AttachStraceToSelf(self, output_file):
+ """Attaches strace to the current process."""
+ args = ['strace', '-o', output_file, '-p', str(os.getpid())]
+ print(args)
+ self._strace_process = subprocess.Popen(args)
+ # Wait until we see some activity.
+ start_time = time.time()
+ while time.time() - start_time < self.STRACE_TIMEOUT:
+ if os.path.isfile(output_file) and open(output_file).read(1):
+ return True
+ time.sleep(1)
+ return False
+
+ def _KillStraceProcess(self):
+ """Kills strace that was started by _AttachStraceToSelf()."""
+ self._strace_process.terminate()
+ self._strace_process.wait()
+ return True
+
+ def testNoPseudoTerminalWhenRunningCommand(self):
+ """Test to make sure we're not touching /dev/ptmx when running commands."""
+ temp_file = tempfile.mktemp()
+ self.assertTrue(self._AttachStraceToSelf(temp_file))
+
+ ce = command_executer.GetCommandExecuter()
+ ce.RunCommand('echo')
+
+ self.assertTrue(self._KillStraceProcess())
+
+ strace_contents = open(temp_file).read()
+ self.assertFalse('/dev/ptmx' in strace_contents)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/cros_utils/perf_diff.py b/cros_utils/perf_diff.py
new file mode 100755
index 00000000..c861f6ba
--- /dev/null
+++ b/cros_utils/perf_diff.py
@@ -0,0 +1,332 @@
+#!/usr/bin/python2
+# Copyright 2012 Google Inc. All Rights Reserved.
+"""One-line documentation for perf_diff module.
+
+A detailed description of perf_diff.
+"""
+
+from __future__ import print_function
+
+__author__ = 'asharif@google.com (Ahmad Sharif)'
+
+import argparse
+import re
+import sys
+
+import misc
+import tabulator
+
+ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table'
+TOTAL_EVENTS = 'Total_events_of_this_profile'
+
+
+def GetPerfDictFromReport(report_file):
+ output = {}
+ perf_report = PerfReport(report_file)
+ for k, v in perf_report.sections.items():
+ if k not in output:
+ output[k] = {}
+ output[k][ROWS_TO_SHOW] = 0
+ output[k][TOTAL_EVENTS] = 0
+ for function in v.functions:
+ out_key = '%s' % (function.name)
+ output[k][out_key] = function.count
+ output[k][TOTAL_EVENTS] += function.count
+ if function.percent > 1:
+ output[k][ROWS_TO_SHOW] += 1
+ return output
+
+
+def _SortDictionaryByValue(d):
+ l = [(k, v) for (k, v) in d.iteritems()]
+
+ def GetFloat(x):
+ if misc.IsFloat(x):
+ return float(x)
+ else:
+ return x
+
+ sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
+ sorted_l.reverse()
+ return [f[0] for f in sorted_l]
+
+
+class Tabulator(object):
+ """Make tables."""
+
+ def __init__(self, all_dicts):
+ self._all_dicts = all_dicts
+
+ def PrintTable(self):
+ for dicts in self._all_dicts:
+ self.PrintTableHelper(dicts)
+
+ def PrintTableHelper(self, dicts):
+ """Transfrom dicts to tables."""
+ fields = {}
+ for d in dicts:
+ for f in d.keys():
+ if f not in fields:
+ fields[f] = d[f]
+ else:
+ fields[f] = max(fields[f], d[f])
+ table = []
+ header = ['name']
+ for i in range(len(dicts)):
+ header.append(i)
+
+ table.append(header)
+
+ sorted_fields = _SortDictionaryByValue(fields)
+
+ for f in sorted_fields:
+ row = [f]
+ for d in dicts:
+ if f in d:
+ row.append(d[f])
+ else:
+ row.append('0')
+ table.append(row)
+
+ print(tabulator.GetSimpleTable(table))
+
+
+class Function(object):
+ """Function for formatting."""
+
+ def __init__(self):
+ self.count = 0
+ self.name = ''
+ self.percent = 0
+
+
+class Section(object):
+ """Section formatting."""
+
+ def __init__(self, contents):
+ self.name = ''
+ self.raw_contents = contents
+ self._ParseSection()
+
+ def _ParseSection(self):
+ matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents)
+ assert len(matches) <= 1, 'More than one event found in 1 section'
+ if not matches:
+ return
+ match = matches[0]
+ self.name = match[1]
+ self.count = misc.UnitToNumber(match[0])
+
+ self.functions = []
+ for line in self.raw_contents.splitlines():
+ if not line.strip():
+ continue
+ if '%' not in line:
+ continue
+ if not line.startswith('#'):
+ fields = [f for f in line.split(' ') if f]
+ function = Function()
+ function.percent = float(fields[0].strip('%'))
+ function.count = int(fields[1])
+ function.name = ' '.join(fields[2:])
+ self.functions.append(function)
+
+
+class PerfReport(object):
+ """Get report from raw report."""
+
+ def __init__(self, perf_file):
+ self.perf_file = perf_file
+ self._ReadFile()
+ self.sections = {}
+ self.metadata = {}
+ self._section_contents = []
+ self._section_header = ''
+ self._SplitSections()
+ self._ParseSections()
+ self._ParseSectionHeader()
+
+ def _ParseSectionHeader(self):
+ """Parse a header of a perf report file."""
+ # The "captured on" field is inaccurate - this actually refers to when the
+ # report was generated, not when the data was captured.
+ for line in self._section_header.splitlines():
+ line = line[2:]
+ if ':' in line:
+ key, val = line.strip().split(':', 1)
+ key = key.strip()
+ val = val.strip()
+ self.metadata[key] = val
+
+ def _ReadFile(self):
+ self._perf_contents = open(self.perf_file).read()
+
+ def _ParseSections(self):
+ self.event_counts = {}
+ self.sections = {}
+ for section_content in self._section_contents:
+ section = Section(section_content)
+ section.name = self._GetHumanReadableName(section.name)
+ self.sections[section.name] = section
+
+ # TODO(asharif): Do this better.
+ def _GetHumanReadableName(self, section_name):
+ if not 'raw' in section_name:
+ return section_name
+ raw_number = section_name.strip().split(' ')[-1]
+ for line in self._section_header.splitlines():
+ if raw_number in line:
+ name = line.strip().split(' ')[5]
+ return name
+
+ def _SplitSections(self):
+ self._section_contents = []
+ indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)]
+ indices.append(len(self._perf_contents))
+ for i in range(len(indices) - 1):
+ section_content = self._perf_contents[indices[i]:indices[i + 1]]
+ self._section_contents.append(section_content)
+ self._section_header = ''
+ if indices:
+ self._section_header = self._perf_contents[0:indices[0]]
+
+
+class PerfDiffer(object):
+ """Perf differ class."""
+
+ def __init__(self, reports, num_symbols, common_only):
+ self._reports = reports
+ self._num_symbols = num_symbols
+ self._common_only = common_only
+ self._common_function_names = {}
+
+ def DoDiff(self):
+ """The function that does the diff."""
+ section_names = self._FindAllSections()
+
+ filename_dicts = []
+ summary_dicts = []
+ for report in self._reports:
+ d = {}
+ filename_dicts.append({'file': report.perf_file})
+ for section_name in section_names:
+ if section_name in report.sections:
+ d[section_name] = report.sections[section_name].count
+ summary_dicts.append(d)
+
+ all_dicts = [filename_dicts, summary_dicts]
+
+ for section_name in section_names:
+ function_names = self._GetTopFunctions(section_name, self._num_symbols)
+ self._FindCommonFunctions(section_name)
+ dicts = []
+ for report in self._reports:
+ d = {}
+ if section_name in report.sections:
+ section = report.sections[section_name]
+
+ # Get a common scaling factor for this report.
+ common_scaling_factor = self._GetCommonScalingFactor(section)
+
+ for function in section.functions:
+ if function.name in function_names:
+ key = '%s %s' % (section.name, function.name)
+ d[key] = function.count
+ # Compute a factor to scale the function count by in common_only
+ # mode.
+ if self._common_only and (
+ function.name in self._common_function_names[section.name]):
+ d[key + ' scaled'] = common_scaling_factor * function.count
+ dicts.append(d)
+
+ all_dicts.append(dicts)
+
+ mytabulator = Tabulator(all_dicts)
+ mytabulator.PrintTable()
+
+ def _FindAllSections(self):
+ sections = {}
+ for report in self._reports:
+ for section in report.sections.values():
+ if section.name not in sections:
+ sections[section.name] = section.count
+ else:
+ sections[section.name] = max(sections[section.name], section.count)
+ return _SortDictionaryByValue(sections)
+
+ def _GetCommonScalingFactor(self, section):
+ unique_count = self._GetCount(
+ section, lambda x: x in self._common_function_names[section.name])
+ return 100.0 / unique_count
+
+ def _GetCount(self, section, filter_fun=None):
+ total_count = 0
+ for function in section.functions:
+ if not filter_fun or filter_fun(function.name):
+ total_count += int(function.count)
+ return total_count
+
+ def _FindCommonFunctions(self, section_name):
+ function_names_list = []
+ for report in self._reports:
+ if section_name in report.sections:
+ section = report.sections[section_name]
+ function_names = [f.name for f in section.functions]
+ function_names_list.append(function_names)
+
+ self._common_function_names[section_name] = (
+ reduce(set.intersection, map(set, function_names_list)))
+
+ def _GetTopFunctions(self, section_name, num_functions):
+ all_functions = {}
+ for report in self._reports:
+ if section_name in report.sections:
+ section = report.sections[section_name]
+ for f in section.functions[:num_functions]:
+ if f.name in all_functions:
+ all_functions[f.name] = max(all_functions[f.name], f.count)
+ else:
+ all_functions[f.name] = f.count
+ # FIXME(asharif): Don't really need to sort these...
+ return _SortDictionaryByValue(all_functions)
+
+ def _GetFunctionsDict(self, section, function_names):
+ d = {}
+ for function in section.functions:
+ if function.name in function_names:
+ d[function.name] = function.count
+ return d
+
+
+def Main(argv):
+ """The entry of the main."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-n',
+ '--num_symbols',
+ dest='num_symbols',
+ default='5',
+ help='The number of symbols to show.')
+ parser.add_argument('-c',
+ '--common_only',
+ dest='common_only',
+ action='store_true',
+ default=False,
+ help='Diff common symbols only.')
+
+ options, args = parser.parse_known_args(argv)
+
+ try:
+ reports = []
+ for report in args[1:]:
+ report = PerfReport(report)
+ reports.append(report)
+ pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
+ pd.DoDiff()
+ finally:
+ pass
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(Main(sys.argv))
diff --git a/cros_utils/pstat.py b/cros_utils/pstat.py
new file mode 100644
index 00000000..602fc0c7
--- /dev/null
+++ b/cros_utils/pstat.py
@@ -0,0 +1,1077 @@
+# We did not author this file nor mantain it. Skip linting it.
+#pylint: skip-file
+# Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# Comments and/or additions are welcome (send e-mail to:
+# strang@nmr.mgh.harvard.edu).
+#
+"""pstat.py module
+
+#################################################
+####### Written by: Gary Strangman ###########
+####### Last modified: Dec 18, 2007 ###########
+#################################################
+
+This module provides some useful list and array manipulation routines
+modeled after those found in the |Stat package by Gary Perlman, plus a
+number of other useful list/file manipulation functions. The list-based
+functions include:
+
+ abut (source,*args)
+ simpleabut (source, addon)
+ colex (listoflists,cnums)
+ collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
+ dm (listoflists,criterion)
+ flat (l)
+ linexand (listoflists,columnlist,valuelist)
+ linexor (listoflists,columnlist,valuelist)
+ linedelimited (inlist,delimiter)
+ lineincols (inlist,colsize)
+ lineincustcols (inlist,colsizes)
+ list2string (inlist)
+ makelol(inlist)
+ makestr(x)
+ printcc (lst,extra=2)
+ printincols (listoflists,colsize)
+ pl (listoflists)
+ printl(listoflists)
+ replace (lst,oldval,newval)
+ recode (inlist,listmap,cols='all')
+ remap (listoflists,criterion)
+ roundlist (inlist,num_digits_to_round_floats_to)
+ sortby(listoflists,sortcols)
+ unique (inlist)
+ duplicates(inlist)
+ writedelimited (listoflists, delimiter, file, writetype='w')
+
+Some of these functions have alternate versions which are defined only if
+Numeric (NumPy) can be imported. These functions are generally named as
+above, with an 'a' prefix.
+
+ aabut (source, *args)
+ acolex (a,indices,axis=1)
+ acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
+ adm (a,criterion)
+ alinexand (a,columnlist,valuelist)
+ alinexor (a,columnlist,valuelist)
+ areplace (a,oldval,newval)
+ arecode (a,listmap,col='all')
+ arowcompare (row1, row2)
+ arowsame (row1, row2)
+ asortrows(a,axis=0)
+ aunique(inarray)
+ aduplicates(inarray)
+
+Currently, the code is all but completely un-optimized. In many cases, the
+array versions of functions amount simply to aliases to built-in array
+functions/methods. Their inclusion here is for function name consistency.
+"""
+
+## CHANGE LOG:
+## ==========
+## 07-11-26 ... edited to work with numpy
+## 01-11-15 ... changed list2string() to accept a delimiter
+## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
+## 01-05-31 ... added duplicates() and aduplicates() functions
+## 00-12-28 ... license made GPL, docstring and import requirements
+## 99-11-01 ... changed version to 0.3
+## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
+## 03/27/99 ... added areplace function, made replace fcn recursive
+## 12/31/98 ... added writefc function for ouput to fixed column sizes
+## 12/07/98 ... fixed import problem (failed on collapse() fcn)
+## added __version__ variable (now 0.2)
+## 12/05/98 ... updated doc-strings
+## added features to collapse() function
+## added flat() function for lists
+## fixed a broken asortrows()
+## 11/16/98 ... fixed minor bug in aput for 1D arrays
+##
+## 11/08/98 ... fixed aput to output large arrays correctly
+
+import stats # required 3rd party module
+import string, copy
+from types import *
+
+__version__ = 0.4
+
+###=========================== LIST FUNCTIONS ==========================
+###
+### Here are the list functions, DEFINED FOR ALL SYSTEMS.
+### Array functions (for NumPy-enabled computers) appear below.
+###
+
+
+def abut(source, *args):
+ """
+Like the |Stat abut command. It concatenates two lists side-by-side
+and returns the result. '2D' lists are also accomodated for either argument
+(source or addon). CAUTION: If one list is shorter, it will be repeated
+until it is as long as the longest list. If this behavior is not desired,
+use pstat.simpleabut().
+
+Usage: abut(source, args) where args=any # of lists
+Returns: a list of lists as long as the LONGEST list past, source on the
+ 'left', lists in <args> attached consecutively on the 'right'
+"""
+
+ if type(source) not in [ListType, TupleType]:
+ source = [source]
+ for addon in args:
+ if type(addon) not in [ListType, TupleType]:
+ addon = [addon]
+ if len(addon) < len(source): # is source list longer?
+ if len(source) % len(addon) == 0: # are they integer multiples?
+ repeats = len(source) / len(addon) # repeat addon n times
+ origadd = copy.deepcopy(addon)
+ for i in range(repeats - 1):
+ addon = addon + origadd
+ else:
+ repeats = len(source) / len(addon) + 1 # repeat addon x times,
+ origadd = copy.deepcopy(addon) # x is NOT an integer
+ for i in range(repeats - 1):
+ addon = addon + origadd
+ addon = addon[0:len(source)]
+ elif len(source) < len(addon): # is addon list longer?
+ if len(addon) % len(source) == 0: # are they integer multiples?
+ repeats = len(addon) / len(source) # repeat source n times
+ origsour = copy.deepcopy(source)
+ for i in range(repeats - 1):
+ source = source + origsour
+ else:
+ repeats = len(addon) / len(source) + 1 # repeat source x times,
+ origsour = copy.deepcopy(source) # x is NOT an integer
+ for i in range(repeats - 1):
+ source = source + origsour
+ source = source[0:len(addon)]
+
+ source = simpleabut(source, addon)
+ return source
+
+
+def simpleabut(source, addon):
+ """
+Concatenates two lists as columns and returns the result. '2D' lists
+are also accomodated for either argument (source or addon). This DOES NOT
+repeat either list to make the 2 lists of equal length. Beware of list pairs
+with different lengths ... the resulting list will be the length of the
+FIRST list passed.
+
+Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
+Returns: a list of lists as long as source, with source on the 'left' and
+ addon on the 'right'
+"""
+ if type(source) not in [ListType, TupleType]:
+ source = [source]
+ if type(addon) not in [ListType, TupleType]:
+ addon = [addon]
+ minlen = min(len(source), len(addon))
+ list = copy.deepcopy(source) # start abut process
+ if type(source[0]) not in [ListType, TupleType]:
+ if type(addon[0]) not in [ListType, TupleType]:
+ for i in range(minlen):
+ list[i] = [source[i]] + [addon[i]] # source/addon = column
+ else:
+ for i in range(minlen):
+ list[i] = [source[i]] + addon[i] # addon=list-of-lists
+ else:
+ if type(addon[0]) not in [ListType, TupleType]:
+ for i in range(minlen):
+ list[i] = source[i] + [addon[i]] # source=list-of-lists
+ else:
+ for i in range(minlen):
+ list[i] = source[i] + addon[i] # source/addon = list-of-lists
+ source = list
+ return source
+
+
+def colex(listoflists, cnums):
+ """
+Extracts from listoflists the columns specified in the list 'cnums'
+(cnums can be an integer, a sequence of integers, or a string-expression that
+corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
+columns 3 onward from the listoflists).
+
+Usage: colex (listoflists,cnums)
+Returns: a list-of-lists corresponding to the columns from listoflists
+ specified by cnums, in the order the column numbers appear in cnums
+"""
+ global index
+ column = 0
+ if type(cnums) in [ListType, TupleType]: # if multiple columns to get
+ index = cnums[0]
+ column = map(lambda x: x[index], listoflists)
+ for col in cnums[1:]:
+ index = col
+ column = abut(column, map(lambda x: x[index], listoflists))
+ elif type(cnums) == StringType: # if an 'x[3:]' type expr.
+ evalstring = 'map(lambda x: x' + cnums + ', listoflists)'
+ column = eval(evalstring)
+ else: # else it's just 1 col to get
+ index = cnums
+ column = map(lambda x: x[index], listoflists)
+ return column
+
+
+def collapse(listoflists,
+ keepcols,
+ collapsecols,
+ fcn1=None,
+ fcn2=None,
+ cfcn=None):
+ """
+Averages data in collapsecol, keeping all unique items in keepcols
+(using unique, which keeps unique LISTS of column numbers), retaining the
+unique sets of values in keepcols, the mean for each. Setting fcn1
+and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
+will append those results (e.g., the sterr, N) after each calculated mean.
+cfcn is the collapse function to apply (defaults to mean, defined here in the
+pstat module to avoid circular imports with stats.py, but harmonicmean or
+others could be passed).
+
+Usage: collapse
+(listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
+Returns: a list of lists with all unique permutations of entries appearing in
+ columns ("conditions") specified by keepcols, abutted with the result of
+ cfcn (if cfcn=None, defaults to the mean) of each column specified by
+ collapsecols.
+"""
+
+ def collmean(inlist):
+ s = 0
+ for item in inlist:
+ s = s + item
+ return s / float(len(inlist))
+
+ if type(keepcols) not in [ListType, TupleType]:
+ keepcols = [keepcols]
+ if type(collapsecols) not in [ListType, TupleType]:
+ collapsecols = [collapsecols]
+ if cfcn == None:
+ cfcn = collmean
+ if keepcols == []:
+ means = [0] * len(collapsecols)
+ for i in range(len(collapsecols)):
+ avgcol = colex(listoflists, collapsecols[i])
+ means[i] = cfcn(avgcol)
+ if fcn1:
+ try:
+ test = fcn1(avgcol)
+ except:
+ test = 'N/A'
+ means[i] = [means[i], test]
+ if fcn2:
+ try:
+ test = fcn2(avgcol)
+ except:
+ test = 'N/A'
+ try:
+ means[i] = means[i] + [len(avgcol)]
+ except TypeError:
+ means[i] = [means[i], len(avgcol)]
+ return means
+ else:
+ values = colex(listoflists, keepcols)
+ uniques = unique(values)
+ uniques.sort()
+ newlist = []
+ if type(keepcols) not in [ListType, TupleType]:
+ keepcols = [keepcols]
+ for item in uniques:
+ if type(item) not in [ListType, TupleType]:
+ item = [item]
+ tmprows = linexand(listoflists, keepcols, item)
+ for col in collapsecols:
+ avgcol = colex(tmprows, col)
+ item.append(cfcn(avgcol))
+ if fcn1 <> None:
+ try:
+ test = fcn1(avgcol)
+ except:
+ test = 'N/A'
+ item.append(test)
+ if fcn2 <> None:
+ try:
+ test = fcn2(avgcol)
+ except:
+ test = 'N/A'
+ item.append(test)
+ newlist.append(item)
+ return newlist
+
+
+def dm(listoflists, criterion):
+ """
+Returns rows from the passed list of lists that meet the criteria in
+the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
+will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
+with column 2 equal to the string 'N').
+
+Usage: dm (listoflists, criterion)
+Returns: rows from listoflists that meet the specified criterion.
+"""
+ function = 'filter(lambda x: ' + criterion + ',listoflists)'
+ lines = eval(function)
+ return lines
+
+
+def flat(l):
+ """
+Returns the flattened version of a '2D' list. List-correlate to the a.ravel()()
+method of NumPy arrays.
+
+Usage: flat(l)
+"""
+ newl = []
+ for i in range(len(l)):
+ for j in range(len(l[i])):
+ newl.append(l[i][j])
+ return newl
+
+
+def linexand(listoflists, columnlist, valuelist):
+ """
+Returns the rows of a list of lists where col (from columnlist) = val
+(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
+len(columnlist) must equal len(valuelist).
+
+Usage: linexand (listoflists,columnlist,valuelist)
+Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
+"""
+ if type(columnlist) not in [ListType, TupleType]:
+ columnlist = [columnlist]
+ if type(valuelist) not in [ListType, TupleType]:
+ valuelist = [valuelist]
+ criterion = ''
+ for i in range(len(columnlist)):
+ if type(valuelist[i]) == StringType:
+ critval = '\'' + valuelist[i] + '\''
+ else:
+ critval = str(valuelist[i])
+ criterion = criterion + ' x[' + str(columnlist[
+ i]) + ']==' + critval + ' and'
+ criterion = criterion[0:-3] # remove the "and" after the last crit
+ function = 'filter(lambda x: ' + criterion + ',listoflists)'
+ lines = eval(function)
+ return lines
+
+
+def linexor(listoflists, columnlist, valuelist):
+ """
+Returns the rows of a list of lists where col (from columnlist) = val
+(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
+One value is required for each column in columnlist. If only one value
+exists for columnlist but multiple values appear in valuelist, the
+valuelist values are all assumed to pertain to the same column.
+
+Usage: linexor (listoflists,columnlist,valuelist)
+Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
+"""
+ if type(columnlist) not in [ListType, TupleType]:
+ columnlist = [columnlist]
+ if type(valuelist) not in [ListType, TupleType]:
+ valuelist = [valuelist]
+ criterion = ''
+ if len(columnlist) == 1 and len(valuelist) > 1:
+ columnlist = columnlist * len(valuelist)
+ for i in range(len(columnlist)): # build an exec string
+ if type(valuelist[i]) == StringType:
+ critval = '\'' + valuelist[i] + '\''
+ else:
+ critval = str(valuelist[i])
+ criterion = criterion + ' x[' + str(columnlist[i]) + ']==' + critval + ' or'
+ criterion = criterion[0:-2] # remove the "or" after the last crit
+ function = 'filter(lambda x: ' + criterion + ',listoflists)'
+ lines = eval(function)
+ return lines
+
+
+def linedelimited(inlist, delimiter):
+ """
+Returns a string composed of elements in inlist, with each element
+separated by 'delimiter.' Used by function writedelimited. Use '\t'
+for tab-delimiting.
+
+Usage: linedelimited (inlist,delimiter)
+"""
+ outstr = ''
+ for item in inlist:
+ if type(item) <> StringType:
+ item = str(item)
+ outstr = outstr + item + delimiter
+ outstr = outstr[0:-1]
+ return outstr
+
+
+def lineincols(inlist, colsize):
+ """
+Returns a string composed of elements in inlist, with each element
+right-aligned in columns of (fixed) colsize.
+
+Usage: lineincols (inlist,colsize) where colsize is an integer
+"""
+ outstr = ''
+ for item in inlist:
+ if type(item) <> StringType:
+ item = str(item)
+ size = len(item)
+ if size <= colsize:
+ for i in range(colsize - size):
+ outstr = outstr + ' '
+ outstr = outstr + item
+ else:
+ outstr = outstr + item[0:colsize + 1]
+ return outstr
+
+
+def lineincustcols(inlist, colsizes):
+ """
+Returns a string composed of elements in inlist, with each element
+right-aligned in a column of width specified by a sequence colsizes. The
+length of colsizes must be greater than or equal to the number of columns
+in inlist.
+
+Usage: lineincustcols (inlist,colsizes)
+Returns: formatted string created from inlist
+"""
+ outstr = ''
+ for i in range(len(inlist)):
+ if type(inlist[i]) <> StringType:
+ item = str(inlist[i])
+ else:
+ item = inlist[i]
+ size = len(item)
+ if size <= colsizes[i]:
+ for j in range(colsizes[i] - size):
+ outstr = outstr + ' '
+ outstr = outstr + item
+ else:
+ outstr = outstr + item[0:colsizes[i] + 1]
+ return outstr
+
+
+def list2string(inlist, delimit=' '):
+ """
+Converts a 1D list to a single long string for file output, using
+the string.join function.
+
+Usage: list2string (inlist,delimit=' ')
+Returns: the string created from inlist
+"""
+ stringlist = map(makestr, inlist)
+ return string.join(stringlist, delimit)
+
+
+def makelol(inlist):
+ """
+Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
+want to use put() to write a 1D list one item per line in the file.
+
+Usage: makelol(inlist)
+Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
+"""
+ x = []
+ for item in inlist:
+ x.append([item])
+ return x
+
+
+def makestr(x):
+ if type(x) <> StringType:
+ x = str(x)
+ return x
+
+
+def printcc(lst, extra=2):
+ """
+Prints a list of lists in columns, customized by the max size of items
+within the columns (max size of items in col, plus 'extra' number of spaces).
+Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
+respectively.
+
+Usage: printcc (lst,extra=2)
+Returns: None
+"""
+ if type(lst[0]) not in [ListType, TupleType]:
+ lst = [lst]
+ rowstokill = []
+ list2print = copy.deepcopy(lst)
+ for i in range(len(lst)):
+ if lst[i] == [
+ '\n'
+ ] or lst[i] == '\n' or lst[i] == 'dashes' or lst[i] == '' or lst[i] == ['']:
+ rowstokill = rowstokill + [i]
+ rowstokill.reverse() # delete blank rows from the end
+ for row in rowstokill:
+ del list2print[row]
+ maxsize = [0] * len(list2print[0])
+ for col in range(len(list2print[0])):
+ items = colex(list2print, col)
+ items = map(makestr, items)
+ maxsize[col] = max(map(len, items)) + extra
+ for row in lst:
+ if row == ['\n'] or row == '\n' or row == '' or row == ['']:
+ print
+ elif row == ['dashes'] or row == 'dashes':
+ dashes = [0] * len(maxsize)
+ for j in range(len(maxsize)):
+ dashes[j] = '-' * (maxsize[j] - 2)
+ print lineincustcols(dashes, maxsize)
+ else:
+ print lineincustcols(row, maxsize)
+ return None
+
+
+def printincols(listoflists, colsize):
+ """
+Prints a list of lists in columns of (fixed) colsize width, where
+colsize is an integer.
+
+Usage: printincols (listoflists,colsize)
+Returns: None
+"""
+ for row in listoflists:
+ print lineincols(row, colsize)
+ return None
+
+
+def pl(listoflists):
+ """
+Prints a list of lists, 1 list (row) at a time.
+
+Usage: pl(listoflists)
+Returns: None
+"""
+ for row in listoflists:
+ if row[-1] == '\n':
+ print row,
+ else:
+ print row
+ return None
+
+
+def printl(listoflists):
+ """Alias for pl."""
+ pl(listoflists)
+ return
+
+
+def replace(inlst, oldval, newval):
+ """
+Replaces all occurrences of 'oldval' with 'newval', recursively.
+
+Usage: replace (inlst,oldval,newval)
+"""
+ lst = inlst * 1
+ for i in range(len(lst)):
+ if type(lst[i]) not in [ListType, TupleType]:
+ if lst[i] == oldval:
+ lst[i] = newval
+ else:
+ lst[i] = replace(lst[i], oldval, newval)
+ return lst
+
+
+def recode(inlist, listmap, cols=None):
+ """
+Changes the values in a list to a new set of values (useful when
+you need to recode data from (e.g.) strings to numbers. cols defaults
+to None (meaning all columns are recoded).
+
+Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
+Returns: inlist with the appropriate values replaced with new ones
+"""
+ lst = copy.deepcopy(inlist)
+ if cols != None:
+ if type(cols) not in [ListType, TupleType]:
+ cols = [cols]
+ for col in cols:
+ for row in range(len(lst)):
+ try:
+ idx = colex(listmap, 0).index(lst[row][col])
+ lst[row][col] = listmap[idx][1]
+ except ValueError:
+ pass
+ else:
+ for row in range(len(lst)):
+ for col in range(len(lst)):
+ try:
+ idx = colex(listmap, 0).index(lst[row][col])
+ lst[row][col] = listmap[idx][1]
+ except ValueError:
+ pass
+ return lst
+
+
+def remap(listoflists, criterion):
+ """
+Remaps values in a given column of a 2D list (listoflists). This requires
+a criterion as a function of 'x' so that the result of the following is
+returned ... map(lambda x: 'criterion',listoflists).
+
+Usage: remap(listoflists,criterion) criterion=string
+Returns: remapped version of listoflists
+"""
+ function = 'map(lambda x: ' + criterion + ',listoflists)'
+ lines = eval(function)
+ return lines
+
+
+def roundlist(inlist, digits):
+ """
+Goes through each element in a 1D or 2D inlist, and applies the following
+function to all elements of FloatType ... round(element,digits).
+
+Usage: roundlist(inlist,digits)
+Returns: list with rounded floats
+"""
+ if type(inlist[0]) in [IntType, FloatType]:
+ inlist = [inlist]
+ l = inlist * 1
+ for i in range(len(l)):
+ for j in range(len(l[i])):
+ if type(l[i][j]) == FloatType:
+ l[i][j] = round(l[i][j], digits)
+ return l
+
+
+def sortby(listoflists, sortcols):
+ """
+Sorts a list of lists on the column(s) specified in the sequence
+sortcols.
+
+Usage: sortby(listoflists,sortcols)
+Returns: sorted list, unchanged column ordering
+"""
+ newlist = abut(colex(listoflists, sortcols), listoflists)
+ newlist.sort()
+ try:
+ numcols = len(sortcols)
+ except TypeError:
+ numcols = 1
+ crit = '[' + str(numcols) + ':]'
+ newlist = colex(newlist, crit)
+ return newlist
+
+
+def unique(inlist):
+ """
+Returns all unique items in the passed list. If the a list-of-lists
+is passed, unique LISTS are found (i.e., items in the first dimension are
+compared).
+
+Usage: unique (inlist)
+Returns: the unique elements (or rows) in inlist
+"""
+ uniques = []
+ for item in inlist:
+ if item not in uniques:
+ uniques.append(item)
+ return uniques
+
+
+def duplicates(inlist):
+ """
+Returns duplicate items in the FIRST dimension of the passed list.
+
+Usage: duplicates (inlist)
+"""
+ dups = []
+ for i in range(len(inlist)):
+ if inlist[i] in inlist[i + 1:]:
+ dups.append(inlist[i])
+ return dups
+
+
+def nonrepeats(inlist):
+ """
+Returns items that are NOT duplicated in the first dim of the passed list.
+
+Usage: nonrepeats (inlist)
+"""
+ nonrepeats = []
+ for i in range(len(inlist)):
+ if inlist.count(inlist[i]) == 1:
+ nonrepeats.append(inlist[i])
+ return nonrepeats
+
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+#=================== PSTAT ARRAY FUNCTIONS =====================
+
+try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE
+ import numpy as N
+
+ def aabut(source, *args):
+ """
+Like the |Stat abut command. It concatenates two arrays column-wise
+and returns the result. CAUTION: If one array is shorter, it will be
+repeated until it is as long as the other.
+
+Usage: aabut (source, args) where args=any # of arrays
+Returns: an array as long as the LONGEST array past, source appearing on the
+ 'left', arrays in <args> attached on the 'right'.
+"""
+ if len(source.shape) == 1:
+ width = 1
+ source = N.resize(source, [source.shape[0], width])
+ else:
+ width = source.shape[1]
+ for addon in args:
+ if len(addon.shape) == 1:
+ width = 1
+ addon = N.resize(addon, [source.shape[0], width])
+ else:
+ width = source.shape[1]
+ if len(addon) < len(source):
+ addon = N.resize(addon, [source.shape[0], addon.shape[1]])
+ elif len(source) < len(addon):
+ source = N.resize(source, [addon.shape[0], source.shape[1]])
+ source = N.concatenate((source, addon), 1)
+ return source
+
+ def acolex(a, indices, axis=1):
+ """
+Extracts specified indices (a list) from passed array, along passed
+axis (column extraction is default). BEWARE: A 1D array is presumed to be a
+column-array (and that the whole array will be returned as a column).
+
+Usage: acolex (a,indices,axis=1)
+Returns: the columns of a specified by indices
+"""
+ if type(indices) not in [ListType, TupleType, N.ndarray]:
+ indices = [indices]
+ if len(N.shape(a)) == 1:
+ cols = N.resize(a, [a.shape[0], 1])
+ else:
+ # print a[:3]
+ cols = N.take(a, indices, axis)
+# print cols[:3]
+ return cols
+
+ def acollapse(a, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None):
+ """
+Averages data in collapsecol, keeping all unique items in keepcols
+(using unique, which keeps unique LISTS of column numbers), retaining
+the unique sets of values in keepcols, the mean for each. If stderror or
+N of the mean are desired, set either or both parameters to 1.
+
+Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
+Returns: unique 'conditions' specified by the contents of columns specified
+ by keepcols, abutted with the mean(s) of column(s) specified by
+ collapsecols
+"""
+
+ def acollmean(inarray):
+ return N.sum(N.ravel(inarray))
+
+ if type(keepcols) not in [ListType, TupleType, N.ndarray]:
+ keepcols = [keepcols]
+ if type(collapsecols) not in [ListType, TupleType, N.ndarray]:
+ collapsecols = [collapsecols]
+
+ if cfcn == None:
+ cfcn = acollmean
+ if keepcols == []:
+ avgcol = acolex(a, collapsecols)
+ means = N.sum(avgcol) / float(len(avgcol))
+ if fcn1 <> None:
+ try:
+ test = fcn1(avgcol)
+ except:
+ test = N.array(['N/A'] * len(means))
+ means = aabut(means, test)
+ if fcn2 <> None:
+ try:
+ test = fcn2(avgcol)
+ except:
+ test = N.array(['N/A'] * len(means))
+ means = aabut(means, test)
+ return means
+ else:
+ if type(keepcols) not in [ListType, TupleType, N.ndarray]:
+ keepcols = [keepcols]
+ values = colex(a, keepcols) # so that "item" can be appended (below)
+ uniques = unique(values) # get a LIST, so .sort keeps rows intact
+ uniques.sort()
+ newlist = []
+ for item in uniques:
+ if type(item) not in [ListType, TupleType, N.ndarray]:
+ item = [item]
+ tmprows = alinexand(a, keepcols, item)
+ for col in collapsecols:
+ avgcol = acolex(tmprows, col)
+ item.append(acollmean(avgcol))
+ if fcn1 <> None:
+ try:
+ test = fcn1(avgcol)
+ except:
+ test = 'N/A'
+ item.append(test)
+ if fcn2 <> None:
+ try:
+ test = fcn2(avgcol)
+ except:
+ test = 'N/A'
+ item.append(test)
+ newlist.append(item)
+ try:
+ new_a = N.array(newlist)
+ except TypeError:
+ new_a = N.array(newlist, 'O')
+ return new_a
+
+ def adm(a, criterion):
+ """
+Returns rows from the passed list of lists that meet the criteria in
+the passed criterion expression (a string as a function of x).
+
+Usage: adm (a,criterion) where criterion is like 'x[2]==37'
+"""
+ function = 'filter(lambda x: ' + criterion + ',a)'
+ lines = eval(function)
+ try:
+ lines = N.array(lines)
+ except:
+ lines = N.array(lines, dtype='O')
+ return lines
+
+ def isstring(x):
+ if type(x) == StringType:
+ return 1
+ else:
+ return 0
+
+ def alinexand(a, columnlist, valuelist):
+ """
+Returns the rows of an array where col (from columnlist) = val
+(from valuelist). One value is required for each column in columnlist.
+
+Usage: alinexand (a,columnlist,valuelist)
+Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
+"""
+ if type(columnlist) not in [ListType, TupleType, N.ndarray]:
+ columnlist = [columnlist]
+ if type(valuelist) not in [ListType, TupleType, N.ndarray]:
+ valuelist = [valuelist]
+ criterion = ''
+ for i in range(len(columnlist)):
+ if type(valuelist[i]) == StringType:
+ critval = '\'' + valuelist[i] + '\''
+ else:
+ critval = str(valuelist[i])
+ criterion = criterion + ' x[' + str(columnlist[
+ i]) + ']==' + critval + ' and'
+ criterion = criterion[0:-3] # remove the "and" after the last crit
+ return adm(a, criterion)
+
+ def alinexor(a, columnlist, valuelist):
+ """
+Returns the rows of an array where col (from columnlist) = val (from
+valuelist). One value is required for each column in columnlist.
+The exception is if either columnlist or valuelist has only 1 value,
+in which case that item will be expanded to match the length of the
+other list.
+
+Usage: alinexor (a,columnlist,valuelist)
+Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
+"""
+ if type(columnlist) not in [ListType, TupleType, N.ndarray]:
+ columnlist = [columnlist]
+ if type(valuelist) not in [ListType, TupleType, N.ndarray]:
+ valuelist = [valuelist]
+ criterion = ''
+ if len(columnlist) == 1 and len(valuelist) > 1:
+ columnlist = columnlist * len(valuelist)
+ elif len(valuelist) == 1 and len(columnlist) > 1:
+ valuelist = valuelist * len(columnlist)
+ for i in range(len(columnlist)):
+ if type(valuelist[i]) == StringType:
+ critval = '\'' + valuelist[i] + '\''
+ else:
+ critval = str(valuelist[i])
+ criterion = criterion + ' x[' + str(columnlist[
+ i]) + ']==' + critval + ' or'
+ criterion = criterion[0:-2] # remove the "or" after the last crit
+ return adm(a, criterion)
+
+ def areplace(a, oldval, newval):
+ """
+Replaces all occurrences of oldval with newval in array a.
+
+Usage: areplace(a,oldval,newval)
+"""
+ return N.where(a == oldval, newval, a)
+
+ def arecode(a, listmap, col='all'):
+ """
+Remaps the values in an array to a new set of values (useful when
+you need to recode data from (e.g.) strings to numbers as most stats
+packages require. Can work on SINGLE columns, or 'all' columns at once.
+@@@BROKEN 2007-11-26
+
+Usage: arecode (a,listmap,col='all')
+Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
+"""
+ ashape = a.shape
+ if col == 'all':
+ work = a.ravel()
+ else:
+ work = acolex(a, col)
+ work = work.ravel()
+ for pair in listmap:
+ if type(pair[
+ 1]) == StringType or work.dtype.char == 'O' or a.dtype.char == 'O':
+ work = N.array(work, dtype='O')
+ a = N.array(a, dtype='O')
+ for i in range(len(work)):
+ if work[i] == pair[0]:
+ work[i] = pair[1]
+ if col == 'all':
+ return N.reshape(work, ashape)
+ else:
+ return N.concatenate(
+ [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
+ else: # must be a non-Object type array and replacement
+ work = N.where(work == pair[0], pair[1], work)
+ return N.concatenate(
+ [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
+
+ def arowcompare(row1, row2):
+ """
+Compares two rows from an array, regardless of whether it is an
+array of numbers or of python objects (which requires the cmp function).
+@@@PURPOSE? 2007-11-26
+
+Usage: arowcompare(row1,row2)
+Returns: an array of equal length containing 1s where the two rows had
+ identical elements and 0 otherwise
+"""
+ return
+ if row1.dtype.char == 'O' or row2.dtype == 'O':
+ cmpvect = N.logical_not(
+ abs(N.array(map(cmp, row1, row2)))) # cmp fcn gives -1,0,1
+ else:
+ cmpvect = N.equal(row1, row2)
+ return cmpvect
+
+ def arowsame(row1, row2):
+ """
+Compares two rows from an array, regardless of whether it is an
+array of numbers or of python objects (which requires the cmp function).
+
+Usage: arowsame(row1,row2)
+Returns: 1 if the two rows are identical, 0 otherwise.
+"""
+ cmpval = N.alltrue(arowcompare(row1, row2))
+ return cmpval
+
+ def asortrows(a, axis=0):
+ """
+Sorts an array "by rows". This differs from the Numeric.sort() function,
+which sorts elements WITHIN the given axis. Instead, this function keeps
+the elements along the given axis intact, but shifts them 'up or down'
+relative to one another.
+
+Usage: asortrows(a,axis=0)
+Returns: sorted version of a
+"""
+ return N.sort(a, axis=axis, kind='mergesort')
+
+ def aunique(inarray):
+ """
+Returns unique items in the FIRST dimension of the passed array. Only
+works on arrays NOT including string items.
+
+Usage: aunique (inarray)
+"""
+ uniques = N.array([inarray[0]])
+ if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY
+ for item in inarray[1:]:
+ if N.add.reduce(N.equal(uniques, item).ravel()) == 0:
+ try:
+ uniques = N.concatenate([uniques, N.array[N.newaxis, :]])
+ except TypeError:
+ uniques = N.concatenate([uniques, N.array([item])])
+ else: # IT MUST BE A 2+D ARRAY
+ if inarray.dtype.char != 'O': # not an Object array
+ for item in inarray[1:]:
+ if not N.sum(N.alltrue(N.equal(uniques, item), 1)):
+ try:
+ uniques = N.concatenate([uniques, item[N.newaxis, :]])
+ except TypeError: # the item to add isn't a list
+ uniques = N.concatenate([uniques, N.array([item])])
+ else:
+ pass # this item is already in the uniques array
+ else: # must be an Object array, alltrue/equal functions don't work
+ for item in inarray[1:]:
+ newflag = 1
+ for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=>
+ test = N.sum(abs(N.array(map(cmp, item, unq))))
+ if test == 0: # if item identical to any 1 row in uniques
+ newflag = 0 # then not a novel item to add
+ break
+ if newflag == 1:
+ try:
+ uniques = N.concatenate([uniques, item[N.newaxis, :]])
+ except TypeError: # the item to add isn't a list
+ uniques = N.concatenate([uniques, N.array([item])])
+ return uniques
+
+ def aduplicates(inarray):
+ """
+Returns duplicate items in the FIRST dimension of the passed array. Only
+works on arrays NOT including string items.
+
+Usage: aunique (inarray)
+"""
+ inarray = N.array(inarray)
+ if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY
+ dups = []
+ inarray = inarray.tolist()
+ for i in range(len(inarray)):
+ if inarray[i] in inarray[i + 1:]:
+ dups.append(inarray[i])
+ dups = aunique(dups)
+ else: # IT MUST BE A 2+D ARRAY
+ dups = []
+ aslist = inarray.tolist()
+ for i in range(len(aslist)):
+ if aslist[i] in aslist[i + 1:]:
+ dups.append(aslist[i])
+ dups = unique(dups)
+ dups = N.array(dups)
+ return dups
+
+except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
+ pass
diff --git a/cros_utils/stats.py b/cros_utils/stats.py
new file mode 100644
index 00000000..0387a076
--- /dev/null
+++ b/cros_utils/stats.py
@@ -0,0 +1,4519 @@
+# We did not author this file nor mantain it. Skip linting it.
+#pylint: skip-file
+# Copyright (c) 1999-2008 Gary Strangman; All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# Comments and/or additions are welcome (send e-mail to:
+# strang@nmr.mgh.harvard.edu).
+#
+"""stats.py module
+
+(Requires pstat.py module.)
+
+#################################################
+####### Written by: Gary Strangman ###########
+####### Last modified: Oct 31, 2008 ###########
+#################################################
+
+A collection of basic statistical functions for python. The function
+names appear below.
+
+IMPORTANT: There are really *3* sets of functions. The first set has an 'l'
+prefix, which can be used with list or tuple arguments. The second set has
+an 'a' prefix, which can accept NumPy array arguments. These latter
+functions are defined only when NumPy is available on the system. The third
+type has NO prefix (i.e., has the name that appears below). Functions of
+this set are members of a "Dispatch" class, c/o David Ascher. This class
+allows different functions to be called depending on the type of the passed
+arguments. Thus, stats.mean is a member of the Dispatch class and
+stats.mean(range(20)) will call stats.lmean(range(20)) while
+stats.mean(Numeric.arange(20)) will call stats.amean(Numeric.arange(20)).
+This is a handy way to keep consistent function names when different
+argument types require different functions to be called. Having
+implementated the Dispatch class, however, means that to get info on
+a given function, you must use the REAL function name ... that is
+"print stats.lmean.__doc__" or "print stats.amean.__doc__" work fine,
+while "print stats.mean.__doc__" will print the doc for the Dispatch
+class. NUMPY FUNCTIONS ('a' prefix) generally have more argument options
+but should otherwise be consistent with the corresponding list functions.
+
+Disclaimers: The function list is obviously incomplete and, worse, the
+functions are not optimized. All functions have been tested (some more
+so than others), but they are far from bulletproof. Thus, as with any
+free software, no warranty or guarantee is expressed or implied. :-) A
+few extra functions that don't appear in the list below can be found by
+interested treasure-hunters. These functions don't necessarily have
+both list and array versions but were deemed useful
+
+CENTRAL TENDENCY: geometricmean
+ harmonicmean
+ mean
+ median
+ medianscore
+ mode
+
+MOMENTS: moment
+ variation
+ skew
+ kurtosis
+ skewtest (for Numpy arrays only)
+ kurtosistest (for Numpy arrays only)
+ normaltest (for Numpy arrays only)
+
+ALTERED VERSIONS: tmean (for Numpy arrays only)
+ tvar (for Numpy arrays only)
+ tmin (for Numpy arrays only)
+ tmax (for Numpy arrays only)
+ tstdev (for Numpy arrays only)
+ tsem (for Numpy arrays only)
+ describe
+
+FREQUENCY STATS: itemfreq
+ scoreatpercentile
+ percentileofscore
+ histogram
+ cumfreq
+ relfreq
+
+VARIABILITY: obrientransform
+ samplevar
+ samplestdev
+ signaltonoise (for Numpy arrays only)
+ var
+ stdev
+ sterr
+ sem
+ z
+ zs
+ zmap (for Numpy arrays only)
+
+TRIMMING FCNS: threshold (for Numpy arrays only)
+ trimboth
+ trim1
+ round (round all vals to 'n' decimals; Numpy only)
+
+CORRELATION FCNS: covariance (for Numpy arrays only)
+ correlation (for Numpy arrays only)
+ paired
+ pearsonr
+ spearmanr
+ pointbiserialr
+ kendalltau
+ linregress
+
+INFERENTIAL STATS: ttest_1samp
+ ttest_ind
+ ttest_rel
+ chisquare
+ ks_2samp
+ mannwhitneyu
+ ranksums
+ wilcoxont
+ kruskalwallish
+ friedmanchisquare
+
+PROBABILITY CALCS: chisqprob
+ erfcc
+ zprob
+ ksprob
+ fprob
+ betacf
+ gammln
+ betai
+
+ANOVA FUNCTIONS: F_oneway
+ F_value
+
+SUPPORT FUNCTIONS: writecc
+ incr
+ sign (for Numpy arrays only)
+ sum
+ cumsum
+ ss
+ summult
+ sumdiffsquared
+ square_of_sums
+ shellsort
+ rankdata
+ outputpairedstats
+ findwithin
+"""
+## CHANGE LOG:
+## ===========
+## 09-07-21 ... added capability for getting the 'proportion' out of l/amannwhitneyu (but comment-disabled)
+## 08-10-31 ... fixed import LinearAlgebra bug before glm fcns
+## 07-11-26 ... conversion for numpy started
+## 07-05-16 ... added Lin's Concordance Correlation Coefficient (alincc) and acov
+## 05-08-21 ... added "Dice's coefficient"
+## 04-10-26 ... added ap2t(), an ugly fcn for converting p-vals to T-vals
+## 04-04-03 ... added amasslinregress() function to do regression on N-D arrays
+## 03-01-03 ... CHANGED VERSION TO 0.6
+## fixed atsem() to properly handle limits=None case
+## improved histogram and median functions (estbinwidth) and
+## fixed atvar() function (wrong answers for neg numbers?!?)
+## 02-11-19 ... fixed attest_ind and attest_rel for div-by-zero Overflows
+## 02-05-10 ... fixed lchisqprob indentation (failed when df=even)
+## 00-12-28 ... removed aanova() to separate module, fixed licensing to
+## match Python License, fixed doc string & imports
+## 00-04-13 ... pulled all "global" statements, except from aanova()
+## added/fixed lots of documentation, removed io.py dependency
+## changed to version 0.5
+## 99-11-13 ... added asign() function
+## 99-11-01 ... changed version to 0.4 ... enough incremental changes now
+## 99-10-25 ... added acovariance and acorrelation functions
+## 99-10-10 ... fixed askew/akurtosis to avoid divide-by-zero errors
+## added aglm function (crude, but will be improved)
+## 99-10-04 ... upgraded acumsum, ass, asummult, asamplevar, avar, etc. to
+## all handle lists of 'dimension's and keepdims
+## REMOVED ar0, ar2, ar3, ar4 and replaced them with around
+## reinserted fixes for abetai to avoid math overflows
+## 99-09-05 ... rewrote achisqprob/aerfcc/aksprob/afprob/abetacf/abetai to
+## handle multi-dimensional arrays (whew!)
+## 99-08-30 ... fixed l/amoment, l/askew, l/akurtosis per D'Agostino (1990)
+## added anormaltest per same reference
+## re-wrote azprob to calc arrays of probs all at once
+## 99-08-22 ... edited attest_ind printing section so arrays could be rounded
+## 99-08-19 ... fixed amean and aharmonicmean for non-error(!) overflow on
+## short/byte arrays (mean of #s btw 100-300 = -150??)
+## 99-08-09 ... fixed asum so that the None case works for Byte arrays
+## 99-08-08 ... fixed 7/3 'improvement' to handle t-calcs on N-D arrays
+## 99-07-03 ... improved attest_ind, attest_rel (zero-division errortrap)
+## 99-06-24 ... fixed bug(?) in attest_ind (n1=a.shape[0])
+## 04/11/99 ... added asignaltonoise, athreshold functions, changed all
+## max/min in array section to N.maximum/N.minimum,
+## fixed square_of_sums to prevent integer overflow
+## 04/10/99 ... !!! Changed function name ... sumsquared ==> square_of_sums
+## 03/18/99 ... Added ar0, ar2, ar3 and ar4 rounding functions
+## 02/28/99 ... Fixed aobrientransform to return an array rather than a list
+## 01/15/99 ... Essentially ceased updating list-versions of functions (!!!)
+## 01/13/99 ... CHANGED TO VERSION 0.3
+## fixed bug in a/lmannwhitneyu p-value calculation
+## 12/31/98 ... fixed variable-name bug in ldescribe
+## 12/19/98 ... fixed bug in findwithin (fcns needed pstat. prefix)
+## 12/16/98 ... changed amedianscore to return float (not array) for 1 score
+## 12/14/98 ... added atmin and atmax functions
+## removed umath from import line (not needed)
+## l/ageometricmean modified to reduce chance of overflows (take
+## nth root first, then multiply)
+## 12/07/98 ... added __version__variable (now 0.2)
+## removed all 'stats.' from anova() fcn
+## 12/06/98 ... changed those functions (except shellsort) that altered
+## arguments in-place ... cumsum, ranksort, ...
+## updated (and fixed some) doc-strings
+## 12/01/98 ... added anova() function (requires NumPy)
+## incorporated Dispatch class
+## 11/12/98 ... added functionality to amean, aharmonicmean, ageometricmean
+## added 'asum' function (added functionality to N.add.reduce)
+## fixed both moment and amoment (two errors)
+## changed name of skewness and askewness to skew and askew
+## fixed (a)histogram (which sometimes counted points <lowerlimit)
+
+import pstat # required 3rd party module
+import math, string, copy # required python modules
+from types import *
+
+__version__ = 0.6
+
+############# DISPATCH CODE ##############
+
+
+class Dispatch:
+ """
+The Dispatch class, care of David Ascher, allows different functions to
+be called depending on the argument types. This way, there can be one
+function name regardless of the argument type. To access function doc
+in stats.py module, prefix the function with an 'l' or 'a' for list or
+array arguments, respectively. That is, print stats.lmean.__doc__ or
+print stats.amean.__doc__ or whatever.
+"""
+
+ def __init__(self, *tuples):
+ self._dispatch = {}
+ for func, types in tuples:
+ for t in types:
+ if t in self._dispatch.keys():
+ raise ValueError, "can't have two dispatches on " + str(t)
+ self._dispatch[t] = func
+ self._types = self._dispatch.keys()
+
+ def __call__(self, arg1, *args, **kw):
+ if type(arg1) not in self._types:
+ raise TypeError, "don't know how to dispatch %s arguments" % type(arg1)
+ return apply(self._dispatch[type(arg1)], (arg1,) + args, kw)
+
+##########################################################################
+######################## LIST-BASED FUNCTIONS ########################
+##########################################################################
+
+### Define these regardless
+
+####################################
+####### CENTRAL TENDENCY #########
+####################################
+
+
+def lgeometricmean(inlist):
+ """
+Calculates the geometric mean of the values in the passed list.
+That is: n-th root of (x1 * x2 * ... * xn). Assumes a '1D' list.
+
+Usage: lgeometricmean(inlist)
+"""
+ mult = 1.0
+ one_over_n = 1.0 / len(inlist)
+ for item in inlist:
+ mult = mult * pow(item, one_over_n)
+ return mult
+
+
+def lharmonicmean(inlist):
+ """
+Calculates the harmonic mean of the values in the passed list.
+That is: n / (1/x1 + 1/x2 + ... + 1/xn). Assumes a '1D' list.
+
+Usage: lharmonicmean(inlist)
+"""
+ sum = 0
+ for item in inlist:
+ sum = sum + 1.0 / item
+ return len(inlist) / sum
+
+
+def lmean(inlist):
+ """
+Returns the arithematic mean of the values in the passed list.
+Assumes a '1D' list, but will function on the 1st dim of an array(!).
+
+Usage: lmean(inlist)
+"""
+ sum = 0
+ for item in inlist:
+ sum = sum + item
+ return sum / float(len(inlist))
+
+
+def lmedian(inlist, numbins=1000):
+ """
+Returns the computed median value of a list of numbers, given the
+number of bins to use for the histogram (more bins brings the computed value
+closer to the median score, default number of bins = 1000). See G.W.
+Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics.
+
+Usage: lmedian (inlist, numbins=1000)
+"""
+ (hist, smallest, binsize, extras) = histogram(
+ inlist, numbins, [min(inlist), max(inlist)]) # make histog
+ cumhist = cumsum(hist) # make cumulative histogram
+ for i in range(len(cumhist)): # get 1st(!) index holding 50%ile score
+ if cumhist[i] >= len(inlist) / 2.0:
+ cfbin = i
+ break
+ LRL = smallest + binsize * cfbin # get lower read limit of that bin
+ cfbelow = cumhist[cfbin - 1]
+ freq = float(hist[cfbin]) # frequency IN the 50%ile bin
+ median = LRL + (
+ (len(inlist) / 2.0 - cfbelow) / float(freq)) * binsize # median formula
+ return median
+
+
+def lmedianscore(inlist):
+ """
+Returns the 'middle' score of the passed list. If there is an even
+number of scores, the mean of the 2 middle scores is returned.
+
+Usage: lmedianscore(inlist)
+"""
+
+ newlist = copy.deepcopy(inlist)
+ newlist.sort()
+ if len(newlist) % 2 == 0: # if even number of scores, average middle 2
+ index = len(newlist) / 2 # integer division correct
+ median = float(newlist[index] + newlist[index - 1]) / 2
+ else:
+ index = len(newlist) / 2 # int divsion gives mid value when count from 0
+ median = newlist[index]
+ return median
+
+
+def lmode(inlist):
+ """
+Returns a list of the modal (most common) score(s) in the passed
+list. If there is more than one such score, all are returned. The
+bin-count for the mode(s) is also returned.
+
+Usage: lmode(inlist)
+Returns: bin-count for mode(s), a list of modal value(s)
+"""
+
+ scores = pstat.unique(inlist)
+ scores.sort()
+ freq = []
+ for item in scores:
+ freq.append(inlist.count(item))
+ maxfreq = max(freq)
+ mode = []
+ stillmore = 1
+ while stillmore:
+ try:
+ indx = freq.index(maxfreq)
+ mode.append(scores[indx])
+ del freq[indx]
+ del scores[indx]
+ except ValueError:
+ stillmore = 0
+ return maxfreq, mode
+
+####################################
+############ MOMENTS #############
+####################################
+
+
+def lmoment(inlist, moment=1):
+ """
+Calculates the nth moment about the mean for a sample (defaults to
+the 1st moment). Used to calculate coefficients of skewness and kurtosis.
+
+Usage: lmoment(inlist,moment=1)
+Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r)
+"""
+ if moment == 1:
+ return 0.0
+ else:
+ mn = mean(inlist)
+ n = len(inlist)
+ s = 0
+ for x in inlist:
+ s = s + (x - mn)**moment
+ return s / float(n)
+
+
+def lvariation(inlist):
+ """
+Returns the coefficient of variation, as defined in CRC Standard
+Probability and Statistics, p.6.
+
+Usage: lvariation(inlist)
+"""
+ return 100.0 * samplestdev(inlist) / float(mean(inlist))
+
+
+def lskew(inlist):
+ """
+Returns the skewness of a distribution, as defined in Numerical
+Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
+
+Usage: lskew(inlist)
+"""
+ return moment(inlist, 3) / pow(moment(inlist, 2), 1.5)
+
+
+def lkurtosis(inlist):
+ """
+Returns the kurtosis of a distribution, as defined in Numerical
+Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.)
+
+Usage: lkurtosis(inlist)
+"""
+ return moment(inlist, 4) / pow(moment(inlist, 2), 2.0)
+
+
+def ldescribe(inlist):
+ """
+Returns some descriptive statistics of the passed list (assumed to be 1D).
+
+Usage: ldescribe(inlist)
+Returns: n, mean, standard deviation, skew, kurtosis
+"""
+ n = len(inlist)
+ mm = (min(inlist), max(inlist))
+ m = mean(inlist)
+ sd = stdev(inlist)
+ sk = skew(inlist)
+ kurt = kurtosis(inlist)
+ return n, mm, m, sd, sk, kurt
+
+####################################
+####### FREQUENCY STATS ##########
+####################################
+
+
+def litemfreq(inlist):
+ """
+Returns a list of pairs. Each pair consists of one of the scores in inlist
+and it's frequency count. Assumes a 1D list is passed.
+
+Usage: litemfreq(inlist)
+Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)
+"""
+ scores = pstat.unique(inlist)
+ scores.sort()
+ freq = []
+ for item in scores:
+ freq.append(inlist.count(item))
+ return pstat.abut(scores, freq)
+
+
+def lscoreatpercentile(inlist, percent):
+ """
+Returns the score at a given percentile relative to the distribution
+given by inlist.
+
+Usage: lscoreatpercentile(inlist,percent)
+"""
+ if percent > 1:
+ print '\nDividing percent>1 by 100 in lscoreatpercentile().\n'
+ percent = percent / 100.0
+ targetcf = percent * len(inlist)
+ h, lrl, binsize, extras = histogram(inlist)
+ cumhist = cumsum(copy.deepcopy(h))
+ for i in range(len(cumhist)):
+ if cumhist[i] >= targetcf:
+ break
+ score = binsize * (
+ (targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i)
+ return score
+
+
+def lpercentileofscore(inlist, score, histbins=10, defaultlimits=None):
+ """
+Returns the percentile value of a score relative to the distribution
+given by inlist. Formula depends on the values used to histogram the data(!).
+
+Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None)
+"""
+
+ h, lrl, binsize, extras = histogram(inlist, histbins, defaultlimits)
+ cumhist = cumsum(copy.deepcopy(h))
+ i = int((score - lrl) / float(binsize))
+ pct = (cumhist[i - 1] + (
+ (score -
+ (lrl + binsize * i)) / float(binsize)) * h[i]) / float(len(inlist)) * 100
+ return pct
+
+
+def lhistogram(inlist, numbins=10, defaultreallimits=None, printextras=0):
+ """
+Returns (i) a list of histogram bin counts, (ii) the smallest value
+of the histogram binning, and (iii) the bin width (the last 2 are not
+necessarily integers). Default number of bins is 10. If no sequence object
+is given for defaultreallimits, the routine picks (usually non-pretty) bins
+spanning all the numbers in the inlist.
+
+Usage: lhistogram (inlist, numbins=10,
+defaultreallimits=None,suppressoutput=0)
+Returns: list of bin values, lowerreallimit, binsize, extrapoints
+"""
+ if (defaultreallimits <> None):
+ if type(defaultreallimits) not in [ListType, TupleType] or len(
+ defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd
+ lowerreallimit = defaultreallimits
+ upperreallimit = 1.000001 * max(inlist)
+ else: # assume both limits given
+ lowerreallimit = defaultreallimits[0]
+ upperreallimit = defaultreallimits[1]
+ binsize = (upperreallimit - lowerreallimit) / float(numbins)
+ else: # no limits given for histogram, both must be calc'd
+ estbinwidth = (max(inlist) -
+ min(inlist)) / float(numbins) + 1e-6 #1=>cover all
+ binsize = ((max(inlist) - min(inlist) + estbinwidth)) / float(numbins)
+ lowerreallimit = min(inlist) - binsize / 2 #lower real limit,1st bin
+ bins = [0] * (numbins)
+ extrapoints = 0
+ for num in inlist:
+ try:
+ if (num - lowerreallimit) < 0:
+ extrapoints = extrapoints + 1
+ else:
+ bintoincrement = int((num - lowerreallimit) / float(binsize))
+ bins[bintoincrement] = bins[bintoincrement] + 1
+ except:
+ extrapoints = extrapoints + 1
+ if (extrapoints > 0 and printextras == 1):
+ print '\nPoints outside given histogram range =', extrapoints
+ return (bins, lowerreallimit, binsize, extrapoints)
+
+
+def lcumfreq(inlist, numbins=10, defaultreallimits=None):
+ """
+Returns a cumulative frequency histogram, using the histogram function.
+
+Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None)
+Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h, l, b, e = histogram(inlist, numbins, defaultreallimits)
+ cumhist = cumsum(copy.deepcopy(h))
+ return cumhist, l, b, e
+
+
+def lrelfreq(inlist, numbins=10, defaultreallimits=None):
+ """
+Returns a relative frequency histogram, using the histogram function.
+
+Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None)
+Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h, l, b, e = histogram(inlist, numbins, defaultreallimits)
+ for i in range(len(h)):
+ h[i] = h[i] / float(len(inlist))
+ return h, l, b, e
+
+####################################
+##### VARIABILITY FUNCTIONS ######
+####################################
+
+
+def lobrientransform(*args):
+ """
+Computes a transform on input data (any number of columns). Used to
+test for homogeneity of variance prior to running one-way stats. From
+Maxwell and Delaney, p.112.
+
+Usage: lobrientransform(*args)
+Returns: transformed data for use in an ANOVA
+"""
+ TINY = 1e-10
+ k = len(args)
+ n = [0.0] * k
+ v = [0.0] * k
+ m = [0.0] * k
+ nargs = []
+ for i in range(k):
+ nargs.append(copy.deepcopy(args[i]))
+ n[i] = float(len(nargs[i]))
+ v[i] = var(nargs[i])
+ m[i] = mean(nargs[i])
+ for j in range(k):
+ for i in range(n[j]):
+ t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j])**2
+ t2 = 0.5 * v[j] * (n[j] - 1.0)
+ t3 = (n[j] - 1.0) * (n[j] - 2.0)
+ nargs[j][i] = (t1 - t2) / float(t3)
+ check = 1
+ for j in range(k):
+ if v[j] - mean(nargs[j]) > TINY:
+ check = 0
+ if check <> 1:
+ raise ValueError, 'Problem in obrientransform.'
+ else:
+ return nargs
+
+
+def lsamplevar(inlist):
+ """
+Returns the variance of the values in the passed list using
+N for the denominator (i.e., DESCRIBES the sample variance only).
+
+Usage: lsamplevar(inlist)
+"""
+ n = len(inlist)
+ mn = mean(inlist)
+ deviations = []
+ for item in inlist:
+ deviations.append(item - mn)
+ return ss(deviations) / float(n)
+
+
+def lsamplestdev(inlist):
+ """
+Returns the standard deviation of the values in the passed list using
+N for the denominator (i.e., DESCRIBES the sample stdev only).
+
+Usage: lsamplestdev(inlist)
+"""
+ return math.sqrt(samplevar(inlist))
+
+
+def lcov(x, y, keepdims=0):
+ """
+Returns the estimated covariance of the values in the passed
+array (i.e., N-1). Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: lcov(x,y,keepdims=0)
+"""
+
+ n = len(x)
+ xmn = mean(x)
+ ymn = mean(y)
+ xdeviations = [0] * len(x)
+ ydeviations = [0] * len(y)
+ for i in range(len(x)):
+ xdeviations[i] = x[i] - xmn
+ ydeviations[i] = y[i] - ymn
+ ss = 0.0
+ for i in range(len(xdeviations)):
+ ss = ss + xdeviations[i] * ydeviations[i]
+ return ss / float(n - 1)
+
+
+def lvar(inlist):
+ """
+Returns the variance of the values in the passed list using N-1
+for the denominator (i.e., for estimating population variance).
+
+Usage: lvar(inlist)
+"""
+ n = len(inlist)
+ mn = mean(inlist)
+ deviations = [0] * len(inlist)
+ for i in range(len(inlist)):
+ deviations[i] = inlist[i] - mn
+ return ss(deviations) / float(n - 1)
+
+
+def lstdev(inlist):
+ """
+Returns the standard deviation of the values in the passed list
+using N-1 in the denominator (i.e., to estimate population stdev).
+
+Usage: lstdev(inlist)
+"""
+ return math.sqrt(var(inlist))
+
+
+def lsterr(inlist):
+ """
+Returns the standard error of the values in the passed list using N-1
+in the denominator (i.e., to estimate population standard error).
+
+Usage: lsterr(inlist)
+"""
+ return stdev(inlist) / float(math.sqrt(len(inlist)))
+
+
+def lsem(inlist):
+ """
+Returns the estimated standard error of the mean (sx-bar) of the
+values in the passed list. sem = stdev / sqrt(n)
+
+Usage: lsem(inlist)
+"""
+ sd = stdev(inlist)
+ n = len(inlist)
+ return sd / math.sqrt(n)
+
+
+def lz(inlist, score):
+ """
+Returns the z-score for a given input score, given that score and the
+list from which that score came. Not appropriate for population calculations.
+
+Usage: lz(inlist, score)
+"""
+ z = (score - mean(inlist)) / samplestdev(inlist)
+ return z
+
+
+def lzs(inlist):
+ """
+Returns a list of z-scores, one for each score in the passed list.
+
+Usage: lzs(inlist)
+"""
+ zscores = []
+ for item in inlist:
+ zscores.append(z(inlist, item))
+ return zscores
+
+####################################
+####### TRIMMING FUNCTIONS #######
+####################################
+
+
+def ltrimboth(l, proportiontocut):
+ """
+Slices off the passed proportion of items from BOTH ends of the passed
+list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost'
+10% of scores. Assumes list is sorted by magnitude. Slices off LESS if
+proportion results in a non-integer slice index (i.e., conservatively
+slices off proportiontocut).
+
+Usage: ltrimboth (l,proportiontocut)
+Returns: trimmed version of list l
+"""
+ lowercut = int(proportiontocut * len(l))
+ uppercut = len(l) - lowercut
+ return l[lowercut:uppercut]
+
+
+def ltrim1(l, proportiontocut, tail='right'):
+ """
+Slices off the passed proportion of items from ONE end of the passed
+list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'
+10% of scores). Slices off LESS if proportion results in a non-integer
+slice index (i.e., conservatively slices off proportiontocut).
+
+Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left'
+Returns: trimmed version of list l
+"""
+ if tail == 'right':
+ lowercut = 0
+ uppercut = len(l) - int(proportiontocut * len(l))
+ elif tail == 'left':
+ lowercut = int(proportiontocut * len(l))
+ uppercut = len(l)
+ return l[lowercut:uppercut]
+
+####################################
+##### CORRELATION FUNCTIONS ######
+####################################
+
+
+def lpaired(x, y):
+ """
+Interactively determines the type of data and then runs the
+appropriated statistic for paired group data.
+
+Usage: lpaired(x,y)
+Returns: appropriate statistic name, value, and probability
+"""
+ samples = ''
+ while samples not in ['i', 'r', 'I', 'R', 'c', 'C']:
+ print '\nIndependent or related samples, or correlation (i,r,c): ',
+ samples = raw_input()
+
+ if samples in ['i', 'I', 'r', 'R']:
+ print '\nComparing variances ...',
+ # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
+ r = obrientransform(x, y)
+ f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1))
+ if p < 0.05:
+ vartype = 'unequal, p=' + str(round(p, 4))
+ else:
+ vartype = 'equal'
+ print vartype
+ if samples in ['i', 'I']:
+ if vartype[0] == 'e':
+ t, p = ttest_ind(x, y, 0)
+ print '\nIndependent samples t-test: ', round(t, 4), round(p, 4)
+ else:
+ if len(x) > 20 or len(y) > 20:
+ z, p = ranksums(x, y)
+ print '\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(
+ p, 4)
+ else:
+ u, p = mannwhitneyu(x, y)
+ print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(
+ u, 4), round(p, 4)
+
+ else: # RELATED SAMPLES
+ if vartype[0] == 'e':
+ t, p = ttest_rel(x, y, 0)
+ print '\nRelated samples t-test: ', round(t, 4), round(p, 4)
+ else:
+ t, p = ranksums(x, y)
+ print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)
+ else: # CORRELATION ANALYSIS
+ corrtype = ''
+ while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']:
+ print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
+ corrtype = raw_input()
+ if corrtype in ['c', 'C']:
+ m, b, r, p, see = linregress(x, y)
+ print '\nLinear regression for continuous variables ...'
+ lol = [['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'],
+ [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]
+ ]
+ pstat.printcc(lol)
+ elif corrtype in ['r', 'R']:
+ r, p = spearmanr(x, y)
+ print '\nCorrelation for ranked variables ...'
+ print "Spearman's r: ", round(r, 4), round(p, 4)
+ else: # DICHOTOMOUS
+ r, p = pointbiserialr(x, y)
+ print '\nAssuming x contains a dichotomous variable ...'
+ print 'Point Biserial r: ', round(r, 4), round(p, 4)
+ print '\n\n'
+ return None
+
+
+def lpearsonr(x, y):
+ """
+Calculates a Pearson correlation coefficient and the associated
+probability value. Taken from Heiman's Basic Statistics for the Behav.
+Sci (2nd), p.195.
+
+Usage: lpearsonr(x,y) where x and y are equal-length lists
+Returns: Pearson's r value, two-tailed p-value
+"""
+ TINY = 1.0e-30
+ if len(x) <> len(y):
+ raise ValueError, 'Input values not paired in pearsonr. Aborting.'
+ n = len(x)
+ x = map(float, x)
+ y = map(float, y)
+ xmean = mean(x)
+ ymean = mean(y)
+ r_num = n * (summult(x, y)) - sum(x) * sum(y)
+ r_den = math.sqrt((n * ss(x) - square_of_sums(x)) *
+ (n * ss(y) - square_of_sums(y)))
+ r = (r_num / r_den) # denominator already a float
+ df = n - 2
+ t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
+ prob = betai(0.5 * df, 0.5, df / float(df + t * t))
+ return r, prob
+
+
+def llincc(x, y):
+ """
+Calculates Lin's concordance correlation coefficient.
+
+Usage: alincc(x,y) where x, y are equal-length arrays
+Returns: Lin's CC
+"""
+ covar = lcov(x, y) * (len(x) - 1) / float(len(x)) # correct denom to n
+ xvar = lvar(x) * (len(x) - 1) / float(len(x)) # correct denom to n
+ yvar = lvar(y) * (len(y) - 1) / float(len(y)) # correct denom to n
+ lincc = (2 * covar) / ((xvar + yvar) + ((amean(x) - amean(y))**2))
+ return lincc
+
+
+def lspearmanr(x, y):
+ """
+Calculates a Spearman rank-order correlation coefficient. Taken
+from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.
+
+Usage: lspearmanr(x,y) where x and y are equal-length lists
+Returns: Spearman's r, two-tailed p-value
+"""
+ TINY = 1e-30
+ if len(x) <> len(y):
+ raise ValueError, 'Input values not paired in spearmanr. Aborting.'
+ n = len(x)
+ rankx = rankdata(x)
+ ranky = rankdata(y)
+ dsq = sumdiffsquared(rankx, ranky)
+ rs = 1 - 6 * dsq / float(n * (n**2 - 1))
+ t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs)))
+ df = n - 2
+ probrs = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float
+ # probability values for rs are from part 2 of the spearman function in
+ # Numerical Recipies, p.510. They are close to tables, but not exact. (?)
+ return rs, probrs
+
+
+def lpointbiserialr(x, y):
+ """
+Calculates a point-biserial correlation coefficient and the associated
+probability value. Taken from Heiman's Basic Statistics for the Behav.
+Sci (1st), p.194.
+
+Usage: lpointbiserialr(x,y) where x,y are equal-length lists
+Returns: Point-biserial r, two-tailed p-value
+"""
+ TINY = 1e-30
+ if len(x) <> len(y):
+ raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.'
+ data = pstat.abut(x, y)
+ categories = pstat.unique(x)
+ if len(categories) <> 2:
+ raise ValueError, 'Exactly 2 categories required for pointbiserialr().'
+ else: # there are 2 categories, continue
+ codemap = pstat.abut(categories, range(2))
+ recoded = pstat.recode(data, codemap, 0)
+ x = pstat.linexand(data, 0, categories[0])
+ y = pstat.linexand(data, 0, categories[1])
+ xmean = mean(pstat.colex(x, 1))
+ ymean = mean(pstat.colex(y, 1))
+ n = len(data)
+ adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n)))
+ rpb = (ymean - xmean) / samplestdev(pstat.colex(data, 1)) * adjust
+ df = n - 2
+ t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY)))
+ prob = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float
+ return rpb, prob
+
+
+def lkendalltau(x, y):
+ """
+Calculates Kendall's tau ... correlation of ordinal data. Adapted
+from function kendl1 in Numerical Recipies. Needs good test-routine.@@@
+
+Usage: lkendalltau(x,y)
+Returns: Kendall's tau, two-tailed p-value
+"""
+ n1 = 0
+ n2 = 0
+ iss = 0
+ for j in range(len(x) - 1):
+ for k in range(j, len(y)):
+ a1 = x[j] - x[k]
+ a2 = y[j] - y[k]
+ aa = a1 * a2
+ if (aa): # neither list has a tie
+ n1 = n1 + 1
+ n2 = n2 + 1
+ if aa > 0:
+ iss = iss + 1
+ else:
+ iss = iss - 1
+ else:
+ if (a1):
+ n1 = n1 + 1
+ else:
+ n2 = n2 + 1
+ tau = iss / math.sqrt(n1 * n2)
+ svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1))
+ z = tau / math.sqrt(svar)
+ prob = erfcc(abs(z) / 1.4142136)
+ return tau, prob
+
+
+def llinregress(x, y):
+ """
+Calculates a regression line on x,y pairs.
+
+Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates
+Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
+"""
+ TINY = 1.0e-20
+ if len(x) <> len(y):
+ raise ValueError, 'Input values not paired in linregress. Aborting.'
+ n = len(x)
+ x = map(float, x)
+ y = map(float, y)
+ xmean = mean(x)
+ ymean = mean(y)
+ r_num = float(n * (summult(x, y)) - sum(x) * sum(y))
+ r_den = math.sqrt((n * ss(x) - square_of_sums(x)) *
+ (n * ss(y) - square_of_sums(y)))
+ r = r_num / r_den
+ z = 0.5 * math.log((1.0 + r + TINY) / (1.0 - r + TINY))
+ df = n - 2
+ t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
+ prob = betai(0.5 * df, 0.5, df / (df + t * t))
+ slope = r_num / float(n * ss(x) - square_of_sums(x))
+ intercept = ymean - slope * xmean
+ sterrest = math.sqrt(1 - r * r) * samplestdev(y)
+ return slope, intercept, r, prob, sterrest
+
+####################################
+##### INFERENTIAL STATISTICS #####
+####################################
+
+
+def lttest_1samp(a, popmean, printit=0, name='Sample', writemode='a'):
+ """
+Calculates the t-obtained for the independent samples T-test on ONE group
+of scores a, given a population mean. If printit=1, results are printed
+to the screen. If printit='filename', the results are output to 'filename'
+using the given writemode (default=append). Returns t-value, and prob.
+
+Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ x = mean(a)
+ v = var(a)
+ n = len(a)
+ df = n - 1
+ svar = ((n - 1) * v) / float(df)
+ t = (x - popmean) / math.sqrt(svar * (1.0 / n))
+ prob = betai(0.5 * df, 0.5, float(df) / (df + t * t))
+
+ if printit <> 0:
+ statname = 'Single-sample T-test.'
+ outputpairedstats(printit, writemode, 'Population', '--', popmean, 0, 0, 0,
+ name, n, x, v, min(a), max(a), statname, t, prob)
+ return t, prob
+
+
+def lttest_ind(a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO INDEPENDENT samples of
+scores a, and b. From Numerical Recipies, p.483. If printit=1, results
+are printed to the screen. If printit='filename', the results are output
+to 'filename' using the given writemode (default=append). Returns t-value,
+and prob.
+
+Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ x1 = mean(a)
+ x2 = mean(b)
+ v1 = stdev(a)**2
+ v2 = stdev(b)**2
+ n1 = len(a)
+ n2 = len(b)
+ df = n1 + n2 - 2
+ svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df)
+ if not svar:
+ svar = 1.0e-26
+ t = (x1 - x2) / math.sqrt(svar * (1.0 / n1 + 1.0 / n2))
+ prob = betai(0.5 * df, 0.5, df / (df + t * t))
+
+ if printit <> 0:
+ statname = 'Independent samples T-test.'
+ outputpairedstats(printit, writemode, name1, n1, x1, v1, min(a), max(a),
+ name2, n2, x2, v2, min(b), max(b), statname, t, prob)
+ return t, prob
+
+
+def lttest_rel(a,
+ b,
+ printit=0,
+ name1='Sample1',
+ name2='Sample2',
+ writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO RELATED samples of scores,
+a and b. From Numerical Recipies, p.483. If printit=1, results are
+printed to the screen. If printit='filename', the results are output to
+'filename' using the given writemode (default=append). Returns t-value,
+and prob.
+
+Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ if len(a) <> len(b):
+ raise ValueError, 'Unequal length lists in ttest_rel.'
+ x1 = mean(a)
+ x2 = mean(b)
+ v1 = var(a)
+ v2 = var(b)
+ n = len(a)
+ cov = 0
+ for i in range(len(a)):
+ cov = cov + (a[i] - x1) * (b[i] - x2)
+ df = n - 1
+ cov = cov / float(df)
+ sd = math.sqrt((v1 + v2 - 2.0 * cov) / float(n))
+ t = (x1 - x2) / sd
+ prob = betai(0.5 * df, 0.5, df / (df + t * t))
+
+ if printit <> 0:
+ statname = 'Related samples T-test.'
+ outputpairedstats(printit, writemode, name1, n, x1, v1, min(a), max(a),
+ name2, n, x2, v2, min(b), max(b), statname, t, prob)
+ return t, prob
+
+
+def lchisquare(f_obs, f_exp=None):
+ """
+Calculates a one-way chi square for list of observed frequencies and returns
+the result. If no expected frequencies are given, the total N is assumed to
+be equally distributed across all groups.
+
+Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq.
+Returns: chisquare-statistic, associated p-value
+"""
+ k = len(f_obs) # number of groups
+ if f_exp == None:
+ f_exp = [sum(f_obs) / float(k)] * len(f_obs) # create k bins with = freq.
+ chisq = 0
+ for i in range(len(f_obs)):
+ chisq = chisq + (f_obs[i] - f_exp[i])**2 / float(f_exp[i])
+ return chisq, chisqprob(chisq, k - 1)
+
+
+def lks_2samp(data1, data2):
+ """
+Computes the Kolmogorov-Smirnof statistic on 2 samples. From
+Numerical Recipies in C, page 493.
+
+Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions
+Returns: KS D-value, associated p-value
+"""
+ j1 = 0
+ j2 = 0
+ fn1 = 0.0
+ fn2 = 0.0
+ n1 = len(data1)
+ n2 = len(data2)
+ en1 = n1
+ en2 = n2
+ d = 0.0
+ data1.sort()
+ data2.sort()
+ while j1 < n1 and j2 < n2:
+ d1 = data1[j1]
+ d2 = data2[j2]
+ if d1 <= d2:
+ fn1 = (j1) / float(en1)
+ j1 = j1 + 1
+ if d2 <= d1:
+ fn2 = (j2) / float(en2)
+ j2 = j2 + 1
+ dt = (fn2 - fn1)
+ if math.fabs(dt) > math.fabs(d):
+ d = dt
+ try:
+ en = math.sqrt(en1 * en2 / float(en1 + en2))
+ prob = ksprob((en + 0.12 + 0.11 / en) * abs(d))
+ except:
+ prob = 1.0
+ return d, prob
+
+
+def lmannwhitneyu(x, y):
+ """
+Calculates a Mann-Whitney U statistic on the provided scores and
+returns the result. Use only when the n in each condition is < 20 and
+you have 2 independent samples of ranks. NOTE: Mann-Whitney U is
+significant if the u-obtained is LESS THAN or equal to the critical
+value of U found in the tables. Equivalent to Kruskal-Wallis H with
+just 2 groups.
+
+Usage: lmannwhitneyu(data)
+Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
+"""
+ n1 = len(x)
+ n2 = len(y)
+ ranked = rankdata(x + y)
+ rankx = ranked[0:n1] # get the x-ranks
+ ranky = ranked[n1:] # the rest are y-ranks
+ u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x
+ u2 = n1 * n2 - u1 # remainder is U for y
+ bigu = max(u1, u2)
+ smallu = min(u1, u2)
+ proportion = bigu / float(n1 * n2)
+ T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
+ if T == 0:
+ raise ValueError, 'All numbers are identical in lmannwhitneyu'
+ sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)
+ z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc
+ return smallu, 1.0 - zprob(z) #, proportion
+
+
+def ltiecorrect(rankvals):
+ """
+Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See
+Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences.
+New York: McGraw-Hill. Code adapted from |Stat rankind.c code.
+
+Usage: ltiecorrect(rankvals)
+Returns: T correction factor for U or H
+"""
+ sorted, posn = shellsort(rankvals)
+ n = len(sorted)
+ T = 0.0
+ i = 0
+ while (i < n - 1):
+ if sorted[i] == sorted[i + 1]:
+ nties = 1
+ while (i < n - 1) and (sorted[i] == sorted[i + 1]):
+ nties = nties + 1
+ i = i + 1
+ T = T + nties**3 - nties
+ i = i + 1
+ T = T / float(n**3 - n)
+ return 1.0 - T
+
+
+def lranksums(x, y):
+ """
+Calculates the rank sums statistic on the provided scores and
+returns the result. Use only when the n in each condition is > 20 and you
+have 2 independent samples of ranks.
+
+Usage: lranksums(x,y)
+Returns: a z-statistic, two-tailed p-value
+"""
+ n1 = len(x)
+ n2 = len(y)
+ alldata = x + y
+ ranked = rankdata(alldata)
+ x = ranked[:n1]
+ y = ranked[n1:]
+ s = sum(x)
+ expected = n1 * (n1 + n2 + 1) / 2.0
+ z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0)
+ prob = 2 * (1.0 - zprob(abs(z)))
+ return z, prob
+
+
+def lwilcoxont(x, y):
+ """
+Calculates the Wilcoxon T-test for related samples and returns the
+result. A non-parametric T-test.
+
+Usage: lwilcoxont(x,y)
+Returns: a t-statistic, two-tail probability estimate
+"""
+ if len(x) <> len(y):
+ raise ValueError, 'Unequal N in wilcoxont. Aborting.'
+ d = []
+ for i in range(len(x)):
+ diff = x[i] - y[i]
+ if diff <> 0:
+ d.append(diff)
+ count = len(d)
+ absd = map(abs, d)
+ absranked = rankdata(absd)
+ r_plus = 0.0
+ r_minus = 0.0
+ for i in range(len(absd)):
+ if d[i] < 0:
+ r_minus = r_minus + absranked[i]
+ else:
+ r_plus = r_plus + absranked[i]
+ wt = min(r_plus, r_minus)
+ mn = count * (count + 1) * 0.25
+ se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0)
+ z = math.fabs(wt - mn) / se
+ prob = 2 * (1.0 - zprob(abs(z)))
+ return wt, prob
+
+
+def lkruskalwallish(*args):
+ """
+The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more
+groups, requiring at least 5 subjects in each group. This function
+calculates the Kruskal-Wallis H-test for 3 or more independent samples
+and returns the result.
+
+Usage: lkruskalwallish(*args)
+Returns: H-statistic (corrected for ties), associated p-value
+"""
+ args = list(args)
+ n = [0] * len(args)
+ all = []
+ n = map(len, args)
+ for i in range(len(args)):
+ all = all + args[i]
+ ranked = rankdata(all)
+ T = tiecorrect(ranked)
+ for i in range(len(args)):
+ args[i] = ranked[0:n[i]]
+ del ranked[0:n[i]]
+ rsums = []
+ for i in range(len(args)):
+ rsums.append(sum(args[i])**2)
+ rsums[i] = rsums[i] / float(n[i])
+ ssbn = sum(rsums)
+ totaln = sum(n)
+ h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1)
+ df = len(args) - 1
+ if T == 0:
+ raise ValueError, 'All numbers are identical in lkruskalwallish'
+ h = h / float(T)
+ return h, chisqprob(h, df)
+
+
+def lfriedmanchisquare(*args):
+ """
+Friedman Chi-Square is a non-parametric, one-way within-subjects
+ANOVA. This function calculates the Friedman Chi-square test for repeated
+measures and returns the result, along with the associated probability
+value. It assumes 3 or more repeated measures. Only 3 levels requires a
+minimum of 10 subjects in the study. Four levels requires 5 subjects per
+level(??).
+
+Usage: lfriedmanchisquare(*args)
+Returns: chi-square statistic, associated p-value
+"""
+ k = len(args)
+ if k < 3:
+ raise ValueError, 'Less than 3 levels. Friedman test not appropriate.'
+ n = len(args[0])
+ data = apply(pstat.abut, tuple(args))
+ for i in range(len(data)):
+ data[i] = rankdata(data[i])
+ ssbn = 0
+ for i in range(k):
+ ssbn = ssbn + sum(args[i])**2
+ chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1)
+ return chisq, chisqprob(chisq, k - 1)
+
+####################################
+#### PROBABILITY CALCULATIONS ####
+####################################
+
+
+def lchisqprob(chisq, df):
+ """
+Returns the (1-tailed) probability value associated with the provided
+chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat.
+
+Usage: lchisqprob(chisq,df)
+"""
+ BIG = 20.0
+
+ def ex(x):
+ BIG = 20.0
+ if x < -BIG:
+ return 0.0
+ else:
+ return math.exp(x)
+
+ if chisq <= 0 or df < 1:
+ return 1.0
+ a = 0.5 * chisq
+ if df % 2 == 0:
+ even = 1
+ else:
+ even = 0
+ if df > 1:
+ y = ex(-a)
+ if even:
+ s = y
+ else:
+ s = 2.0 * zprob(-math.sqrt(chisq))
+ if (df > 2):
+ chisq = 0.5 * (df - 1.0)
+ if even:
+ z = 1.0
+ else:
+ z = 0.5
+ if a > BIG:
+ if even:
+ e = 0.0
+ else:
+ e = math.log(math.sqrt(math.pi))
+ c = math.log(a)
+ while (z <= chisq):
+ e = math.log(z) + e
+ s = s + ex(c * z - a - e)
+ z = z + 1.0
+ return s
+ else:
+ if even:
+ e = 1.0
+ else:
+ e = 1.0 / math.sqrt(math.pi) / math.sqrt(a)
+ c = 0.0
+ while (z <= chisq):
+ e = e * (a / float(z))
+ c = c + e
+ z = z + 1.0
+ return (c * y + s)
+ else:
+ return s
+
+
+def lerfcc(x):
+ """
+Returns the complementary error function erfc(x) with fractional
+error everywhere less than 1.2e-7. Adapted from Numerical Recipies.
+
+Usage: lerfcc(x)
+"""
+ z = abs(x)
+ t = 1.0 / (1.0 + 0.5 * z)
+ ans = t * math.exp(-z * z - 1.26551223 + t * (1.00002368 + t * (
+ 0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (0.27886807 + t * (
+ -1.13520398 + t * (1.48851587 + t * (-0.82215223 + t * 0.17087277)))))
+ ))))
+ if x >= 0:
+ return ans
+ else:
+ return 2.0 - ans
+
+
+def lzprob(z):
+ """
+Returns the area under the normal curve 'to the left of' the given z value.
+Thus,
+ for z<0, zprob(z) = 1-tail probability
+ for z>0, 1.0-zprob(z) = 1-tail probability
+ for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability
+Adapted from z.c in Gary Perlman's |Stat.
+
+Usage: lzprob(z)
+"""
+ Z_MAX = 6.0 # maximum meaningful z-value
+ if z == 0.0:
+ x = 0.0
+ else:
+ y = 0.5 * math.fabs(z)
+ if y >= (Z_MAX * 0.5):
+ x = 1.0
+ elif (y < 1.0):
+ w = y * y
+ x = ((
+ ((((((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w -
+ 0.019198292004) * w + 0.059054035642) * w - 0.151968751364) * w +
+ 0.319152932694) * w - 0.531923007300) * w + 0.797884560593) * y * 2.0
+ else:
+ y = y - 2.0
+ x = (((((((
+ ((((((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) * y
+ - 0.000676904986) * y + 0.001390604284) * y - 0.000794620820) * y
+ - 0.002034254874) * y + 0.006549791214) * y - 0.010557625006) * y +
+ 0.011630447319) * y - 0.009279453341) * y + 0.005353579108) * y -
+ 0.002141268741) * y + 0.000535310849) * y + 0.999936657524
+ if z > 0.0:
+ prob = ((x + 1.0) * 0.5)
+ else:
+ prob = ((1.0 - x) * 0.5)
+ return prob
+
+
+def lksprob(alam):
+ """
+Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from
+Numerical Recipies.
+
+Usage: lksprob(alam)
+"""
+ fac = 2.0
+ sum = 0.0
+ termbf = 0.0
+ a2 = -2.0 * alam * alam
+ for j in range(1, 201):
+ term = fac * math.exp(a2 * j * j)
+ sum = sum + term
+ if math.fabs(term) <= (0.001 * termbf) or math.fabs(term) < (1.0e-8 * sum):
+ return sum
+ fac = -fac
+ termbf = math.fabs(term)
+ return 1.0 # Get here only if fails to converge; was 0.0!!
+
+
+def lfprob(dfnum, dfden, F):
+ """
+Returns the (1-tailed) significance level (p-value) of an F
+statistic given the degrees of freedom for the numerator (dfR-dfF) and
+the degrees of freedom for the denominator (dfF).
+
+Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn
+"""
+ p = betai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F))
+ return p
+
+
+def lbetacf(a, b, x):
+ """
+This function evaluates the continued fraction form of the incomplete
+Beta function, betai. (Adapted from: Numerical Recipies in C.)
+
+Usage: lbetacf(a,b,x)
+"""
+ ITMAX = 200
+ EPS = 3.0e-7
+
+ bm = az = am = 1.0
+ qab = a + b
+ qap = a + 1.0
+ qam = a - 1.0
+ bz = 1.0 - qab * x / qap
+ for i in range(ITMAX + 1):
+ em = float(i + 1)
+ tem = em + em
+ d = em * (b - em) * x / ((qam + tem) * (a + tem))
+ ap = az + d * am
+ bp = bz + d * bm
+ d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem))
+ app = ap + d * az
+ bpp = bp + d * bz
+ aold = az
+ am = ap / bpp
+ bm = bp / bpp
+ az = app / bpp
+ bz = 1.0
+ if (abs(az - aold) < (EPS * abs(az))):
+ return az
+ print 'a or b too big, or ITMAX too small in Betacf.'
+
+
+def lgammln(xx):
+ """
+Returns the gamma function of xx.
+ Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.
+(Adapted from: Numerical Recipies in C.)
+
+Usage: lgammln(xx)
+"""
+
+ coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, 0.120858003e-2,
+ -0.536382e-5]
+ x = xx - 1.0
+ tmp = x + 5.5
+ tmp = tmp - (x + 0.5) * math.log(tmp)
+ ser = 1.0
+ for j in range(len(coeff)):
+ x = x + 1
+ ser = ser + coeff[j] / x
+ return -tmp + math.log(2.50662827465 * ser)
+
+
+def lbetai(a, b, x):
+ """
+Returns the incomplete beta function:
+
+ I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)
+
+where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
+function of a. The continued fraction formulation is implemented here,
+using the betacf function. (Adapted from: Numerical Recipies in C.)
+
+Usage: lbetai(a,b,x)
+"""
+ if (x < 0.0 or x > 1.0):
+ raise ValueError, 'Bad x in lbetai'
+ if (x == 0.0 or x == 1.0):
+ bt = 0.0
+ else:
+ bt = math.exp(gammln(a + b) - gammln(a) - gammln(b) + a * math.log(x) + b *
+ math.log(1.0 - x))
+ if (x < (a + 1.0) / (a + b + 2.0)):
+ return bt * betacf(a, b, x) / float(a)
+ else:
+ return 1.0 - bt * betacf(b, a, 1.0 - x) / float(b)
+
+####################################
+####### ANOVA CALCULATIONS #######
+####################################
+
+
+def lF_oneway(*lists):
+ """
+Performs a 1-way ANOVA, returning an F-value and probability given
+any number of groups. From Heiman, pp.394-7.
+
+Usage: F_oneway(*lists) where *lists is any number of lists, one per
+ treatment group
+Returns: F value, one-tailed p-value
+"""
+ a = len(lists) # ANOVA on 'a' groups, each in it's own list
+ means = [0] * a
+ vars = [0] * a
+ ns = [0] * a
+ alldata = []
+ tmp = map(N.array, lists)
+ means = map(amean, tmp)
+ vars = map(avar, tmp)
+ ns = map(len, lists)
+ for i in range(len(lists)):
+ alldata = alldata + lists[i]
+ alldata = N.array(alldata)
+ bign = len(alldata)
+ sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign))
+ ssbn = 0
+ for list in lists:
+ ssbn = ssbn + asquare_of_sums(N.array(list)) / float(len(list))
+ ssbn = ssbn - (asquare_of_sums(alldata) / float(bign))
+ sswn = sstot - ssbn
+ dfbn = a - 1
+ dfwn = bign - a
+ msb = ssbn / float(dfbn)
+ msw = sswn / float(dfwn)
+ f = msb / msw
+ prob = fprob(dfbn, dfwn, f)
+ return f, prob
+
+
+def lF_value(ER, EF, dfnum, dfden):
+ """
+Returns an F-statistic given the following:
+ ER = error associated with the null hypothesis (the Restricted model)
+ EF = error associated with the alternate hypothesis (the Full model)
+ dfR-dfF = degrees of freedom of the numerator
+ dfF = degrees of freedom associated with the denominator/Full model
+
+Usage: lF_value(ER,EF,dfnum,dfden)
+"""
+ return ((ER - EF) / float(dfnum) / (EF / float(dfden)))
+
+####################################
+######## SUPPORT FUNCTIONS #######
+####################################
+
+
+def writecc(listoflists, file, writetype='w', extra=2):
+ """
+Writes a list of lists to a file in columns, customized by the max
+size of items within the columns (max size of items in col, +2 characters)
+to specified file. File-overwrite is the default.
+
+Usage: writecc (listoflists,file,writetype='w',extra=2)
+Returns: None
+"""
+ if type(listoflists[0]) not in [ListType, TupleType]:
+ listoflists = [listoflists]
+ outfile = open(file, writetype)
+ rowstokill = []
+ list2print = copy.deepcopy(listoflists)
+ for i in range(len(listoflists)):
+ if listoflists[i] == [
+ '\n'
+ ] or listoflists[i] == '\n' or listoflists[i] == 'dashes':
+ rowstokill = rowstokill + [i]
+ rowstokill.reverse()
+ for row in rowstokill:
+ del list2print[row]
+ maxsize = [0] * len(list2print[0])
+ for col in range(len(list2print[0])):
+ items = pstat.colex(list2print, col)
+ items = map(pstat.makestr, items)
+ maxsize[col] = max(map(len, items)) + extra
+ for row in listoflists:
+ if row == ['\n'] or row == '\n':
+ outfile.write('\n')
+ elif row == ['dashes'] or row == 'dashes':
+ dashes = [0] * len(maxsize)
+ for j in range(len(maxsize)):
+ dashes[j] = '-' * (maxsize[j] - 2)
+ outfile.write(pstat.lineincustcols(dashes, maxsize))
+ else:
+ outfile.write(pstat.lineincustcols(row, maxsize))
+ outfile.write('\n')
+ outfile.close()
+ return None
+
+
+def lincr(l, cap): # to increment a list up to a max-list of 'cap'
+ """
+Simulate a counting system from an n-dimensional list.
+
+Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n
+Returns: next set of values for list l, OR -1 (if overflow)
+"""
+ l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap)
+ for i in range(len(l)):
+ if l[i] > cap[i] and i < len(l) - 1: # if carryover AND not done
+ l[i] = 0
+ l[i + 1] = l[i + 1] + 1
+ elif l[i] > cap[i] and i == len(
+ l) - 1: # overflow past last column, must be finished
+ l = -1
+ return l
+
+
+def lsum(inlist):
+ """
+Returns the sum of the items in the passed list.
+
+Usage: lsum(inlist)
+"""
+ s = 0
+ for item in inlist:
+ s = s + item
+ return s
+
+
+def lcumsum(inlist):
+ """
+Returns a list consisting of the cumulative sum of the items in the
+passed list.
+
+Usage: lcumsum(inlist)
+"""
+ newlist = copy.deepcopy(inlist)
+ for i in range(1, len(newlist)):
+ newlist[i] = newlist[i] + newlist[i - 1]
+ return newlist
+
+
+def lss(inlist):
+ """
+Squares each value in the passed list, adds up these squares and
+returns the result.
+
+Usage: lss(inlist)
+"""
+ ss = 0
+ for item in inlist:
+ ss = ss + item * item
+ return ss
+
+
+def lsummult(list1, list2):
+ """
+Multiplies elements in list1 and list2, element by element, and
+returns the sum of all resulting multiplications. Must provide equal
+length lists.
+
+Usage: lsummult(list1,list2)
+"""
+ if len(list1) <> len(list2):
+ raise ValueError, 'Lists not equal length in summult.'
+ s = 0
+ for item1, item2 in pstat.abut(list1, list2):
+ s = s + item1 * item2
+ return s
+
+
+def lsumdiffsquared(x, y):
+ """
+Takes pairwise differences of the values in lists x and y, squares
+these differences, and returns the sum of these squares.
+
+Usage: lsumdiffsquared(x,y)
+Returns: sum[(x[i]-y[i])**2]
+"""
+ sds = 0
+ for i in range(len(x)):
+ sds = sds + (x[i] - y[i])**2
+ return sds
+
+
+def lsquare_of_sums(inlist):
+ """
+Adds the values in the passed list, squares the sum, and returns
+the result.
+
+Usage: lsquare_of_sums(inlist)
+Returns: sum(inlist[i])**2
+"""
+ s = sum(inlist)
+ return float(s) * s
+
+
+def lshellsort(inlist):
+ """
+Shellsort algorithm. Sorts a 1D-list.
+
+Usage: lshellsort(inlist)
+Returns: sorted-inlist, sorting-index-vector (for original list)
+"""
+ n = len(inlist)
+ svec = copy.deepcopy(inlist)
+ ivec = range(n)
+ gap = n / 2 # integer division needed
+ while gap > 0:
+ for i in range(gap, n):
+ for j in range(i - gap, -1, -gap):
+ while j >= 0 and svec[j] > svec[j + gap]:
+ temp = svec[j]
+ svec[j] = svec[j + gap]
+ svec[j + gap] = temp
+ itemp = ivec[j]
+ ivec[j] = ivec[j + gap]
+ ivec[j + gap] = itemp
+ gap = gap / 2 # integer division needed
+# svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]]
+ return svec, ivec
+
+
+def lrankdata(inlist):
+ """
+Ranks the data in inlist, dealing with ties appropritely. Assumes
+a 1D inlist. Adapted from Gary Perlman's |Stat ranksort.
+
+Usage: lrankdata(inlist)
+Returns: a list of length equal to inlist, containing rank scores
+"""
+ n = len(inlist)
+ svec, ivec = shellsort(inlist)
+ sumranks = 0
+ dupcount = 0
+ newlist = [0] * n
+ for i in range(n):
+ sumranks = sumranks + i
+ dupcount = dupcount + 1
+ if i == n - 1 or svec[i] <> svec[i + 1]:
+ averank = sumranks / float(dupcount) + 1
+ for j in range(i - dupcount + 1, i + 1):
+ newlist[ivec[j]] = averank
+ sumranks = 0
+ dupcount = 0
+ return newlist
+
+
+def outputpairedstats(fname, writemode, name1, n1, m1, se1, min1, max1, name2,
+ n2, m2, se2, min2, max2, statname, stat, prob):
+ """
+Prints or write to a file stats for two groups, using the name, n,
+mean, sterr, min and max for each group, as well as the statistic name,
+its value, and the associated p-value.
+
+Usage: outputpairedstats(fname,writemode,
+ name1,n1,mean1,stderr1,min1,max1,
+ name2,n2,mean2,stderr2,min2,max2,
+ statname,stat,prob)
+Returns: None
+"""
+ suffix = '' # for *s after the p-value
+ try:
+ x = prob.shape
+ prob = prob[0]
+ except:
+ pass
+ if prob < 0.001:
+ suffix = ' ***'
+ elif prob < 0.01:
+ suffix = ' **'
+ elif prob < 0.05:
+ suffix = ' *'
+ title = [['Name', 'N', 'Mean', 'SD', 'Min', 'Max']]
+ lofl = title + [[name1, n1, round(m1, 3), round(
+ math.sqrt(se1), 3), min1, max1], [name2, n2, round(m2, 3), round(
+ math.sqrt(se2), 3), min2, max2]]
+ if type(fname) <> StringType or len(fname) == 0:
+ print
+ print statname
+ print
+ pstat.printcc(lofl)
+ print
+ try:
+ if stat.shape == ():
+ stat = stat[0]
+ if prob.shape == ():
+ prob = prob[0]
+ except:
+ pass
+ print 'Test statistic = ', round(stat, 3), ' p = ', round(prob, 3), suffix
+ print
+ else:
+ file = open(fname, writemode)
+ file.write('\n' + statname + '\n\n')
+ file.close()
+ writecc(lofl, fname, 'a')
+ file = open(fname, 'a')
+ try:
+ if stat.shape == ():
+ stat = stat[0]
+ if prob.shape == ():
+ prob = prob[0]
+ except:
+ pass
+ file.write(pstat.list2string(['\nTest statistic = ', round(stat, 4),
+ ' p = ', round(prob, 4), suffix, '\n\n']))
+ file.close()
+ return None
+
+
+def lfindwithin(data):
+ """
+Returns an integer representing a binary vector, where 1=within-
+subject factor, 0=between. Input equals the entire data 2D list (i.e.,
+column 0=random factor, column -1=measured values (those two are skipped).
+Note: input data is in |Stat format ... a list of lists ("2D list") with
+one row per measured value, first column=subject identifier, last column=
+score, one in-between column per factor (these columns contain level
+designations on each factor). See also stats.anova.__doc__.
+
+Usage: lfindwithin(data) data in |Stat format
+"""
+
+ numfact = len(data[0]) - 1
+ withinvec = 0
+ for col in range(1, numfact):
+ examplelevel = pstat.unique(pstat.colex(data, col))[0]
+ rows = pstat.linexand(data, col, examplelevel) # get 1 level of this factor
+ factsubjs = pstat.unique(pstat.colex(rows, 0))
+ allsubjs = pstat.unique(pstat.colex(data, 0))
+ if len(factsubjs) == len(allsubjs): # fewer Ss than scores on this factor?
+ withinvec = withinvec + (1 << col)
+ return withinvec
+
+#########################################################
+#########################################################
+####### DISPATCH LISTS AND TUPLES TO ABOVE FCNS #########
+#########################################################
+#########################################################
+
+## CENTRAL TENDENCY:
+geometricmean = Dispatch((lgeometricmean, (ListType, TupleType)),)
+harmonicmean = Dispatch((lharmonicmean, (ListType, TupleType)),)
+mean = Dispatch((lmean, (ListType, TupleType)),)
+median = Dispatch((lmedian, (ListType, TupleType)),)
+medianscore = Dispatch((lmedianscore, (ListType, TupleType)),)
+mode = Dispatch((lmode, (ListType, TupleType)),)
+
+## MOMENTS:
+moment = Dispatch((lmoment, (ListType, TupleType)),)
+variation = Dispatch((lvariation, (ListType, TupleType)),)
+skew = Dispatch((lskew, (ListType, TupleType)),)
+kurtosis = Dispatch((lkurtosis, (ListType, TupleType)),)
+describe = Dispatch((ldescribe, (ListType, TupleType)),)
+
+## FREQUENCY STATISTICS:
+itemfreq = Dispatch((litemfreq, (ListType, TupleType)),)
+scoreatpercentile = Dispatch((lscoreatpercentile, (ListType, TupleType)),)
+percentileofscore = Dispatch((lpercentileofscore, (ListType, TupleType)),)
+histogram = Dispatch((lhistogram, (ListType, TupleType)),)
+cumfreq = Dispatch((lcumfreq, (ListType, TupleType)),)
+relfreq = Dispatch((lrelfreq, (ListType, TupleType)),)
+
+## VARIABILITY:
+obrientransform = Dispatch((lobrientransform, (ListType, TupleType)),)
+samplevar = Dispatch((lsamplevar, (ListType, TupleType)),)
+samplestdev = Dispatch((lsamplestdev, (ListType, TupleType)),)
+var = Dispatch((lvar, (ListType, TupleType)),)
+stdev = Dispatch((lstdev, (ListType, TupleType)),)
+sterr = Dispatch((lsterr, (ListType, TupleType)),)
+sem = Dispatch((lsem, (ListType, TupleType)),)
+z = Dispatch((lz, (ListType, TupleType)),)
+zs = Dispatch((lzs, (ListType, TupleType)),)
+
+## TRIMMING FCNS:
+trimboth = Dispatch((ltrimboth, (ListType, TupleType)),)
+trim1 = Dispatch((ltrim1, (ListType, TupleType)),)
+
+## CORRELATION FCNS:
+paired = Dispatch((lpaired, (ListType, TupleType)),)
+pearsonr = Dispatch((lpearsonr, (ListType, TupleType)),)
+spearmanr = Dispatch((lspearmanr, (ListType, TupleType)),)
+pointbiserialr = Dispatch((lpointbiserialr, (ListType, TupleType)),)
+kendalltau = Dispatch((lkendalltau, (ListType, TupleType)),)
+linregress = Dispatch((llinregress, (ListType, TupleType)),)
+
+## INFERENTIAL STATS:
+ttest_1samp = Dispatch((lttest_1samp, (ListType, TupleType)),)
+ttest_ind = Dispatch((lttest_ind, (ListType, TupleType)),)
+ttest_rel = Dispatch((lttest_rel, (ListType, TupleType)),)
+chisquare = Dispatch((lchisquare, (ListType, TupleType)),)
+ks_2samp = Dispatch((lks_2samp, (ListType, TupleType)),)
+mannwhitneyu = Dispatch((lmannwhitneyu, (ListType, TupleType)),)
+ranksums = Dispatch((lranksums, (ListType, TupleType)),)
+tiecorrect = Dispatch((ltiecorrect, (ListType, TupleType)),)
+wilcoxont = Dispatch((lwilcoxont, (ListType, TupleType)),)
+kruskalwallish = Dispatch((lkruskalwallish, (ListType, TupleType)),)
+friedmanchisquare = Dispatch((lfriedmanchisquare, (ListType, TupleType)),)
+
+## PROBABILITY CALCS:
+chisqprob = Dispatch((lchisqprob, (IntType, FloatType)),)
+zprob = Dispatch((lzprob, (IntType, FloatType)),)
+ksprob = Dispatch((lksprob, (IntType, FloatType)),)
+fprob = Dispatch((lfprob, (IntType, FloatType)),)
+betacf = Dispatch((lbetacf, (IntType, FloatType)),)
+betai = Dispatch((lbetai, (IntType, FloatType)),)
+erfcc = Dispatch((lerfcc, (IntType, FloatType)),)
+gammln = Dispatch((lgammln, (IntType, FloatType)),)
+
+## ANOVA FUNCTIONS:
+F_oneway = Dispatch((lF_oneway, (ListType, TupleType)),)
+F_value = Dispatch((lF_value, (ListType, TupleType)),)
+
+## SUPPORT FUNCTIONS:
+incr = Dispatch((lincr, (ListType, TupleType)),)
+sum = Dispatch((lsum, (ListType, TupleType)),)
+cumsum = Dispatch((lcumsum, (ListType, TupleType)),)
+ss = Dispatch((lss, (ListType, TupleType)),)
+summult = Dispatch((lsummult, (ListType, TupleType)),)
+square_of_sums = Dispatch((lsquare_of_sums, (ListType, TupleType)),)
+sumdiffsquared = Dispatch((lsumdiffsquared, (ListType, TupleType)),)
+shellsort = Dispatch((lshellsort, (ListType, TupleType)),)
+rankdata = Dispatch((lrankdata, (ListType, TupleType)),)
+findwithin = Dispatch((lfindwithin, (ListType, TupleType)),)
+
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS ===============
+
+try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE
+ import numpy as N
+ import numpy.linalg as LA
+
+ #####################################
+ ######## ACENTRAL TENDENCY ########
+ #####################################
+
+
+ def ageometricmean(inarray, dimension=None, keepdims=0):
+ """
+Calculates the geometric mean of the values in the passed array.
+That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in
+the passed array. Use dimension=None to flatten array first. REMEMBER: if
+dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
+if dimension is a sequence, it collapses over all specified dimensions. If
+keepdims is set to 1, the resulting array will have as many dimensions as
+inarray, with only 1 'level' per dim that was collapsed over.
+
+Usage: ageometricmean(inarray,dimension=None,keepdims=0)
+Returns: geometric mean computed over dim(s) listed in dimension
+"""
+ inarray = N.array(inarray, N.float_)
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ size = len(inarray)
+ mult = N.power(inarray, 1.0 / size)
+ mult = N.multiply.reduce(mult)
+ elif type(dimension) in [IntType, FloatType]:
+ size = inarray.shape[dimension]
+ mult = N.power(inarray, 1.0 / size)
+ mult = N.multiply.reduce(mult, dimension)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ shp[dimension] = 1
+ sum = N.reshape(sum, shp)
+ else: # must be a SEQUENCE of dims to average over
+ dims = list(dimension)
+ dims.sort()
+ dims.reverse()
+ size = N.array(N.multiply.reduce(N.take(inarray.shape, dims)), N.float_)
+ mult = N.power(inarray, 1.0 / size)
+ for dim in dims:
+ mult = N.multiply.reduce(mult, dim)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ for dim in dims:
+ shp[dim] = 1
+ mult = N.reshape(mult, shp)
+ return mult
+
+ def aharmonicmean(inarray, dimension=None, keepdims=0):
+ """
+Calculates the harmonic mean of the values in the passed array.
+That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in
+the passed array. Use dimension=None to flatten array first. REMEMBER: if
+dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
+if dimension is a sequence, it collapses over all specified dimensions. If
+keepdims is set to 1, the resulting array will have as many dimensions as
+inarray, with only 1 'level' per dim that was collapsed over.
+
+Usage: aharmonicmean(inarray,dimension=None,keepdims=0)
+Returns: harmonic mean computed over dim(s) in dimension
+"""
+ inarray = inarray.astype(N.float_)
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ size = len(inarray)
+ s = N.add.reduce(1.0 / inarray)
+ elif type(dimension) in [IntType, FloatType]:
+ size = float(inarray.shape[dimension])
+ s = N.add.reduce(1.0 / inarray, dimension)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ shp[dimension] = 1
+ s = N.reshape(s, shp)
+ else: # must be a SEQUENCE of dims to average over
+ dims = list(dimension)
+ dims.sort()
+ nondims = []
+ for i in range(len(inarray.shape)):
+ if i not in dims:
+ nondims.append(i)
+ tinarray = N.transpose(inarray, nondims + dims) # put keep-dims first
+ idx = [0] * len(nondims)
+ if idx == []:
+ size = len(N.ravel(inarray))
+ s = asum(1.0 / inarray)
+ if keepdims == 1:
+ s = N.reshape([s], N.ones(len(inarray.shape)))
+ else:
+ idx[0] = -1
+ loopcap = N.array(tinarray.shape[0:len(nondims)]) - 1
+ s = N.zeros(loopcap + 1, N.float_)
+ while incr(idx, loopcap) <> -1:
+ s[idx] = asum(1.0 / tinarray[idx])
+ size = N.multiply.reduce(N.take(inarray.shape, dims))
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ for dim in dims:
+ shp[dim] = 1
+ s = N.reshape(s, shp)
+ return size / s
+
+ def amean(inarray, dimension=None, keepdims=0):
+ """
+Calculates the arithmatic mean of the values in the passed array.
+That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the
+passed array. Use dimension=None to flatten array first. REMEMBER: if
+dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and
+if dimension is a sequence, it collapses over all specified dimensions. If
+keepdims is set to 1, the resulting array will have as many dimensions as
+inarray, with only 1 'level' per dim that was collapsed over.
+
+Usage: amean(inarray,dimension=None,keepdims=0)
+Returns: arithematic mean calculated over dim(s) in dimension
+"""
+ if inarray.dtype in [N.int_, N.short, N.ubyte]:
+ inarray = inarray.astype(N.float_)
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ sum = N.add.reduce(inarray)
+ denom = float(len(inarray))
+ elif type(dimension) in [IntType, FloatType]:
+ sum = asum(inarray, dimension)
+ denom = float(inarray.shape[dimension])
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ shp[dimension] = 1
+ sum = N.reshape(sum, shp)
+ else: # must be a TUPLE of dims to average over
+ dims = list(dimension)
+ dims.sort()
+ dims.reverse()
+ sum = inarray * 1.0
+ for dim in dims:
+ sum = N.add.reduce(sum, dim)
+ denom = N.array(N.multiply.reduce(N.take(inarray.shape, dims)), N.float_)
+ if keepdims == 1:
+ shp = list(inarray.shape)
+ for dim in dims:
+ shp[dim] = 1
+ sum = N.reshape(sum, shp)
+ return sum / denom
+
+ def amedian(inarray, numbins=1000):
+ """
+Calculates the COMPUTED median value of an array of numbers, given the
+number of bins to use for the histogram (more bins approaches finding the
+precise median value of the array; default number of bins = 1000). From
+G.W. Heiman's Basic Stats, or CRC Probability & Statistics.
+NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first).
+
+Usage: amedian(inarray,numbins=1000)
+Returns: median calculated over ALL values in inarray
+"""
+ inarray = N.ravel(inarray)
+ (hist, smallest, binsize, extras) = ahistogram(inarray, numbins,
+ [min(inarray), max(inarray)])
+ cumhist = N.cumsum(hist) # make cumulative histogram
+ otherbins = N.greater_equal(cumhist, len(inarray) / 2.0)
+ otherbins = list(otherbins) # list of 0/1s, 1s start at median bin
+ cfbin = otherbins.index(1) # get 1st(!) index holding 50%ile score
+ LRL = smallest + binsize * cfbin # get lower read limit of that bin
+ cfbelow = N.add.reduce(hist[0:cfbin]) # cum. freq. below bin
+ freq = hist[cfbin] # frequency IN the 50%ile bin
+ median = LRL + (
+ (len(inarray) / 2.0 - cfbelow) / float(freq)) * binsize # MEDIAN
+ return median
+
+ def amedianscore(inarray, dimension=None):
+ """
+Returns the 'middle' score of the passed array. If there is an even
+number of scores, the mean of the 2 middle scores is returned. Can function
+with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can
+be None, to pre-flatten the array, or else dimension must equal 0).
+
+Usage: amedianscore(inarray,dimension=None)
+Returns: 'middle' score of the array, or the mean of the 2 middle scores
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ inarray = N.sort(inarray, dimension)
+ if inarray.shape[dimension] % 2 == 0: # if even number of elements
+ indx = inarray.shape[dimension] / 2 # integer division correct
+ median = N.asarray(inarray[indx] + inarray[indx - 1]) / 2.0
+ else:
+ indx = inarray.shape[dimension] / 2 # integer division correct
+ median = N.take(inarray, [indx], dimension)
+ if median.shape == (1,):
+ median = median[0]
+ return median
+
+ def amode(a, dimension=None):
+ """
+Returns an array of the modal (most common) score in the passed array.
+If there is more than one such score, ONLY THE FIRST is returned.
+The bin-count for the modal values is also returned. Operates on whole
+array (dimension=None), or on a given dimension.
+
+Usage: amode(a, dimension=None)
+Returns: array of bin-counts for mode(s), array of corresponding modal values
+"""
+
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ scores = pstat.aunique(N.ravel(a)) # get ALL unique values
+ testshape = list(a.shape)
+ testshape[dimension] = 1
+ oldmostfreq = N.zeros(testshape)
+ oldcounts = N.zeros(testshape)
+ for score in scores:
+ template = N.equal(a, score)
+ counts = asum(template, dimension, 1)
+ mostfrequent = N.where(counts > oldcounts, score, oldmostfreq)
+ oldcounts = N.where(counts > oldcounts, counts, oldcounts)
+ oldmostfreq = mostfrequent
+ return oldcounts, mostfrequent
+
+ def atmean(a, limits=None, inclusive=(1, 1)):
+ """
+Returns the arithmetic mean of all values in an array, ignoring values
+strictly outside the sequence passed to 'limits'. Note: either limit
+in the sequence, or the value of limits itself, can be set to None. The
+inclusive list/tuple determines whether the lower and upper limiting bounds
+(respectively) are open/exclusive (0) or closed/inclusive (1).
+
+Usage: atmean(a,limits=None,inclusive=(1,1))
+"""
+ if a.dtype in [N.int_, N.short, N.ubyte]:
+ a = a.astype(N.float_)
+ if limits == None:
+ return mean(a)
+ assert type(limits) in [ListType, TupleType, N.ndarray
+ ], 'Wrong type for limits in atmean'
+ if inclusive[0]:
+ lowerfcn = N.greater_equal
+ else:
+ lowerfcn = N.greater
+ if inclusive[1]:
+ upperfcn = N.less_equal
+ else:
+ upperfcn = N.less
+ if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(
+ N.ravel(a)):
+ raise ValueError, 'No array values within given limits (atmean).'
+ elif limits[0] == None and limits[1] <> None:
+ mask = upperfcn(a, limits[1])
+ elif limits[0] <> None and limits[1] == None:
+ mask = lowerfcn(a, limits[0])
+ elif limits[0] <> None and limits[1] <> None:
+ mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1])
+ s = float(N.add.reduce(N.ravel(a * mask)))
+ n = float(N.add.reduce(N.ravel(mask)))
+ return s / n
+
+ def atvar(a, limits=None, inclusive=(1, 1)):
+ """
+Returns the sample variance of values in an array, (i.e., using N-1),
+ignoring values strictly outside the sequence passed to 'limits'.
+Note: either limit in the sequence, or the value of limits itself,
+can be set to None. The inclusive list/tuple determines whether the lower
+and upper limiting bounds (respectively) are open/exclusive (0) or
+closed/inclusive (1). ASSUMES A FLAT ARRAY (OR ELSE PREFLATTENS).
+
+Usage: atvar(a,limits=None,inclusive=(1,1))
+"""
+ a = a.astype(N.float_)
+ if limits == None or limits == [None, None]:
+ return avar(a)
+ assert type(limits) in [ListType, TupleType, N.ndarray
+ ], 'Wrong type for limits in atvar'
+ if inclusive[0]:
+ lowerfcn = N.greater_equal
+ else:
+ lowerfcn = N.greater
+ if inclusive[1]:
+ upperfcn = N.less_equal
+ else:
+ upperfcn = N.less
+ if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(
+ N.ravel(a)):
+ raise ValueError, 'No array values within given limits (atvar).'
+ elif limits[0] == None and limits[1] <> None:
+ mask = upperfcn(a, limits[1])
+ elif limits[0] <> None and limits[1] == None:
+ mask = lowerfcn(a, limits[0])
+ elif limits[0] <> None and limits[1] <> None:
+ mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1])
+
+ a = N.compress(mask, a) # squish out excluded values
+ return avar(a)
+
+ def atmin(a, lowerlimit=None, dimension=None, inclusive=1):
+ """
+Returns the minimum value of a, along dimension, including only values less
+than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None,
+all values in the array are used.
+
+Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1)
+"""
+ if inclusive:
+ lowerfcn = N.greater
+ else:
+ lowerfcn = N.greater_equal
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if lowerlimit == None:
+ lowerlimit = N.minimum.reduce(N.ravel(a)) - 11
+ biggest = N.maximum.reduce(N.ravel(a))
+ ta = N.where(lowerfcn(a, lowerlimit), a, biggest)
+ return N.minimum.reduce(ta, dimension)
+
+ def atmax(a, upperlimit, dimension=None, inclusive=1):
+ """
+Returns the maximum value of a, along dimension, including only values greater
+than (or equal to, if inclusive=1) upperlimit. If the limit is set to None,
+a limit larger than the max value in the array is used.
+
+Usage: atmax(a,upperlimit,dimension=None,inclusive=1)
+"""
+ if inclusive:
+ upperfcn = N.less
+ else:
+ upperfcn = N.less_equal
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if upperlimit == None:
+ upperlimit = N.maximum.reduce(N.ravel(a)) + 1
+ smallest = N.minimum.reduce(N.ravel(a))
+ ta = N.where(upperfcn(a, upperlimit), a, smallest)
+ return N.maximum.reduce(ta, dimension)
+
+ def atstdev(a, limits=None, inclusive=(1, 1)):
+ """
+Returns the standard deviation of all values in an array, ignoring values
+strictly outside the sequence passed to 'limits'. Note: either limit
+in the sequence, or the value of limits itself, can be set to None. The
+inclusive list/tuple determines whether the lower and upper limiting bounds
+(respectively) are open/exclusive (0) or closed/inclusive (1).
+
+Usage: atstdev(a,limits=None,inclusive=(1,1))
+"""
+ return N.sqrt(tvar(a, limits, inclusive))
+
+ def atsem(a, limits=None, inclusive=(1, 1)):
+ """
+Returns the standard error of the mean for the values in an array,
+(i.e., using N for the denominator), ignoring values strictly outside
+the sequence passed to 'limits'. Note: either limit in the sequence,
+or the value of limits itself, can be set to None. The inclusive list/tuple
+determines whether the lower and upper limiting bounds (respectively) are
+open/exclusive (0) or closed/inclusive (1).
+
+Usage: atsem(a,limits=None,inclusive=(1,1))
+"""
+ sd = tstdev(a, limits, inclusive)
+ if limits == None or limits == [None, None]:
+ n = float(len(N.ravel(a)))
+ limits = [min(a) - 1, max(a) + 1]
+ assert type(limits) in [ListType, TupleType, N.ndarray
+ ], 'Wrong type for limits in atsem'
+ if inclusive[0]:
+ lowerfcn = N.greater_equal
+ else:
+ lowerfcn = N.greater
+ if inclusive[1]:
+ upperfcn = N.less_equal
+ else:
+ upperfcn = N.less
+ if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(
+ N.ravel(a)):
+ raise ValueError, 'No array values within given limits (atsem).'
+ elif limits[0] == None and limits[1] <> None:
+ mask = upperfcn(a, limits[1])
+ elif limits[0] <> None and limits[1] == None:
+ mask = lowerfcn(a, limits[0])
+ elif limits[0] <> None and limits[1] <> None:
+ mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1])
+ term1 = N.add.reduce(N.ravel(a * a * mask))
+ n = float(N.add.reduce(N.ravel(mask)))
+ return sd / math.sqrt(n)
+
+#####################################
+############ AMOMENTS #############
+#####################################
+
+ def amoment(a, moment=1, dimension=None):
+ """
+Calculates the nth moment about the mean for a sample (defaults to the
+1st moment). Generally used to calculate coefficients of skewness and
+kurtosis. Dimension can equal None (ravel array first), an integer
+(the dimension over which to operate), or a sequence (operate over
+multiple dimensions).
+
+Usage: amoment(a,moment=1,dimension=None)
+Returns: appropriate moment along given dimension
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if moment == 1:
+ return 0.0
+ else:
+ mn = amean(a, dimension, 1) # 1=keepdims
+ s = N.power((a - mn), moment)
+ return amean(s, dimension)
+
+ def avariation(a, dimension=None):
+ """
+Returns the coefficient of variation, as defined in CRC Standard
+Probability and Statistics, p.6. Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions).
+
+Usage: avariation(a,dimension=None)
+"""
+ return 100.0 * asamplestdev(a, dimension) / amean(a, dimension)
+
+ def askew(a, dimension=None):
+ """
+Returns the skewness of a distribution (normal ==> 0.0; >0 means extra
+weight in left tail). Use askewtest() to see if it's close enough.
+Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions).
+
+Usage: askew(a, dimension=None)
+Returns: skew of vals in a along dimension, returning ZERO where all vals equal
+"""
+ denom = N.power(amoment(a, 2, dimension), 1.5)
+ zero = N.equal(denom, 0)
+ if type(denom) == N.ndarray and asum(zero) <> 0:
+ print 'Number of zeros in askew: ', asum(zero)
+ denom = denom + zero # prevent divide-by-zero
+ return N.where(zero, 0, amoment(a, 3, dimension) / denom)
+
+ def akurtosis(a, dimension=None):
+ """
+Returns the kurtosis of a distribution (normal ==> 3.0; >3 means
+heavier in the tails, and usually more peaked). Use akurtosistest()
+to see if it's close enough. Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions).
+
+Usage: akurtosis(a,dimension=None)
+Returns: kurtosis of values in a along dimension, and ZERO where all vals equal
+"""
+ denom = N.power(amoment(a, 2, dimension), 2)
+ zero = N.equal(denom, 0)
+ if type(denom) == N.ndarray and asum(zero) <> 0:
+ print 'Number of zeros in akurtosis: ', asum(zero)
+ denom = denom + zero # prevent divide-by-zero
+ return N.where(zero, 0, amoment(a, 4, dimension) / denom)
+
+ def adescribe(inarray, dimension=None):
+ """
+Returns several descriptive statistics of the passed array. Dimension
+can equal None (ravel array first), an integer (the dimension over
+which to operate), or a sequence (operate over multiple dimensions).
+
+Usage: adescribe(inarray,dimension=None)
+Returns: n, (min,max), mean, standard deviation, skew, kurtosis
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ n = inarray.shape[dimension]
+ mm = (N.minimum.reduce(inarray), N.maximum.reduce(inarray))
+ m = amean(inarray, dimension)
+ sd = astdev(inarray, dimension)
+ skew = askew(inarray, dimension)
+ kurt = akurtosis(inarray, dimension)
+ return n, mm, m, sd, skew, kurt
+
+#####################################
+######## NORMALITY TESTS ##########
+#####################################
+
+ def askewtest(a, dimension=None):
+ """
+Tests whether the skew is significantly different from a normal
+distribution. Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions).
+
+Usage: askewtest(a,dimension=None)
+Returns: z-score and 2-tail z-probability
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ b2 = askew(a, dimension)
+ n = float(a.shape[dimension])
+ y = b2 * N.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2)))
+ beta2 = (3.0 * (n * n + 27 * n - 70) * (n + 1) *
+ (n + 3)) / ((n - 2.0) * (n + 5) * (n + 7) * (n + 9))
+ W2 = -1 + N.sqrt(2 * (beta2 - 1))
+ delta = 1 / N.sqrt(N.log(N.sqrt(W2)))
+ alpha = N.sqrt(2 / (W2 - 1))
+ y = N.where(y == 0, 1, y)
+ Z = delta * N.log(y / alpha + N.sqrt((y / alpha)**2 + 1))
+ return Z, (1.0 - zprob(Z)) * 2
+
+ def akurtosistest(a, dimension=None):
+ """
+Tests whether a dataset has normal kurtosis (i.e.,
+kurtosis=3(n-1)/(n+1)) Valid only for n>20. Dimension can equal None
+(ravel array first), an integer (the dimension over which to operate),
+or a sequence (operate over multiple dimensions).
+
+Usage: akurtosistest(a,dimension=None)
+Returns: z-score and 2-tail z-probability, returns 0 for bad pixels
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ n = float(a.shape[dimension])
+ if n < 20:
+ print 'akurtosistest only valid for n>=20 ... continuing anyway, n=', n
+ b2 = akurtosis(a, dimension)
+ E = 3.0 * (n - 1) / (n + 1)
+ varb2 = 24.0 * n * (n - 2) * (n - 3) / ((n + 1) * (n + 1) * (n + 3) *
+ (n + 5))
+ x = (b2 - E) / N.sqrt(varb2)
+ sqrtbeta1 = 6.0 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) * N.sqrt(
+ (6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3)))
+ A = 6.0 + 8.0 / sqrtbeta1 * (2.0 / sqrtbeta1 +
+ N.sqrt(1 + 4.0 / (sqrtbeta1**2)))
+ term1 = 1 - 2 / (9.0 * A)
+ denom = 1 + x * N.sqrt(2 / (A - 4.0))
+ denom = N.where(N.less(denom, 0), 99, denom)
+ term2 = N.where(
+ N.equal(denom, 0), term1, N.power(
+ (1 - 2.0 / A) / denom, 1 / 3.0))
+ Z = (term1 - term2) / N.sqrt(2 / (9.0 * A))
+ Z = N.where(N.equal(denom, 99), 0, Z)
+ return Z, (1.0 - zprob(Z)) * 2
+
+ def anormaltest(a, dimension=None):
+ """
+Tests whether skew and/OR kurtosis of dataset differs from normal
+curve. Can operate over multiple dimensions. Dimension can equal
+None (ravel array first), an integer (the dimension over which to
+operate), or a sequence (operate over multiple dimensions).
+
+Usage: anormaltest(a,dimension=None)
+Returns: z-score and 2-tail probability
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ s, p = askewtest(a, dimension)
+ k, p = akurtosistest(a, dimension)
+ k2 = N.power(s, 2) + N.power(k, 2)
+ return k2, achisqprob(k2, 2)
+
+#####################################
+###### AFREQUENCY FUNCTIONS #######
+#####################################
+
+ def aitemfreq(a):
+ """
+Returns a 2D array of item frequencies. Column 1 contains item values,
+column 2 contains their respective counts. Assumes a 1D array is passed.
+@@@sorting OK?
+
+Usage: aitemfreq(a)
+Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)
+"""
+ scores = pstat.aunique(a)
+ scores = N.sort(scores)
+ freq = N.zeros(len(scores))
+ for i in range(len(scores)):
+ freq[i] = N.add.reduce(N.equal(a, scores[i]))
+ return N.array(pstat.aabut(scores, freq))
+
+ def ascoreatpercentile(inarray, percent):
+ """
+Usage: ascoreatpercentile(inarray,percent) 0<percent<100
+Returns: score at given percentile, relative to inarray distribution
+"""
+ percent = percent / 100.0
+ targetcf = percent * len(inarray)
+ h, lrl, binsize, extras = histogram(inarray)
+ cumhist = cumsum(h * 1)
+ for i in range(len(cumhist)):
+ if cumhist[i] >= targetcf:
+ break
+ score = binsize * (
+ (targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i)
+ return score
+
+ def apercentileofscore(inarray, score, histbins=10, defaultlimits=None):
+ """
+Note: result of this function depends on the values used to histogram
+the data(!).
+
+Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None)
+Returns: percentile-position of score (0-100) relative to inarray
+"""
+ h, lrl, binsize, extras = histogram(inarray, histbins, defaultlimits)
+ cumhist = cumsum(h * 1)
+ i = int((score - lrl) / float(binsize))
+ pct = (cumhist[i - 1] + ((score - (lrl + binsize * i)) / float(binsize)) *
+ h[i]) / float(len(inarray)) * 100
+ return pct
+
+ def ahistogram(inarray, numbins=10, defaultlimits=None, printextras=1):
+ """
+Returns (i) an array of histogram bin counts, (ii) the smallest value
+of the histogram binning, and (iii) the bin width (the last 2 are not
+necessarily integers). Default number of bins is 10. Defaultlimits
+can be None (the routine picks bins spanning all the numbers in the
+inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the
+following: array of bin values, lowerreallimit, binsize, extrapoints.
+
+Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1)
+Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range)
+"""
+ inarray = N.ravel(inarray) # flatten any >1D arrays
+ if (defaultlimits <> None):
+ lowerreallimit = defaultlimits[0]
+ upperreallimit = defaultlimits[1]
+ binsize = (upperreallimit - lowerreallimit) / float(numbins)
+ else:
+ Min = N.minimum.reduce(inarray)
+ Max = N.maximum.reduce(inarray)
+ estbinwidth = float(Max - Min) / float(numbins) + 1e-6
+ binsize = (Max - Min + estbinwidth) / float(numbins)
+ lowerreallimit = Min - binsize / 2.0 #lower real limit,1st bin
+ bins = N.zeros(numbins)
+ extrapoints = 0
+ for num in inarray:
+ try:
+ if (num - lowerreallimit) < 0:
+ extrapoints = extrapoints + 1
+ else:
+ bintoincrement = int((num - lowerreallimit) / float(binsize))
+ bins[bintoincrement] = bins[bintoincrement] + 1
+ except: # point outside lower/upper limits
+ extrapoints = extrapoints + 1
+ if (extrapoints > 0 and printextras == 1):
+ print '\nPoints outside given histogram range =', extrapoints
+ return (bins, lowerreallimit, binsize, extrapoints)
+
+ def acumfreq(a, numbins=10, defaultreallimits=None):
+ """
+Returns a cumulative frequency histogram, using the histogram function.
+Defaultreallimits can be None (use all data), or a 2-sequence containing
+lower and upper limits on values to include.
+
+Usage: acumfreq(a,numbins=10,defaultreallimits=None)
+Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h, l, b, e = histogram(a, numbins, defaultreallimits)
+ cumhist = cumsum(h * 1)
+ return cumhist, l, b, e
+
+ def arelfreq(a, numbins=10, defaultreallimits=None):
+ """
+Returns a relative frequency histogram, using the histogram function.
+Defaultreallimits can be None (use all data), or a 2-sequence containing
+lower and upper limits on values to include.
+
+Usage: arelfreq(a,numbins=10,defaultreallimits=None)
+Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints
+"""
+ h, l, b, e = histogram(a, numbins, defaultreallimits)
+ h = N.array(h / float(a.shape[0]))
+ return h, l, b, e
+
+#####################################
+###### AVARIABILITY FUNCTIONS #####
+#####################################
+
+ def aobrientransform(*args):
+ """
+Computes a transform on input data (any number of columns). Used to
+test for homogeneity of variance prior to running one-way stats. Each
+array in *args is one level of a factor. If an F_oneway() run on the
+transformed data and found significant, variances are unequal. From
+Maxwell and Delaney, p.112.
+
+Usage: aobrientransform(*args) *args = 1D arrays, one per level of factor
+Returns: transformed data for use in an ANOVA
+"""
+ TINY = 1e-10
+ k = len(args)
+ n = N.zeros(k, N.float_)
+ v = N.zeros(k, N.float_)
+ m = N.zeros(k, N.float_)
+ nargs = []
+ for i in range(k):
+ nargs.append(args[i].astype(N.float_))
+ n[i] = float(len(nargs[i]))
+ v[i] = var(nargs[i])
+ m[i] = mean(nargs[i])
+ for j in range(k):
+ for i in range(n[j]):
+ t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j])**2
+ t2 = 0.5 * v[j] * (n[j] - 1.0)
+ t3 = (n[j] - 1.0) * (n[j] - 2.0)
+ nargs[j][i] = (t1 - t2) / float(t3)
+ check = 1
+ for j in range(k):
+ if v[j] - mean(nargs[j]) > TINY:
+ check = 0
+ if check <> 1:
+ raise ValueError, 'Lack of convergence in obrientransform.'
+ else:
+ return N.array(nargs)
+
+ def asamplevar(inarray, dimension=None, keepdims=0):
+ """
+Returns the sample standard deviation of the values in the passed
+array (i.e., using N). Dimension can equal None (ravel array first),
+an integer (the dimension over which to operate), or a sequence
+(operate over multiple dimensions). Set keepdims=1 to return an array
+with the same number of dimensions as inarray.
+
+Usage: asamplevar(inarray,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ if dimension == 1:
+ mn = amean(inarray, dimension)[:, N.NewAxis]
+ else:
+ mn = amean(inarray, dimension, keepdims=1)
+ deviations = inarray - mn
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n * inarray.shape[d]
+ else:
+ n = inarray.shape[dimension]
+ svar = ass(deviations, dimension, keepdims) / float(n)
+ return svar
+
+ def asamplestdev(inarray, dimension=None, keepdims=0):
+ """
+Returns the sample standard deviation of the values in the passed
+array (i.e., using N). Dimension can equal None (ravel array first),
+an integer (the dimension over which to operate), or a sequence
+(operate over multiple dimensions). Set keepdims=1 to return an array
+with the same number of dimensions as inarray.
+
+Usage: asamplestdev(inarray,dimension=None,keepdims=0)
+"""
+ return N.sqrt(asamplevar(inarray, dimension, keepdims))
+
+ def asignaltonoise(instack, dimension=0):
+ """
+Calculates signal-to-noise. Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions).
+
+Usage: asignaltonoise(instack,dimension=0):
+Returns: array containing the value of (mean/stdev) along dimension,
+ or 0 when stdev=0
+"""
+ m = mean(instack, dimension)
+ sd = stdev(instack, dimension)
+ return N.where(sd == 0, 0, m / sd)
+
+ def acov(x, y, dimension=None, keepdims=0):
+ """
+Returns the estimated covariance of the values in the passed
+array (i.e., N-1). Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: acov(x,y,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ x = N.ravel(x)
+ y = N.ravel(y)
+ dimension = 0
+ xmn = amean(x, dimension, 1) # keepdims
+ xdeviations = x - xmn
+ ymn = amean(y, dimension, 1) # keepdims
+ ydeviations = y - ymn
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n * x.shape[d]
+ else:
+ n = x.shape[dimension]
+ covar = N.sum(xdeviations * ydeviations) / float(n - 1)
+ return covar
+
+ def avar(inarray, dimension=None, keepdims=0):
+ """
+Returns the estimated population variance of the values in the passed
+array (i.e., N-1). Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: avar(inarray,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ mn = amean(inarray, dimension, 1)
+ deviations = inarray - mn
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n * inarray.shape[d]
+ else:
+ n = inarray.shape[dimension]
+ var = ass(deviations, dimension, keepdims) / float(n - 1)
+ return var
+
+ def astdev(inarray, dimension=None, keepdims=0):
+ """
+Returns the estimated population standard deviation of the values in
+the passed array (i.e., N-1). Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions). Set keepdims=1 to return
+an array with the same number of dimensions as inarray.
+
+Usage: astdev(inarray,dimension=None,keepdims=0)
+"""
+ return N.sqrt(avar(inarray, dimension, keepdims))
+
+ def asterr(inarray, dimension=None, keepdims=0):
+ """
+Returns the estimated population standard error of the values in the
+passed array (i.e., N-1). Dimension can equal None (ravel array
+first), an integer (the dimension over which to operate), or a
+sequence (operate over multiple dimensions). Set keepdims=1 to return
+an array with the same number of dimensions as inarray.
+
+Usage: asterr(inarray,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ return astdev(inarray, dimension,
+ keepdims) / float(N.sqrt(inarray.shape[dimension]))
+
+ def asem(inarray, dimension=None, keepdims=0):
+ """
+Returns the standard error of the mean (i.e., using N) of the values
+in the passed array. Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions). Set keepdims=1 to return an array with the
+same number of dimensions as inarray.
+
+Usage: asem(inarray,dimension=None, keepdims=0)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ if type(dimension) == ListType:
+ n = 1
+ for d in dimension:
+ n = n * inarray.shape[d]
+ else:
+ n = inarray.shape[dimension]
+ s = asamplestdev(inarray, dimension, keepdims) / N.sqrt(n - 1)
+ return s
+
+ def az(a, score):
+ """
+Returns the z-score of a given input score, given thearray from which
+that score came. Not appropriate for population calculations, nor for
+arrays > 1D.
+
+Usage: az(a, score)
+"""
+ z = (score - amean(a)) / asamplestdev(a)
+ return z
+
+ def azs(a):
+ """
+Returns a 1D array of z-scores, one for each score in the passed array,
+computed relative to the passed array.
+
+Usage: azs(a)
+"""
+ zscores = []
+ for item in a:
+ zscores.append(z(a, item))
+ return N.array(zscores)
+
+ def azmap(scores, compare, dimension=0):
+ """
+Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to
+array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0
+of the compare array.
+
+Usage: azs(scores, compare, dimension=0)
+"""
+ mns = amean(compare, dimension)
+ sstd = asamplestdev(compare, 0)
+ return (scores - mns) / sstd
+
+#####################################
+####### ATRIMMING FUNCTIONS #######
+#####################################
+
+## deleted around() as it's in numpy now
+
+ def athreshold(a, threshmin=None, threshmax=None, newval=0):
+ """
+Like Numeric.clip() except that values <threshmid or >threshmax are replaced
+by newval instead of by threshmin/threshmax (respectively).
+
+Usage: athreshold(a,threshmin=None,threshmax=None,newval=0)
+Returns: a, with values <threshmin or >threshmax replaced with newval
+"""
+ mask = N.zeros(a.shape)
+ if threshmin <> None:
+ mask = mask + N.where(a < threshmin, 1, 0)
+ if threshmax <> None:
+ mask = mask + N.where(a > threshmax, 1, 0)
+ mask = N.clip(mask, 0, 1)
+ return N.where(mask, newval, a)
+
+ def atrimboth(a, proportiontocut):
+ """
+Slices off the passed proportion of items from BOTH ends of the passed
+array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND
+'rightmost' 10% of scores. You must pre-sort the array if you want
+"proper" trimming. Slices off LESS if proportion results in a
+non-integer slice index (i.e., conservatively slices off
+proportiontocut).
+
+Usage: atrimboth (a,proportiontocut)
+Returns: trimmed version of array a
+"""
+ lowercut = int(proportiontocut * len(a))
+ uppercut = len(a) - lowercut
+ return a[lowercut:uppercut]
+
+ def atrim1(a, proportiontocut, tail='right'):
+ """
+Slices off the passed proportion of items from ONE end of the passed
+array (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'
+10% of scores). Slices off LESS if proportion results in a non-integer
+slice index (i.e., conservatively slices off proportiontocut).
+
+Usage: atrim1(a,proportiontocut,tail='right') or set tail='left'
+Returns: trimmed version of array a
+"""
+ if string.lower(tail) == 'right':
+ lowercut = 0
+ uppercut = len(a) - int(proportiontocut * len(a))
+ elif string.lower(tail) == 'left':
+ lowercut = int(proportiontocut * len(a))
+ uppercut = len(a)
+ return a[lowercut:uppercut]
+
+#####################################
+##### ACORRELATION FUNCTIONS ######
+#####################################
+
+ def acovariance(X):
+ """
+Computes the covariance matrix of a matrix X. Requires a 2D matrix input.
+
+Usage: acovariance(X)
+Returns: covariance matrix of X
+"""
+ if len(X.shape) <> 2:
+ raise TypeError, 'acovariance requires 2D matrices'
+ n = X.shape[0]
+ mX = amean(X, 0)
+ return N.dot(N.transpose(X), X) / float(n) - N.multiply.outer(mX, mX)
+
+ def acorrelation(X):
+ """
+Computes the correlation matrix of a matrix X. Requires a 2D matrix input.
+
+Usage: acorrelation(X)
+Returns: correlation matrix of X
+"""
+ C = acovariance(X)
+ V = N.diagonal(C)
+ return C / N.sqrt(N.multiply.outer(V, V))
+
+ def apaired(x, y):
+ """
+Interactively determines the type of data in x and y, and then runs the
+appropriated statistic for paired group data.
+
+Usage: apaired(x,y) x,y = the two arrays of values to be compared
+Returns: appropriate statistic name, value, and probability
+"""
+ samples = ''
+ while samples not in ['i', 'r', 'I', 'R', 'c', 'C']:
+ print '\nIndependent or related samples, or correlation (i,r,c): ',
+ samples = raw_input()
+
+ if samples in ['i', 'I', 'r', 'R']:
+ print '\nComparing variances ...',
+ # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112
+ r = obrientransform(x, y)
+ f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1))
+ if p < 0.05:
+ vartype = 'unequal, p=' + str(round(p, 4))
+ else:
+ vartype = 'equal'
+ print vartype
+ if samples in ['i', 'I']:
+ if vartype[0] == 'e':
+ t, p = ttest_ind(x, y, None, 0)
+ print '\nIndependent samples t-test: ', round(t, 4), round(p, 4)
+ else:
+ if len(x) > 20 or len(y) > 20:
+ z, p = ranksums(x, y)
+ print '\nRank Sums test (NONparametric, n>20): ', round(
+ z, 4), round(p, 4)
+ else:
+ u, p = mannwhitneyu(x, y)
+ print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(
+ u, 4), round(p, 4)
+
+ else: # RELATED SAMPLES
+ if vartype[0] == 'e':
+ t, p = ttest_rel(x, y, 0)
+ print '\nRelated samples t-test: ', round(t, 4), round(p, 4)
+ else:
+ t, p = ranksums(x, y)
+ print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)
+ else: # CORRELATION ANALYSIS
+ corrtype = ''
+ while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']:
+ print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',
+ corrtype = raw_input()
+ if corrtype in ['c', 'C']:
+ m, b, r, p, see = linregress(x, y)
+ print '\nLinear regression for continuous variables ...'
+ lol = [
+ ['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'],
+ [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]
+ ]
+ pstat.printcc(lol)
+ elif corrtype in ['r', 'R']:
+ r, p = spearmanr(x, y)
+ print '\nCorrelation for ranked variables ...'
+ print "Spearman's r: ", round(r, 4), round(p, 4)
+ else: # DICHOTOMOUS
+ r, p = pointbiserialr(x, y)
+ print '\nAssuming x contains a dichotomous variable ...'
+ print 'Point Biserial r: ', round(r, 4), round(p, 4)
+ print '\n\n'
+ return None
+
+ def dices(x, y):
+ """
+Calculates Dice's coefficient ... (2*number of common terms)/(number of terms in
+x +
+number of terms in y). Returns a value between 0 (orthogonal) and 1.
+
+Usage: dices(x,y)
+"""
+ import sets
+ x = sets.Set(x)
+ y = sets.Set(y)
+ common = len(x.intersection(y))
+ total = float(len(x) + len(y))
+ return 2 * common / total
+
+ def icc(x, y=None, verbose=0):
+ """
+Calculates intraclass correlation coefficients using simple, Type I sums of
+squares.
+If only one variable is passed, assumed it's an Nx2 matrix
+
+Usage: icc(x,y=None,verbose=0)
+Returns: icc rho, prob ####PROB IS A GUESS BASED ON PEARSON
+"""
+ TINY = 1.0e-20
+ if y:
+ all = N.concatenate([x, y], 0)
+ else:
+ all = x + 0
+ x = all[:, 0]
+ y = all[:, 1]
+ totalss = ass(all - mean(all))
+ pairmeans = (x + y) / 2.
+ withinss = ass(x - pairmeans) + ass(y - pairmeans)
+ withindf = float(len(x))
+ betwdf = float(len(x) - 1)
+ withinms = withinss / withindf
+ betweenms = (totalss - withinss) / betwdf
+ rho = (betweenms - withinms) / (withinms + betweenms)
+ t = rho * math.sqrt(betwdf / ((1.0 - rho + TINY) * (1.0 + rho + TINY)))
+ prob = abetai(0.5 * betwdf, 0.5, betwdf / (betwdf + t * t), verbose)
+ return rho, prob
+
+ def alincc(x, y):
+ """
+Calculates Lin's concordance correlation coefficient.
+
+Usage: alincc(x,y) where x, y are equal-length arrays
+Returns: Lin's CC
+"""
+ x = N.ravel(x)
+ y = N.ravel(y)
+ covar = acov(x, y) * (len(x) - 1) / float(len(x)) # correct denom to n
+ xvar = avar(x) * (len(x) - 1) / float(len(x)) # correct denom to n
+ yvar = avar(y) * (len(y) - 1) / float(len(y)) # correct denom to n
+ lincc = (2 * covar) / ((xvar + yvar) + ((amean(x) - amean(y))**2))
+ return lincc
+
+ def apearsonr(x, y, verbose=1):
+ """
+Calculates a Pearson correlation coefficient and returns p. Taken
+from Heiman's Basic Statistics for the Behav. Sci (2nd), p.195.
+
+Usage: apearsonr(x,y,verbose=1) where x,y are equal length arrays
+Returns: Pearson's r, two-tailed p-value
+"""
+ TINY = 1.0e-20
+ n = len(x)
+ xmean = amean(x)
+ ymean = amean(y)
+ r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y)
+ r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) *
+ (n * ass(y) - asquare_of_sums(y)))
+ r = (r_num / r_den)
+ df = n - 2
+ t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
+ prob = abetai(0.5 * df, 0.5, df / (df + t * t), verbose)
+ return r, prob
+
+ def aspearmanr(x, y):
+ """
+Calculates a Spearman rank-order correlation coefficient. Taken
+from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.
+
+Usage: aspearmanr(x,y) where x,y are equal-length arrays
+Returns: Spearman's r, two-tailed p-value
+"""
+ TINY = 1e-30
+ n = len(x)
+ rankx = rankdata(x)
+ ranky = rankdata(y)
+ dsq = N.add.reduce((rankx - ranky)**2)
+ rs = 1 - 6 * dsq / float(n * (n**2 - 1))
+ t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs)))
+ df = n - 2
+ probrs = abetai(0.5 * df, 0.5, df / (df + t * t))
+ # probability values for rs are from part 2 of the spearman function in
+ # Numerical Recipies, p.510. They close to tables, but not exact.(?)
+ return rs, probrs
+
+ def apointbiserialr(x, y):
+ """
+Calculates a point-biserial correlation coefficient and the associated
+probability value. Taken from Heiman's Basic Statistics for the Behav.
+Sci (1st), p.194.
+
+Usage: apointbiserialr(x,y) where x,y are equal length arrays
+Returns: Point-biserial r, two-tailed p-value
+"""
+ TINY = 1e-30
+ categories = pstat.aunique(x)
+ data = pstat.aabut(x, y)
+ if len(categories) <> 2:
+ raise ValueError, ('Exactly 2 categories required (in x) for '
+ 'pointbiserialr().')
+ else: # there are 2 categories, continue
+ codemap = pstat.aabut(categories, N.arange(2))
+ recoded = pstat.arecode(data, codemap, 0)
+ x = pstat.alinexand(data, 0, categories[0])
+ y = pstat.alinexand(data, 0, categories[1])
+ xmean = amean(pstat.acolex(x, 1))
+ ymean = amean(pstat.acolex(y, 1))
+ n = len(data)
+ adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n)))
+ rpb = (ymean - xmean) / asamplestdev(pstat.acolex(data, 1)) * adjust
+ df = n - 2
+ t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY)))
+ prob = abetai(0.5 * df, 0.5, df / (df + t * t))
+ return rpb, prob
+
+ def akendalltau(x, y):
+ """
+Calculates Kendall's tau ... correlation of ordinal data. Adapted
+from function kendl1 in Numerical Recipies. Needs good test-cases.@@@
+
+Usage: akendalltau(x,y)
+Returns: Kendall's tau, two-tailed p-value
+"""
+ n1 = 0
+ n2 = 0
+ iss = 0
+ for j in range(len(x) - 1):
+ for k in range(j, len(y)):
+ a1 = x[j] - x[k]
+ a2 = y[j] - y[k]
+ aa = a1 * a2
+ if (aa): # neither array has a tie
+ n1 = n1 + 1
+ n2 = n2 + 1
+ if aa > 0:
+ iss = iss + 1
+ else:
+ iss = iss - 1
+ else:
+ if (a1):
+ n1 = n1 + 1
+ else:
+ n2 = n2 + 1
+ tau = iss / math.sqrt(n1 * n2)
+ svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1))
+ z = tau / math.sqrt(svar)
+ prob = erfcc(abs(z) / 1.4142136)
+ return tau, prob
+
+ def alinregress(*args):
+ """
+Calculates a regression line on two arrays, x and y, corresponding to x,y
+pairs. If a single 2D array is passed, alinregress finds dim with 2 levels
+and splits data into x,y pairs along that dim.
+
+Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array
+Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n
+"""
+ TINY = 1.0e-20
+ if len(args) == 1: # more than 1D array?
+ args = args[0]
+ if len(args) == 2:
+ x = args[0]
+ y = args[1]
+ else:
+ x = args[:, 0]
+ y = args[:, 1]
+ else:
+ x = args[0]
+ y = args[1]
+ n = len(x)
+ xmean = amean(x)
+ ymean = amean(y)
+ r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y)
+ r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) *
+ (n * ass(y) - asquare_of_sums(y)))
+ r = r_num / r_den
+ z = 0.5 * math.log((1.0 + r + TINY) / (1.0 - r + TINY))
+ df = n - 2
+ t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
+ prob = abetai(0.5 * df, 0.5, df / (df + t * t))
+ slope = r_num / (float(n) * ass(x) - asquare_of_sums(x))
+ intercept = ymean - slope * xmean
+ sterrest = math.sqrt(1 - r * r) * asamplestdev(y)
+ return slope, intercept, r, prob, sterrest, n
+
+ def amasslinregress(*args):
+ """
+Calculates a regression line on one 1D array (x) and one N-D array (y).
+
+Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n
+"""
+ TINY = 1.0e-20
+ if len(args) == 1: # more than 1D array?
+ args = args[0]
+ if len(args) == 2:
+ x = N.ravel(args[0])
+ y = args[1]
+ else:
+ x = N.ravel(args[:, 0])
+ y = args[:, 1]
+ else:
+ x = args[0]
+ y = args[1]
+ x = x.astype(N.float_)
+ y = y.astype(N.float_)
+ n = len(x)
+ xmean = amean(x)
+ ymean = amean(y, 0)
+ shp = N.ones(len(y.shape))
+ shp[0] = len(x)
+ x.shape = shp
+ print x.shape, y.shape
+ r_num = n * (N.add.reduce(x * y, 0)) - N.add.reduce(x) * N.add.reduce(y, 0)
+ r_den = N.sqrt((n * ass(x) - asquare_of_sums(x)) *
+ (n * ass(y, 0) - asquare_of_sums(y, 0)))
+ zerodivproblem = N.equal(r_den, 0)
+ r_den = N.where(zerodivproblem, 1, r_den
+ ) # avoid zero-division in 1st place
+ r = r_num / r_den # need to do this nicely for matrix division
+ r = N.where(zerodivproblem, 0.0, r)
+ z = 0.5 * N.log((1.0 + r + TINY) / (1.0 - r + TINY))
+ df = n - 2
+ t = r * N.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
+ prob = abetai(0.5 * df, 0.5, df / (df + t * t))
+
+ ss = float(n) * ass(x) - asquare_of_sums(x)
+ s_den = N.where(ss == 0, 1, ss) # avoid zero-division in 1st place
+ slope = r_num / s_den
+ intercept = ymean - slope * xmean
+ sterrest = N.sqrt(1 - r * r) * asamplestdev(y, 0)
+ return slope, intercept, r, prob, sterrest, n
+
+#####################################
+##### AINFERENTIAL STATISTICS #####
+#####################################
+
+ def attest_1samp(a, popmean, printit=0, name='Sample', writemode='a'):
+ """
+Calculates the t-obtained for the independent samples T-test on ONE group
+of scores a, given a population mean. If printit=1, results are printed
+to the screen. If printit='filename', the results are output to 'filename'
+using the given writemode (default=append). Returns t-value, and prob.
+
+Usage: attest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
+Returns: t-value, two-tailed prob
+"""
+ if type(a) != N.ndarray:
+ a = N.array(a)
+ x = amean(a)
+ v = avar(a)
+ n = len(a)
+ df = n - 1
+ svar = ((n - 1) * v) / float(df)
+ t = (x - popmean) / math.sqrt(svar * (1.0 / n))
+ prob = abetai(0.5 * df, 0.5, df / (df + t * t))
+
+ if printit <> 0:
+ statname = 'Single-sample T-test.'
+ outputpairedstats(printit, writemode, 'Population', '--', popmean, 0, 0,
+ 0, name, n, x, v, N.minimum.reduce(N.ravel(a)),
+ N.maximum.reduce(N.ravel(a)), statname, t, prob)
+ return t, prob
+
+ def attest_ind(a,
+ b,
+ dimension=None,
+ printit=0,
+ name1='Samp1',
+ name2='Samp2',
+ writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores
+a, and b. From Numerical Recipies, p.483. If printit=1, results are
+printed to the screen. If printit='filename', the results are output
+to 'filename' using the given writemode (default=append). Dimension
+can equal None (ravel array first), or an integer (the dimension over
+which to operate on a and b).
+
+Usage: attest_ind (a,b,dimension=None,printit=0,
+ Name1='Samp1',Name2='Samp2',writemode='a')
+Returns: t-value, two-tailed p-value
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ b = N.ravel(b)
+ dimension = 0
+ x1 = amean(a, dimension)
+ x2 = amean(b, dimension)
+ v1 = avar(a, dimension)
+ v2 = avar(b, dimension)
+ n1 = a.shape[dimension]
+ n2 = b.shape[dimension]
+ df = n1 + n2 - 2
+ svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df)
+ zerodivproblem = N.equal(svar, 0)
+ svar = N.where(zerodivproblem, 1, svar) # avoid zero-division in 1st place
+ t = (x1 - x2) / N.sqrt(svar *
+ (1.0 / n1 + 1.0 / n2)) # N-D COMPUTATION HERE!!!!!!
+ t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0
+ probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
+
+ if type(t) == N.ndarray:
+ probs = N.reshape(probs, t.shape)
+ if probs.shape == (1,):
+ probs = probs[0]
+
+ if printit <> 0:
+ if type(t) == N.ndarray:
+ t = t[0]
+ if type(probs) == N.ndarray:
+ probs = probs[0]
+ statname = 'Independent samples T-test.'
+ outputpairedstats(printit, writemode, name1, n1, x1, v1,
+ N.minimum.reduce(N.ravel(a)),
+ N.maximum.reduce(N.ravel(a)), name2, n2, x2, v2,
+ N.minimum.reduce(N.ravel(b)),
+ N.maximum.reduce(N.ravel(b)), statname, t, probs)
+ return
+ return t, probs
+
+ def ap2t(pval, df):
+ """
+Tries to compute a t-value from a p-value (or pval array) and associated df.
+SLOW for large numbers of elements(!) as it re-computes p-values 20 times
+(smaller step-sizes) at which point it decides it's done. Keeps the signs
+of the input array. Returns 1000 (or -1000) if t>100.
+
+Usage: ap2t(pval,df)
+Returns: an array of t-values with the shape of pval
+ """
+ pval = N.array(pval)
+ signs = N.sign(pval)
+ pval = abs(pval)
+ t = N.ones(pval.shape, N.float_) * 50
+ step = N.ones(pval.shape, N.float_) * 25
+ print 'Initial ap2t() prob calc'
+ prob = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
+ print 'ap2t() iter: ',
+ for i in range(10):
+ print i, ' ',
+ t = N.where(pval < prob, t + step, t - step)
+ prob = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
+ step = step / 2
+ print
+ # since this is an ugly hack, we get ugly boundaries
+ t = N.where(t > 99.9, 1000, t) # hit upper-boundary
+ t = t + signs
+ return t #, prob, pval
+
+ def attest_rel(a,
+ b,
+ dimension=None,
+ printit=0,
+ name1='Samp1',
+ name2='Samp2',
+ writemode='a'):
+ """
+Calculates the t-obtained T-test on TWO RELATED samples of scores, a
+and b. From Numerical Recipies, p.483. If printit=1, results are
+printed to the screen. If printit='filename', the results are output
+to 'filename' using the given writemode (default=append). Dimension
+can equal None (ravel array first), or an integer (the dimension over
+which to operate on a and b).
+
+Usage: attest_rel(a,b,dimension=None,printit=0,
+ name1='Samp1',name2='Samp2',writemode='a')
+Returns: t-value, two-tailed p-value
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ b = N.ravel(b)
+ dimension = 0
+ if len(a) <> len(b):
+ raise ValueError, 'Unequal length arrays.'
+ x1 = amean(a, dimension)
+ x2 = amean(b, dimension)
+ v1 = avar(a, dimension)
+ v2 = avar(b, dimension)
+ n = a.shape[dimension]
+ df = float(n - 1)
+ d = (a - b).astype('d')
+
+ denom = N.sqrt(
+ (n * N.add.reduce(d * d, dimension) - N.add.reduce(d, dimension)**2) /
+ df)
+ zerodivproblem = N.equal(denom, 0)
+ denom = N.where(zerodivproblem, 1, denom
+ ) # avoid zero-division in 1st place
+ t = N.add.reduce(d, dimension) / denom # N-D COMPUTATION HERE!!!!!!
+ t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0
+ probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
+ if type(t) == N.ndarray:
+ probs = N.reshape(probs, t.shape)
+ if probs.shape == (1,):
+ probs = probs[0]
+
+ if printit <> 0:
+ statname = 'Related samples T-test.'
+ outputpairedstats(printit, writemode, name1, n, x1, v1,
+ N.minimum.reduce(N.ravel(a)),
+ N.maximum.reduce(N.ravel(a)), name2, n, x2, v2,
+ N.minimum.reduce(N.ravel(b)),
+ N.maximum.reduce(N.ravel(b)), statname, t, probs)
+ return
+ return t, probs
+
+ def achisquare(f_obs, f_exp=None):
+ """
+Calculates a one-way chi square for array of observed frequencies and returns
+the result. If no expected frequencies are given, the total N is assumed to
+be equally distributed across all groups.
+@@@NOT RIGHT??
+
+Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq.
+Returns: chisquare-statistic, associated p-value
+"""
+
+ k = len(f_obs)
+ if f_exp == None:
+ f_exp = N.array([sum(f_obs) / float(k)] * len(f_obs), N.float_)
+ f_exp = f_exp.astype(N.float_)
+ chisq = N.add.reduce((f_obs - f_exp)**2 / f_exp)
+ return chisq, achisqprob(chisq, k - 1)
+
+ def aks_2samp(data1, data2):
+ """
+Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from
+Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc-
+like.
+
+Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays
+Returns: KS D-value, p-value
+"""
+ j1 = 0 # N.zeros(data1.shape[1:]) TRIED TO MAKE THIS UFUNC-LIKE
+ j2 = 0 # N.zeros(data2.shape[1:])
+ fn1 = 0.0 # N.zeros(data1.shape[1:],N.float_)
+ fn2 = 0.0 # N.zeros(data2.shape[1:],N.float_)
+ n1 = data1.shape[0]
+ n2 = data2.shape[0]
+ en1 = n1 * 1
+ en2 = n2 * 1
+ d = N.zeros(data1.shape[1:], N.float_)
+ data1 = N.sort(data1, 0)
+ data2 = N.sort(data2, 0)
+ while j1 < n1 and j2 < n2:
+ d1 = data1[j1]
+ d2 = data2[j2]
+ if d1 <= d2:
+ fn1 = (j1) / float(en1)
+ j1 = j1 + 1
+ if d2 <= d1:
+ fn2 = (j2) / float(en2)
+ j2 = j2 + 1
+ dt = (fn2 - fn1)
+ if abs(dt) > abs(d):
+ d = dt
+# try:
+ en = math.sqrt(en1 * en2 / float(en1 + en2))
+ prob = aksprob((en + 0.12 + 0.11 / en) * N.fabs(d))
+ # except:
+ # prob = 1.0
+ return d, prob
+
+ def amannwhitneyu(x, y):
+ """
+Calculates a Mann-Whitney U statistic on the provided scores and
+returns the result. Use only when the n in each condition is < 20 and
+you have 2 independent samples of ranks. REMEMBER: Mann-Whitney U is
+significant if the u-obtained is LESS THAN or equal to the critical
+value of U.
+
+Usage: amannwhitneyu(x,y) where x,y are arrays of values for 2 conditions
+Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))
+"""
+ n1 = len(x)
+ n2 = len(y)
+ ranked = rankdata(N.concatenate((x, y)))
+ rankx = ranked[0:n1] # get the x-ranks
+ ranky = ranked[n1:] # the rest are y-ranks
+ u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x
+ u2 = n1 * n2 - u1 # remainder is U for y
+ bigu = max(u1, u2)
+ smallu = min(u1, u2)
+ proportion = bigu / float(n1 * n2)
+ T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores
+ if T == 0:
+ raise ValueError, 'All numbers are identical in amannwhitneyu'
+ sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0)
+ z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc
+ return smallu, 1.0 - azprob(z), proportion
+
+ def atiecorrect(rankvals):
+ """
+Tie-corrector for ties in Mann Whitney U and Kruskal Wallis H tests.
+See Siegel, S. (1956) Nonparametric Statistics for the Behavioral
+Sciences. New York: McGraw-Hill. Code adapted from |Stat rankind.c
+code.
+
+Usage: atiecorrect(rankvals)
+Returns: T correction factor for U or H
+"""
+ sorted, posn = ashellsort(N.array(rankvals))
+ n = len(sorted)
+ T = 0.0
+ i = 0
+ while (i < n - 1):
+ if sorted[i] == sorted[i + 1]:
+ nties = 1
+ while (i < n - 1) and (sorted[i] == sorted[i + 1]):
+ nties = nties + 1
+ i = i + 1
+ T = T + nties**3 - nties
+ i = i + 1
+ T = T / float(n**3 - n)
+ return 1.0 - T
+
+ def aranksums(x, y):
+ """
+Calculates the rank sums statistic on the provided scores and returns
+the result.
+
+Usage: aranksums(x,y) where x,y are arrays of values for 2 conditions
+Returns: z-statistic, two-tailed p-value
+"""
+ n1 = len(x)
+ n2 = len(y)
+ alldata = N.concatenate((x, y))
+ ranked = arankdata(alldata)
+ x = ranked[:n1]
+ y = ranked[n1:]
+ s = sum(x)
+ expected = n1 * (n1 + n2 + 1) / 2.0
+ z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0)
+ prob = 2 * (1.0 - azprob(abs(z)))
+ return z, prob
+
+ def awilcoxont(x, y):
+ """
+Calculates the Wilcoxon T-test for related samples and returns the
+result. A non-parametric T-test.
+
+Usage: awilcoxont(x,y) where x,y are equal-length arrays for 2 conditions
+Returns: t-statistic, two-tailed p-value
+"""
+ if len(x) <> len(y):
+ raise ValueError, 'Unequal N in awilcoxont. Aborting.'
+ d = x - y
+ d = N.compress(N.not_equal(d, 0), d) # Keep all non-zero differences
+ count = len(d)
+ absd = abs(d)
+ absranked = arankdata(absd)
+ r_plus = 0.0
+ r_minus = 0.0
+ for i in range(len(absd)):
+ if d[i] < 0:
+ r_minus = r_minus + absranked[i]
+ else:
+ r_plus = r_plus + absranked[i]
+ wt = min(r_plus, r_minus)
+ mn = count * (count + 1) * 0.25
+ se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0)
+ z = math.fabs(wt - mn) / se
+ z = math.fabs(wt - mn) / se
+ prob = 2 * (1.0 - zprob(abs(z)))
+ return wt, prob
+
+ def akruskalwallish(*args):
+ """
+The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more
+groups, requiring at least 5 subjects in each group. This function
+calculates the Kruskal-Wallis H and associated p-value for 3 or more
+independent samples.
+
+Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions
+Returns: H-statistic (corrected for ties), associated p-value
+"""
+ assert len(args) == 3, 'Need at least 3 groups in stats.akruskalwallish()'
+ args = list(args)
+ n = [0] * len(args)
+ n = map(len, args)
+ all = []
+ for i in range(len(args)):
+ all = all + args[i].tolist()
+ ranked = rankdata(all)
+ T = tiecorrect(ranked)
+ for i in range(len(args)):
+ args[i] = ranked[0:n[i]]
+ del ranked[0:n[i]]
+ rsums = []
+ for i in range(len(args)):
+ rsums.append(sum(args[i])**2)
+ rsums[i] = rsums[i] / float(n[i])
+ ssbn = sum(rsums)
+ totaln = sum(n)
+ h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1)
+ df = len(args) - 1
+ if T == 0:
+ raise ValueError, 'All numbers are identical in akruskalwallish'
+ h = h / float(T)
+ return h, chisqprob(h, df)
+
+ def afriedmanchisquare(*args):
+ """
+Friedman Chi-Square is a non-parametric, one-way within-subjects
+ANOVA. This function calculates the Friedman Chi-square test for
+repeated measures and returns the result, along with the associated
+probability value. It assumes 3 or more repeated measures. Only 3
+levels requires a minimum of 10 subjects in the study. Four levels
+requires 5 subjects per level(??).
+
+Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions
+Returns: chi-square statistic, associated p-value
+"""
+ k = len(args)
+ if k < 3:
+ raise ValueError, ('\nLess than 3 levels. Friedman test not '
+ 'appropriate.\n')
+ n = len(args[0])
+ data = apply(pstat.aabut, args)
+ data = data.astype(N.float_)
+ for i in range(len(data)):
+ data[i] = arankdata(data[i])
+ ssbn = asum(asum(args, 1)**2)
+ chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1)
+ return chisq, achisqprob(chisq, k - 1)
+
+#####################################
+#### APROBABILITY CALCULATIONS ####
+#####################################
+
+ def achisqprob(chisq, df):
+ """
+Returns the (1-tail) probability value associated with the provided chi-square
+value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can
+handle multiple dimensions.
+
+Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom
+"""
+ BIG = 200.0
+
+ def ex(x):
+ BIG = 200.0
+ exponents = N.where(N.less(x, -BIG), -BIG, x)
+ return N.exp(exponents)
+
+ if type(chisq) == N.ndarray:
+ arrayflag = 1
+ else:
+ arrayflag = 0
+ chisq = N.array([chisq])
+ if df < 1:
+ return N.ones(chisq.shape, N.float)
+ probs = N.zeros(chisq.shape, N.float_)
+ probs = N.where(
+ N.less_equal(chisq, 0), 1.0, probs) # set prob=1 for chisq<0
+ a = 0.5 * chisq
+ if df > 1:
+ y = ex(-a)
+ if df % 2 == 0:
+ even = 1
+ s = y * 1
+ s2 = s * 1
+ else:
+ even = 0
+ s = 2.0 * azprob(-N.sqrt(chisq))
+ s2 = s * 1
+ if (df > 2):
+ chisq = 0.5 * (df - 1.0)
+ if even:
+ z = N.ones(probs.shape, N.float_)
+ else:
+ z = 0.5 * N.ones(probs.shape, N.float_)
+ if even:
+ e = N.zeros(probs.shape, N.float_)
+ else:
+ e = N.log(N.sqrt(N.pi)) * N.ones(probs.shape, N.float_)
+ c = N.log(a)
+ mask = N.zeros(probs.shape)
+ a_big = N.greater(a, BIG)
+ a_big_frozen = -1 * N.ones(probs.shape, N.float_)
+ totalelements = N.multiply.reduce(N.array(probs.shape))
+ while asum(mask) <> totalelements:
+ e = N.log(z) + e
+ s = s + ex(c * z - a - e)
+ z = z + 1.0
+ # print z, e, s
+ newmask = N.greater(z, chisq)
+ a_big_frozen = N.where(newmask * N.equal(mask, 0) * a_big, s,
+ a_big_frozen)
+ mask = N.clip(newmask + mask, 0, 1)
+ if even:
+ z = N.ones(probs.shape, N.float_)
+ e = N.ones(probs.shape, N.float_)
+ else:
+ z = 0.5 * N.ones(probs.shape, N.float_)
+ e = 1.0 / N.sqrt(N.pi) / N.sqrt(a) * N.ones(probs.shape, N.float_)
+ c = 0.0
+ mask = N.zeros(probs.shape)
+ a_notbig_frozen = -1 * N.ones(probs.shape, N.float_)
+ while asum(mask) <> totalelements:
+ e = e * (a / z.astype(N.float_))
+ c = c + e
+ z = z + 1.0
+ # print '#2', z, e, c, s, c*y+s2
+ newmask = N.greater(z, chisq)
+ a_notbig_frozen = N.where(newmask * N.equal(mask, 0) * (1 - a_big),
+ c * y + s2, a_notbig_frozen)
+ mask = N.clip(newmask + mask, 0, 1)
+ probs = N.where(
+ N.equal(probs, 1), 1, N.where(
+ N.greater(a, BIG), a_big_frozen, a_notbig_frozen))
+ return probs
+ else:
+ return s
+
+ def aerfcc(x):
+ """
+Returns the complementary error function erfc(x) with fractional error
+everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can
+handle multiple dimensions.
+
+Usage: aerfcc(x)
+"""
+ z = abs(x)
+ t = 1.0 / (1.0 + 0.5 * z)
+ ans = t * N.exp(-z * z - 1.26551223 + t * (1.00002368 + t * (
+ 0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (
+ 0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (
+ -0.82215223 + t * 0.17087277)))))))))
+ return N.where(N.greater_equal(x, 0), ans, 2.0 - ans)
+
+ def azprob(z):
+ """
+Returns the area under the normal curve 'to the left of' the given z value.
+Thus,
+ for z<0, zprob(z) = 1-tail probability
+ for z>0, 1.0-zprob(z) = 1-tail probability
+ for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability
+Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions.
+
+Usage: azprob(z) where z is a z-value
+"""
+
+ def yfunc(y):
+ x = (((((((
+ ((((((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) * y
+ - 0.000676904986) * y + 0.001390604284) * y - 0.000794620820) * y
+ - 0.002034254874) * y + 0.006549791214) * y - 0.010557625006) * y +
+ 0.011630447319) * y - 0.009279453341) * y + 0.005353579108) * y -
+ 0.002141268741) * y + 0.000535310849) * y + 0.999936657524
+ return x
+
+ def wfunc(w):
+ x = ((((((((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w
+ - 0.019198292004) * w + 0.059054035642) * w - 0.151968751364) *
+ w + 0.319152932694) * w - 0.531923007300) * w +
+ 0.797884560593) * N.sqrt(w) * 2.0
+ return x
+
+ Z_MAX = 6.0 # maximum meaningful z-value
+ x = N.zeros(z.shape, N.float_) # initialize
+ y = 0.5 * N.fabs(z)
+ x = N.where(N.less(y, 1.0), wfunc(y * y), yfunc(y - 2.0)) # get x's
+ x = N.where(N.greater(y, Z_MAX * 0.5), 1.0, x) # kill those with big Z
+ prob = N.where(N.greater(z, 0), (x + 1) * 0.5, (1 - x) * 0.5)
+ return prob
+
+ def aksprob(alam):
+ """
+Returns the probability value for a K-S statistic computed via ks_2samp.
+Adapted from Numerical Recipies. Can handle multiple dimensions.
+
+Usage: aksprob(alam)
+"""
+ if type(alam) == N.ndarray:
+ frozen = -1 * N.ones(alam.shape, N.float64)
+ alam = alam.astype(N.float64)
+ arrayflag = 1
+ else:
+ frozen = N.array(-1.)
+ alam = N.array(alam, N.float64)
+ arrayflag = 1
+ mask = N.zeros(alam.shape)
+ fac = 2.0 * N.ones(alam.shape, N.float_)
+ sum = N.zeros(alam.shape, N.float_)
+ termbf = N.zeros(alam.shape, N.float_)
+ a2 = N.array(-2.0 * alam * alam, N.float64)
+ totalelements = N.multiply.reduce(N.array(mask.shape))
+ for j in range(1, 201):
+ if asum(mask) == totalelements:
+ break
+ exponents = (a2 * j * j)
+ overflowmask = N.less(exponents, -746)
+ frozen = N.where(overflowmask, 0, frozen)
+ mask = mask + overflowmask
+ term = fac * N.exp(exponents)
+ sum = sum + term
+ newmask = N.where(
+ N.less_equal(
+ abs(term), (0.001 * termbf)) + N.less(
+ abs(term), 1.0e-8 * sum), 1, 0)
+ frozen = N.where(newmask * N.equal(mask, 0), sum, frozen)
+ mask = N.clip(mask + newmask, 0, 1)
+ fac = -fac
+ termbf = abs(term)
+ if arrayflag:
+ return N.where(
+ N.equal(frozen, -1), 1.0, frozen) # 1.0 if doesn't converge
+ else:
+ return N.where(
+ N.equal(frozen, -1), 1.0, frozen)[0] # 1.0 if doesn't converge
+
+ def afprob(dfnum, dfden, F):
+ """
+Returns the 1-tailed significance level (p-value) of an F statistic
+given the degrees of freedom for the numerator (dfR-dfF) and the degrees
+of freedom for the denominator (dfF). Can handle multiple dims for F.
+
+Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn
+"""
+ if type(F) == N.ndarray:
+ return abetai(0.5 * dfden, 0.5 * dfnum, dfden / (1.0 * dfden + dfnum * F))
+ else:
+ return abetai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F))
+
+ def abetacf(a, b, x, verbose=1):
+ """
+Evaluates the continued fraction form of the incomplete Beta function,
+betai. (Adapted from: Numerical Recipies in C.) Can handle multiple
+dimensions for x.
+
+Usage: abetacf(a,b,x,verbose=1)
+"""
+ ITMAX = 200
+ EPS = 3.0e-7
+
+ arrayflag = 1
+ if type(x) == N.ndarray:
+ frozen = N.ones(x.shape,
+ N.float_) * -1 #start out w/ -1s, should replace all
+ else:
+ arrayflag = 0
+ frozen = N.array([-1])
+ x = N.array([x])
+ mask = N.zeros(x.shape)
+ bm = az = am = 1.0
+ qab = a + b
+ qap = a + 1.0
+ qam = a - 1.0
+ bz = 1.0 - qab * x / qap
+ for i in range(ITMAX + 1):
+ if N.sum(N.ravel(N.equal(frozen, -1))) == 0:
+ break
+ em = float(i + 1)
+ tem = em + em
+ d = em * (b - em) * x / ((qam + tem) * (a + tem))
+ ap = az + d * am
+ bp = bz + d * bm
+ d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem))
+ app = ap + d * az
+ bpp = bp + d * bz
+ aold = az * 1
+ am = ap / bpp
+ bm = bp / bpp
+ az = app / bpp
+ bz = 1.0
+ newmask = N.less(abs(az - aold), EPS * abs(az))
+ frozen = N.where(newmask * N.equal(mask, 0), az, frozen)
+ mask = N.clip(mask + newmask, 0, 1)
+ noconverge = asum(N.equal(frozen, -1))
+ if noconverge <> 0 and verbose:
+ print 'a or b too big, or ITMAX too small in Betacf for ', noconverge, ' elements'
+ if arrayflag:
+ return frozen
+ else:
+ return frozen[0]
+
+ def agammln(xx):
+ """
+Returns the gamma function of xx.
+ Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.
+Adapted from: Numerical Recipies in C. Can handle multiple dims ... but
+probably doesn't normally have to.
+
+Usage: agammln(xx)
+"""
+ coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,
+ 0.120858003e-2, -0.536382e-5]
+ x = xx - 1.0
+ tmp = x + 5.5
+ tmp = tmp - (x + 0.5) * N.log(tmp)
+ ser = 1.0
+ for j in range(len(coeff)):
+ x = x + 1
+ ser = ser + coeff[j] / x
+ return -tmp + N.log(2.50662827465 * ser)
+
+ def abetai(a, b, x, verbose=1):
+ """
+Returns the incomplete beta function:
+
+ I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)
+
+where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
+function of a. The continued fraction formulation is implemented
+here, using the betacf function. (Adapted from: Numerical Recipies in
+C.) Can handle multiple dimensions.
+
+Usage: abetai(a,b,x,verbose=1)
+"""
+ TINY = 1e-15
+ if type(a) == N.ndarray:
+ if asum(N.less(x, 0) + N.greater(x, 1)) <> 0:
+ raise ValueError, 'Bad x in abetai'
+ x = N.where(N.equal(x, 0), TINY, x)
+ x = N.where(N.equal(x, 1.0), 1 - TINY, x)
+
+ bt = N.where(N.equal(x, 0) + N.equal(x, 1), 0, -1)
+ exponents = (gammln(a + b) - gammln(a) - gammln(b) + a * N.log(x) + b *
+ N.log(1.0 - x))
+ # 746 (below) is the MAX POSSIBLE BEFORE OVERFLOW
+ exponents = N.where(N.less(exponents, -740), -740, exponents)
+ bt = N.exp(exponents)
+ if type(x) == N.ndarray:
+ ans = N.where(
+ N.less(x, (a + 1) / (a + b + 2.0)), bt * abetacf(a, b, x, verbose) /
+ float(a), 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b))
+ else:
+ if x < (a + 1) / (a + b + 2.0):
+ ans = bt * abetacf(a, b, x, verbose) / float(a)
+ else:
+ ans = 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b)
+ return ans
+
+#####################################
+####### AANOVA CALCULATIONS #######
+#####################################
+
+ import numpy.linalg, operator
+ LA = numpy.linalg
+
+ def aglm(data, para):
+ """
+Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken
+from:
+ Peterson et al. Statistical limitations in functional neuroimaging
+ I. Non-inferential methods and statistical models. Phil Trans Royal Soc
+ Lond B 354: 1239-1260.
+
+Usage: aglm(data,para)
+Returns: statistic, p-value ???
+"""
+ if len(para) <> len(data):
+ print 'data and para must be same length in aglm'
+ return
+ n = len(para)
+ p = pstat.aunique(para)
+ x = N.zeros((n, len(p))) # design matrix
+ for l in range(len(p)):
+ x[:, l] = N.equal(para, p[l])
+ b = N.dot(
+ N.dot(
+ LA.inv(N.dot(
+ N.transpose(x), x)), # i.e., b=inv(X'X)X'Y
+ N.transpose(x)),
+ data)
+ diffs = (data - N.dot(x, b))
+ s_sq = 1. / (n - len(p)) * N.dot(N.transpose(diffs), diffs)
+
+ if len(p) == 2: # ttest_ind
+ c = N.array([1, -1])
+ df = n - 2
+ fact = asum(1.0 / asum(x, 0)) # i.e., 1/n1 + 1/n2 + 1/n3 ...
+ t = N.dot(c, b) / N.sqrt(s_sq * fact)
+ probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t))
+ return t, probs
+
+ def aF_oneway(*args):
+ """
+Performs a 1-way ANOVA, returning an F-value and probability given
+any number of groups. From Heiman, pp.394-7.
+
+Usage: aF_oneway (*args) where *args is 2 or more arrays, one per
+ treatment group
+Returns: f-value, probability
+"""
+ na = len(args) # ANOVA on 'na' groups, each in it's own array
+ means = [0] * na
+ vars = [0] * na
+ ns = [0] * na
+ alldata = []
+ tmp = map(N.array, args)
+ means = map(amean, tmp)
+ vars = map(avar, tmp)
+ ns = map(len, args)
+ alldata = N.concatenate(args)
+ bign = len(alldata)
+ sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign))
+ ssbn = 0
+ for a in args:
+ ssbn = ssbn + asquare_of_sums(N.array(a)) / float(len(a))
+ ssbn = ssbn - (asquare_of_sums(alldata) / float(bign))
+ sswn = sstot - ssbn
+ dfbn = na - 1
+ dfwn = bign - na
+ msb = ssbn / float(dfbn)
+ msw = sswn / float(dfwn)
+ f = msb / msw
+ prob = fprob(dfbn, dfwn, f)
+ return f, prob
+
+ def aF_value(ER, EF, dfR, dfF):
+ """
+Returns an F-statistic given the following:
+ ER = error associated with the null hypothesis (the Restricted model)
+ EF = error associated with the alternate hypothesis (the Full model)
+ dfR = degrees of freedom the Restricted model
+ dfF = degrees of freedom associated with the Restricted model
+"""
+ return ((ER - EF) / float(dfR - dfF) / (EF / float(dfF)))
+
+ def outputfstats(Enum, Eden, dfnum, dfden, f, prob):
+ Enum = round(Enum, 3)
+ Eden = round(Eden, 3)
+ dfnum = round(Enum, 3)
+ dfden = round(dfden, 3)
+ f = round(f, 3)
+ prob = round(prob, 3)
+ suffix = '' # for *s after the p-value
+ if prob < 0.001:
+ suffix = ' ***'
+ elif prob < 0.01:
+ suffix = ' **'
+ elif prob < 0.05:
+ suffix = ' *'
+ title = [['EF/ER', 'DF', 'Mean Square', 'F-value', 'prob', '']]
+ lofl = title + [[Enum, dfnum, round(Enum / float(dfnum), 3), f, prob, suffix
+ ], [Eden, dfden, round(Eden / float(dfden), 3), '', '', '']]
+ pstat.printcc(lofl)
+ return
+
+ def F_value_multivariate(ER, EF, dfnum, dfden):
+ """
+Returns an F-statistic given the following:
+ ER = error associated with the null hypothesis (the Restricted model)
+ EF = error associated with the alternate hypothesis (the Full model)
+ dfR = degrees of freedom the Restricted model
+ dfF = degrees of freedom associated with the Restricted model
+where ER and EF are matrices from a multivariate F calculation.
+"""
+ if type(ER) in [IntType, FloatType]:
+ ER = N.array([[ER]])
+ if type(EF) in [IntType, FloatType]:
+ EF = N.array([[EF]])
+ n_um = (LA.det(ER) - LA.det(EF)) / float(dfnum)
+ d_en = LA.det(EF) / float(dfden)
+ return n_um / d_en
+
+#####################################
+####### ASUPPORT FUNCTIONS ########
+#####################################
+
+ def asign(a):
+ """
+Usage: asign(a)
+Returns: array shape of a, with -1 where a<0 and +1 where a>=0
+"""
+ a = N.asarray(a)
+ if ((type(a) == type(1.4)) or (type(a) == type(1))):
+ return a - a - N.less(a, 0) + N.greater(a, 0)
+ else:
+ return N.zeros(N.shape(a)) - N.less(a, 0) + N.greater(a, 0)
+
+ def asum(a, dimension=None, keepdims=0):
+ """
+An alternative to the Numeric.add.reduce function, which allows one to
+(1) collapse over multiple dimensions at once, and/or (2) to retain
+all dimensions in the original array (squashing one down to size.
+Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions). If keepdims=1, the resulting array will have as many
+dimensions as the input array.
+
+Usage: asum(a, dimension=None, keepdims=0)
+Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1
+"""
+ if type(a) == N.ndarray and a.dtype in [N.int_, N.short, N.ubyte]:
+ a = a.astype(N.float_)
+ if dimension == None:
+ s = N.sum(N.ravel(a))
+ elif type(dimension) in [IntType, FloatType]:
+ s = N.add.reduce(a, dimension)
+ if keepdims == 1:
+ shp = list(a.shape)
+ shp[dimension] = 1
+ s = N.reshape(s, shp)
+ else: # must be a SEQUENCE of dims to sum over
+ dims = list(dimension)
+ dims.sort()
+ dims.reverse()
+ s = a * 1.0
+ for dim in dims:
+ s = N.add.reduce(s, dim)
+ if keepdims == 1:
+ shp = list(a.shape)
+ for dim in dims:
+ shp[dim] = 1
+ s = N.reshape(s, shp)
+ return s
+
+ def acumsum(a, dimension=None):
+ """
+Returns an array consisting of the cumulative sum of the items in the
+passed array. Dimension can equal None (ravel array first), an
+integer (the dimension over which to operate), or a sequence (operate
+over multiple dimensions, but this last one just barely makes sense).
+
+Usage: acumsum(a,dimension=None)
+"""
+ if dimension == None:
+ a = N.ravel(a)
+ dimension = 0
+ if type(dimension) in [ListType, TupleType, N.ndarray]:
+ dimension = list(dimension)
+ dimension.sort()
+ dimension.reverse()
+ for d in dimension:
+ a = N.add.accumulate(a, d)
+ return a
+ else:
+ return N.add.accumulate(a, dimension)
+
+ def ass(inarray, dimension=None, keepdims=0):
+ """
+Squares each value in the passed array, adds these squares & returns
+the result. Unfortunate function name. :-) Defaults to ALL values in
+the array. Dimension can equal None (ravel array first), an integer
+(the dimension over which to operate), or a sequence (operate over
+multiple dimensions). Set keepdims=1 to maintain the original number
+of dimensions.
+
+Usage: ass(inarray, dimension=None, keepdims=0)
+Returns: sum-along-'dimension' for (inarray*inarray)
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ return asum(inarray * inarray, dimension, keepdims)
+
+ def asummult(array1, array2, dimension=None, keepdims=0):
+ """
+Multiplies elements in array1 and array2, element by element, and
+returns the sum (along 'dimension') of all resulting multiplications.
+Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions). A trivial function, but included for completeness.
+
+Usage: asummult(array1,array2,dimension=None,keepdims=0)
+"""
+ if dimension == None:
+ array1 = N.ravel(array1)
+ array2 = N.ravel(array2)
+ dimension = 0
+ return asum(array1 * array2, dimension, keepdims)
+
+ def asquare_of_sums(inarray, dimension=None, keepdims=0):
+ """
+Adds the values in the passed array, squares that sum, and returns the
+result. Dimension can equal None (ravel array first), an integer (the
+dimension over which to operate), or a sequence (operate over multiple
+dimensions). If keepdims=1, the returned array will have the same
+NUMBER of dimensions as the original.
+
+Usage: asquare_of_sums(inarray, dimension=None, keepdims=0)
+Returns: the square of the sum over dim(s) in dimension
+"""
+ if dimension == None:
+ inarray = N.ravel(inarray)
+ dimension = 0
+ s = asum(inarray, dimension, keepdims)
+ if type(s) == N.ndarray:
+ return s.astype(N.float_) * s
+ else:
+ return float(s) * s
+
+ def asumdiffsquared(a, b, dimension=None, keepdims=0):
+ """
+Takes pairwise differences of the values in arrays a and b, squares
+these differences, and returns the sum of these squares. Dimension
+can equal None (ravel array first), an integer (the dimension over
+which to operate), or a sequence (operate over multiple dimensions).
+keepdims=1 means the return shape = len(a.shape) = len(b.shape)
+
+Usage: asumdiffsquared(a,b)
+Returns: sum[ravel(a-b)**2]
+"""
+ if dimension == None:
+ inarray = N.ravel(a)
+ dimension = 0
+ return asum((a - b)**2, dimension, keepdims)
+
+ def ashellsort(inarray):
+ """
+Shellsort algorithm. Sorts a 1D-array.
+
+Usage: ashellsort(inarray)
+Returns: sorted-inarray, sorting-index-vector (for original array)
+"""
+ n = len(inarray)
+ svec = inarray * 1.0
+ ivec = range(n)
+ gap = n / 2 # integer division needed
+ while gap > 0:
+ for i in range(gap, n):
+ for j in range(i - gap, -1, -gap):
+ while j >= 0 and svec[j] > svec[j + gap]:
+ temp = svec[j]
+ svec[j] = svec[j + gap]
+ svec[j + gap] = temp
+ itemp = ivec[j]
+ ivec[j] = ivec[j + gap]
+ ivec[j + gap] = itemp
+ gap = gap / 2 # integer division needed
+# svec is now sorted input vector, ivec has the order svec[i] = vec[ivec[i]]
+ return svec, ivec
+
+ def arankdata(inarray):
+ """
+Ranks the data in inarray, dealing with ties appropritely. Assumes
+a 1D inarray. Adapted from Gary Perlman's |Stat ranksort.
+
+Usage: arankdata(inarray)
+Returns: array of length equal to inarray, containing rank scores
+"""
+ n = len(inarray)
+ svec, ivec = ashellsort(inarray)
+ sumranks = 0
+ dupcount = 0
+ newarray = N.zeros(n, N.float_)
+ for i in range(n):
+ sumranks = sumranks + i
+ dupcount = dupcount + 1
+ if i == n - 1 or svec[i] <> svec[i + 1]:
+ averank = sumranks / float(dupcount) + 1
+ for j in range(i - dupcount + 1, i + 1):
+ newarray[ivec[j]] = averank
+ sumranks = 0
+ dupcount = 0
+ return newarray
+
+ def afindwithin(data):
+ """
+Returns a binary vector, 1=within-subject factor, 0=between. Input
+equals the entire data array (i.e., column 1=random factor, last
+column = measured values.
+
+Usage: afindwithin(data) data in |Stat format
+"""
+ numfact = len(data[0]) - 2
+ withinvec = [0] * numfact
+ for col in range(1, numfact + 1):
+ rows = pstat.linexand(data, col, pstat.unique(pstat.colex(data, 1))[0]
+ ) # get 1 level of this factor
+ if len(pstat.unique(pstat.colex(rows, 0))) < len(
+ rows): # if fewer subjects than scores on this factor
+ withinvec[col - 1] = 1
+ return withinvec
+
+ #########################################################
+ #########################################################
+ ###### RE-DEFINE DISPATCHES TO INCLUDE ARRAYS #########
+ #########################################################
+ #########################################################
+
+ ## CENTRAL TENDENCY:
+ geometricmean = Dispatch(
+ (lgeometricmean, (ListType, TupleType)), (ageometricmean, (N.ndarray,)))
+ harmonicmean = Dispatch(
+ (lharmonicmean, (ListType, TupleType)), (aharmonicmean, (N.ndarray,)))
+ mean = Dispatch((lmean, (ListType, TupleType)), (amean, (N.ndarray,)))
+ median = Dispatch((lmedian, (ListType, TupleType)), (amedian, (N.ndarray,)))
+ medianscore = Dispatch(
+ (lmedianscore, (ListType, TupleType)), (amedianscore, (N.ndarray,)))
+ mode = Dispatch((lmode, (ListType, TupleType)), (amode, (N.ndarray,)))
+ tmean = Dispatch((atmean, (N.ndarray,)))
+ tvar = Dispatch((atvar, (N.ndarray,)))
+ tstdev = Dispatch((atstdev, (N.ndarray,)))
+ tsem = Dispatch((atsem, (N.ndarray,)))
+
+ ## VARIATION:
+ moment = Dispatch((lmoment, (ListType, TupleType)), (amoment, (N.ndarray,)))
+ variation = Dispatch(
+ (lvariation, (ListType, TupleType)), (avariation, (N.ndarray,)))
+ skew = Dispatch((lskew, (ListType, TupleType)), (askew, (N.ndarray,)))
+ kurtosis = Dispatch(
+ (lkurtosis, (ListType, TupleType)), (akurtosis, (N.ndarray,)))
+ describe = Dispatch(
+ (ldescribe, (ListType, TupleType)), (adescribe, (N.ndarray,)))
+
+ ## DISTRIBUTION TESTS
+
+ skewtest = Dispatch(
+ (askewtest, (ListType, TupleType)), (askewtest, (N.ndarray,)))
+ kurtosistest = Dispatch(
+ (akurtosistest, (ListType, TupleType)), (akurtosistest, (N.ndarray,)))
+ normaltest = Dispatch(
+ (anormaltest, (ListType, TupleType)), (anormaltest, (N.ndarray,)))
+
+ ## FREQUENCY STATS:
+ itemfreq = Dispatch(
+ (litemfreq, (ListType, TupleType)), (aitemfreq, (N.ndarray,)))
+ scoreatpercentile = Dispatch(
+ (lscoreatpercentile, (ListType, TupleType)), (ascoreatpercentile,
+ (N.ndarray,)))
+ percentileofscore = Dispatch(
+ (lpercentileofscore, (ListType, TupleType)), (apercentileofscore,
+ (N.ndarray,)))
+ histogram = Dispatch(
+ (lhistogram, (ListType, TupleType)), (ahistogram, (N.ndarray,)))
+ cumfreq = Dispatch(
+ (lcumfreq, (ListType, TupleType)), (acumfreq, (N.ndarray,)))
+ relfreq = Dispatch(
+ (lrelfreq, (ListType, TupleType)), (arelfreq, (N.ndarray,)))
+
+ ## VARIABILITY:
+ obrientransform = Dispatch(
+ (lobrientransform, (ListType, TupleType)), (aobrientransform,
+ (N.ndarray,)))
+ samplevar = Dispatch(
+ (lsamplevar, (ListType, TupleType)), (asamplevar, (N.ndarray,)))
+ samplestdev = Dispatch(
+ (lsamplestdev, (ListType, TupleType)), (asamplestdev, (N.ndarray,)))
+ signaltonoise = Dispatch((asignaltonoise, (N.ndarray,)),)
+ var = Dispatch((lvar, (ListType, TupleType)), (avar, (N.ndarray,)))
+ stdev = Dispatch((lstdev, (ListType, TupleType)), (astdev, (N.ndarray,)))
+ sterr = Dispatch((lsterr, (ListType, TupleType)), (asterr, (N.ndarray,)))
+ sem = Dispatch((lsem, (ListType, TupleType)), (asem, (N.ndarray,)))
+ z = Dispatch((lz, (ListType, TupleType)), (az, (N.ndarray,)))
+ zs = Dispatch((lzs, (ListType, TupleType)), (azs, (N.ndarray,)))
+
+ ## TRIMMING FCNS:
+ threshold = Dispatch((athreshold, (N.ndarray,)),)
+ trimboth = Dispatch(
+ (ltrimboth, (ListType, TupleType)), (atrimboth, (N.ndarray,)))
+ trim1 = Dispatch((ltrim1, (ListType, TupleType)), (atrim1, (N.ndarray,)))
+
+ ## CORRELATION FCNS:
+ paired = Dispatch((lpaired, (ListType, TupleType)), (apaired, (N.ndarray,)))
+ lincc = Dispatch((llincc, (ListType, TupleType)), (alincc, (N.ndarray,)))
+ pearsonr = Dispatch(
+ (lpearsonr, (ListType, TupleType)), (apearsonr, (N.ndarray,)))
+ spearmanr = Dispatch(
+ (lspearmanr, (ListType, TupleType)), (aspearmanr, (N.ndarray,)))
+ pointbiserialr = Dispatch(
+ (lpointbiserialr, (ListType, TupleType)), (apointbiserialr, (N.ndarray,)))
+ kendalltau = Dispatch(
+ (lkendalltau, (ListType, TupleType)), (akendalltau, (N.ndarray,)))
+ linregress = Dispatch(
+ (llinregress, (ListType, TupleType)), (alinregress, (N.ndarray,)))
+
+ ## INFERENTIAL STATS:
+ ttest_1samp = Dispatch(
+ (lttest_1samp, (ListType, TupleType)), (attest_1samp, (N.ndarray,)))
+ ttest_ind = Dispatch(
+ (lttest_ind, (ListType, TupleType)), (attest_ind, (N.ndarray,)))
+ ttest_rel = Dispatch(
+ (lttest_rel, (ListType, TupleType)), (attest_rel, (N.ndarray,)))
+ chisquare = Dispatch(
+ (lchisquare, (ListType, TupleType)), (achisquare, (N.ndarray,)))
+ ks_2samp = Dispatch(
+ (lks_2samp, (ListType, TupleType)), (aks_2samp, (N.ndarray,)))
+ mannwhitneyu = Dispatch(
+ (lmannwhitneyu, (ListType, TupleType)), (amannwhitneyu, (N.ndarray,)))
+ tiecorrect = Dispatch(
+ (ltiecorrect, (ListType, TupleType)), (atiecorrect, (N.ndarray,)))
+ ranksums = Dispatch(
+ (lranksums, (ListType, TupleType)), (aranksums, (N.ndarray,)))
+ wilcoxont = Dispatch(
+ (lwilcoxont, (ListType, TupleType)), (awilcoxont, (N.ndarray,)))
+ kruskalwallish = Dispatch(
+ (lkruskalwallish, (ListType, TupleType)), (akruskalwallish, (N.ndarray,)))
+ friedmanchisquare = Dispatch(
+ (lfriedmanchisquare, (ListType, TupleType)), (afriedmanchisquare,
+ (N.ndarray,)))
+
+ ## PROBABILITY CALCS:
+ chisqprob = Dispatch(
+ (lchisqprob, (IntType, FloatType)), (achisqprob, (N.ndarray,)))
+ zprob = Dispatch((lzprob, (IntType, FloatType)), (azprob, (N.ndarray,)))
+ ksprob = Dispatch((lksprob, (IntType, FloatType)), (aksprob, (N.ndarray,)))
+ fprob = Dispatch((lfprob, (IntType, FloatType)), (afprob, (N.ndarray,)))
+ betacf = Dispatch((lbetacf, (IntType, FloatType)), (abetacf, (N.ndarray,)))
+ betai = Dispatch((lbetai, (IntType, FloatType)), (abetai, (N.ndarray,)))
+ erfcc = Dispatch((lerfcc, (IntType, FloatType)), (aerfcc, (N.ndarray,)))
+ gammln = Dispatch((lgammln, (IntType, FloatType)), (agammln, (N.ndarray,)))
+
+ ## ANOVA FUNCTIONS:
+ F_oneway = Dispatch(
+ (lF_oneway, (ListType, TupleType)), (aF_oneway, (N.ndarray,)))
+ F_value = Dispatch(
+ (lF_value, (ListType, TupleType)), (aF_value, (N.ndarray,)))
+
+ ## SUPPORT FUNCTIONS:
+ incr = Dispatch((lincr, (ListType, TupleType, N.ndarray)),)
+ sum = Dispatch((lsum, (ListType, TupleType)), (asum, (N.ndarray,)))
+ cumsum = Dispatch((lcumsum, (ListType, TupleType)), (acumsum, (N.ndarray,)))
+ ss = Dispatch((lss, (ListType, TupleType)), (ass, (N.ndarray,)))
+ summult = Dispatch(
+ (lsummult, (ListType, TupleType)), (asummult, (N.ndarray,)))
+ square_of_sums = Dispatch(
+ (lsquare_of_sums, (ListType, TupleType)), (asquare_of_sums, (N.ndarray,)))
+ sumdiffsquared = Dispatch(
+ (lsumdiffsquared, (ListType, TupleType)), (asumdiffsquared, (N.ndarray,)))
+ shellsort = Dispatch(
+ (lshellsort, (ListType, TupleType)), (ashellsort, (N.ndarray,)))
+ rankdata = Dispatch(
+ (lrankdata, (ListType, TupleType)), (arankdata, (N.ndarray,)))
+ findwithin = Dispatch(
+ (lfindwithin, (ListType, TupleType)), (afindwithin, (N.ndarray,)))
+
+###################### END OF NUMERIC FUNCTION BLOCK #####################
+
+###################### END OF STATISTICAL FUNCTIONS ######################
+
+except ImportError:
+ pass
diff --git a/cros_utils/tabulator.py b/cros_utils/tabulator.py
new file mode 100644
index 00000000..2c26ccad
--- /dev/null
+++ b/cros_utils/tabulator.py
@@ -0,0 +1,1248 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Table generating, analyzing and printing functions.
+
+This defines several classes that are used to generate, analyze and print
+tables.
+
+Example usage:
+
+ from cros_utils import tabulator
+
+ data = [["benchmark1", "33", "44"],["benchmark2", "44", "33"]]
+ tabulator.GetSimpleTable(data)
+
+You could also use it to generate more complex tables with analysis such as
+p-values, custom colors, etc. Tables are generated by TableGenerator and
+analyzed/formatted by TableFormatter. TableFormatter can take in a list of
+columns with custom result computation and coloring, and will compare values in
+each row according to taht scheme. Here is a complex example on printing a
+table:
+
+ from cros_utils import tabulator
+
+ runs = [[{"k1": "10", "k2": "12", "k5": "40", "k6": "40",
+ "ms_1": "20", "k7": "FAIL", "k8": "PASS", "k9": "PASS",
+ "k10": "0"},
+ {"k1": "13", "k2": "14", "k3": "15", "ms_1": "10", "k8": "PASS",
+ "k9": "FAIL", "k10": "0"}],
+ [{"k1": "50", "k2": "51", "k3": "52", "k4": "53", "k5": "35", "k6":
+ "45", "ms_1": "200", "ms_2": "20", "k7": "FAIL", "k8": "PASS", "k9":
+ "PASS"}]]
+ labels = ["vanilla", "modified"]
+ tg = TableGenerator(runs, labels, TableGenerator.SORT_BY_VALUES_DESC)
+ table = tg.GetTable()
+ columns = [Column(LiteralResult(),
+ Format(),
+ "Literal"),
+ Column(AmeanResult(),
+ Format()),
+ Column(StdResult(),
+ Format()),
+ Column(CoeffVarResult(),
+ CoeffVarFormat()),
+ Column(NonEmptyCountResult(),
+ Format()),
+ Column(AmeanRatioResult(),
+ PercentFormat()),
+ Column(AmeanRatioResult(),
+ RatioFormat()),
+ Column(GmeanRatioResult(),
+ RatioFormat()),
+ Column(PValueResult(),
+ PValueFormat()),
+ ]
+ tf = TableFormatter(table, columns)
+ cell_table = tf.GetCellTable()
+ tp = TablePrinter(cell_table, out_to)
+ print tp.Print()
+
+"""
+
+from __future__ import print_function
+
+import getpass
+import math
+import sys
+import numpy
+
+import colortrans
+from email_sender import EmailSender
+import misc
+
+
+def _AllFloat(values):
+ return all([misc.IsFloat(v) for v in values])
+
+
+def _GetFloats(values):
+ return [float(v) for v in values]
+
+
+def _StripNone(results):
+ res = []
+ for result in results:
+ if result is not None:
+ res.append(result)
+ return res
+
+
+class TableGenerator(object):
+ """Creates a table from a list of list of dicts.
+
+ The main public function is called GetTable().
+ """
+ SORT_BY_KEYS = 0
+ SORT_BY_KEYS_DESC = 1
+ SORT_BY_VALUES = 2
+ SORT_BY_VALUES_DESC = 3
+
+ MISSING_VALUE = 'x'
+
+ def __init__(self, d, l, sort=SORT_BY_KEYS, key_name='keys'):
+ self._runs = d
+ self._labels = l
+ self._sort = sort
+ self._key_name = key_name
+
+ def _AggregateKeys(self):
+ keys = set([])
+ for run_list in self._runs:
+ for run in run_list:
+ keys = keys.union(run.keys())
+ return keys
+
+ def _GetHighestValue(self, key):
+ values = []
+ for run_list in self._runs:
+ for run in run_list:
+ if key in run:
+ values.append(run[key])
+ values = _StripNone(values)
+ if _AllFloat(values):
+ values = _GetFloats(values)
+ return max(values)
+
+ def _GetLowestValue(self, key):
+ values = []
+ for run_list in self._runs:
+ for run in run_list:
+ if key in run:
+ values.append(run[key])
+ values = _StripNone(values)
+ if _AllFloat(values):
+ values = _GetFloats(values)
+ return min(values)
+
+ def _SortKeys(self, keys):
+ if self._sort == self.SORT_BY_KEYS:
+ return sorted(keys)
+ elif self._sort == self.SORT_BY_VALUES:
+ # pylint: disable=unnecessary-lambda
+ return sorted(keys, key=lambda x: self._GetLowestValue(x))
+ elif self._sort == self.SORT_BY_VALUES_DESC:
+ # pylint: disable=unnecessary-lambda
+ return sorted(keys, key=lambda x: self._GetHighestValue(x), reverse=True)
+ else:
+ assert 0, 'Unimplemented sort %s' % self._sort
+
+ def _GetKeys(self):
+ keys = self._AggregateKeys()
+ return self._SortKeys(keys)
+
+ def GetTable(self, number_of_rows=sys.maxint):
+ """Returns a table from a list of list of dicts.
+
+ The list of list of dicts is passed into the constructor of TableGenerator.
+ This method converts that into a canonical list of lists which represents a
+ table of values.
+
+ Args:
+ number_of_rows: Maximum number of rows to return from the table.
+
+ Returns:
+ A list of lists which is the table.
+
+ Example:
+ We have the following runs:
+ [[{"k1": "v1", "k2": "v2"}, {"k1": "v3"}],
+ [{"k1": "v4", "k4": "v5"}]]
+ and the following labels:
+ ["vanilla", "modified"]
+ it will return:
+ [["Key", "vanilla", "modified"]
+ ["k1", ["v1", "v3"], ["v4"]]
+ ["k2", ["v2"], []]
+ ["k4", [], ["v5"]]]
+ The returned table can then be processed further by other classes in this
+ module.
+ """
+ keys = self._GetKeys()
+ header = [self._key_name] + self._labels
+ table = [header]
+ rows = 0
+ for k in keys:
+ row = [k]
+ unit = None
+ for run_list in self._runs:
+ v = []
+ for run in run_list:
+ if k in run:
+ if type(run[k]) is list:
+ val = run[k][0]
+ unit = run[k][1]
+ else:
+ val = run[k]
+ v.append(val)
+ else:
+ v.append(None)
+ row.append(v)
+ # If we got a 'unit' value, append the units name to the key name.
+ if unit:
+ keyname = row[0] + ' (%s) ' % unit
+ row[0] = keyname
+ table.append(row)
+ rows += 1
+ if rows == number_of_rows:
+ break
+ return table
+
+
+class Result(object):
+ """A class that respresents a single result.
+
+ This single result is obtained by condensing the information from a list of
+ runs and a list of baseline runs.
+ """
+
+ def __init__(self):
+ pass
+
+ def _AllStringsSame(self, values):
+ values_set = set(values)
+ return len(values_set) == 1
+
+ def NeedsBaseline(self):
+ return False
+
+ # pylint: disable=unused-argument
+ def _Literal(self, cell, values, baseline_values):
+ cell.value = ' '.join([str(v) for v in values])
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ self._Literal(cell, values, baseline_values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ self._Literal(cell, values, baseline_values)
+
+ def _InvertIfLowerIsBetter(self, cell):
+ pass
+
+ def _GetGmean(self, values):
+ if not values:
+ return float('nan')
+ if any([v < 0 for v in values]):
+ return float('nan')
+ if any([v == 0 for v in values]):
+ return 0.0
+ log_list = [math.log(v) for v in values]
+ gmean_log = sum(log_list) / len(log_list)
+ return math.exp(gmean_log)
+
+ def Compute(self, cell, values, baseline_values):
+ """Compute the result given a list of values and baseline values.
+
+ Args:
+ cell: A cell data structure to populate.
+ values: List of values.
+ baseline_values: List of baseline values. Can be none if this is the
+ baseline itself.
+ """
+ all_floats = True
+ values = _StripNone(values)
+ if not values:
+ cell.value = ''
+ return
+ if _AllFloat(values):
+ float_values = _GetFloats(values)
+ else:
+ all_floats = False
+ if baseline_values:
+ baseline_values = _StripNone(baseline_values)
+ if baseline_values:
+ if _AllFloat(baseline_values):
+ float_baseline_values = _GetFloats(baseline_values)
+ else:
+ all_floats = False
+ else:
+ if self.NeedsBaseline():
+ cell.value = ''
+ return
+ float_baseline_values = None
+ if all_floats:
+ self._ComputeFloat(cell, float_values, float_baseline_values)
+ self._InvertIfLowerIsBetter(cell)
+ else:
+ self._ComputeString(cell, values, baseline_values)
+
+
+class LiteralResult(Result):
+ """A literal result."""
+
+ def __init__(self, iteration=0):
+ super(LiteralResult, self).__init__()
+ self.iteration = iteration
+
+ def Compute(self, cell, values, baseline_values):
+ try:
+ cell.value = values[self.iteration]
+ except IndexError:
+ cell.value = '-'
+
+
+class NonEmptyCountResult(Result):
+ """A class that counts the number of non-empty results.
+
+ The number of non-empty values will be stored in the cell.
+ """
+
+ def Compute(self, cell, values, baseline_values):
+ """Put the number of non-empty values in the cell result.
+
+ Args:
+ cell: Put the result in cell.value.
+ values: A list of values for the row.
+ baseline_values: A list of baseline values for the row.
+ """
+ cell.value = len(_StripNone(values))
+ if not baseline_values:
+ return
+ base_value = len(_StripNone(baseline_values))
+ if cell.value == base_value:
+ return
+ f = ColorBoxFormat()
+ len_values = len(values)
+ len_baseline_values = len(baseline_values)
+ tmp_cell = Cell()
+ tmp_cell.value = 1.0 + (float(cell.value - base_value) /
+ (max(len_values, len_baseline_values)))
+ f.Compute(tmp_cell)
+ cell.bgcolor = tmp_cell.bgcolor
+
+
+class StringMeanResult(Result):
+ """Mean of string values."""
+
+ def _ComputeString(self, cell, values, baseline_values):
+ if self._AllStringsSame(values):
+ cell.value = str(values[0])
+ else:
+ cell.value = '?'
+
+
+class AmeanResult(StringMeanResult):
+ """Arithmetic mean."""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = numpy.mean(values)
+
+
+class RawResult(Result):
+ """Raw result."""
+ pass
+
+
+class MinResult(Result):
+ """Minimum."""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = min(values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ if values:
+ cell.value = min(values)
+ else:
+ cell.value = ''
+
+
+class MaxResult(Result):
+ """Maximum."""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = max(values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ if values:
+ cell.value = max(values)
+ else:
+ cell.value = ''
+
+
+class NumericalResult(Result):
+ """Numerical result."""
+
+ def _ComputeString(self, cell, values, baseline_values):
+ cell.value = '?'
+
+
+class StdResult(NumericalResult):
+ """Standard deviation."""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ cell.value = numpy.std(values)
+
+
+class CoeffVarResult(NumericalResult):
+ """Standard deviation / Mean"""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ if numpy.mean(values) != 0.0:
+ noise = numpy.abs(numpy.std(values) / numpy.mean(values))
+ else:
+ noise = 0.0
+ cell.value = noise
+
+
+class ComparisonResult(Result):
+ """Same or Different."""
+
+ def NeedsBaseline(self):
+ return True
+
+ def _ComputeString(self, cell, values, baseline_values):
+ value = None
+ baseline_value = None
+ if self._AllStringsSame(values):
+ value = values[0]
+ if self._AllStringsSame(baseline_values):
+ baseline_value = baseline_values[0]
+ if value is not None and baseline_value is not None:
+ if value == baseline_value:
+ cell.value = 'SAME'
+ else:
+ cell.value = 'DIFFERENT'
+ else:
+ cell.value = '?'
+
+
+class PValueResult(ComparisonResult):
+ """P-value."""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ if len(values) < 2 or len(baseline_values) < 2:
+ cell.value = float('nan')
+ return
+ import stats
+ _, cell.value = stats.lttest_ind(values, baseline_values)
+
+ def _ComputeString(self, cell, values, baseline_values):
+ return float('nan')
+
+
+class KeyAwareComparisonResult(ComparisonResult):
+ """Automatic key aware comparison."""
+
+ def _IsLowerBetter(self, key):
+ # TODO(llozano): Trying to guess direction by looking at the name of the
+ # test does not seem like a good idea. Test frameworks should provide this
+ # info explicitly. I believe Telemetry has this info. Need to find it out.
+ #
+ # Below are some test names for which we are not sure what the
+ # direction is.
+ #
+ # For these we dont know what the direction is. But, since we dont
+ # specify anything, crosperf will assume higher is better:
+ # --percent_impl_scrolled--percent_impl_scrolled--percent
+ # --solid_color_tiles_analyzed--solid_color_tiles_analyzed--count
+ # --total_image_cache_hit_count--total_image_cache_hit_count--count
+ # --total_texture_upload_time_by_url
+ #
+ # About these we are doubtful but we made a guess:
+ # --average_num_missing_tiles_by_url--*--units (low is good)
+ # --experimental_mean_frame_time_by_url--*--units (low is good)
+ # --experimental_median_frame_time_by_url--*--units (low is good)
+ # --texture_upload_count--texture_upload_count--count (high is good)
+ # --total_deferred_image_decode_count--count (low is good)
+ # --total_tiles_analyzed--total_tiles_analyzed--count (high is good)
+ lower_is_better_keys = ['milliseconds', 'ms_', 'seconds_', 'KB', 'rdbytes',
+ 'wrbytes', 'dropped_percent', '(ms)', '(seconds)',
+ '--ms', '--average_num_missing_tiles',
+ '--experimental_jank', '--experimental_mean_frame',
+ '--experimental_median_frame_time',
+ '--total_deferred_image_decode_count', '--seconds']
+
+ return any([l in key for l in lower_is_better_keys])
+
+ def _InvertIfLowerIsBetter(self, cell):
+ if self._IsLowerBetter(cell.name):
+ if cell.value:
+ cell.value = 1.0 / cell.value
+
+
+class AmeanRatioResult(KeyAwareComparisonResult):
+ """Ratio of arithmetic means of values vs. baseline values."""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ if numpy.mean(baseline_values) != 0:
+ cell.value = numpy.mean(values) / numpy.mean(baseline_values)
+ elif numpy.mean(values) != 0:
+ cell.value = 0.00
+ # cell.value = 0 means the values and baseline_values have big difference
+ else:
+ cell.value = 1.00
+ # no difference if both values and baseline_values are 0
+
+
+class GmeanRatioResult(KeyAwareComparisonResult):
+ """Ratio of geometric means of values vs. baseline values."""
+
+ def _ComputeFloat(self, cell, values, baseline_values):
+ if self._GetGmean(baseline_values) != 0:
+ cell.value = self._GetGmean(values) / self._GetGmean(baseline_values)
+ elif self._GetGmean(values) != 0:
+ cell.value = 0.00
+ else:
+ cell.value = 1.00
+
+
+class Color(object):
+ """Class that represents color in RGBA format."""
+
+ def __init__(self, r=0, g=0, b=0, a=0):
+ self.r = r
+ self.g = g
+ self.b = b
+ self.a = a
+
+ def __str__(self):
+ return 'r: %s g: %s: b: %s: a: %s' % (self.r, self.g, self.b, self.a)
+
+ def Round(self):
+ """Round RGBA values to the nearest integer."""
+ self.r = int(self.r)
+ self.g = int(self.g)
+ self.b = int(self.b)
+ self.a = int(self.a)
+
+ def GetRGB(self):
+ """Get a hex representation of the color."""
+ return '%02x%02x%02x' % (self.r, self.g, self.b)
+
+ @classmethod
+ def Lerp(cls, ratio, a, b):
+ """Perform linear interpolation between two colors.
+
+ Args:
+ ratio: The ratio to use for linear polation.
+ a: The first color object (used when ratio is 0).
+ b: The second color object (used when ratio is 1).
+
+ Returns:
+ Linearly interpolated color.
+ """
+ ret = cls()
+ ret.r = (b.r - a.r) * ratio + a.r
+ ret.g = (b.g - a.g) * ratio + a.g
+ ret.b = (b.b - a.b) * ratio + a.b
+ ret.a = (b.a - a.a) * ratio + a.a
+ return ret
+
+
+class Format(object):
+ """A class that represents the format of a column."""
+
+ def __init__(self):
+ pass
+
+ def Compute(self, cell):
+ """Computes the attributes of a cell based on its value.
+
+ Attributes typically are color, width, etc.
+
+ Args:
+ cell: The cell whose attributes are to be populated.
+ """
+ if cell.value is None:
+ cell.string_value = ''
+ if isinstance(cell.value, float):
+ self._ComputeFloat(cell)
+ else:
+ self._ComputeString(cell)
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = '{0:.2f}'.format(cell.value)
+
+ def _ComputeString(self, cell):
+ cell.string_value = str(cell.value)
+
+ def _GetColor(self, value, low, mid, high, power=6, mid_value=1.0):
+ min_value = 0.0
+ max_value = 2.0
+ if math.isnan(value):
+ return mid
+ if value > mid_value:
+ value = max_value - mid_value / value
+
+ return self._GetColorBetweenRange(value, min_value, mid_value, max_value,
+ low, mid, high, power)
+
+ def _GetColorBetweenRange(self, value, min_value, mid_value, max_value,
+ low_color, mid_color, high_color, power):
+ assert value <= max_value
+ assert value >= min_value
+ if value > mid_value:
+ value = (max_value - value) / (max_value - mid_value)
+ value **= power
+ ret = Color.Lerp(value, high_color, mid_color)
+ else:
+ value = (value - min_value) / (mid_value - min_value)
+ value **= power
+ ret = Color.Lerp(value, low_color, mid_color)
+ ret.Round()
+ return ret
+
+
+class PValueFormat(Format):
+ """Formatting for p-value."""
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = '%0.2f' % float(cell.value)
+ if float(cell.value) < 0.05:
+ cell.bgcolor = self._GetColor(cell.value,
+ Color(255, 255, 0, 0),
+ Color(255, 255, 255, 0),
+ Color(255, 255, 255, 0),
+ mid_value=0.05,
+ power=1)
+
+
+class StorageFormat(Format):
+ """Format the cell as a storage number.
+
+ Example:
+ If the cell contains a value of 1024, the string_value will be 1.0K.
+ """
+
+ def _ComputeFloat(self, cell):
+ base = 1024
+ suffices = ['K', 'M', 'G']
+ v = float(cell.value)
+ current = 0
+ while v >= base**(current + 1) and current < len(suffices):
+ current += 1
+
+ if current:
+ divisor = base**current
+ cell.string_value = '%1.1f%s' % ((v / divisor), suffices[current - 1])
+ else:
+ cell.string_value = str(cell.value)
+
+
+class CoeffVarFormat(Format):
+ """Format the cell as a percent.
+
+ Example:
+ If the cell contains a value of 1.5, the string_value will be +150%.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = '%1.1f%%' % (float(cell.value) * 100)
+ cell.color = self._GetColor(cell.value,
+ Color(0, 255, 0, 0),
+ Color(0, 0, 0, 0),
+ Color(255, 0, 0, 0),
+ mid_value=0.02,
+ power=1)
+
+
+class PercentFormat(Format):
+ """Format the cell as a percent.
+
+ Example:
+ If the cell contains a value of 1.5, the string_value will be +50%.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = '%+1.1f%%' % ((float(cell.value) - 1) * 100)
+ cell.color = self._GetColor(cell.value, Color(255, 0, 0, 0),
+ Color(0, 0, 0, 0), Color(0, 255, 0, 0))
+
+
+class RatioFormat(Format):
+ """Format the cell as a ratio.
+
+ Example:
+ If the cell contains a value of 1.5642, the string_value will be 1.56.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = '%+1.1f%%' % ((cell.value - 1) * 100)
+ cell.color = self._GetColor(cell.value, Color(255, 0, 0, 0),
+ Color(0, 0, 0, 0), Color(0, 255, 0, 0))
+
+
+class ColorBoxFormat(Format):
+ """Format the cell as a color box.
+
+ Example:
+ If the cell contains a value of 1.5, it will get a green color.
+ If the cell contains a value of 0.5, it will get a red color.
+ The intensity of the green/red will be determined by how much above or below
+ 1.0 the value is.
+ """
+
+ def _ComputeFloat(self, cell):
+ cell.string_value = '--'
+ bgcolor = self._GetColor(cell.value, Color(255, 0, 0, 0),
+ Color(255, 255, 255, 0), Color(0, 255, 0, 0))
+ cell.bgcolor = bgcolor
+ cell.color = bgcolor
+
+
+class Cell(object):
+ """A class to represent a cell in a table.
+
+ Attributes:
+ value: The raw value of the cell.
+ color: The color of the cell.
+ bgcolor: The background color of the cell.
+ string_value: The string value of the cell.
+ suffix: A string suffix to be attached to the value when displaying.
+ prefix: A string prefix to be attached to the value when displaying.
+ color_row: Indicates whether the whole row is to inherit this cell's color.
+ bgcolor_row: Indicates whether the whole row is to inherit this cell's
+ bgcolor.
+ width: Optional specifier to make a column narrower than the usual width.
+ The usual width of a column is the max of all its cells widths.
+ colspan: Set the colspan of the cell in the HTML table, this is used for
+ table headers. Default value is 1.
+ name: the test name of the cell.
+ header: Whether this is a header in html.
+ """
+
+ def __init__(self):
+ self.value = None
+ self.color = None
+ self.bgcolor = None
+ self.string_value = None
+ self.suffix = None
+ self.prefix = None
+ # Entire row inherits this color.
+ self.color_row = False
+ self.bgcolor_row = False
+ self.width = None
+ self.colspan = 1
+ self.name = None
+ self.header = False
+
+ def __str__(self):
+ l = []
+ l.append('value: %s' % self.value)
+ l.append('string_value: %s' % self.string_value)
+ return ' '.join(l)
+
+
+class Column(object):
+ """Class representing a column in a table.
+
+ Attributes:
+ result: an object of the Result class.
+ fmt: an object of the Format class.
+ """
+
+ def __init__(self, result, fmt, name=''):
+ self.result = result
+ self.fmt = fmt
+ self.name = name
+
+
+# Takes in:
+# ["Key", "Label1", "Label2"]
+# ["k", ["v", "v2"], [v3]]
+# etc.
+# Also takes in a format string.
+# Returns a table like:
+# ["Key", "Label1", "Label2"]
+# ["k", avg("v", "v2"), stddev("v", "v2"), etc.]]
+# according to format string
+class TableFormatter(object):
+ """Class to convert a plain table into a cell-table.
+
+ This class takes in a table generated by TableGenerator and a list of column
+ formats to apply to the table and returns a table of cells.
+ """
+
+ def __init__(self, table, columns):
+ """The constructor takes in a table and a list of columns.
+
+ Args:
+ table: A list of lists of values.
+ columns: A list of column containing what to produce and how to format it.
+ """
+ self._table = table
+ self._columns = columns
+ self._table_columns = []
+ self._out_table = []
+
+ def GenerateCellTable(self, table_type):
+ row_index = 0
+ all_failed = False
+
+ for row in self._table[1:]:
+ # It does not make sense to put retval in the summary table.
+ if str(row[0]) == 'retval' and table_type == 'summary':
+ # Check to see if any runs passed, and update all_failed.
+ all_failed = True
+ for values in row[1:]:
+ if 0 in values:
+ all_failed = False
+ continue
+ key = Cell()
+ key.string_value = str(row[0])
+ out_row = [key]
+ baseline = None
+ for values in row[1:]:
+ for column in self._columns:
+ cell = Cell()
+ cell.name = key.string_value
+ if column.result.NeedsBaseline():
+ if baseline is not None:
+ column.result.Compute(cell, values, baseline)
+ column.fmt.Compute(cell)
+ out_row.append(cell)
+ if not row_index:
+ self._table_columns.append(column)
+ else:
+ column.result.Compute(cell, values, baseline)
+ column.fmt.Compute(cell)
+ out_row.append(cell)
+ if not row_index:
+ self._table_columns.append(column)
+
+ if baseline is None:
+ baseline = values
+ self._out_table.append(out_row)
+ row_index += 1
+
+ # If this is a summary table, and the only row in it is 'retval', and
+ # all the test runs failed, we need to a 'Results' row to the output
+ # table.
+ if table_type == 'summary' and all_failed and len(self._table) == 2:
+ labels_row = self._table[0]
+ key = Cell()
+ key.string_value = 'Results'
+ out_row = [key]
+ baseline = None
+ for _ in labels_row[1:]:
+ for column in self._columns:
+ cell = Cell()
+ cell.name = key.string_value
+ column.result.Compute(cell, ['Fail'], baseline)
+ column.fmt.Compute(cell)
+ out_row.append(cell)
+ if not row_index:
+ self._table_columns.append(column)
+ self._out_table.append(out_row)
+
+ def AddColumnName(self):
+ """Generate Column name at the top of table."""
+ key = Cell()
+ key.header = True
+ key.string_value = 'Keys'
+ header = [key]
+ for column in self._table_columns:
+ cell = Cell()
+ cell.header = True
+ if column.name:
+ cell.string_value = column.name
+ else:
+ result_name = column.result.__class__.__name__
+ format_name = column.fmt.__class__.__name__
+
+ cell.string_value = '%s %s' % (result_name.replace('Result', ''),
+ format_name.replace('Format', ''))
+
+ header.append(cell)
+
+ self._out_table = [header] + self._out_table
+
+ def AddHeader(self, s):
+ """Put additional string on the top of the table."""
+ cell = Cell()
+ cell.header = True
+ cell.string_value = str(s)
+ header = [cell]
+ colspan = max(1, max(len(row) for row in self._table))
+ cell.colspan = colspan
+ self._out_table = [header] + self._out_table
+
+ def GetPassesAndFails(self, values):
+ passes = 0
+ fails = 0
+ for val in values:
+ if val == 0:
+ passes = passes + 1
+ else:
+ fails = fails + 1
+ return passes, fails
+
+ def AddLabelName(self):
+ """Put label on the top of the table."""
+ top_header = []
+ base_colspan = len([c for c in self._columns if not c.result.NeedsBaseline()
+ ])
+ compare_colspan = len(self._columns)
+ # Find the row with the key 'retval', if it exists. This
+ # will be used to calculate the number of iterations that passed and
+ # failed for each image label.
+ retval_row = None
+ for row in self._table:
+ if row[0] == 'retval':
+ retval_row = row
+ # The label is organized as follows
+ # "keys" label_base, label_comparison1, label_comparison2
+ # The first cell has colspan 1, the second is base_colspan
+ # The others are compare_colspan
+ column_position = 0
+ for label in self._table[0]:
+ cell = Cell()
+ cell.header = True
+ # Put the number of pass/fail iterations in the image label header.
+ if column_position > 0 and retval_row:
+ retval_values = retval_row[column_position]
+ if type(retval_values) is list:
+ passes, fails = self.GetPassesAndFails(retval_values)
+ cell.string_value = str(label) + ' (pass:%d fail:%d)' % (passes,
+ fails)
+ else:
+ cell.string_value = str(label)
+ else:
+ cell.string_value = str(label)
+ if top_header:
+ cell.colspan = base_colspan
+ if len(top_header) > 1:
+ cell.colspan = compare_colspan
+ top_header.append(cell)
+ column_position = column_position + 1
+ self._out_table = [top_header] + self._out_table
+
+ def _PrintOutTable(self):
+ o = ''
+ for row in self._out_table:
+ for cell in row:
+ o += str(cell) + ' '
+ o += '\n'
+ print(o)
+
+ def GetCellTable(self, table_type='full', headers=True):
+ """Function to return a table of cells.
+
+ The table (list of lists) is converted into a table of cells by this
+ function.
+
+ Args:
+ table_type: Can be 'full' or 'summary'
+ headers: A boolean saying whether we want default headers
+
+ Returns:
+ A table of cells with each cell having the properties and string values as
+ requiested by the columns passed in the constructor.
+ """
+ # Generate the cell table, creating a list of dynamic columns on the fly.
+ if not self._out_table:
+ self.GenerateCellTable(table_type)
+ if headers:
+ self.AddColumnName()
+ self.AddLabelName()
+ return self._out_table
+
+
+class TablePrinter(object):
+ """Class to print a cell table to the console, file or html."""
+ PLAIN = 0
+ CONSOLE = 1
+ HTML = 2
+ TSV = 3
+ EMAIL = 4
+
+ def __init__(self, table, output_type):
+ """Constructor that stores the cell table and output type."""
+ self._table = table
+ self._output_type = output_type
+ self._row_styles = []
+ self._column_styles = []
+
+ # Compute whole-table properties like max-size, etc.
+ def _ComputeStyle(self):
+ self._row_styles = []
+ for row in self._table:
+ row_style = Cell()
+ for cell in row:
+ if cell.color_row:
+ assert cell.color, 'Cell color not set but color_row set!'
+ assert not row_style.color, 'Multiple row_style.colors found!'
+ row_style.color = cell.color
+ if cell.bgcolor_row:
+ assert cell.bgcolor, 'Cell bgcolor not set but bgcolor_row set!'
+ assert not row_style.bgcolor, 'Multiple row_style.bgcolors found!'
+ row_style.bgcolor = cell.bgcolor
+ self._row_styles.append(row_style)
+
+ self._column_styles = []
+ if len(self._table) < 2:
+ return
+
+ for i in range(max(len(row) for row in self._table)):
+ column_style = Cell()
+ for row in self._table:
+ if not any([cell.colspan != 1 for cell in row]):
+ column_style.width = max(column_style.width, len(row[i].string_value))
+ self._column_styles.append(column_style)
+
+ def _GetBGColorFix(self, color):
+ if self._output_type == self.CONSOLE:
+ rgb = color.GetRGB()
+ prefix, _ = colortrans.rgb2short(rgb)
+ # pylint: disable=anomalous-backslash-in-string
+ prefix = '\033[48;5;%sm' % prefix
+ suffix = '\033[0m'
+ elif self._output_type in [self.EMAIL, self.HTML]:
+ rgb = color.GetRGB()
+ prefix = ("<FONT style=\"BACKGROUND-COLOR:#{0}\">".format(rgb))
+ suffix = '</FONT>'
+ elif self._output_type in [self.PLAIN, self.TSV]:
+ prefix = ''
+ suffix = ''
+ return prefix, suffix
+
+ def _GetColorFix(self, color):
+ if self._output_type == self.CONSOLE:
+ rgb = color.GetRGB()
+ prefix, _ = colortrans.rgb2short(rgb)
+ # pylint: disable=anomalous-backslash-in-string
+ prefix = '\033[38;5;%sm' % prefix
+ suffix = '\033[0m'
+ elif self._output_type in [self.EMAIL, self.HTML]:
+ rgb = color.GetRGB()
+ prefix = '<FONT COLOR=#{0}>'.format(rgb)
+ suffix = '</FONT>'
+ elif self._output_type in [self.PLAIN, self.TSV]:
+ prefix = ''
+ suffix = ''
+ return prefix, suffix
+
+ def Print(self):
+ """Print the table to a console, html, etc.
+
+ Returns:
+ A string that contains the desired representation of the table.
+ """
+ self._ComputeStyle()
+ return self._GetStringValue()
+
+ def _GetCellValue(self, i, j):
+ cell = self._table[i][j]
+ out = cell.string_value
+ raw_width = len(out)
+
+ if cell.color:
+ p, s = self._GetColorFix(cell.color)
+ out = '%s%s%s' % (p, out, s)
+
+ if cell.bgcolor:
+ p, s = self._GetBGColorFix(cell.bgcolor)
+ out = '%s%s%s' % (p, out, s)
+
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.EMAIL]:
+ if cell.width:
+ width = cell.width
+ else:
+ if self._column_styles:
+ width = self._column_styles[j].width
+ else:
+ width = len(cell.string_value)
+ if cell.colspan > 1:
+ width = 0
+ start = 0
+ for k in range(j):
+ start += self._table[i][k].colspan
+ for k in range(cell.colspan):
+ width += self._column_styles[start + k].width
+ if width > raw_width:
+ padding = ('%' + str(width - raw_width) + 's') % ''
+ out = padding + out
+
+ if self._output_type == self.HTML:
+ if cell.header:
+ tag = 'th'
+ else:
+ tag = 'td'
+ out = "<{0} colspan = \"{2}\"> {1} </{0}>".format(tag, out, cell.colspan)
+
+ return out
+
+ def _GetHorizontalSeparator(self):
+ if self._output_type in [self.CONSOLE, self.PLAIN, self.EMAIL]:
+ return ' '
+ if self._output_type == self.HTML:
+ return ''
+ if self._output_type == self.TSV:
+ return '\t'
+
+ def _GetVerticalSeparator(self):
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
+ return '\n'
+ if self._output_type == self.HTML:
+ return '</tr>\n<tr>'
+
+ def _GetPrefix(self):
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
+ return ''
+ if self._output_type == self.HTML:
+ return "<p></p><table id=\"box-table-a\">\n<tr>"
+
+ def _GetSuffix(self):
+ if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]:
+ return ''
+ if self._output_type == self.HTML:
+ return '</tr>\n</table>'
+
+ def _GetStringValue(self):
+ o = ''
+ o += self._GetPrefix()
+ for i in range(len(self._table)):
+ row = self._table[i]
+ # Apply row color and bgcolor.
+ p = s = bgp = bgs = ''
+ if self._row_styles[i].bgcolor:
+ bgp, bgs = self._GetBGColorFix(self._row_styles[i].bgcolor)
+ if self._row_styles[i].color:
+ p, s = self._GetColorFix(self._row_styles[i].color)
+ o += p + bgp
+ for j in range(len(row)):
+ out = self._GetCellValue(i, j)
+ o += out + self._GetHorizontalSeparator()
+ o += s + bgs
+ o += self._GetVerticalSeparator()
+ o += self._GetSuffix()
+ return o
+
+
+# Some common drivers
+def GetSimpleTable(table, out_to=TablePrinter.CONSOLE):
+ """Prints a simple table.
+
+ This is used by code that has a very simple list-of-lists and wants to produce
+ a table with ameans, a percentage ratio of ameans and a colorbox.
+
+ Args:
+ table: a list of lists.
+ out_to: specify the fomat of output. Currently it supports HTML and CONSOLE.
+
+ Returns:
+ A string version of the table that can be printed to the console.
+
+ Example:
+ GetSimpleConsoleTable([["binary", "b1", "b2"],["size", "300", "400"]])
+ will produce a colored table that can be printed to the console.
+ """
+ columns = [
+ Column(AmeanResult(), Format()),
+ Column(AmeanRatioResult(), PercentFormat()),
+ Column(AmeanRatioResult(), ColorBoxFormat()),
+ ]
+ our_table = [table[0]]
+ for row in table[1:]:
+ our_row = [row[0]]
+ for v in row[1:]:
+ our_row.append([v])
+ our_table.append(our_row)
+
+ tf = TableFormatter(our_table, columns)
+ cell_table = tf.GetCellTable()
+ tp = TablePrinter(cell_table, out_to)
+ return tp.Print()
+
+
+# pylint: disable=redefined-outer-name
+def GetComplexTable(runs, labels, out_to=TablePrinter.CONSOLE):
+ """Prints a complex table.
+
+ This can be used to generate a table with arithmetic mean, standard deviation,
+ coefficient of variation, p-values, etc.
+
+ Args:
+ runs: A list of lists with data to tabulate.
+ labels: A list of labels that correspond to the runs.
+ out_to: specifies the format of the table (example CONSOLE or HTML).
+
+ Returns:
+ A string table that can be printed to the console or put in an HTML file.
+ """
+ tg = TableGenerator(runs, labels, TableGenerator.SORT_BY_VALUES_DESC)
+ table = tg.GetTable()
+ columns = [Column(LiteralResult(), Format(), 'Literal'),
+ Column(AmeanResult(), Format()), Column(StdResult(), Format()),
+ Column(CoeffVarResult(), CoeffVarFormat()),
+ Column(NonEmptyCountResult(), Format()),
+ Column(AmeanRatioResult(), PercentFormat()),
+ Column(AmeanRatioResult(), RatioFormat()),
+ Column(GmeanRatioResult(), RatioFormat()),
+ Column(PValueResult(), PValueFormat())]
+ tf = TableFormatter(table, columns)
+ cell_table = tf.GetCellTable()
+ tp = TablePrinter(cell_table, out_to)
+ return tp.Print()
+
+
+if __name__ == '__main__':
+ # Run a few small tests here.
+ runs = [[{'k1': '10',
+ 'k2': '12',
+ 'k5': '40',
+ 'k6': '40',
+ 'ms_1': '20',
+ 'k7': 'FAIL',
+ 'k8': 'PASS',
+ 'k9': 'PASS',
+ 'k10': '0'}, {'k1': '13',
+ 'k2': '14',
+ 'k3': '15',
+ 'ms_1': '10',
+ 'k8': 'PASS',
+ 'k9': 'FAIL',
+ 'k10': '0'}], [{'k1': '50',
+ 'k2': '51',
+ 'k3': '52',
+ 'k4': '53',
+ 'k5': '35',
+ 'k6': '45',
+ 'ms_1': '200',
+ 'ms_2': '20',
+ 'k7': 'FAIL',
+ 'k8': 'PASS',
+ 'k9': 'PASS'}]]
+ labels = ['vanilla', 'modified']
+ t = GetComplexTable(runs, labels, TablePrinter.CONSOLE)
+ print(t)
+ email = GetComplexTable(runs, labels, TablePrinter.EMAIL)
+
+ runs = [[{'k1': '1'}, {'k1': '1.1'}, {'k1': '1.2'}],
+ [{'k1': '5'}, {'k1': '5.1'}, {'k1': '5.2'}]]
+ t = GetComplexTable(runs, labels, TablePrinter.CONSOLE)
+ print(t)
+
+ simple_table = [
+ ['binary', 'b1', 'b2', 'b3'],
+ ['size', 100, 105, 108],
+ ['rodata', 100, 80, 70],
+ ['data', 100, 100, 100],
+ ['debug', 100, 140, 60],
+ ]
+ t = GetSimpleTable(simple_table)
+ print(t)
+ email += GetSimpleTable(simple_table, TablePrinter.HTML)
+ email_to = [getpass.getuser()]
+ email = "<pre style='font-size: 13px'>%s</pre>" % email
+ EmailSender().SendEmail(email_to, 'SimpleTableTest', email, msg_type='html')
diff --git a/cros_utils/tabulator_test.py b/cros_utils/tabulator_test.py
new file mode 100644
index 00000000..21cd1e73
--- /dev/null
+++ b/cros_utils/tabulator_test.py
@@ -0,0 +1,141 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+"""Tests for the tabulator module."""
+
+from __future__ import print_function
+
+__author__ = 'asharif@google.com (Ahmad Sharif)'
+
+# System modules
+import unittest
+
+# Local modules
+import tabulator
+
+
+class TabulatorTest(unittest.TestCase):
+ """Tests for the Tabulator class."""
+
+ def testResult(self):
+ table = ['k1', ['1', '3'], ['55']]
+ result = tabulator.Result()
+ cell = tabulator.Cell()
+ result.Compute(cell, table[2], table[1])
+ expected = ' '.join([str(float(v)) for v in table[2]])
+ self.assertTrue(cell.value == expected)
+
+ result = tabulator.AmeanResult()
+ cell = tabulator.Cell()
+ result.Compute(cell, table[2], table[1])
+ self.assertTrue(cell.value == float(table[2][0]))
+
+ def testStringMean(self):
+ smr = tabulator.StringMeanResult()
+ cell = tabulator.Cell()
+ value = 'PASS'
+ values = [value for _ in range(3)]
+ smr.Compute(cell, values, None)
+ self.assertTrue(cell.value == value)
+ values.append('FAIL')
+ smr.Compute(cell, values, None)
+ self.assertTrue(cell.value == '?')
+
+ def testStorageFormat(self):
+ sf = tabulator.StorageFormat()
+ cell = tabulator.Cell()
+ base = 1024.0
+ cell.value = base
+ sf.Compute(cell)
+ self.assertTrue(cell.string_value == '1.0K')
+ cell.value = base**2
+ sf.Compute(cell)
+ self.assertTrue(cell.string_value == '1.0M')
+ cell.value = base**3
+ sf.Compute(cell)
+ self.assertTrue(cell.string_value == '1.0G')
+
+ def testLerp(self):
+ c1 = tabulator.Color(0, 0, 0, 0)
+ c2 = tabulator.Color(255, 0, 0, 0)
+ c3 = tabulator.Color.Lerp(0.5, c1, c2)
+ self.assertTrue(c3.r == 127.5)
+ self.assertTrue(c3.g == 0)
+ self.assertTrue(c3.b == 0)
+ self.assertTrue(c3.a == 0)
+ c3.Round()
+ self.assertTrue(c3.r == 127)
+
+ def testGmean(self):
+ a = [1.0e+308] * 3
+ # pylint: disable=protected-access
+ b = tabulator.Result()._GetGmean(a)
+ self.assertTrue(b >= 0.99e+308 and b <= 1.01e+308)
+
+ def testTableGenerator(self):
+ runs = [[{'k1': '10',
+ 'k2': '12'}, {'k1': '13',
+ 'k2': '14',
+ 'k3': '15'}], [{'k1': '50',
+ 'k2': '51',
+ 'k3': '52',
+ 'k4': '53'}]]
+ labels = ['vanilla', 'modified']
+ tg = tabulator.TableGenerator(runs, labels)
+ table = tg.GetTable()
+ header = table.pop(0)
+
+ self.assertTrue(header == ['keys', 'vanilla', 'modified'])
+ row = table.pop(0)
+ self.assertTrue(row == ['k1', ['10', '13'], ['50']])
+ row = table.pop(0)
+ self.assertTrue(row == ['k2', ['12', '14'], ['51']])
+ row = table.pop(0)
+ self.assertTrue(row == ['k3', [None, '15'], ['52']])
+ row = table.pop(0)
+ self.assertTrue(row == ['k4', [None, None], ['53']])
+
+ table = tg.GetTable()
+ columns = [
+ tabulator.Column(tabulator.AmeanResult(), tabulator.Format()),
+ tabulator.Column(tabulator.AmeanRatioResult(),
+ tabulator.PercentFormat()),
+ ]
+ tf = tabulator.TableFormatter(table, columns)
+ table = tf.GetCellTable()
+ self.assertTrue(table)
+
+ def testColspan(self):
+ simple_table = [
+ ['binary', 'b1', 'b2', 'b3'],
+ ['size', 100, 105, 108],
+ ['rodata', 100, 80, 70],
+ ['data', 100, 100, 100],
+ ['debug', 100, 140, 60],
+ ]
+ columns = [
+ tabulator.Column(tabulator.AmeanResult(), tabulator.Format()),
+ tabulator.Column(tabulator.MinResult(), tabulator.Format()),
+ tabulator.Column(tabulator.AmeanRatioResult(),
+ tabulator.PercentFormat()),
+ tabulator.Column(tabulator.AmeanRatioResult(),
+ tabulator.ColorBoxFormat()),
+ ]
+ our_table = [simple_table[0]]
+ for row in simple_table[1:]:
+ our_row = [row[0]]
+ for v in row[1:]:
+ our_row.append([v])
+ our_table.append(our_row)
+
+ tf = tabulator.TableFormatter(our_table, columns)
+ cell_table = tf.GetCellTable()
+ self.assertTrue(cell_table[0][0].colspan == 1)
+ self.assertTrue(cell_table[0][1].colspan == 2)
+ self.assertTrue(cell_table[0][2].colspan == 4)
+ self.assertTrue(cell_table[0][3].colspan == 4)
+ for row in cell_table[1:]:
+ for cell in row:
+ self.assertTrue(cell.colspan == 1)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/cros_utils/timeline.py b/cros_utils/timeline.py
new file mode 100644
index 00000000..873aaa30
--- /dev/null
+++ b/cros_utils/timeline.py
@@ -0,0 +1,52 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+#
+"""Tools for recording and reporting timeline of benchmark_run."""
+
+from __future__ import print_function
+
+__author__ = 'yunlian@google.com (Yunlian Jiang)'
+
+import time
+
+
+class Event(object):
+ """One event on the timeline."""
+
+ def __init__(self, name='', cur_time=0):
+ self.name = name
+ self.timestamp = cur_time
+
+
+class Timeline(object):
+ """Use a dict to store the timeline."""
+
+ def __init__(self):
+ self.events = []
+
+ def Record(self, event):
+ for e in self.events:
+ assert e.name != event, ('The event {0} is already recorded.'
+ .format(event))
+ cur_event = Event(name=event, cur_time=time.time())
+ self.events.append(cur_event)
+
+ def GetEvents(self):
+ return ([e.name for e in self.events])
+
+ def GetEventDict(self):
+ tl = {}
+ for e in self.events:
+ tl[e.name] = e.timestamp
+ return tl
+
+ def GetEventTime(self, event):
+ for e in self.events:
+ if e.name == event:
+ return e.timestamp
+ raise IndexError, 'The event {0} is not recorded'.format(event)
+
+ def GetLastEventTime(self):
+ return self.events[-1].timestamp
+
+ def GetLastEvent(self):
+ return self.events[-1].name
diff --git a/cros_utils/timeline_test.py b/cros_utils/timeline_test.py
new file mode 100644
index 00000000..c93a1274
--- /dev/null
+++ b/cros_utils/timeline_test.py
@@ -0,0 +1,57 @@
+# Copyright 2012 Google Inc. All Rights Reserved.
+"""Tests for time_line.py."""
+
+from __future__ import print_function
+
+__author__ = 'yunlian@google.com (Yunlian Jiang)'
+
+import time
+import unittest
+
+import timeline
+
+
+class TimeLineTest(unittest.TestCase):
+ """Tests for the Timeline class."""
+
+ def testRecord(self):
+ tl = timeline.Timeline()
+ tl.Record('A')
+ t = time.time()
+ t1 = tl.events[0].timestamp
+ self.assertEqual(int(t1 - t), 0)
+ self.assertRaises(AssertionError, tl.Record, 'A')
+
+ def testGetEvents(self):
+ tl = timeline.Timeline()
+ tl.Record('A')
+ e = tl.GetEvents()
+ self.assertEqual(e, ['A'])
+ tl.Record('B')
+ e = tl.GetEvents()
+ self.assertEqual(e, ['A', 'B'])
+
+ def testGetEventTime(self):
+ tl = timeline.Timeline()
+ tl.Record('A')
+ t = time.time()
+ t1 = tl.GetEventTime('A')
+ self.assertEqual(int(t1 - t), 0)
+ self.assertRaises(IndexError, tl.GetEventTime, 'B')
+
+ def testGetLastEventTime(self):
+ tl = timeline.Timeline()
+ self.assertRaises(IndexError, tl.GetLastEventTime)
+ tl.Record('A')
+ t = time.time()
+ t1 = tl.GetLastEventTime()
+ self.assertEqual(int(t1 - t), 0)
+ time.sleep(2)
+ tl.Record('B')
+ t = time.time()
+ t1 = tl.GetLastEventTime()
+ self.assertEqual(int(t1 - t), 0)
+
+
+if __name__ == '__main__':
+ unittest.main()