diff options
author | Caroline Tice <cmtice@google.com> | 2016-07-20 12:52:59 -0700 |
---|---|---|
committer | chrome-bot <chrome-bot@chromium.org> | 2016-07-25 11:00:38 -0700 |
commit | a8af9a7a2462b00e72deff99327bdb452a715277 (patch) | |
tree | 92573f258457cc6a737c10df0dd250265b9efb8d /cros_utils | |
parent | 19b6f5fc11dcf97144e9723c8f78534cce27423a (diff) | |
download | toolchain-utils-a8af9a7a2462b00e72deff99327bdb452a715277.tar.gz |
[toolchain-utils] Finish switching utils/ to cros_utils/.
This CL finishes switching the subdirectory from 'utils' to
'cros_utils'. It changes all the remaining import statements to
use 'cros_utils'; it removes the 'cros_utils' symlink, and it
renames the 'utils' subdirectory to 'cros_utils'.
BUG=chromium:568195
TEST=ran crosperf & binary search tool unittests.
Change-Id: I7427f8bfb2ddac3a4b6108e46782039059684382
Reviewed-on: https://chrome-internal-review.googlesource.com/270396
Commit-Ready: Caroline Tice <cmtice@google.com>
Tested-by: Caroline Tice <cmtice@google.com>
Reviewed-by: Cassidy Burden <cburden@google.com>
Reviewed-by: Luis Lozano <llozano@chromium.org>
Diffstat (limited to 'cros_utils')
l--------- | cros_utils | 1 | ||||
-rw-r--r-- | cros_utils/__init__.py | 1 | ||||
-rwxr-xr-x | cros_utils/buildbot_json.py | 1518 | ||||
-rw-r--r-- | cros_utils/buildbot_utils.py | 328 | ||||
-rw-r--r-- | cros_utils/colortrans.py | 388 | ||||
-rw-r--r-- | cros_utils/command_executer.py | 685 | ||||
-rwxr-xr-x | cros_utils/command_executer_unittest.py | 27 | ||||
-rw-r--r-- | cros_utils/constants.py | 10 | ||||
-rwxr-xr-x | cros_utils/email_sender.py | 144 | ||||
-rw-r--r-- | cros_utils/file_utils.py | 87 | ||||
-rw-r--r-- | cros_utils/html_tools.py | 91 | ||||
-rw-r--r-- | cros_utils/locks.py | 44 | ||||
-rw-r--r-- | cros_utils/logger.py | 369 | ||||
-rw-r--r-- | cros_utils/machines.py | 25 | ||||
-rw-r--r-- | cros_utils/manifest_versions.py | 97 | ||||
-rw-r--r-- | cros_utils/misc.py | 557 | ||||
-rw-r--r-- | cros_utils/misc_test.py | 51 | ||||
-rw-r--r-- | cros_utils/no_pseudo_terminal_test.py | 53 | ||||
-rwxr-xr-x | cros_utils/perf_diff.py | 332 | ||||
-rw-r--r-- | cros_utils/pstat.py | 1077 | ||||
-rw-r--r-- | cros_utils/stats.py | 4519 | ||||
-rw-r--r-- | cros_utils/tabulator.py | 1248 | ||||
-rw-r--r-- | cros_utils/tabulator_test.py | 141 | ||||
-rw-r--r-- | cros_utils/timeline.py | 52 | ||||
-rw-r--r-- | cros_utils/timeline_test.py | 57 |
25 files changed, 11901 insertions, 1 deletions
diff --git a/cros_utils b/cros_utils deleted file mode 120000 index 66252432..00000000 --- a/cros_utils +++ /dev/null @@ -1 +0,0 @@ -utils
\ No newline at end of file diff --git a/cros_utils/__init__.py b/cros_utils/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/cros_utils/__init__.py @@ -0,0 +1 @@ + diff --git a/cros_utils/buildbot_json.py b/cros_utils/buildbot_json.py new file mode 100755 index 00000000..693a42cd --- /dev/null +++ b/cros_utils/buildbot_json.py @@ -0,0 +1,1518 @@ +#!/usr/bin/python2 +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# NOTE: This file is NOT under GPL. See above. +"""Queries buildbot through the json interface. +""" + +from __future__ import print_function + +__author__ = 'maruel@chromium.org' +__version__ = '1.2' + +import code +import datetime +import functools +import json + +# Pylint recommends we use "from chromite.lib import cros_logging as logging". +# Chromite specific policy message, we want to keep using the standard logging. +# pylint: disable=cros-logging-import +import logging + +# pylint: disable=deprecated-module +import optparse + +import time +import urllib +import urllib2 +import sys + +try: + from natsort import natsorted +except ImportError: + # natsorted is a simple helper to sort "naturally", e.g. "vm40" is sorted + # after "vm7". Defaults to normal sorting. + natsorted = sorted + +# These values are buildbot constants used for Build and BuildStep. +# This line was copied from master/buildbot/status/builder.py. +SUCCESS, WARNINGS, FAILURE, SKIPPED, EXCEPTION, RETRY = range(6) + +## Generic node caching code. + + +class Node(object): + """Root class for all nodes in the graph. + + Provides base functionality for any node in the graph, independent if it has + children or not or if its content can be addressed through an url or needs to + be fetched as part of another node. + + self.printable_attributes is only used for self documentation and for str() + implementation. + """ + printable_attributes = [] + + def __init__(self, parent, url): + self.printable_attributes = self.printable_attributes[:] + if url: + self.printable_attributes.append('url') + url = url.rstrip('/') + if parent is not None: + self.printable_attributes.append('parent') + self.url = url + self.parent = parent + + def __str__(self): + return self.to_string() + + def __repr__(self): + """Embeds key if present.""" + key = getattr(self, 'key', None) + if key is not None: + return '<%s key=%s>' % (self.__class__.__name__, key) + cached_keys = getattr(self, 'cached_keys', None) + if cached_keys is not None: + return '<%s keys=%s>' % (self.__class__.__name__, cached_keys) + return super(Node, self).__repr__() + + def to_string(self, maximum=100): + out = ['%s:' % self.__class__.__name__] + assert not 'printable_attributes' in self.printable_attributes + + def limit(txt): + txt = str(txt) + if maximum > 0: + if len(txt) > maximum + 2: + txt = txt[:maximum] + '...' + return txt + + for k in sorted(self.printable_attributes): + if k == 'parent': + # Avoid infinite recursion. + continue + out.append(limit(' %s: %r' % (k, getattr(self, k)))) + return '\n'.join(out) + + def refresh(self): + """Refreshes the data.""" + self.discard() + return self.cache() + + def cache(self): # pragma: no cover + """Caches the data.""" + raise NotImplementedError() + + def discard(self): # pragma: no cover + """Discards cached data. + + Pretty much everything is temporary except completed Build. + """ + raise NotImplementedError() + + +class AddressableBaseDataNode(Node): # pylint: disable=W0223 + """A node that contains a dictionary of data that can be fetched with an url. + + The node is directly addressable. It also often can be fetched by the parent. + """ + printable_attributes = Node.printable_attributes + ['data'] + + def __init__(self, parent, url, data): + super(AddressableBaseDataNode, self).__init__(parent, url) + self._data = data + + @property + def cached_data(self): + return self._data + + @property + def data(self): + self.cache() + return self._data + + def cache(self): + if self._data is None: + self._data = self._readall() + return True + return False + + def discard(self): + self._data = None + + def read(self, suburl): + assert self.url, self.__class__.__name__ + url = self.url + if suburl: + url = '%s/%s' % (self.url, suburl) + return self.parent.read(url) + + def _readall(self): + return self.read('') + + +class AddressableDataNode(AddressableBaseDataNode): # pylint: disable=W0223 + """Automatically encodes the url.""" + + def __init__(self, parent, url, data): + super(AddressableDataNode, self).__init__(parent, urllib.quote(url), data) + + +class NonAddressableDataNode(Node): # pylint: disable=W0223 + """A node that cannot be addressed by an unique url. + + The data comes directly from the parent. + """ + + def __init__(self, parent, subkey): + super(NonAddressableDataNode, self).__init__(parent, None) + self.subkey = subkey + + @property + def cached_data(self): + if self.parent.cached_data is None: + return None + return self.parent.cached_data[self.subkey] + + @property + def data(self): + return self.parent.data[self.subkey] + + def cache(self): + self.parent.cache() + + def discard(self): # pragma: no cover + """Avoid invalid state when parent recreate the object.""" + raise AttributeError('Call parent discard() instead') + + +class VirtualNodeList(Node): + """Base class for every node that has children. + + Adds partial supports for keys and iterator functionality. 'key' can be a + string or a int. Not to be used directly. + """ + printable_attributes = Node.printable_attributes + ['keys'] + + def __init__(self, parent, url): + super(VirtualNodeList, self).__init__(parent, url) + # Keeps the keys independently when ordering is needed. + self._is_cached = False + self._has_keys_cached = False + + def __contains__(self, key): + """Enables 'if i in obj:'.""" + return key in self.keys + + def __iter__(self): + """Enables 'for i in obj:'. It returns children.""" + self.cache_keys() + for key in self.keys: + yield self[key] + + def __len__(self): + """Enables 'len(obj)' to get the number of childs.""" + return len(self.keys) + + def discard(self): + """Discards data. + + The default behavior is to not invalidate cached keys. The only place where + keys need to be invalidated is with Builds. + """ + self._is_cached = False + self._has_keys_cached = False + + @property + def cached_children(self): # pragma: no cover + """Returns an iterator over the children that are cached.""" + raise NotImplementedError() + + @property + def cached_keys(self): # pragma: no cover + raise NotImplementedError() + + @property + def keys(self): # pragma: no cover + """Returns the keys for every children.""" + raise NotImplementedError() + + def __getitem__(self, key): # pragma: no cover + """Returns a child, without fetching its data. + + The children could be invalid since no verification is done. + """ + raise NotImplementedError() + + def cache(self): # pragma: no cover + """Cache all the children.""" + raise NotImplementedError() + + def cache_keys(self): # pragma: no cover + """Cache all children's keys.""" + raise NotImplementedError() + + +class NodeList(VirtualNodeList): # pylint: disable=W0223 + """Adds a cache of the keys.""" + + def __init__(self, parent, url): + super(NodeList, self).__init__(parent, url) + self._keys = [] + + @property + def cached_keys(self): + return self._keys + + @property + def keys(self): + self.cache_keys() + return self._keys + + +class NonAddressableNodeList(VirtualNodeList): # pylint: disable=W0223 + """A node that contains children but retrieves all its data from its parent. + + I.e. there's no url to get directly this data. + """ + # Child class object for children of this instance. For example, BuildSteps + # has BuildStep children. + _child_cls = None + + def __init__(self, parent, subkey): + super(NonAddressableNodeList, self).__init__(parent, None) + self.subkey = subkey + assert (not isinstance(self._child_cls, NonAddressableDataNode) and + issubclass(self._child_cls, NonAddressableDataNode)), ( + self._child_cls.__name__) + + @property + def cached_children(self): + if self.parent.cached_data is not None: + for i in xrange(len(self.parent.cached_data[self.subkey])): + yield self[i] + + @property + def cached_data(self): + if self.parent.cached_data is None: + return None + return self.parent.data.get(self.subkey, None) + + @property + def cached_keys(self): + if self.parent.cached_data is None: + return None + return range(len(self.parent.data.get(self.subkey, []))) + + @property + def data(self): + return self.parent.data[self.subkey] + + def cache(self): + self.parent.cache() + + def cache_keys(self): + self.parent.cache() + + def discard(self): # pragma: no cover + """Do not call. + + Avoid infinite recursion by having the caller calls the parent's + discard() explicitely. + """ + raise AttributeError('Call parent discard() instead') + + def __iter__(self): + """Enables 'for i in obj:'. It returns children.""" + if self.data: + for i in xrange(len(self.data)): + yield self[i] + + def __getitem__(self, key): + """Doesn't cache the value, it's not needed. + + TODO(maruel): Cache? + """ + if isinstance(key, int) and key < 0: + key = len(self.data) + key + # pylint: disable=E1102 + return self._child_cls(self, key) + + +class AddressableNodeList(NodeList): + """A node that has children that can be addressed with an url.""" + + # Child class object for children of this instance. For example, Builders has + # Builder children and Builds has Build children. + _child_cls = None + + def __init__(self, parent, url): + super(AddressableNodeList, self).__init__(parent, url) + self._cache = {} + assert (not isinstance(self._child_cls, AddressableDataNode) and + issubclass(self._child_cls, AddressableDataNode)), ( + self._child_cls.__name__) + + @property + def cached_children(self): + for item in self._cache.itervalues(): + if item.cached_data is not None: + yield item + + @property + def cached_keys(self): + return self._cache.keys() + + def __getitem__(self, key): + """Enables 'obj[i]'.""" + if self._has_keys_cached and not key in self._keys: + raise KeyError(key) + + if not key in self._cache: + # Create an empty object. + self._create_obj(key, None) + return self._cache[key] + + def cache(self): + if not self._is_cached: + data = self._readall() + for key in sorted(data): + self._create_obj(key, data[key]) + self._is_cached = True + self._has_keys_cached = True + + def cache_partial(self, children): + """Caches a partial number of children. + + This method is more efficient since it does a single request for all the + children instead of one request per children. + + It only grab objects not already cached. + """ + # pylint: disable=W0212 + if not self._is_cached: + to_fetch = [ + child + for child in children + if not (child in self._cache and self._cache[child].cached_data) + ] + if to_fetch: + # Similar to cache(). The only reason to sort is to simplify testing. + params = '&'.join('select=%s' % urllib.quote(str(v)) + for v in sorted(to_fetch)) + data = self.read('?' + params) + for key in sorted(data): + self._create_obj(key, data[key]) + + def cache_keys(self): + """Implement to speed up enumeration. Defaults to call cache().""" + if not self._has_keys_cached: + self.cache() + assert self._has_keys_cached + + def discard(self): + """Discards temporary children.""" + super(AddressableNodeList, self).discard() + for v in self._cache.itervalues(): + v.discard() + + def read(self, suburl): + assert self.url, self.__class__.__name__ + url = self.url + if suburl: + url = '%s/%s' % (self.url, suburl) + return self.parent.read(url) + + def _create_obj(self, key, data): + """Creates an object of type self._child_cls.""" + # pylint: disable=E1102 + obj = self._child_cls(self, key, data) + # obj.key and key may be different. + # No need to overide cached data with None. + if data is not None or obj.key not in self._cache: + self._cache[obj.key] = obj + if obj.key not in self._keys: + self._keys.append(obj.key) + + def _readall(self): + return self.read('') + + +class SubViewNodeList(VirtualNodeList): # pylint: disable=W0223 + """A node that shows a subset of children that comes from another structure. + + The node is not addressable. + + E.g. the keys are retrieved from parent but the actual data comes from + virtual_parent. + """ + + def __init__(self, parent, virtual_parent, subkey): + super(SubViewNodeList, self).__init__(parent, None) + self.subkey = subkey + self.virtual_parent = virtual_parent + assert isinstance(self.parent, AddressableDataNode) + assert isinstance(self.virtual_parent, NodeList) + + @property + def cached_children(self): + if self.parent.cached_data is not None: + for item in self.keys: + if item in self.virtual_parent.keys: + child = self[item] + if child.cached_data is not None: + yield child + + @property + def cached_keys(self): + return (self.parent.cached_data or {}).get(self.subkey, []) + + @property + def keys(self): + self.cache_keys() + return self.parent.data.get(self.subkey, []) + + def cache(self): + """Batch request for each child in a single read request.""" + if not self._is_cached: + self.virtual_parent.cache_partial(self.keys) + self._is_cached = True + + def cache_keys(self): + if not self._has_keys_cached: + self.parent.cache() + self._has_keys_cached = True + + def discard(self): + if self.parent.cached_data is not None: + for child in self.virtual_parent.cached_children: + if child.key in self.keys: + child.discard() + self.parent.discard() + super(SubViewNodeList, self).discard() + + def __getitem__(self, key): + """Makes sure the key is in our key but grab it from the virtual parent.""" + return self.virtual_parent[key] + + def __iter__(self): + self.cache() + return super(SubViewNodeList, self).__iter__() + +############################################################################### +## Buildbot-specific code + + +class Slave(AddressableDataNode): + """Buildbot slave class.""" + printable_attributes = AddressableDataNode.printable_attributes + [ + 'name', + 'key', + 'connected', + 'version', + ] + + def __init__(self, parent, name, data): + super(Slave, self).__init__(parent, name, data) + self.name = name + self.key = self.name + # TODO(maruel): Add SlaveBuilders and a 'builders' property. + # TODO(maruel): Add a 'running_builds' property. + + @property + def connected(self): + return self.data.get('connected', False) + + @property + def version(self): + return self.data.get('version') + + +class Slaves(AddressableNodeList): + """Buildbot slaves.""" + _child_cls = Slave + printable_attributes = AddressableNodeList.printable_attributes + ['names'] + + def __init__(self, parent): + super(Slaves, self).__init__(parent, 'slaves') + + @property + def names(self): + return self.keys + + +class BuilderSlaves(SubViewNodeList): + """Similar to Slaves but only list slaves connected to a specific builder.""" + printable_attributes = SubViewNodeList.printable_attributes + ['names'] + + def __init__(self, parent): + super(BuilderSlaves, self).__init__(parent, parent.parent.parent.slaves, + 'slaves') + + @property + def names(self): + return self.keys + + +class BuildStep(NonAddressableDataNode): + """Class for a buildbot build step.""" + printable_attributes = NonAddressableDataNode.printable_attributes + [ + 'name', + 'number', + 'start_time', + 'end_time', + 'duration', + 'is_started', + 'is_finished', + 'is_running', + 'result', + 'simplified_result', + ] + + def __init__(self, parent, number): + """Pre-loaded, since the data is retrieved via the Build object.""" + assert isinstance(number, int) + super(BuildStep, self).__init__(parent, number) + self.number = number + + @property + def start_time(self): + if self.data.get('times'): + return int(round(self.data['times'][0])) + + @property + def end_time(self): + times = self.data.get('times') + if times and len(times) == 2 and times[1]: + return int(round(times[1])) + + @property + def duration(self): + if self.start_time: + return (self.end_time or int(round(time.time()))) - self.start_time + + @property + def name(self): + return self.data['name'] + + @property + def is_started(self): + return self.data.get('isStarted', False) + + @property + def is_finished(self): + return self.data.get('isFinished', False) + + @property + def is_running(self): + return self.is_started and not self.is_finished + + @property + def result(self): + result = self.data.get('results') + if result is None: + # results may be 0, in that case with filter=1, the value won't be + # present. + if self.data.get('isFinished'): + result = self.data.get('results', 0) + while isinstance(result, list): + result = result[0] + return result + + @property + def simplified_result(self): + """Returns a simplified 3 state value, True, False or None.""" + result = self.result + if result in (SUCCESS, WARNINGS): + return True + elif result in (FAILURE, EXCEPTION, RETRY): + return False + assert result in (None, SKIPPED), (result, self.data) + return None + + +class BuildSteps(NonAddressableNodeList): + """Duplicates keys to support lookup by both step number and step name.""" + printable_attributes = NonAddressableNodeList.printable_attributes + [ + 'failed', + ] + _child_cls = BuildStep + + def __init__(self, parent): + """Pre-loaded, since the data is retrieved via the Build object.""" + super(BuildSteps, self).__init__(parent, 'steps') + + @property + def keys(self): + """Returns the steps name in order.""" + return [i['name'] for i in self.data or []] + + @property + def failed(self): + """Shortcuts that lists the step names of steps that failed.""" + return [step.name for step in self if step.simplified_result is False] + + def __getitem__(self, key): + """Accept step name in addition to index number.""" + if isinstance(key, basestring): + # It's a string, try to find the corresponding index. + for i, step in enumerate(self.data): + if step['name'] == key: + key = i + break + else: + raise KeyError(key) + return super(BuildSteps, self).__getitem__(key) + + +class Build(AddressableDataNode): + """Buildbot build info.""" + printable_attributes = AddressableDataNode.printable_attributes + [ + 'key', + 'number', + 'steps', + 'blame', + 'reason', + 'revision', + 'result', + 'simplified_result', + 'start_time', + 'end_time', + 'duration', + 'slave', + 'properties', + 'completed', + ] + + def __init__(self, parent, key, data): + super(Build, self).__init__(parent, str(key), data) + self.number = int(key) + self.key = self.number + self.steps = BuildSteps(self) + + @property + def blame(self): + return self.data.get('blame', []) + + @property + def builder(self): + """Returns the Builder object. + + Goes up the hierarchy to find the Buildbot.builders[builder] instance. + """ + return self.parent.parent.parent.parent.builders[self.data['builderName']] + + @property + def start_time(self): + if self.data.get('times'): + return int(round(self.data['times'][0])) + + @property + def end_time(self): + times = self.data.get('times') + if times and len(times) == 2 and times[1]: + return int(round(times[1])) + + @property + def duration(self): + if self.start_time: + return (self.end_time or int(round(time.time()))) - self.start_time + + @property + def eta(self): + return self.data.get('eta', 0) + + @property + def completed(self): + return self.data.get('currentStep') is None + + @property + def properties(self): + return self.data.get('properties', []) + + @property + def reason(self): + return self.data.get('reason') + + @property + def result(self): + result = self.data.get('results') + while isinstance(result, list): + result = result[0] + if result is None and self.steps: + # results may be 0, in that case with filter=1, the value won't be + # present. + result = self.steps[-1].result + return result + + @property + def revision(self): + return self.data.get('sourceStamp', {}).get('revision') + + @property + def simplified_result(self): + """Returns a simplified 3 state value, True, False or None.""" + result = self.result + if result in (SUCCESS, WARNINGS, SKIPPED): + return True + elif result in (FAILURE, EXCEPTION, RETRY): + return False + assert result is None, (result, self.data) + return None + + @property + def slave(self): + """Returns the Slave object. + + Goes up the hierarchy to find the Buildbot.slaves[slave] instance. + """ + return self.parent.parent.parent.parent.slaves[self.data['slave']] + + def discard(self): + """Completed Build isn't discarded.""" + if self._data and self.result is None: + assert not self.steps or not self.steps[-1].data.get('isFinished') + self._data = None + + +class CurrentBuilds(SubViewNodeList): + """Lists of the current builds.""" + + def __init__(self, parent): + super(CurrentBuilds, self).__init__(parent, parent.builds, 'currentBuilds') + + +class PendingBuilds(AddressableDataNode): + """List of the pending builds.""" + + def __init__(self, parent): + super(PendingBuilds, self).__init__(parent, 'pendingBuilds', None) + + +class Builds(AddressableNodeList): + """Supports iteration. + + Recommends using .cache() to speed up if a significant number of builds are + iterated over. + """ + _child_cls = Build + + def __init__(self, parent): + super(Builds, self).__init__(parent, 'builds') + + def __getitem__(self, key): + """Support for negative reference and enable retrieving non-cached builds. + + e.g. -1 is the last build, -2 is the previous build before the last one. + """ + key = int(key) + if key < 0: + # Convert negative to positive build number. + self.cache_keys() + # Since the negative value can be outside of the cache keys range, use the + # highest key value and calculate from it. + key = max(self._keys) + key + 1 + + if not key in self._cache: + # Create an empty object. + self._create_obj(key, None) + return self._cache[key] + + def __iter__(self): + """Returns cached Build objects in reversed order. + + The most recent build is returned first and then in reverse chronological + order, up to the oldest cached build by the server. Older builds can be + accessed but will trigger significantly more I/O so they are not included by + default in the iteration. + + To access the older builds, use self.iterall() instead. + """ + self.cache() + return reversed(self._cache.values()) + + def iterall(self): + """Returns Build objects in decreasing order unbounded up to build 0. + + The most recent build is returned first and then in reverse chronological + order. Older builds can be accessed and will trigger significantly more I/O + so use this carefully. + """ + # Only cache keys here. + self.cache_keys() + if self._keys: + for i in xrange(max(self._keys), -1, -1): + yield self[i] + + def cache_keys(self): + """Grabs the keys (build numbers) from the builder.""" + if not self._has_keys_cached: + for i in self.parent.data.get('cachedBuilds', []): + i = int(i) + self._cache.setdefault(i, Build(self, i, None)) + if i not in self._keys: + self._keys.append(i) + self._has_keys_cached = True + + def discard(self): + super(Builds, self).discard() + # Can't keep keys. + self._has_keys_cached = False + + def _readall(self): + return self.read('_all') + + +class Builder(AddressableDataNode): + """Builder status.""" + printable_attributes = AddressableDataNode.printable_attributes + [ + 'name', + 'key', + 'builds', + 'slaves', + 'pending_builds', + 'current_builds', + ] + + def __init__(self, parent, name, data): + super(Builder, self).__init__(parent, name, data) + self.name = name + self.key = name + self.builds = Builds(self) + self.slaves = BuilderSlaves(self) + self.current_builds = CurrentBuilds(self) + self.pending_builds = PendingBuilds(self) + + def discard(self): + super(Builder, self).discard() + self.builds.discard() + self.slaves.discard() + self.current_builds.discard() + + +class Builders(AddressableNodeList): + """Root list of builders.""" + _child_cls = Builder + + def __init__(self, parent): + super(Builders, self).__init__(parent, 'builders') + + +class Buildbot(AddressableBaseDataNode): + """This object should be recreated on a master restart as it caches data.""" + # Throttle fetches to not kill the server. + auto_throttle = None + printable_attributes = AddressableDataNode.printable_attributes + [ + 'slaves', + 'builders', + 'last_fetch', + ] + + def __init__(self, url): + super(Buildbot, self).__init__(None, url.rstrip('/') + '/json', None) + self._builders = Builders(self) + self._slaves = Slaves(self) + self.last_fetch = None + + @property + def builders(self): + return self._builders + + @property + def slaves(self): + return self._slaves + + def discard(self): + """Discards information about Builders and Slaves.""" + super(Buildbot, self).discard() + self._builders.discard() + self._slaves.discard() + + def read(self, suburl): + if self.auto_throttle: + if self.last_fetch: + delta = datetime.datetime.utcnow() - self.last_fetch + remaining = (datetime.timedelta(seconds=self.auto_throttle) - delta) + if remaining > datetime.timedelta(seconds=0): + logging.debug('Sleeping for %ss', remaining) + time.sleep(remaining.seconds) + self.last_fetch = datetime.datetime.utcnow() + url = '%s/%s' % (self.url, suburl) + if '?' in url: + url += '&filter=1' + else: + url += '?filter=1' + logging.info('read(%s)', suburl) + channel = urllib.urlopen(url) + data = channel.read() + try: + return json.loads(data) + except ValueError: + if channel.getcode() >= 400: + # Convert it into an HTTPError for easier processing. + raise urllib2.HTTPError(url, channel.getcode(), '%s:\n%s' % (url, data), + channel.headers, None) + raise + + def _readall(self): + return self.read('project') + +############################################################################### +## Controller code + + +def usage(more): + + def hook(fn): + fn.func_usage_more = more + return fn + + return hook + + +def need_buildbot(fn): + """Post-parse args to create a buildbot object.""" + + @functools.wraps(fn) + def hook(parser, args, *extra_args, **kwargs): + old_parse_args = parser.parse_args + + def new_parse_args(args): + options, args = old_parse_args(args) + if len(args) < 1: + parser.error('Need to pass the root url of the buildbot') + url = args.pop(0) + if not url.startswith('http'): + url = 'http://' + url + buildbot = Buildbot(url) + buildbot.auto_throttle = options.throttle + return options, args, buildbot + + parser.parse_args = new_parse_args + # Call the original function with the modified parser. + return fn(parser, args, *extra_args, **kwargs) + + hook.func_usage_more = '[options] <url>' + return hook + + +@need_buildbot +def CMDpending(parser, args): + """Lists pending jobs.""" + parser.add_option('-b', + '--builder', + dest='builders', + action='append', + default=[], + help='Builders to filter on') + options, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + if not options.builders: + options.builders = buildbot.builders.keys + for builder in options.builders: + builder = buildbot.builders[builder] + pending_builds = builder.data.get('pendingBuilds', 0) + if not pending_builds: + continue + print('Builder %s: %d' % (builder.name, pending_builds)) + if not options.quiet: + for pending in builder.pending_builds.data: + if 'revision' in pending['source']: + print(' revision: %s' % pending['source']['revision']) + for change in pending['source']['changes']: + print(' change:') + print(' comment: %r' % unicode(change['comments'][:50])) + print(' who: %s' % change['who']) + return 0 + + +@usage('[options] <url> [commands] ...') +@need_buildbot +def CMDrun(parser, args): + """Runs commands passed as parameters. + + When passing commands on the command line, each command will be run as if it + was on its own line. + """ + parser.add_option('-f', '--file', help='Read script from file') + parser.add_option('-i', + dest='use_stdin', + action='store_true', + help='Read script on stdin') + # Variable 'buildbot' is not used directly. + # pylint: disable=W0612 + options, args, buildbot = parser.parse_args(args) + if (bool(args) + bool(options.use_stdin) + bool(options.file)) != 1: + parser.error('Need to pass only one of: <commands>, -f <file> or -i') + if options.use_stdin: + cmds = sys.stdin.read() + elif options.file: + cmds = open(options.file).read() + else: + cmds = '\n'.join(args) + compiled = compile(cmds, '<cmd line>', 'exec') + # pylint: disable=eval-used + eval(compiled, globals(), locals()) + return 0 + + +@need_buildbot +def CMDinteractive(parser, args): + """Runs an interactive shell to run queries.""" + _, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + prompt = ( + 'Buildbot interactive console for "%s".\n' + 'Hint: Start with typing: \'buildbot.printable_attributes\' or ' + '\'print str(buildbot)\' to explore.') % buildbot.url[:-len('/json')] + local_vars = {'buildbot': buildbot, 'b': buildbot} + code.interact(prompt, None, local_vars) + + +@need_buildbot +def CMDidle(parser, args): + """Lists idle slaves.""" + return find_idle_busy_slaves(parser, args, True) + + +@need_buildbot +def CMDbusy(parser, args): + """Lists idle slaves.""" + return find_idle_busy_slaves(parser, args, False) + + +@need_buildbot +def CMDdisconnected(parser, args): + """Lists disconnected slaves.""" + _, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + for slave in buildbot.slaves: + if not slave.connected: + print(slave.name) + return 0 + + +def find_idle_busy_slaves(parser, args, show_idle): + parser.add_option('-b', + '--builder', + dest='builders', + action='append', + default=[], + help='Builders to filter on') + parser.add_option('-s', + '--slave', + dest='slaves', + action='append', + default=[], + help='Slaves to filter on') + options, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + if not options.builders: + options.builders = buildbot.builders.keys + for builder in options.builders: + builder = buildbot.builders[builder] + if options.slaves: + # Only the subset of slaves connected to the builder. + slaves = list(set(options.slaves).intersection(set(builder.slaves.names))) + if not slaves: + continue + else: + slaves = builder.slaves.names + busy_slaves = [build.slave.name for build in builder.current_builds] + if show_idle: + slaves = natsorted(set(slaves) - set(busy_slaves)) + else: + slaves = natsorted(set(slaves) & set(busy_slaves)) + if options.quiet: + for slave in slaves: + print(slave) + else: + if slaves: + print('Builder %s: %s' % (builder.name, ', '.join(slaves))) + return 0 + + +def last_failure(buildbot, + builders=None, + slaves=None, + steps=None, + no_cache=False): + """Returns Build object with last failure with the specific filters.""" + builders = builders or buildbot.builders.keys + for builder in builders: + builder = buildbot.builders[builder] + if slaves: + # Only the subset of slaves connected to the builder. + builder_slaves = list(set(slaves).intersection(set(builder.slaves.names))) + if not builder_slaves: + continue + else: + builder_slaves = builder.slaves.names + + if not no_cache and len(builder.slaves) > 2: + # Unless you just want the last few builds, it's often faster to + # fetch the whole thing at once, at the cost of a small hickup on + # the buildbot. + # TODO(maruel): Cache only N last builds or all builds since + # datetime. + builder.builds.cache() + + found = [] + for build in builder.builds: + if build.slave.name not in builder_slaves or build.slave.name in found: + continue + # Only add the slave for the first completed build but still look for + # incomplete builds. + if build.completed: + found.append(build.slave.name) + + if steps: + if any(build.steps[step].simplified_result is False for step in steps): + yield build + elif build.simplified_result is False: + yield build + + if len(found) == len(builder_slaves): + # Found all the slaves, quit. + break + + +@need_buildbot +def CMDlast_failure(parser, args): + """Lists all slaves that failed on that step on their last build. + + Example: to find all slaves where their last build was a compile failure, + run with --step compile + """ + parser.add_option( + '-S', + '--step', + dest='steps', + action='append', + default=[], + help='List all slaves that failed on that step on their last build') + parser.add_option('-b', + '--builder', + dest='builders', + action='append', + default=[], + help='Builders to filter on') + parser.add_option('-s', + '--slave', + dest='slaves', + action='append', + default=[], + help='Slaves to filter on') + parser.add_option('-n', + '--no_cache', + action='store_true', + help='Don\'t load all builds at once') + options, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + print_builders = not options.quiet and len(options.builders) != 1 + last_builder = None + for build in last_failure(buildbot, + builders=options.builders, + slaves=options.slaves, + steps=options.steps, + no_cache=options.no_cache): + + if print_builders and last_builder != build.builder: + print(build.builder.name) + last_builder = build.builder + + if options.quiet: + if options.slaves: + print('%s: %s' % (build.builder.name, build.slave.name)) + else: + print(build.slave.name) + else: + out = '%d on %s: blame:%s' % (build.number, build.slave.name, + ', '.join(build.blame)) + if print_builders: + out = ' ' + out + print(out) + + if len(options.steps) != 1: + for step in build.steps: + if step.simplified_result is False: + # Assume the first line is the text name anyway. + summary = ', '.join(step.data['text'][1:])[:40] + out = ' %s: "%s"' % (step.data['name'], summary) + if print_builders: + out = ' ' + out + print(out) + return 0 + + +@need_buildbot +def CMDcurrent(parser, args): + """Lists current jobs.""" + parser.add_option('-b', + '--builder', + dest='builders', + action='append', + default=[], + help='Builders to filter on') + parser.add_option('--blame', + action='store_true', + help='Only print the blame list') + options, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + if not options.builders: + options.builders = buildbot.builders.keys + + if options.blame: + blame = set() + for builder in options.builders: + for build in buildbot.builders[builder].current_builds: + if build.blame: + for blamed in build.blame: + blame.add(blamed) + print('\n'.join(blame)) + return 0 + + for builder in options.builders: + builder = buildbot.builders[builder] + if not options.quiet and builder.current_builds: + print(builder.name) + for build in builder.current_builds: + if options.quiet: + print(build.slave.name) + else: + out = '%4d: slave=%10s' % (build.number, build.slave.name) + out += ' duration=%5d' % (build.duration or 0) + if build.eta: + out += ' eta=%5.0f' % build.eta + else: + out += ' ' + if build.blame: + out += ' blame=' + ', '.join(build.blame) + print(out) + + return 0 + + +@need_buildbot +def CMDbuilds(parser, args): + """Lists all builds. + + Example: to find all builds on a single slave, run with -b bar -s foo + """ + parser.add_option('-r', + '--result', + type='int', + help='Build result to filter on') + parser.add_option('-b', + '--builder', + dest='builders', + action='append', + default=[], + help='Builders to filter on') + parser.add_option('-s', + '--slave', + dest='slaves', + action='append', + default=[], + help='Slaves to filter on') + parser.add_option('-n', + '--no_cache', + action='store_true', + help='Don\'t load all builds at once') + options, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + builders = options.builders or buildbot.builders.keys + for builder in builders: + builder = buildbot.builders[builder] + for build in builder.builds: + if not options.slaves or build.slave.name in options.slaves: + if options.quiet: + out = '' + if options.builders: + out += '%s/' % builder.name + if len(options.slaves) != 1: + out += '%s/' % build.slave.name + out += '%d revision:%s result:%s blame:%s' % ( + build.number, build.revision, build.result, ','.join(build.blame)) + print(out) + else: + print(build) + return 0 + + +@need_buildbot +def CMDcount(parser, args): + """Count the number of builds that occured during a specific period.""" + parser.add_option('-o', + '--over', + type='int', + help='Number of seconds to look for') + parser.add_option('-b', + '--builder', + dest='builders', + action='append', + default=[], + help='Builders to filter on') + options, args, buildbot = parser.parse_args(args) + if args: + parser.error('Unrecognized parameters: %s' % ' '.join(args)) + if not options.over: + parser.error( + 'Specify the number of seconds, e.g. --over 86400 for the last 24 ' + 'hours') + builders = options.builders or buildbot.builders.keys + counts = {} + since = time.time() - options.over + for builder in builders: + builder = buildbot.builders[builder] + counts[builder.name] = 0 + if not options.quiet: + print(builder.name) + for build in builder.builds.iterall(): + try: + start_time = build.start_time + except urllib2.HTTPError: + # The build was probably trimmed. + print('Failed to fetch build %s/%d' % (builder.name, build.number), + file=sys.stderr) + continue + if start_time >= since: + counts[builder.name] += 1 + else: + break + if not options.quiet: + print('.. %d' % counts[builder.name]) + + align_name = max(len(b) for b in counts) + align_number = max(len(str(c)) for c in counts.itervalues()) + for builder in sorted(counts): + print('%*s: %*d' % (align_name, builder, align_number, counts[builder])) + print('Total: %d' % sum(counts.itervalues())) + return 0 + + +def gen_parser(): + """Returns an OptionParser instance with default options. + + It should be then processed with gen_usage() before being used. + """ + parser = optparse.OptionParser(version=__version__) + # Remove description formatting + parser.format_description = lambda x: parser.description + # Add common parsing. + old_parser_args = parser.parse_args + + def Parse(*args, **kwargs): + options, args = old_parser_args(*args, **kwargs) + if options.verbose >= 2: + logging.basicConfig(level=logging.DEBUG) + elif options.verbose: + logging.basicConfig(level=logging.INFO) + else: + logging.basicConfig(level=logging.WARNING) + return options, args + + parser.parse_args = Parse + + parser.add_option('-v', + '--verbose', + action='count', + help='Use multiple times to increase logging leve') + parser.add_option( + '-q', + '--quiet', + action='store_true', + help='Reduces the output to be parsed by scripts, independent of -v') + parser.add_option('--throttle', + type='float', + help='Minimum delay to sleep between requests') + return parser + +############################################################################### +## Generic subcommand handling code + + +def Command(name): + return getattr(sys.modules[__name__], 'CMD' + name, None) + + +@usage('<command>') +def CMDhelp(parser, args): + """Print list of commands or use 'help <command>'.""" + _, args = parser.parse_args(args) + if len(args) == 1: + return main(args + ['--help']) + parser.print_help() + return 0 + + +def gen_usage(parser, command): + """Modifies an OptionParser object with the command's documentation. + + The documentation is taken from the function's docstring. + """ + obj = Command(command) + more = getattr(obj, 'func_usage_more') + # OptParser.description prefer nicely non-formatted strings. + parser.description = obj.__doc__ + '\n' + parser.set_usage('usage: %%prog %s %s' % (command, more)) + + +def main(args=None): + # Do it late so all commands are listed. + # pylint: disable=E1101 + CMDhelp.__doc__ += '\n\nCommands are:\n' + '\n'.join( + ' %-12s %s' % (fn[3:], Command(fn[3:]).__doc__.split('\n', 1)[0]) + for fn in dir(sys.modules[__name__]) if fn.startswith('CMD')) + + parser = gen_parser() + if args is None: + args = sys.argv[1:] + if args: + command = Command(args[0]) + if command: + # "fix" the usage and the description now that we know the subcommand. + gen_usage(parser, args[0]) + return command(parser, args[1:]) + + # Not a known command. Default to help. + gen_usage(parser, 'help') + return CMDhelp(parser, args) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/cros_utils/buildbot_utils.py b/cros_utils/buildbot_utils.py new file mode 100644 index 00000000..a80b7ad4 --- /dev/null +++ b/cros_utils/buildbot_utils.py @@ -0,0 +1,328 @@ +# Copyright 2014 Google Inc. All Rights Reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Utilities for launching and accessing ChromeOS buildbots.""" + +from __future__ import print_function + +import os +import time +import urllib2 + +from cros_utils import command_executer +from cros_utils import logger +from cros_utils import buildbot_json + +SLEEP_TIME = 600 # 10 minutes; time between polling of buildbot. +TIME_OUT = 18000 # Decide the build is dead or will never finish +# after this time (5 hours). +OK_STATUS = [ # List of result status values that are 'ok'. + # This was obtained from: + # https://chromium.googlesource.com/chromium/tools/build/+/ + # master/third_party/buildbot_8_4p1/buildbot/status/results.py + 0, # "success" + 1, # "warnings" + 6, # "retry" +] + + +class BuildbotTimeout(Exception): + """Exception to throw when a buildbot operation timesout.""" + pass + + +def ParseReportLog(url, build): + """Scrape the trybot image name off the Reports log page. + + This takes the URL for a trybot Reports Stage web page, + and a trybot build type, such as 'daisy-release'. It + opens the web page and parses it looking for the trybot + artifact name (e.g. something like + 'trybot-daisy-release/R40-6394.0.0-b1389'). It returns the + artifact name, if found. + """ + trybot_image = '' + url += '/text' + newurl = url.replace('uberchromegw', 'chromegw') + webpage = urllib2.urlopen(newurl) + data = webpage.read() + lines = data.split('\n') + for l in lines: + if l.find('Artifacts') > 0 and l.find('trybot') > 0: + trybot_name = 'trybot-%s' % build + start_pos = l.find(trybot_name) + end_pos = l.find('@https://storage') + trybot_image = l[start_pos:end_pos] + + return trybot_image + + +def GetBuildData(buildbot_queue, build_id): + """Find the Reports stage web page for a trybot build. + + This takes the name of a buildbot_queue, such as 'daisy-release' + and a build id (the build number), and uses the json buildbot api to + find the Reports stage web page for that build, if it exists. + """ + builder = buildbot_json.Buildbot( + 'http://chromegw/p/tryserver.chromiumos/').builders[buildbot_queue] + build_data = builder.builds[build_id].data + logs = build_data['logs'] + for l in logs: + fname = l[1] + if 'steps/Report/' in fname: + return fname + + return '' + + +def FindBuildRecordFromLog(description, log_info): + """Find the right build record in the build logs. + + Get the first build record from build log with a reason field + that matches 'description'. ('description' is a special tag we + created when we launched the buildbot, so we could find it at this + point.) + """ + + current_line = 1 + while current_line < len(log_info): + my_dict = {} + # Read all the lines from one "Build" to the next into my_dict + while True: + key = log_info[current_line].split(':')[0].strip() + value = log_info[current_line].split(':', 1)[1].strip() + my_dict[key] = value + current_line += 1 + if 'Build' in key or current_line == len(log_info): + break + try: + # Check to see of the build record is the right one. + if str(description) in my_dict['reason']: + # We found a match; we're done. + return my_dict + except KeyError: + print("reason is not in dictionary: '%s'" % repr(my_dict)) + else: + # Keep going. + continue + + # We hit the bottom of the log without a match. + return {} + + +def GetBuildInfo(file_dir, builder): + """Get all the build records for the trybot builds. + + file_dir is the toolchain_utils directory. + """ + ce = command_executer.GetCommandExecuter() + commands = ('{0}/cros_utils/buildbot_json.py builds ' + 'http://chromegw/i/tryserver.chromiumos/'.format(file_dir)) + + if builder: + # For release builds, get logs from the 'release' builder. + if builder.endswith('-release'): + commands += ' -b release' + else: + commands += ' -b %s' % builder + _, buildinfo, _ = ce.RunCommandWOutput(commands, print_to_console=False) + build_log = buildinfo.splitlines() + return build_log + + +def FindArchiveImage(chromeos_root, build, build_id): + """Returns name of the trybot artifact for board/build_id.""" + ce = command_executer.GetCommandExecuter() + command = ('gsutil ls gs://chromeos-image-archive/trybot-%s/*b%s' + '/chromiumos_test_image.tar.xz' % (build, build_id)) + _, out, _ = ce.ChrootRunCommandWOutput(chromeos_root, + command, + print_to_console=False) + # + # If build_id is not unique, there may be multiple archive images + # to choose from; sort them & pick the first (newest). + # + # If there are multiple archive images found, out will look something + # like this: + # + # 'gs://.../R35-5692.0.0-b105/chromiumos_test_image.tar.xz + # gs://.../R46-7339.0.0-b105/chromiumos_test_image.tar.xz' + # + out = out.rstrip('\n') + tmp_list = out.split('\n') + # After stripping the final '\n' and splitting on any other '\n', we get + # something like this: + # tmp_list = [ 'gs://.../R35-5692.0.0-b105/chromiumos_test_image.tar.xz' , + # 'gs://.../R46-7339.0.0-b105/chromiumos_test_image.tar.xz' ] + # + # If we sort this in descending order, we should end up with the most + # recent test image first, so that's what we do here. + # + if len(tmp_list) > 1: + tmp_list = sorted(tmp_list, reverse=True) + out = tmp_list[0] + + trybot_image = '' + trybot_name = 'trybot-%s' % build + if out and out.find(trybot_name) > 0: + start_pos = out.find(trybot_name) + end_pos = out.find('/chromiumos_test_image') + trybot_image = out[start_pos:end_pos] + + return trybot_image + + +def GetTrybotImage(chromeos_root, + buildbot_name, + patch_list, + build_tag, + build_toolchain=False): + """Launch buildbot and get resulting trybot artifact name. + + This function launches a buildbot with the appropriate flags to + build the test ChromeOS image, with the current ToT mobile compiler. It + checks every 10 minutes to see if the trybot has finished. When the trybot + has finished, it parses the resulting report logs to find the trybot + artifact (if one was created), and returns that artifact name. + + chromeos_root is the path to the ChromeOS root, needed for finding chromite + and launching the buildbot. + + buildbot_name is the name of the buildbot queue, such as lumpy-release or + daisy-paladin. + + patch_list a python list of the patches, if any, for the buildbot to use. + + build_tag is a (unique) string to be used to look up the buildbot results + from among all the build records. + """ + ce = command_executer.GetCommandExecuter() + cbuildbot_path = os.path.join(chromeos_root, 'chromite/cbuildbot') + base_dir = os.getcwd() + patch_arg = '' + if patch_list: + for p in patch_list: + patch_arg = patch_arg + ' -g ' + repr(p) + toolchain_flags = '' + if build_toolchain: + toolchain_flags += '--latest-toolchain' + os.chdir(cbuildbot_path) + + # Launch buildbot with appropriate flags. + build = buildbot_name + description = build_tag + command = ('./cbuildbot --remote --nochromesdk --notests' + ' --remote-description=%s %s %s %s' % + (description, toolchain_flags, patch_arg, build)) + _, out, _ = ce.RunCommandWOutput(command) + if 'Tryjob submitted!' not in out: + logger.GetLogger().LogFatal('Error occurred while launching trybot job: ' + '%s' % command) + os.chdir(base_dir) + + build_id = 0 + build_status = None + # Wait for buildbot to finish running (check every 10 minutes). Wait + # 10 minutes before the first check to give the buildbot time to launch + # (so we don't start looking for build data before it's out there). + time.sleep(SLEEP_TIME) + done = False + pending = True + # pending_time is the time between when we submit the job and when the + # buildbot actually launches the build. running_time is the time between + # when the buildbot job launches and when it finishes. The job is + # considered 'pending' until we can find an entry for it in the buildbot + # logs. + pending_time = SLEEP_TIME + running_time = 0 + while not done: + done = True + build_info = GetBuildInfo(base_dir, build) + if not build_info: + if pending_time > TIME_OUT: + logger.GetLogger().LogFatal('Unable to get build logs for target %s.' % + build) + else: + pending_message = 'Unable to find build log; job may be pending.' + done = False + + if done: + data_dict = FindBuildRecordFromLog(description, build_info) + if not data_dict: + # Trybot job may be pending (not actually launched yet). + if pending_time > TIME_OUT: + logger.GetLogger().LogFatal('Unable to find build record for trybot' + ' %s.' % description) + else: + pending_message = 'Unable to find build record; job may be pending.' + done = False + + else: + # Now that we have actually found the entry for the build + # job in the build log, we know the job is actually + # runnning, not pending, so we flip the 'pending' flag. We + # still have to wait for the buildbot job to finish running + # however. + pending = False + if 'True' in data_dict['completed']: + build_id = data_dict['number'] + build_status = int(data_dict['result']) + else: + done = False + + if not done: + if pending: + logger.GetLogger().LogOutput(pending_message) + logger.GetLogger().LogOutput('Current pending time: %d minutes.' % + (pending_time / 60)) + pending_time += SLEEP_TIME + else: + logger.GetLogger().LogOutput('{0} minutes passed.'.format(running_time / + 60)) + logger.GetLogger().LogOutput('Sleeping {0} seconds.'.format(SLEEP_TIME)) + running_time += SLEEP_TIME + + time.sleep(SLEEP_TIME) + if running_time > TIME_OUT: + done = True + + trybot_image = '' + + if build_status in OK_STATUS: + trybot_image = FindArchiveImage(chromeos_root, build, build_id) + if not trybot_image: + logger.GetLogger().LogError('Trybot job %s failed with status %d;' + ' no trybot image generated.' % + (description, build_status)) + + logger.GetLogger().LogOutput("trybot_image is '%s'" % trybot_image) + logger.GetLogger().LogOutput('build_status is %d' % build_status) + return trybot_image + + +def DoesImageExist(chromeos_root, build): + """Check if the image for the given build exists.""" + + ce = command_executer.GetCommandExecuter() + command = ('gsutil ls gs://chromeos-image-archive/%s' + '/chromiumos_test_image.tar.xz' % (build)) + ret = ce.ChrootRunCommand(chromeos_root, command, print_to_console=False) + return not ret + + +def WaitForImage(chromeos_root, build): + """Wait for an image to be ready.""" + + elapsed_time = 0 + while elapsed_time < TIME_OUT: + if DoesImageExist(chromeos_root, build): + return + logger.GetLogger().LogOutput('Image %s not ready, waiting for 10 minutes' % + build) + time.sleep(SLEEP_TIME) + elapsed_time += SLEEP_TIME + + logger.GetLogger().LogOutput('Image %s not found, waited for %d hours' % + (build, (TIME_OUT / 3600))) + raise BuildbotTimeout('Timeout while waiting for image %s' % build) diff --git a/cros_utils/colortrans.py b/cros_utils/colortrans.py new file mode 100644 index 00000000..9458cc49 --- /dev/null +++ b/cros_utils/colortrans.py @@ -0,0 +1,388 @@ +# We did not author this file nor mantain it. Skip linting it. +#pylint: skip-file +""" Convert values between RGB hex codes and xterm-256 color codes. + +Nice long listing of all 256 colors and their codes. Useful for +developing console color themes, or even script output schemes. + +Resources: +* http://en.wikipedia.org/wiki/8-bit_color +* http://en.wikipedia.org/wiki/ANSI_escape_code +* /usr/share/X11/rgb.txt + +I'm not sure where this script was inspired from. I think I must have +written it from scratch, though it's been several years now. +""" + +__author__ = 'Micah Elliott http://MicahElliott.com' +__version__ = '0.1' +__copyright__ = 'Copyright (C) 2011 Micah Elliott. All rights reserved.' +__license__ = 'WTFPL http://sam.zoy.org/wtfpl/' + +#--------------------------------------------------------------------- + +import sys, re + +CLUT = [ # color look-up table + # 8-bit, RGB hex + + # Primary 3-bit (8 colors). Unique representation! + ('00', '000000'), + ('01', '800000'), + ('02', '008000'), + ('03', '808000'), + ('04', '000080'), + ('05', '800080'), + ('06', '008080'), + ('07', 'c0c0c0'), + + # Equivalent "bright" versions of original 8 colors. + ('08', '808080'), + ('09', 'ff0000'), + ('10', '00ff00'), + ('11', 'ffff00'), + ('12', '0000ff'), + ('13', 'ff00ff'), + ('14', '00ffff'), + ('15', 'ffffff'), + + # Strictly ascending. + ('16', '000000'), + ('17', '00005f'), + ('18', '000087'), + ('19', '0000af'), + ('20', '0000d7'), + ('21', '0000ff'), + ('22', '005f00'), + ('23', '005f5f'), + ('24', '005f87'), + ('25', '005faf'), + ('26', '005fd7'), + ('27', '005fff'), + ('28', '008700'), + ('29', '00875f'), + ('30', '008787'), + ('31', '0087af'), + ('32', '0087d7'), + ('33', '0087ff'), + ('34', '00af00'), + ('35', '00af5f'), + ('36', '00af87'), + ('37', '00afaf'), + ('38', '00afd7'), + ('39', '00afff'), + ('40', '00d700'), + ('41', '00d75f'), + ('42', '00d787'), + ('43', '00d7af'), + ('44', '00d7d7'), + ('45', '00d7ff'), + ('46', '00ff00'), + ('47', '00ff5f'), + ('48', '00ff87'), + ('49', '00ffaf'), + ('50', '00ffd7'), + ('51', '00ffff'), + ('52', '5f0000'), + ('53', '5f005f'), + ('54', '5f0087'), + ('55', '5f00af'), + ('56', '5f00d7'), + ('57', '5f00ff'), + ('58', '5f5f00'), + ('59', '5f5f5f'), + ('60', '5f5f87'), + ('61', '5f5faf'), + ('62', '5f5fd7'), + ('63', '5f5fff'), + ('64', '5f8700'), + ('65', '5f875f'), + ('66', '5f8787'), + ('67', '5f87af'), + ('68', '5f87d7'), + ('69', '5f87ff'), + ('70', '5faf00'), + ('71', '5faf5f'), + ('72', '5faf87'), + ('73', '5fafaf'), + ('74', '5fafd7'), + ('75', '5fafff'), + ('76', '5fd700'), + ('77', '5fd75f'), + ('78', '5fd787'), + ('79', '5fd7af'), + ('80', '5fd7d7'), + ('81', '5fd7ff'), + ('82', '5fff00'), + ('83', '5fff5f'), + ('84', '5fff87'), + ('85', '5fffaf'), + ('86', '5fffd7'), + ('87', '5fffff'), + ('88', '870000'), + ('89', '87005f'), + ('90', '870087'), + ('91', '8700af'), + ('92', '8700d7'), + ('93', '8700ff'), + ('94', '875f00'), + ('95', '875f5f'), + ('96', '875f87'), + ('97', '875faf'), + ('98', '875fd7'), + ('99', '875fff'), + ('100', '878700'), + ('101', '87875f'), + ('102', '878787'), + ('103', '8787af'), + ('104', '8787d7'), + ('105', '8787ff'), + ('106', '87af00'), + ('107', '87af5f'), + ('108', '87af87'), + ('109', '87afaf'), + ('110', '87afd7'), + ('111', '87afff'), + ('112', '87d700'), + ('113', '87d75f'), + ('114', '87d787'), + ('115', '87d7af'), + ('116', '87d7d7'), + ('117', '87d7ff'), + ('118', '87ff00'), + ('119', '87ff5f'), + ('120', '87ff87'), + ('121', '87ffaf'), + ('122', '87ffd7'), + ('123', '87ffff'), + ('124', 'af0000'), + ('125', 'af005f'), + ('126', 'af0087'), + ('127', 'af00af'), + ('128', 'af00d7'), + ('129', 'af00ff'), + ('130', 'af5f00'), + ('131', 'af5f5f'), + ('132', 'af5f87'), + ('133', 'af5faf'), + ('134', 'af5fd7'), + ('135', 'af5fff'), + ('136', 'af8700'), + ('137', 'af875f'), + ('138', 'af8787'), + ('139', 'af87af'), + ('140', 'af87d7'), + ('141', 'af87ff'), + ('142', 'afaf00'), + ('143', 'afaf5f'), + ('144', 'afaf87'), + ('145', 'afafaf'), + ('146', 'afafd7'), + ('147', 'afafff'), + ('148', 'afd700'), + ('149', 'afd75f'), + ('150', 'afd787'), + ('151', 'afd7af'), + ('152', 'afd7d7'), + ('153', 'afd7ff'), + ('154', 'afff00'), + ('155', 'afff5f'), + ('156', 'afff87'), + ('157', 'afffaf'), + ('158', 'afffd7'), + ('159', 'afffff'), + ('160', 'd70000'), + ('161', 'd7005f'), + ('162', 'd70087'), + ('163', 'd700af'), + ('164', 'd700d7'), + ('165', 'd700ff'), + ('166', 'd75f00'), + ('167', 'd75f5f'), + ('168', 'd75f87'), + ('169', 'd75faf'), + ('170', 'd75fd7'), + ('171', 'd75fff'), + ('172', 'd78700'), + ('173', 'd7875f'), + ('174', 'd78787'), + ('175', 'd787af'), + ('176', 'd787d7'), + ('177', 'd787ff'), + ('178', 'd7af00'), + ('179', 'd7af5f'), + ('180', 'd7af87'), + ('181', 'd7afaf'), + ('182', 'd7afd7'), + ('183', 'd7afff'), + ('184', 'd7d700'), + ('185', 'd7d75f'), + ('186', 'd7d787'), + ('187', 'd7d7af'), + ('188', 'd7d7d7'), + ('189', 'd7d7ff'), + ('190', 'd7ff00'), + ('191', 'd7ff5f'), + ('192', 'd7ff87'), + ('193', 'd7ffaf'), + ('194', 'd7ffd7'), + ('195', 'd7ffff'), + ('196', 'ff0000'), + ('197', 'ff005f'), + ('198', 'ff0087'), + ('199', 'ff00af'), + ('200', 'ff00d7'), + ('201', 'ff00ff'), + ('202', 'ff5f00'), + ('203', 'ff5f5f'), + ('204', 'ff5f87'), + ('205', 'ff5faf'), + ('206', 'ff5fd7'), + ('207', 'ff5fff'), + ('208', 'ff8700'), + ('209', 'ff875f'), + ('210', 'ff8787'), + ('211', 'ff87af'), + ('212', 'ff87d7'), + ('213', 'ff87ff'), + ('214', 'ffaf00'), + ('215', 'ffaf5f'), + ('216', 'ffaf87'), + ('217', 'ffafaf'), + ('218', 'ffafd7'), + ('219', 'ffafff'), + ('220', 'ffd700'), + ('221', 'ffd75f'), + ('222', 'ffd787'), + ('223', 'ffd7af'), + ('224', 'ffd7d7'), + ('225', 'ffd7ff'), + ('226', 'ffff00'), + ('227', 'ffff5f'), + ('228', 'ffff87'), + ('229', 'ffffaf'), + ('230', 'ffffd7'), + ('231', 'ffffff'), + + # Gray-scale range. + ('232', '080808'), + ('233', '121212'), + ('234', '1c1c1c'), + ('235', '262626'), + ('236', '303030'), + ('237', '3a3a3a'), + ('238', '444444'), + ('239', '4e4e4e'), + ('240', '585858'), + ('241', '626262'), + ('242', '6c6c6c'), + ('243', '767676'), + ('244', '808080'), + ('245', '8a8a8a'), + ('246', '949494'), + ('247', '9e9e9e'), + ('248', 'a8a8a8'), + ('249', 'b2b2b2'), + ('250', 'bcbcbc'), + ('251', 'c6c6c6'), + ('252', 'd0d0d0'), + ('253', 'dadada'), + ('254', 'e4e4e4'), + ('255', 'eeeeee'), +] + + +def _str2hex(hexstr): + return int(hexstr, 16) + + +def _strip_hash(rgb): + # Strip leading `#` if exists. + if rgb.startswith('#'): + rgb = rgb.lstrip('#') + return rgb + + +def _create_dicts(): + short2rgb_dict = dict(CLUT) + rgb2short_dict = {} + for k, v in short2rgb_dict.items(): + rgb2short_dict[v] = k + return rgb2short_dict, short2rgb_dict + + +def short2rgb(short): + return SHORT2RGB_DICT[short] + + +def print_all(): + """ Print all 256 xterm color codes. + """ + for short, rgb in CLUT: + sys.stdout.write('\033[48;5;%sm%s:%s' % (short, short, rgb)) + sys.stdout.write('\033[0m ') + sys.stdout.write('\033[38;5;%sm%s:%s' % (short, short, rgb)) + sys.stdout.write('\033[0m\n') + print 'Printed all codes.' + print 'You can translate a hex or 0-255 code by providing an argument.' + + +def rgb2short(rgb): + """ Find the closest xterm-256 approximation to the given RGB value. + @param rgb: Hex code representing an RGB value, eg, 'abcdef' + @returns: String between 0 and 255, compatible with xterm. + >>> rgb2short('123456') + ('23', '005f5f') + >>> rgb2short('ffffff') + ('231', 'ffffff') + >>> rgb2short('0DADD6') # vimeo logo + ('38', '00afd7') + """ + rgb = _strip_hash(rgb) + incs = (0x00, 0x5f, 0x87, 0xaf, 0xd7, 0xff) + # Break 6-char RGB code into 3 integer vals. + parts = [int(h, 16) for h in re.split(r'(..)(..)(..)', rgb)[1:4]] + res = [] + for part in parts: + i = 0 + while i < len(incs) - 1: + s, b = incs[i], incs[i + 1] # smaller, bigger + if s <= part <= b: + s1 = abs(s - part) + b1 = abs(b - part) + if s1 < b1: + closest = s + else: + closest = b + res.append(closest) + break + i += 1 + #print '***', res + res = ''.join([('%02.x' % i) for i in res]) + equiv = RGB2SHORT_DICT[res] + #print '***', res, equiv + return equiv, res + + +RGB2SHORT_DICT, SHORT2RGB_DICT = _create_dicts() + +#--------------------------------------------------------------------- + +if __name__ == '__main__': + import doctest + doctest.testmod() + if len(sys.argv) == 1: + print_all() + raise SystemExit + arg = sys.argv[1] + if len(arg) < 4 and int(arg) < 256: + rgb = short2rgb(arg) + sys.stdout.write( + 'xterm color \033[38;5;%sm%s\033[0m -> RGB exact \033[38;5;%sm%s\033[0m' + % (arg, arg, arg, rgb)) + sys.stdout.write('\033[0m\n') + else: + short, rgb = rgb2short(arg) + sys.stdout.write('RGB %s -> xterm color approx \033[38;5;%sm%s (%s)' % + (arg, short, short, rgb)) + sys.stdout.write('\033[0m\n') diff --git a/cros_utils/command_executer.py b/cros_utils/command_executer.py new file mode 100644 index 00000000..c5614513 --- /dev/null +++ b/cros_utils/command_executer.py @@ -0,0 +1,685 @@ +# Copyright 2011 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Utilities to run commands in outside/inside chroot and on the board.""" + +from __future__ import print_function + +import getpass +import os +import re +import select +import signal +import subprocess +import sys +import tempfile +import time + +import logger +import misc + +mock_default = False + +LOG_LEVEL = ('none', 'quiet', 'average', 'verbose') + + +def InitCommandExecuter(mock=False): + # pylint: disable=global-statement + global mock_default + # Whether to default to a mock command executer or not + mock_default = mock + + +def GetCommandExecuter(logger_to_set=None, mock=False, log_level='verbose'): + # If the default is a mock executer, always return one. + if mock_default or mock: + return MockCommandExecuter(log_level, logger_to_set) + else: + return CommandExecuter(log_level, logger_to_set) + + +class CommandExecuter(object): + """Provides several methods to execute commands on several environments.""" + + def __init__(self, log_level, logger_to_set=None): + self.log_level = log_level + if log_level == 'none': + self.logger = None + else: + if logger_to_set is not None: + self.logger = logger_to_set + else: + self.logger = logger.GetLogger() + + def GetLogLevel(self): + return self.log_level + + def SetLogLevel(self, log_level): + self.log_level = log_level + + def RunCommandGeneric(self, + cmd, + return_output=False, + machine=None, + username=None, + command_terminator=None, + command_timeout=None, + terminated_timeout=10, + print_to_console=True, + except_handler=lambda p, e: None): + """Run a command. + + Returns triplet (returncode, stdout, stderr). + """ + + cmd = str(cmd) + + if self.log_level == 'quiet': + print_to_console = False + + if self.log_level == 'verbose': + self.logger.LogCmd(cmd, machine, username, print_to_console) + elif self.logger: + self.logger.LogCmdToFileOnly(cmd, machine, username) + if command_terminator and command_terminator.IsTerminated(): + if self.logger: + self.logger.LogError('Command was terminated!', print_to_console) + return (1, '', '') + + if machine is not None: + user = '' + if username is not None: + user = username + '@' + cmd = "ssh -t -t %s%s -- '%s'" % (user, machine, cmd) + + # We use setsid so that the child will have a different session id + # and we can easily kill the process group. This is also important + # because the child will be disassociated from the parent terminal. + # In this way the child cannot mess the parent's terminal. + p = None + try: + p = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True, + preexec_fn=os.setsid) + + full_stdout = '' + full_stderr = '' + + # Pull output from pipes, send it to file/stdout/string + out = err = None + pipes = [p.stdout, p.stderr] + + my_poll = select.poll() + my_poll.register(p.stdout, select.POLLIN) + my_poll.register(p.stderr, select.POLLIN) + + terminated_time = None + started_time = time.time() + + while len(pipes): + if command_terminator and command_terminator.IsTerminated(): + os.killpg(os.getpgid(p.pid), signal.SIGTERM) + if self.logger: + self.logger.LogError('Command received termination request. ' + 'Killed child process group.', + print_to_console) + break + + l = my_poll.poll(100) + for (fd, _) in l: + if fd == p.stdout.fileno(): + out = os.read(p.stdout.fileno(), 16384) + if return_output: + full_stdout += out + if self.logger: + self.logger.LogCommandOutput(out, print_to_console) + if out == '': + pipes.remove(p.stdout) + my_poll.unregister(p.stdout) + if fd == p.stderr.fileno(): + err = os.read(p.stderr.fileno(), 16384) + if return_output: + full_stderr += err + if self.logger: + self.logger.LogCommandError(err, print_to_console) + if err == '': + pipes.remove(p.stderr) + my_poll.unregister(p.stderr) + + if p.poll() is not None: + if terminated_time is None: + terminated_time = time.time() + elif (terminated_timeout is not None and + time.time() - terminated_time > terminated_timeout): + if self.logger: + self.logger.LogWarning('Timeout of %s seconds reached since ' + 'process termination.' % + terminated_timeout, + print_to_console) + break + + if (command_timeout is not None and + time.time() - started_time > command_timeout): + os.killpg(os.getpgid(p.pid), signal.SIGTERM) + if self.logger: + self.logger.LogWarning('Timeout of %s seconds reached since process' + 'started. Killed child process group.' % + command_timeout, print_to_console) + break + + if out == err == '': + break + + p.wait() + if return_output: + return (p.returncode, full_stdout, full_stderr) + return (p.returncode, '', '') + except BaseException as e: + except_handler(p, e) + raise + + def RunCommand(self, *args, **kwargs): + """Run a command. + + Takes the same arguments as RunCommandGeneric except for return_output. + Returns a single value returncode. + """ + # Make sure that args does not overwrite 'return_output' + assert len(args) <= 1 + assert 'return_output' not in kwargs + kwargs['return_output'] = False + return self.RunCommandGeneric(*args, **kwargs)[0] + + def RunCommandWExceptionCleanup(self, *args, **kwargs): + """Run a command and kill process if exception is thrown. + + Takes the same arguments as RunCommandGeneric except for except_handler. + Returns same as RunCommandGeneric. + """ + + def KillProc(proc, _): + if proc: + os.killpg(os.getpgid(proc.pid), signal.SIGTERM) + + # Make sure that args does not overwrite 'except_handler' + assert len(args) <= 8 + assert 'except_handler' not in kwargs + kwargs['except_handler'] = KillProc + return self.RunCommandGeneric(*args, **kwargs) + + def RunCommandWOutput(self, *args, **kwargs): + """Run a command. + + Takes the same arguments as RunCommandGeneric except for return_output. + Returns a triplet (returncode, stdout, stderr). + """ + # Make sure that args does not overwrite 'return_output' + assert len(args) <= 1 + assert 'return_output' not in kwargs + kwargs['return_output'] = True + return self.RunCommandGeneric(*args, **kwargs) + + def RemoteAccessInitCommand(self, chromeos_root, machine): + command = '' + command += '\nset -- --remote=' + machine + command += '\n. ' + chromeos_root + '/src/scripts/common.sh' + command += '\n. ' + chromeos_root + '/src/scripts/remote_access.sh' + command += '\nTMP=$(mktemp -d)' + command += "\nFLAGS \"$@\" || exit 1" + command += '\nremote_access_init' + return command + + def WriteToTempShFile(self, contents): + handle, command_file = tempfile.mkstemp(prefix=os.uname()[1], suffix='.sh') + os.write(handle, '#!/bin/bash\n') + os.write(handle, contents) + os.close(handle) + return command_file + + def CrosLearnBoard(self, chromeos_root, machine): + command = self.RemoteAccessInitCommand(chromeos_root, machine) + command += '\nlearn_board' + command += '\necho ${FLAGS_board}' + retval, output, _ = self.RunCommandWOutput(command) + if self.logger: + self.logger.LogFatalIf(retval, 'learn_board command failed') + elif retval: + sys.exit(1) + return output.split()[-1] + + def CrosRunCommandGeneric(self, + cmd, + return_output=False, + machine=None, + command_terminator=None, + chromeos_root=None, + command_timeout=None, + terminated_timeout=10, + print_to_console=True): + """Run a command on a ChromeOS box. + + Returns triplet (returncode, stdout, stderr). + """ + + if self.log_level != 'verbose': + print_to_console = False + + if self.logger: + self.logger.LogCmd(cmd, print_to_console=print_to_console) + self.logger.LogFatalIf(not machine, 'No machine provided!') + self.logger.LogFatalIf(not chromeos_root, 'chromeos_root not given!') + else: + if not chromeos_root or not machine: + sys.exit(1) + chromeos_root = os.path.expanduser(chromeos_root) + + # Write all commands to a file. + command_file = self.WriteToTempShFile(cmd) + retval = self.CopyFiles(command_file, + command_file, + dest_machine=machine, + command_terminator=command_terminator, + chromeos_root=chromeos_root, + dest_cros=True, + recursive=False, + print_to_console=print_to_console) + if retval: + if self.logger: + self.logger.LogError('Could not run remote command on machine.' + ' Is the machine up?') + return (retval, '', '') + + command = self.RemoteAccessInitCommand(chromeos_root, machine) + command += '\nremote_sh bash %s' % command_file + command += "\nl_retval=$?; echo \"$REMOTE_OUT\"; exit $l_retval" + retval = self.RunCommandGeneric(command, + return_output, + command_terminator=command_terminator, + command_timeout=command_timeout, + terminated_timeout=terminated_timeout, + print_to_console=print_to_console) + if return_output: + connect_signature = ( + 'Initiating first contact with remote host\n' + 'Connection OK\n') + connect_signature_re = re.compile(connect_signature) + modded_retval = list(retval) + modded_retval[1] = connect_signature_re.sub('', retval[1]) + return modded_retval + return retval + + def CrosRunCommand(self, *args, **kwargs): + """Run a command on a ChromeOS box. + + Takes the same arguments as CrosRunCommandGeneric except for return_output. + Returns a single value returncode. + """ + # Make sure that args does not overwrite 'return_output' + assert len(args) <= 1 + assert 'return_output' not in kwargs + kwargs['return_output'] = False + return self.CrosRunCommandGeneric(*args, **kwargs)[0] + + def CrosRunCommandWOutput(self, *args, **kwargs): + """Run a command on a ChromeOS box. + + Takes the same arguments as CrosRunCommandGeneric except for return_output. + Returns a triplet (returncode, stdout, stderr). + """ + # Make sure that args does not overwrite 'return_output' + assert len(args) <= 1 + assert 'return_output' not in kwargs + kwargs['return_output'] = True + return self.CrosRunCommandGeneric(*args, **kwargs) + + def ChrootRunCommandGeneric(self, + chromeos_root, + command, + return_output=False, + command_terminator=None, + command_timeout=None, + terminated_timeout=10, + print_to_console=True, + cros_sdk_options=''): + """Runs a command within the chroot. + + Returns triplet (returncode, stdout, stderr). + """ + + if self.log_level != 'verbose': + print_to_console = False + + if self.logger: + self.logger.LogCmd(command, print_to_console=print_to_console) + + handle, command_file = tempfile.mkstemp( + dir=os.path.join(chromeos_root, 'src/scripts'), + suffix='.sh', + prefix='in_chroot_cmd') + os.write(handle, '#!/bin/bash\n') + os.write(handle, command) + os.write(handle, '\n') + os.close(handle) + + os.chmod(command_file, 0777) + + # if return_output is set, run a dummy command first to make sure that + # the chroot already exists. We want the final returned output to skip + # the output from chroot creation steps. + if return_output: + ret = self.RunCommand('cd %s; cros_sdk %s -- true' % + (chromeos_root, cros_sdk_options)) + if ret: + return (ret, '', '') + + # Run command_file inside the chroot, making sure that any "~" is expanded + # by the shell inside the chroot, not outside. + command = ("cd %s; cros_sdk %s -- bash -c '%s/%s'" % + (chromeos_root, cros_sdk_options, misc.CHROMEOS_SCRIPTS_DIR, + os.path.basename(command_file))) + ret = self.RunCommandGeneric(command, + return_output, + command_terminator=command_terminator, + command_timeout=command_timeout, + terminated_timeout=terminated_timeout, + print_to_console=print_to_console) + os.remove(command_file) + return ret + + def ChrootRunCommand(self, *args, **kwargs): + """Runs a command within the chroot. + + Takes the same arguments as ChrootRunCommandGeneric except for + return_output. + Returns a single value returncode. + """ + # Make sure that args does not overwrite 'return_output' + assert len(args) <= 2 + assert 'return_output' not in kwargs + kwargs['return_output'] = False + return self.ChrootRunCommandGeneric(*args, **kwargs)[0] + + def ChrootRunCommandWOutput(self, *args, **kwargs): + """Runs a command within the chroot. + + Takes the same arguments as ChrootRunCommandGeneric except for + return_output. + Returns a triplet (returncode, stdout, stderr). + """ + # Make sure that args does not overwrite 'return_output' + assert len(args) <= 2 + assert 'return_output' not in kwargs + kwargs['return_output'] = True + return self.ChrootRunCommandGeneric(*args, **kwargs) + + def RunCommands(self, + cmdlist, + machine=None, + username=None, + command_terminator=None): + cmd = ' ;\n'.join(cmdlist) + return self.RunCommand(cmd, + machine=machine, + username=username, + command_terminator=command_terminator) + + def CopyFiles(self, + src, + dest, + src_machine=None, + dest_machine=None, + src_user=None, + dest_user=None, + recursive=True, + command_terminator=None, + chromeos_root=None, + src_cros=False, + dest_cros=False, + print_to_console=True): + src = os.path.expanduser(src) + dest = os.path.expanduser(dest) + + if recursive: + src = src + '/' + dest = dest + '/' + + if src_cros == True or dest_cros == True: + if self.logger: + self.logger.LogFatalIf(src_cros == dest_cros, + 'Only one of src_cros and desc_cros can ' + 'be True.') + self.logger.LogFatalIf(not chromeos_root, 'chromeos_root not given!') + elif src_cros == dest_cros or not chromeos_root: + sys.exit(1) + if src_cros == True: + cros_machine = src_machine + else: + cros_machine = dest_machine + + command = self.RemoteAccessInitCommand(chromeos_root, cros_machine) + ssh_command = ( + 'ssh -p ${FLAGS_ssh_port}' + ' -o StrictHostKeyChecking=no' + + ' -o UserKnownHostsFile=$(mktemp)' + ' -i $TMP_PRIVATE_KEY') + rsync_prefix = "\nrsync -r -e \"%s\" " % ssh_command + if dest_cros == True: + command += rsync_prefix + '%s root@%s:%s' % (src, dest_machine, dest) + return self.RunCommand(command, + machine=src_machine, + username=src_user, + command_terminator=command_terminator, + print_to_console=print_to_console) + else: + command += rsync_prefix + 'root@%s:%s %s' % (src_machine, src, dest) + return self.RunCommand(command, + machine=dest_machine, + username=dest_user, + command_terminator=command_terminator, + print_to_console=print_to_console) + + if dest_machine == src_machine: + command = 'rsync -a %s %s' % (src, dest) + else: + if src_machine is None: + src_machine = os.uname()[1] + src_user = getpass.getuser() + command = 'rsync -a %s@%s:%s %s' % (src_user, src_machine, src, dest) + return self.RunCommand(command, + machine=dest_machine, + username=dest_user, + command_terminator=command_terminator, + print_to_console=print_to_console) + + def RunCommand2(self, + cmd, + cwd=None, + line_consumer=None, + timeout=None, + shell=True, + join_stderr=True, + env=None, + except_handler=lambda p, e: None): + """Run the command with an extra feature line_consumer. + + This version allow developers to provide a line_consumer which will be + fed execution output lines. + + A line_consumer is a callback, which is given a chance to run for each + line the execution outputs (either to stdout or stderr). The + line_consumer must accept one and exactly one dict argument, the dict + argument has these items - + 'line' - The line output by the binary. Notice, this string includes + the trailing '\n'. + 'output' - Whether this is a stdout or stderr output, values are either + 'stdout' or 'stderr'. When join_stderr is True, this value + will always be 'output'. + 'pobject' - The object used to control execution, for example, call + pobject.kill(). + + Note: As this is written, the stdin for the process executed is + not associated with the stdin of the caller of this routine. + + Args: + cmd: Command in a single string. + cwd: Working directory for execution. + line_consumer: A function that will ba called by this function. See above + for details. + timeout: terminate command after this timeout. + shell: Whether to use a shell for execution. + join_stderr: Whether join stderr to stdout stream. + env: Execution environment. + except_handler: Callback for when exception is thrown during command + execution. Passed process object and exception. + + Returns: + Execution return code. + + Raises: + child_exception: if fails to start the command process (missing + permission, no such file, etc) + """ + + class StreamHandler(object): + """Internal utility class.""" + + def __init__(self, pobject, fd, name, line_consumer): + self._pobject = pobject + self._fd = fd + self._name = name + self._buf = '' + self._line_consumer = line_consumer + + def read_and_notify_line(self): + t = os.read(fd, 1024) + self._buf = self._buf + t + self.notify_line() + + def notify_line(self): + p = self._buf.find('\n') + while p >= 0: + self._line_consumer(line=self._buf[:p + 1], + output=self._name, + pobject=self._pobject) + if p < len(self._buf) - 1: + self._buf = self._buf[p + 1:] + p = self._buf.find('\n') + else: + self._buf = '' + p = -1 + break + + def notify_eos(self): + # Notify end of stream. The last line may not end with a '\n'. + if self._buf != '': + self._line_consumer(line=self._buf, + output=self._name, + pobject=self._pobject) + self._buf = '' + + if self.log_level == 'verbose': + self.logger.LogCmd(cmd) + elif self.logger: + self.logger.LogCmdToFileOnly(cmd) + + # We use setsid so that the child will have a different session id + # and we can easily kill the process group. This is also important + # because the child will be disassociated from the parent terminal. + # In this way the child cannot mess the parent's terminal. + pobject = None + try: + pobject = subprocess.Popen( + cmd, + cwd=cwd, + bufsize=1024, + env=env, + shell=shell, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT if join_stderr else subprocess.PIPE, + preexec_fn=os.setsid) + + # We provide a default line_consumer + if line_consumer is None: + line_consumer = lambda **d: None + start_time = time.time() + poll = select.poll() + outfd = pobject.stdout.fileno() + poll.register(outfd, select.POLLIN | select.POLLPRI) + handlermap = {outfd: + StreamHandler(pobject, outfd, 'stdout', line_consumer)} + if not join_stderr: + errfd = pobject.stderr.fileno() + poll.register(errfd, + select.POLLIN | select.POLLPRI) + handlermap[errfd] = StreamHandler(pobject, + errfd, + 'stderr', + line_consumer) + while len(handlermap): + readables = poll.poll(300) + for (fd, evt) in readables: + handler = handlermap[fd] + if evt & (select.POLLPRI | select.POLLIN): + handler.read_and_notify_line() + elif evt & (select.POLLHUP | select.POLLERR | select.POLLNVAL): + handler.notify_eos() + poll.unregister(fd) + del handlermap[fd] + + if timeout is not None and (time.time() - start_time > timeout): + os.killpg(os.getpgid(pobject.pid), signal.SIGTERM) + + return pobject.wait() + except BaseException as e: + except_handler(pobject, e) + raise + + +class MockCommandExecuter(CommandExecuter): + """Mock class for class CommandExecuter.""" + + def __init__(self, log_level, logger_to_set=None): + super(MockCommandExecuter, self).__init__(log_level, logger_to_set) + + def RunCommandGeneric(self, + cmd, + return_output=False, + machine=None, + username=None, + command_terminator=None, + command_timeout=None, + terminated_timeout=10, + print_to_console=True, + except_handler=lambda p, e: None): + assert not command_timeout + cmd = str(cmd) + if machine is None: + machine = 'localhost' + if username is None: + username = 'current' + logger.GetLogger().LogCmd('(Mock) ' + cmd, machine, username, + print_to_console) + return (0, '', '') + + def RunCommand(self, *args, **kwargs): + assert 'return_output' not in kwargs + kwargs['return_output'] = False + return self.RunCommandGeneric(*args, **kwargs)[0] + + def RunCommandWOutput(self, *args, **kwargs): + assert 'return_output' not in kwargs + kwargs['return_output'] = True + return self.RunCommandGeneric(*args, **kwargs) + + +class CommandTerminator(object): + """Object to request termination of a command in execution.""" + + def __init__(self): + self.terminated = False + + def Terminate(self): + self.terminated = True + + def IsTerminated(self): + return self.terminated diff --git a/cros_utils/command_executer_unittest.py b/cros_utils/command_executer_unittest.py new file mode 100755 index 00000000..d5f5d0cf --- /dev/null +++ b/cros_utils/command_executer_unittest.py @@ -0,0 +1,27 @@ +#!/usr/bin/python2 +"""Unittest for command_executer.py.""" + +from __future__ import print_function + +import time +import unittest + +import command_executer + + +class CommandExecuterTest(unittest.TestCase): + """Test for CommandExecuter class.""" + + def testTimeout(self): + timeout = 1 + logging_level = 'average' + ce = command_executer.CommandExecuter(logging_level) + start = time.time() + command = 'sleep 20' + ce.RunCommand(command, command_timeout=timeout, terminated_timeout=timeout) + end = time.time() + self.assertTrue(round(end - start) == timeout) + + +if __name__ == '__main__': + unittest.main() diff --git a/cros_utils/constants.py b/cros_utils/constants.py new file mode 100644 index 00000000..827e9233 --- /dev/null +++ b/cros_utils/constants.py @@ -0,0 +1,10 @@ +# Copyright 2010 Google Inc. All Rights Reserved. +"""Generic constants used accross modules. +""" + +__author__ = 'shenhan@google.com (Han Shen)' + +MOUNTED_TOOLCHAIN_ROOT = '/usr/local/toolchain_root' + +# Root directory for night testing run. +CROSTC_WORKSPACE = '/usr/local/google/crostc' diff --git a/cros_utils/email_sender.py b/cros_utils/email_sender.py new file mode 100755 index 00000000..cd45b4ec --- /dev/null +++ b/cros_utils/email_sender.py @@ -0,0 +1,144 @@ +#!/usr/bin/python2 + +# Copyright 2011 Google Inc. All Rights Reserved. +"""Utilities to send email either through SMTP or SendGMR.""" + +from __future__ import print_function + +from email import encoders as Encoders +from email.mime.base import MIMEBase +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +import os +import smtplib +import tempfile + +from cros_utils import command_executer + + +class EmailSender(object): + """Utility class to send email through SMTP or SendGMR.""" + + class Attachment(object): + """Small class to keep track of attachment info.""" + + def __init__(self, name, content): + self.name = name + self.content = content + + def SendEmail(self, + email_to, + subject, + text_to_send, + email_cc=None, + email_bcc=None, + email_from=None, + msg_type='plain', + attachments=None): + """Choose appropriate email method and call it.""" + if os.path.exists('/usr/bin/sendgmr'): + self.SendGMREmail(email_to, subject, text_to_send, email_cc, email_bcc, + email_from, msg_type, attachments) + else: + self.SendSMTPEmail(email_to, subject, text_to_send, email_cc, email_bcc, + email_from, msg_type, attachments) + + def SendSMTPEmail(self, email_to, subject, text_to_send, email_cc, email_bcc, + email_from, msg_type, attachments): + """Send email via standard smtp mail.""" + # Email summary to the current user. + msg = MIMEMultipart() + + if not email_from: + email_from = os.path.basename(__file__) + + msg['To'] = ','.join(email_to) + msg['Subject'] = subject + + if email_from: + msg['From'] = email_from + if email_cc: + msg['CC'] = ','.join(email_cc) + email_to += email_cc + if email_bcc: + msg['BCC'] = ','.join(email_bcc) + email_to += email_bcc + + msg.attach(MIMEText(text_to_send, msg_type)) + if attachments: + for attachment in attachments: + part = MIMEBase('application', 'octet-stream') + part.set_payload(attachment.content) + Encoders.encode_base64(part) + part.add_header('Content-Disposition', + "attachment; filename=\"%s\"" % attachment.name) + msg.attach(part) + + # Send the message via our own SMTP server, but don't include the + # envelope header. + s = smtplib.SMTP('localhost') + s.sendmail(email_from, email_to, msg.as_string()) + s.quit() + + def SendGMREmail(self, email_to, subject, text_to_send, email_cc, email_bcc, + email_from, msg_type, attachments): + """Send email via sendgmr program.""" + ce = command_executer.GetCommandExecuter(log_level='none') + + if not email_from: + email_from = os.path.basename(__file__) + + to_list = ','.join(email_to) + + if not text_to_send: + text_to_send = 'Empty message body.' + body_fd, body_filename = tempfile.mkstemp() + to_be_deleted = [body_filename] + + try: + os.write(body_fd, text_to_send) + os.close(body_fd) + + # Fix single-quotes inside the subject. In bash, to escape a single quote + # (e.g 'don't') you need to replace it with '\'' (e.g. 'don'\''t'). To + # make Python read the backslash as a backslash rather than an escape + # character, you need to double it. So... + subject = subject.replace("'", "'\\''") + + if msg_type == 'html': + command = ("sendgmr --to='%s' --subject='%s' --html_file='%s' " + '--body_file=/dev/null' % (to_list, subject, body_filename)) + else: + command = ("sendgmr --to='%s' --subject='%s' --body_file='%s'" % + (to_list, subject, body_filename)) + if email_from: + command += ' --from=%s' % email_from + if email_cc: + cc_list = ','.join(email_cc) + command += " --cc='%s'" % cc_list + if email_bcc: + bcc_list = ','.join(email_bcc) + command += " --bcc='%s'" % bcc_list + + if attachments: + attachment_files = [] + for attachment in attachments: + if '<html>' in attachment.content: + report_suffix = '_report.html' + else: + report_suffix = '_report.txt' + fd, fname = tempfile.mkstemp(suffix=report_suffix) + os.write(fd, attachment.content) + os.close(fd) + attachment_files.append(fname) + files = ','.join(attachment_files) + command += " --attachment_files='%s'" % files + to_be_deleted += attachment_files + + # Send the message via our own GMR server. + status = ce.RunCommand(command) + return status + + finally: + for f in to_be_deleted: + os.remove(f) diff --git a/cros_utils/file_utils.py b/cros_utils/file_utils.py new file mode 100644 index 00000000..b7aad7b3 --- /dev/null +++ b/cros_utils/file_utils.py @@ -0,0 +1,87 @@ +# Copyright 2011 Google Inc. All Rights Reserved. +"""Utilities for operations on files.""" + +from __future__ import print_function + +import errno +import os +import shutil +import command_executer + + +class FileUtils(object): + """Utilities for operations on files.""" + _instance = None + DRY_RUN = False + + @classmethod + def Configure(cls, dry_run): + cls.DRY_RUN = dry_run + + def __new__(cls, *args, **kwargs): + if not cls._instance: + if cls.DRY_RUN: + cls._instance = super(FileUtils, cls).__new__(MockFileUtils, *args, + **kwargs) + else: + cls._instance = super(FileUtils, cls).__new__(cls, *args, **kwargs) + return cls._instance + + def Md5File(self, filename, log_level='verbose', _block_size=2**10): + command = 'md5sum %s' % filename + ce = command_executer.GetCommandExecuter(log_level=log_level) + ret, out, _ = ce.RunCommandWOutput(command) + if ret: + raise Exception('Could not run md5sum on: %s' % filename) + + return out.strip().split()[0] + + def CanonicalizeChromeOSRoot(self, chromeos_root): + chromeos_root = os.path.expanduser(chromeos_root) + if os.path.isdir(os.path.join(chromeos_root, 'chromite')): + return chromeos_root + else: + return None + + def ChromeOSRootFromImage(self, chromeos_image): + chromeos_root = os.path.join( + os.path.dirname(chromeos_image), '../../../../..') + return self.CanonicalizeChromeOSRoot(chromeos_root) + + def MkDirP(self, path): + try: + os.makedirs(path) + except OSError as exc: + if exc.errno == errno.EEXIST: + pass + else: + raise + + def RmDir(self, path): + shutil.rmtree(path, ignore_errors=True) + + def WriteFile(self, path, contents): + with open(path, 'wb') as f: + f.write(contents) + + +class MockFileUtils(FileUtils): + """Mock class for file utilities.""" + + def Md5File(self, filename, log_level='verbose', _block_size=2**10): + return 'd41d8cd98f00b204e9800998ecf8427e' + + def CanonicalizeChromeOSRoot(self, chromeos_root): + return '/tmp/chromeos_root' + + def ChromeOSRootFromImage(self, chromeos_image): + return '/tmp/chromeos_root' + + def RmDir(self, path): + pass + + def MkDirP(self, path): + pass + + def WriteFile(self, path, contents): + pass diff --git a/cros_utils/html_tools.py b/cros_utils/html_tools.py new file mode 100644 index 00000000..8ca795bf --- /dev/null +++ b/cros_utils/html_tools.py @@ -0,0 +1,91 @@ +# Copyright 2010 Google Inc. All Rights Reserved. +"""Utilities for generating html.""" + + +def GetPageHeader(page_title): + return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html> +<head> +<style type="text/css"> +table +{ +border-collapse:collapse; +} +table, td, th +{ +border:1px solid black; +} +</style> +<script type="text/javascript"> +function displayRow(id){ + var row = document.getElementById("group_"+id); + if (row.style.display == '') row.style.display = 'none'; + else row.style.display = ''; + } +</script> +<title>%s</title> +</head> +<body> + +""" % page_title + + +def GetListHeader(): + return '<ul>' + + +def GetListItem(text): + return '<li>%s</li>' % text + + +def GetListFooter(): + return '</ul>' + + +def GetList(items): + return '<ul>%s</ul>' % ''.join(['<li>%s</li>' % item for item in items]) + + +def GetParagraph(text): + return '<p>%s</p>' % text + + +def GetFooter(): + return '</body>\n</html>' + + +def GetHeader(text, h=1): + return '<h%s>%s</h%s>' % (h, text, h) + + +def GetTableHeader(headers): + row = ''.join(['<th>%s</th>' % header for header in headers]) + return '<table><tr>%s</tr>' % row + + +def GetTableFooter(): + return '</table>' + + +def FormatLineBreaks(text): + return text.replace('\n', '<br/>') + + +def GetTableCell(text): + return '<td>%s</td>' % FormatLineBreaks(str(text)) + + +def GetTableRow(columns): + return '<tr>%s</tr>' % '\n'.join([GetTableCell(column) for column in columns]) + + +def GetTable(headers, rows): + table = [GetTableHeader(headers)] + table.extend([GetTableRow(row) for row in rows]) + table.append(GetTableFooter()) + return '\n'.join(table) + + +def GetLink(link, text): + return "<a href='%s'>%s</a>" % (link, text) diff --git a/cros_utils/locks.py b/cros_utils/locks.py new file mode 100644 index 00000000..cb96368e --- /dev/null +++ b/cros_utils/locks.py @@ -0,0 +1,44 @@ +# Copyright 2015 The Chromium OS Authors. All rights reserved. +"""Utilities for locking machines.""" + +from __future__ import print_function + +import time + +import afe_lock_machine + +import logger + + +def AcquireLock(machines, chromeos_root, timeout=1200): + """Acquire lock for machine(s) with timeout, using AFE server for locking.""" + start_time = time.time() + locked = True + sleep_time = min(10, timeout / 10.0) + while True: + try: + afe_lock_machine.AFELockManager(machines, False, chromeos_root, + None).UpdateMachines(True) + break + except Exception as e: + if time.time() - start_time > timeout: + locked = False + logger.GetLogger().LogWarning( + 'Could not acquire lock on {0} within {1} seconds: {2}'.format( + repr(machines), timeout, str(e))) + break + time.sleep(sleep_time) + return locked + + +def ReleaseLock(machines, chromeos_root): + """Release locked machine(s), using AFE server for locking.""" + unlocked = True + try: + afe_lock_machine.AFELockManager(machines, False, chromeos_root, + None).UpdateMachines(False) + except Exception as e: + unlocked = False + logger.GetLogger().LogWarning('Could not unlock %s. %s' % + (repr(machines), str(e))) + return unlocked diff --git a/cros_utils/logger.py b/cros_utils/logger.py new file mode 100644 index 00000000..364d9c9d --- /dev/null +++ b/cros_utils/logger.py @@ -0,0 +1,369 @@ +# Copyright 2010 Google Inc. All Rights Reserved. +"""Logging helper module.""" + +from __future__ import print_function + +# System modules +import os.path +import sys +import traceback + + +#TODO(yunlian@google.com): Use GetRoot from misc +def GetRoot(scr_name): + """Break up pathname into (dir+name).""" + abs_path = os.path.abspath(scr_name) + return (os.path.dirname(abs_path), os.path.basename(abs_path)) + + +class Logger(object): + """Logging helper class.""" + + MAX_LOG_FILES = 10 + + def __init__(self, rootdir, basefilename, print_console, subdir='logs'): + logdir = os.path.join(rootdir, subdir) + basename = os.path.join(logdir, basefilename) + + try: + os.makedirs(logdir) + except OSError: + pass + # print("Warning: Logs directory '%s' already exists." % logdir) + + self.print_console = print_console + + self._CreateLogFileHandles(basename) + + self._WriteTo(self.cmdfd, ' '.join(sys.argv), True) + + def _AddSuffix(self, basename, suffix): + return '%s%s' % (basename, suffix) + + def _FindSuffix(self, basename): + timestamps = [] + found_suffix = None + for i in range(self.MAX_LOG_FILES): + suffix = str(i) + suffixed_basename = self._AddSuffix(basename, suffix) + cmd_file = '%s.cmd' % suffixed_basename + if not os.path.exists(cmd_file): + found_suffix = suffix + break + timestamps.append(os.stat(cmd_file).st_mtime) + + if found_suffix: + return found_suffix + + # Try to pick the oldest file with the suffix and return that one. + suffix = str(timestamps.index(min(timestamps))) + # print ("Warning: Overwriting log file: %s" % + # self._AddSuffix(basename, suffix)) + return suffix + + def _CreateLogFileHandle(self, name): + fd = None + try: + fd = open(name, 'w') + except IOError: + print('Warning: could not open %s for writing.' % name) + return fd + + def _CreateLogFileHandles(self, basename): + suffix = self._FindSuffix(basename) + suffixed_basename = self._AddSuffix(basename, suffix) + + self.cmdfd = self._CreateLogFileHandle('%s.cmd' % suffixed_basename) + self.stdout = self._CreateLogFileHandle('%s.out' % suffixed_basename) + self.stderr = self._CreateLogFileHandle('%s.err' % suffixed_basename) + + self._CreateLogFileSymlinks(basename, suffixed_basename) + + # Symlink unsuffixed basename to currently suffixed one. + def _CreateLogFileSymlinks(self, basename, suffixed_basename): + try: + for extension in ['cmd', 'out', 'err']: + src_file = '%s.%s' % (os.path.basename(suffixed_basename), extension) + dest_file = '%s.%s' % (basename, extension) + if os.path.exists(dest_file): + os.remove(dest_file) + os.symlink(src_file, dest_file) + except Exception as ex: + print('Exception while creating symlinks: %s' % str(ex)) + + def _WriteTo(self, fd, msg, flush): + if fd: + fd.write(msg) + if flush: + fd.flush() + + def LogStartDots(self, print_to_console=True): + term_fd = self._GetStdout(print_to_console) + if term_fd: + term_fd.flush() + term_fd.write('. ') + term_fd.flush() + + def LogAppendDot(self, print_to_console=True): + term_fd = self._GetStdout(print_to_console) + if term_fd: + term_fd.write('. ') + term_fd.flush() + + def LogEndDots(self, print_to_console=True): + term_fd = self._GetStdout(print_to_console) + if term_fd: + term_fd.write('\n') + term_fd.flush() + + def LogMsg(self, file_fd, term_fd, msg, flush=True): + if file_fd: + self._WriteTo(file_fd, msg, flush) + if self.print_console: + self._WriteTo(term_fd, msg, flush) + + def _GetStdout(self, print_to_console): + if print_to_console: + return sys.stdout + return None + + def _GetStderr(self, print_to_console): + if print_to_console: + return sys.stderr + return None + + def LogCmdToFileOnly(self, cmd, machine='', user=None): + if not self.cmdfd: + return + + host = ('%s@%s' % (user, machine)) if user else machine + flush = True + cmd_string = 'CMD (%s): %s\n' % (host, cmd) + self._WriteTo(self.cmdfd, cmd_string, flush) + + def LogCmd(self, cmd, machine='', user=None, print_to_console=True): + if user: + host = '%s@%s' % (user, machine) + else: + host = machine + + self.LogMsg(self.cmdfd, self._GetStdout(print_to_console), + 'CMD (%s): %s\n' % (host, cmd)) + + def LogFatal(self, msg, print_to_console=True): + self.LogMsg(self.stderr, self._GetStderr(print_to_console), + 'FATAL: %s\n' % msg) + self.LogMsg(self.stderr, self._GetStderr(print_to_console), + '\n'.join(traceback.format_stack())) + sys.exit(1) + + def LogError(self, msg, print_to_console=True): + self.LogMsg(self.stderr, self._GetStderr(print_to_console), + 'ERROR: %s\n' % msg) + + def LogWarning(self, msg, print_to_console=True): + self.LogMsg(self.stderr, self._GetStderr(print_to_console), + 'WARNING: %s\n' % msg) + + def LogOutput(self, msg, print_to_console=True): + self.LogMsg(self.stdout, self._GetStdout(print_to_console), + 'OUTPUT: %s\n' % msg) + + def LogFatalIf(self, condition, msg): + if condition: + self.LogFatal(msg) + + def LogErrorIf(self, condition, msg): + if condition: + self.LogError(msg) + + def LogWarningIf(self, condition, msg): + if condition: + self.LogWarning(msg) + + def LogCommandOutput(self, msg, print_to_console=True): + self.LogMsg(self.stdout, + self._GetStdout(print_to_console), + msg, + flush=False) + + def LogCommandError(self, msg, print_to_console=True): + self.LogMsg(self.stderr, + self._GetStderr(print_to_console), + msg, + flush=False) + + def Flush(self): + self.cmdfd.flush() + self.stdout.flush() + self.stderr.flush() + + +class MockLogger(object): + """Logging helper class.""" + + MAX_LOG_FILES = 10 + + def __init__(self, *_args, **_kwargs): + self.stdout = sys.stdout + self.stderr = sys.stderr + return None + + def _AddSuffix(self, basename, suffix): + return '%s%s' % (basename, suffix) + + def _FindSuffix(self, basename): + timestamps = [] + found_suffix = None + for i in range(self.MAX_LOG_FILES): + suffix = str(i) + suffixed_basename = self._AddSuffix(basename, suffix) + cmd_file = '%s.cmd' % suffixed_basename + if not os.path.exists(cmd_file): + found_suffix = suffix + break + timestamps.append(os.stat(cmd_file).st_mtime) + + if found_suffix: + return found_suffix + + # Try to pick the oldest file with the suffix and return that one. + suffix = str(timestamps.index(min(timestamps))) + # print ("Warning: Overwriting log file: %s" % + # self._AddSuffix(basename, suffix)) + return suffix + + def _CreateLogFileHandle(self, name): + print('MockLogger: creating open file handle for %s (writing)' % name) + + def _CreateLogFileHandles(self, basename): + suffix = self._FindSuffix(basename) + suffixed_basename = self._AddSuffix(basename, suffix) + + print('MockLogger: opening file %s.cmd' % suffixed_basename) + print('MockLogger: opening file %s.out' % suffixed_basename) + print('MockLogger: opening file %s.err' % suffixed_basename) + + self._CreateLogFileSymlinks(basename, suffixed_basename) + + # Symlink unsuffixed basename to currently suffixed one. + def _CreateLogFileSymlinks(self, basename, suffixed_basename): + for extension in ['cmd', 'out', 'err']: + src_file = '%s.%s' % (os.path.basename(suffixed_basename), extension) + dest_file = '%s.%s' % (basename, extension) + print('MockLogger: Calling os.symlink(%s, %s)' % (src_file, dest_file)) + + def _WriteTo(self, _fd, msg, _flush): + print('MockLogger: %s' % msg) + + def LogStartDots(self, _print_to_console=True): + print('. ') + + def LogAppendDot(self, _print_to_console=True): + print('. ') + + def LogEndDots(self, _print_to_console=True): + print('\n') + + def LogMsg(self, _file_fd, _term_fd, msg, **_kwargs): + print('MockLogger: %s' % msg) + + def _GetStdout(self, _print_to_console): + return None + + def _GetStderr(self, _print_to_console): + return None + + def LogCmdToFileOnly(self, *_args, **_kwargs): + return + + # def LogCmdToFileOnly(self, cmd, machine='', user=None): + # host = ('%s@%s' % (user, machine)) if user else machine + # cmd_string = 'CMD (%s): %s\n' % (host, cmd) + # print('MockLogger: Writing to file ONLY: %s' % cmd_string) + + def LogCmd(self, cmd, machine='', user=None, print_to_console=True): + if user: + host = '%s@%s' % (user, machine) + else: + host = machine + + self.LogMsg(0, self._GetStdout(print_to_console), + 'CMD (%s): %s\n' % (host, cmd)) + + def LogFatal(self, msg, print_to_console=True): + self.LogMsg(0, self._GetStderr(print_to_console), 'FATAL: %s\n' % msg) + self.LogMsg(0, self._GetStderr(print_to_console), + '\n'.join(traceback.format_stack())) + print('MockLogger: Calling sysexit(1)') + + def LogError(self, msg, print_to_console=True): + self.LogMsg(0, self._GetStderr(print_to_console), 'ERROR: %s\n' % msg) + + def LogWarning(self, msg, print_to_console=True): + self.LogMsg(0, self._GetStderr(print_to_console), 'WARNING: %s\n' % msg) + + def LogOutput(self, msg, print_to_console=True): + self.LogMsg(0, self._GetStdout(print_to_console), 'OUTPUT: %s\n' % msg) + + def LogFatalIf(self, condition, msg): + if condition: + self.LogFatal(msg) + + def LogErrorIf(self, condition, msg): + if condition: + self.LogError(msg) + + def LogWarningIf(self, condition, msg): + if condition: + self.LogWarning(msg) + + def LogCommandOutput(self, msg, print_to_console=True): + self.LogMsg(self.stdout, + self._GetStdout(print_to_console), + msg, + flush=False) + + def LogCommandError(self, msg, print_to_console=True): + self.LogMsg(self.stderr, + self._GetStderr(print_to_console), + msg, + flush=False) + + def Flush(self): + print('MockLogger: Flushing cmdfd, stdout, stderr') + + +main_logger = None + + +def InitLogger(script_name, log_dir, print_console=True, mock=False): + """Initialize a global logger. To be called only once.""" + # pylint: disable=global-statement + global main_logger + assert not main_logger, 'The logger has already been initialized' + rootdir, basefilename = GetRoot(script_name) + if not log_dir: + log_dir = rootdir + if not mock: + main_logger = Logger(log_dir, basefilename, print_console) + else: + main_logger = MockLogger(log_dir, basefilename, print_console) + + +def GetLogger(log_dir='', mock=False): + if not main_logger: + InitLogger(sys.argv[0], log_dir, mock=mock) + return main_logger + + +def HandleUncaughtExceptions(fun): + """Catches all exceptions that would go outside decorated fun scope.""" + + def _Interceptor(*args, **kwargs): + try: + return fun(*args, **kwargs) + except StandardError: + GetLogger().LogFatal('Uncaught exception:\n%s' % traceback.format_exc()) + + return _Interceptor diff --git a/cros_utils/machines.py b/cros_utils/machines.py new file mode 100644 index 00000000..722df3b8 --- /dev/null +++ b/cros_utils/machines.py @@ -0,0 +1,25 @@ +# Copyright 2015 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Utilities relating to machine-specific functions.""" + +from __future__ import print_function + +from cros_utils import command_executer + + +def MachineIsPingable(machine, logging_level='average'): + """Checks to see if a machine is responding to 'ping'. + + Args: + machine: String containing the name or ip address of the machine to check. + logging_level: The logging level with which to initialize the + command_executer (from command_executor.LOG_LEVEL enum list). + + Returns: + Boolean indicating whether machine is responding to ping or not. + """ + ce = command_executer.GetCommandExecuter(log_level=logging_level) + cmd = 'ping -c 1 -w 3 %s' % machine + status = ce.RunCommand(cmd) + return status == 0 diff --git a/cros_utils/manifest_versions.py b/cros_utils/manifest_versions.py new file mode 100644 index 00000000..f011282b --- /dev/null +++ b/cros_utils/manifest_versions.py @@ -0,0 +1,97 @@ +# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Tools for searching/manipulating the manifests repository.""" + +from __future__ import print_function + +__author__ = 'llozano@google.com (Luis Lozano)' + +import os +import re +import shutil +import tempfile +import time + +import command_executer +import logger + + +def IsCrosVersion(version): + match = re.search(r'(\d+\.\d+\.\d+\.\d+)', version) + return match is not None + + +def IsRFormatCrosVersion(version): + match = re.search(r'(R\d+-\d+\.\d+\.\d+)', version) + return match is not None + + +def RFormatCrosVersion(version): + assert IsCrosVersion(version) + tmp_major, tmp_minor = version.split('.', 1) + rformat = 'R' + tmp_major + '-' + tmp_minor + assert IsRFormatCrosVersion(rformat) + return rformat + + +class ManifestVersions(object): + """This class handles interactions with the manifests repo.""" + + def __init__(self, internal=True): + self.internal = internal + self.clone_location = tempfile.mkdtemp() + self.ce = command_executer.GetCommandExecuter() + if internal: + versions_git = ('https://chrome-internal.googlesource.com/' + 'chromeos/manifest-versions.git') + else: + versions_git = ( + 'https://chromium.googlesource.com/chromiumos/manifest-versions.git') + commands = ['cd {0}'.format(self.clone_location), + 'git clone {0}'.format(versions_git)] + ret = self.ce.RunCommands(commands) + if ret: + logger.GetLogger().LogFatal('Failed to clone manifest-versions.') + + def __del__(self): + if self.clone_location: + shutil.rmtree(self.clone_location) + + def TimeToVersion(self, my_time): + """Convert timestamp to version number.""" + cur_time = time.mktime(time.gmtime()) + des_time = float(my_time) + if cur_time - des_time > 7000000: + logger.GetLogger().LogFatal('The time you specify is too early.') + commands = ['cd {0}'.format(self.clone_location), 'cd manifest-versions', + 'git checkout -f $(git rev-list' + + ' --max-count=1 --before={0} origin/master)'.format(my_time)] + ret = self.ce.RunCommands(commands) + if ret: + logger.GetLogger().LogFatal('Failed to checkout manifest at ' + 'specified time') + path = os.path.realpath('{0}/manifest-versions/LKGM/lkgm.xml'.format( + self.clone_location)) + pp = path.split('/') + small = os.path.basename(path).split('.xml')[0] + version = pp[-2] + '.' + small + commands = ['cd {0}'.format(self.clone_location), 'cd manifest-versions', + 'git checkout master'] + self.ce.RunCommands(commands) + return version + + def GetManifest(self, version, to_file): + """Get the manifest file from a given chromeos-internal version.""" + assert not IsRFormatCrosVersion(version) + version = version.split('.', 1)[1] + os.chdir(self.clone_location) + files = [os.path.join(r, f) + for r, _, fs in os.walk('.') for f in fs if version in f] + if files: + command = 'cp {0} {1}'.format(files[0], to_file) + ret = self.ce.RunCommand(command) + if ret: + raise Exception('Cannot copy manifest to {0}'.format(to_file)) + else: + raise Exception('Version {0} is not available.'.format(version)) diff --git a/cros_utils/misc.py b/cros_utils/misc.py new file mode 100644 index 00000000..ae234fe3 --- /dev/null +++ b/cros_utils/misc.py @@ -0,0 +1,557 @@ +# Copyright 2013 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Utilities for toolchain build.""" + +from __future__ import print_function + +__author__ = 'asharif@google.com (Ahmad Sharif)' + +from contextlib import contextmanager +import os +import re +import shutil +import sys +import traceback + +import command_executer +import logger + +CHROMEOS_SCRIPTS_DIR = '~/trunk/src/scripts' +TOOLCHAIN_UTILS_PATH = '~/trunk/src/platform/dev/toolchain_utils.sh' + + +def GetChromeOSVersionFromLSBVersion(lsb_version): + """Get Chromeos version from Lsb version.""" + ce = command_executer.GetCommandExecuter() + command = ('git ls-remote ' + 'https://chromium.googlesource.com/chromiumos/manifest.git') + ret, out, _ = ce.RunCommandWOutput(command, print_to_console=False) + assert ret == 0, 'Command %s failed' % command + lower = [] + for line in out.splitlines(): + mo = re.search(r'refs/heads/release-R(\d+)-(\d+)\.B', line) + if mo: + revision = int(mo.group(1)) + build = int(mo.group(2)) + lsb_build = int(lsb_version.split('.')[0]) + if lsb_build > build: + lower.append(revision) + lower = sorted(lower) + if lower: + return 'R%d-%s' % (lower[-1] + 1, lsb_version) + else: + return 'Unknown' + + +def ApplySubs(string, *substitutions): + for pattern, replacement in substitutions: + string = re.sub(pattern, replacement, string) + return string + + +def UnitToNumber(unit_num, base=1000): + """Convert a number with unit to float.""" + unit_dict = {'kilo': base, 'mega': base**2, 'giga': base**3} + unit_num = unit_num.lower() + mo = re.search(r'(\d*)(.+)?', unit_num) + number = mo.group(1) + unit = mo.group(2) + if not unit: + return float(number) + for k, v in unit_dict.items(): + if k.startswith(unit): + return float(number) * v + raise Exception('Unit: %s not found in byte: %s!' % (unit, unit_num)) + + +def GetFilenameFromString(string): + return ApplySubs(string, (r'/', '__'), (r'\s', '_'), (r'[\\$="?^]', ''),) + + +def GetRoot(scr_name): + """Break up pathname into (dir+name).""" + abs_path = os.path.abspath(scr_name) + return (os.path.dirname(abs_path), os.path.basename(abs_path)) + + +def GetChromeOSKeyFile(chromeos_root): + return os.path.join(chromeos_root, 'src', 'scripts', 'mod_for_test_scripts', + 'ssh_keys', 'testing_rsa') + + +def GetChrootPath(chromeos_root): + return os.path.join(chromeos_root, 'chroot') + + +def GetInsideChrootPath(chromeos_root, file_path): + if not file_path.startswith(GetChrootPath(chromeos_root)): + raise Exception("File: %s doesn't seem to be in the chroot: %s" % + (file_path, chromeos_root)) + return file_path[len(GetChrootPath(chromeos_root)):] + + +def GetOutsideChrootPath(chromeos_root, file_path): + return os.path.join(GetChrootPath(chromeos_root), file_path.lstrip('/')) + + +def FormatQuotedCommand(command): + return ApplySubs(command, ('"', r'\"')) + + +def FormatCommands(commands): + return ApplySubs( + str(commands), ('&&', '&&\n'), (';', ';\n'), (r'\n+\s*', '\n')) + + +def GetImageDir(chromeos_root, board): + return os.path.join(chromeos_root, 'src', 'build', 'images', board) + + +def LabelLatestImage(chromeos_root, board, label, vanilla_path=None): + image_dir = GetImageDir(chromeos_root, board) + latest_image_dir = os.path.join(image_dir, 'latest') + latest_image_dir = os.path.realpath(latest_image_dir) + latest_image_dir = os.path.basename(latest_image_dir) + retval = 0 + with WorkingDirectory(image_dir): + command = 'ln -sf -T %s %s' % (latest_image_dir, label) + ce = command_executer.GetCommandExecuter() + retval = ce.RunCommand(command) + if retval: + return retval + if vanilla_path: + command = 'ln -sf -T %s %s' % (vanilla_path, 'vanilla') + retval2 = ce.RunCommand(command) + return retval2 + return retval + + +def DoesLabelExist(chromeos_root, board, label): + image_label = os.path.join(GetImageDir(chromeos_root, board), label) + return os.path.exists(image_label) + + +def GetBuildPackagesCommand(board, usepkg=False, debug=False): + if usepkg: + usepkg_flag = '--usepkg' + else: + usepkg_flag = '--nousepkg' + if debug: + withdebug_flag = '--withdebug' + else: + withdebug_flag = '--nowithdebug' + return ('%s/build_packages %s --withdev --withtest --withautotest ' + '--skip_toolchain_update %s --board=%s ' + '--accept_licenses=@CHROMEOS' % + (CHROMEOS_SCRIPTS_DIR, usepkg_flag, withdebug_flag, board)) + + +def GetBuildImageCommand(board, dev=False): + dev_args = '' + if dev: + dev_args = '--noenable_rootfs_verification --disk_layout=2gb-rootfs' + return ('%s/build_image --board=%s %s test' % + (CHROMEOS_SCRIPTS_DIR, board, dev_args)) + + +def GetSetupBoardCommand(board, + gcc_version=None, + binutils_version=None, + usepkg=None, + force=None): + """Get setup_board command.""" + options = [] + + if gcc_version: + options.append('--gcc_version=%s' % gcc_version) + + if binutils_version: + options.append('--binutils_version=%s' % binutils_version) + + if usepkg: + options.append('--usepkg') + else: + options.append('--nousepkg') + + if force: + options.append('--force') + + options.append('--accept_licenses=@CHROMEOS') + + return ('%s/setup_board --board=%s %s' % + (CHROMEOS_SCRIPTS_DIR, board, ' '.join(options))) + + +def CanonicalizePath(path): + path = os.path.expanduser(path) + path = os.path.realpath(path) + return path + + +def GetCtargetFromBoard(board, chromeos_root): + """Get Ctarget from board.""" + base_board = board.split('_')[0] + command = ('source %s; get_ctarget_from_board %s' % + (TOOLCHAIN_UTILS_PATH, base_board)) + ce = command_executer.GetCommandExecuter() + ret, out, _ = ce.ChrootRunCommandWOutput(chromeos_root, command) + if ret != 0: + raise ValueError('Board %s is invalid!' % board) + # Remove ANSI escape sequences. + out = StripANSIEscapeSequences(out) + return out.strip() + + +def GetArchFromBoard(board, chromeos_root): + """Get Arch from board.""" + base_board = board.split('_')[0] + command = ('source %s; get_board_arch %s' % + (TOOLCHAIN_UTILS_PATH, base_board)) + ce = command_executer.GetCommandExecuter() + ret, out, _ = ce.ChrootRunCommandWOutput(chromeos_root, command) + if ret != 0: + raise ValueError('Board %s is invalid!' % board) + # Remove ANSI escape sequences. + out = StripANSIEscapeSequences(out) + return out.strip() + + +def GetGccLibsDestForBoard(board, chromeos_root): + """Get gcc libs destination from board.""" + arch = GetArchFromBoard(board, chromeos_root) + if arch == 'x86': + return '/build/%s/usr/lib/gcc/' % board + if arch == 'amd64': + return '/build/%s/usr/lib64/gcc/' % board + if arch == 'arm': + return '/build/%s/usr/lib/gcc/' % board + if arch == 'arm64': + return '/build/%s/usr/lib/gcc/' % board + raise ValueError('Arch %s is invalid!' % arch) + + +def StripANSIEscapeSequences(string): + string = re.sub(r'\x1b\[[0-9]*[a-zA-Z]', '', string) + return string + + +def GetChromeSrcDir(): + return 'var/cache/distfiles/target/chrome-src/src' + + +def GetEnvStringFromDict(env_dict): + return ' '.join(["%s=\"%s\"" % var for var in env_dict.items()]) + + +def MergeEnvStringWithDict(env_string, env_dict, prepend=True): + """Merge env string with dict.""" + if not env_string.strip(): + return GetEnvStringFromDict(env_dict) + override_env_list = [] + ce = command_executer.GetCommandExecuter() + for k, v in env_dict.items(): + v = v.strip("\"'") + if prepend: + new_env = "%s=\"%s $%s\"" % (k, v, k) + else: + new_env = "%s=\"$%s %s\"" % (k, k, v) + command = '; '.join([env_string, new_env, 'echo $%s' % k]) + ret, out, _ = ce.RunCommandWOutput(command) + override_env_list.append('%s=%r' % (k, out.strip())) + ret = env_string + ' ' + ' '.join(override_env_list) + return ret.strip() + + +def GetAllImages(chromeos_root, board): + ce = command_executer.GetCommandExecuter() + command = ('find %s/src/build/images/%s -name chromiumos_test_image.bin' % + (chromeos_root, board)) + ret, out, _ = ce.RunCommandWOutput(command) + assert ret == 0, 'Could not run command: %s' % command + return out.splitlines() + + +def IsFloat(text): + if text is None: + return False + try: + float(text) + return True + except ValueError: + return False + + +def RemoveChromeBrowserObjectFiles(chromeos_root, board): + """Remove any object files from all the posible locations.""" + out_dir = os.path.join( + GetChrootPath(chromeos_root), + 'var/cache/chromeos-chrome/chrome-src/src/out_%s' % board) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + logger.GetLogger().LogCmd('rm -rf %s' % out_dir) + out_dir = os.path.join( + GetChrootPath(chromeos_root), + 'var/cache/chromeos-chrome/chrome-src-internal/src/out_%s' % board) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + logger.GetLogger().LogCmd('rm -rf %s' % out_dir) + + +@contextmanager +def WorkingDirectory(new_dir): + """Get the working directory.""" + old_dir = os.getcwd() + if old_dir != new_dir: + msg = 'cd %s' % new_dir + logger.GetLogger().LogCmd(msg) + os.chdir(new_dir) + yield new_dir + if old_dir != new_dir: + msg = 'cd %s' % old_dir + logger.GetLogger().LogCmd(msg) + os.chdir(old_dir) + + +def HasGitStagedChanges(git_dir): + """Return True if git repository has staged changes.""" + command = 'cd {0} && git diff --quiet --cached --exit-code HEAD'.format( + git_dir) + return command_executer.GetCommandExecuter().RunCommand( + command, + print_to_console=False) + + +def HasGitUnstagedChanges(git_dir): + """Return True if git repository has un-staged changes.""" + command = 'cd {0} && git diff --quiet --exit-code HEAD'.format(git_dir) + return command_executer.GetCommandExecuter().RunCommand( + command, + print_to_console=False) + + +def HasGitUntrackedChanges(git_dir): + """Return True if git repository has un-tracked changes.""" + command = ('cd {0} && test -z ' + '$(git ls-files --exclude-standard --others)').format(git_dir) + return command_executer.GetCommandExecuter().RunCommand( + command, + print_to_console=False) + + +def GitGetCommitHash(git_dir, commit_symbolic_name): + """Return githash for the symbolic git commit. + + For example, commit_symbolic_name could be + "cros/gcc.gnu.org/branches/gcc/gcc-4_8-mobile, this function returns the git + hash for this symbolic name. + + Args: + git_dir: a git working tree. + commit_symbolic_name: a symbolic name for a particular git commit. + + Returns: + The git hash for the symbolic name or None if fails. + """ + + command = ('cd {0} && git log -n 1 --pretty="format:%H" {1}').format( + git_dir, commit_symbolic_name) + rv, out, _ = command_executer.GetCommandExecuter().RunCommandWOutput( + command, + print_to_console=False) + if rv == 0: + return out.strip() + return None + + +def IsGitTreeClean(git_dir): + """Test if git tree has no local changes. + + Args: + git_dir: git tree directory. + + Returns: + True if git dir is clean. + """ + if HasGitStagedChanges(git_dir): + logger.GetLogger().LogWarning('Git tree has staged changes.') + return False + if HasGitUnstagedChanges(git_dir): + logger.GetLogger().LogWarning('Git tree has unstaged changes.') + return False + if HasGitUntrackedChanges(git_dir): + logger.GetLogger().LogWarning('Git tree has un-tracked changes.') + return False + return True + + +def GetGitChangesAsList(git_dir, path=None, staged=False): + """Get changed files as a list. + + Args: + git_dir: git tree directory. + path: a relative path that is part of the tree directory, could be null. + staged: whether to include staged files as well. + + Returns: + A list containing all the changed files. + """ + command = 'cd {0} && git diff --name-only'.format(git_dir) + if staged: + command += ' --cached' + if path: + command += ' -- ' + path + _, out, _ = command_executer.GetCommandExecuter().RunCommandWOutput( + command, + print_to_console=False) + rv = [] + for line in out.splitlines(): + rv.append(line) + return rv + + +def IsChromeOsTree(chromeos_root): + return (os.path.isdir(os.path.join(chromeos_root, + 'src/third_party/chromiumos-overlay')) and + os.path.isdir(os.path.join(chromeos_root, 'manifest'))) + + +def DeleteChromeOsTree(chromeos_root, dry_run=False): + """Delete a ChromeOs tree *safely*. + + Args: + chromeos_root: dir of the tree, could be a relative one (but be careful) + dry_run: only prints out the command if True + + Returns: + True if everything is ok. + """ + if not IsChromeOsTree(chromeos_root): + logger.GetLogger().LogWarning( + '"{0}" does not seem to be a valid chromeos tree, do nothing.'.format( + chromeos_root)) + return False + cmd0 = 'cd {0} && cros_sdk --delete'.format(chromeos_root) + if dry_run: + print(cmd0) + else: + if command_executer.GetCommandExecuter().RunCommand( + cmd0, + print_to_console=True) != 0: + return False + + cmd1 = ('export CHROMEOSDIRNAME="$(dirname $(cd {0} && pwd))" && ' + 'export CHROMEOSBASENAME="$(basename $(cd {0} && pwd))" && ' + 'cd $CHROMEOSDIRNAME && sudo rm -fr $CHROMEOSBASENAME').format( + chromeos_root) + if dry_run: + print(cmd1) + return True + + return command_executer.GetCommandExecuter().RunCommand( + cmd1, + print_to_console=True) == 0 + + +def ApplyGerritPatches(chromeos_root, + gerrit_patch_string, + branch='cros/master'): + """Apply gerrit patches on a chromeos tree. + + Args: + chromeos_root: chromeos tree path + gerrit_patch_string: a patch string just like the one gives to cbuildbot, + 'id1 id2 *id3 ... idn'. A prefix of '* means this is an internal patch. + branch: the tree based on which to apply the patches. + + Returns: + True if success. + """ + + ### First of all, we need chromite libs + sys.path.append(os.path.join(chromeos_root, 'chromite')) + # Imports below are ok after modifying path to add chromite. + # Pylint cannot detect that and complains. + # pylint: disable=import-error + from lib import git + from lib import gerrit + manifest = git.ManifestCheckout(chromeos_root) + patch_list = gerrit_patch_string.split(' ') + ### This takes time, print log information. + logger.GetLogger().LogOutput('Retrieving patch information from server ...') + patch_info_list = gerrit.GetGerritPatchInfo(patch_list) + for pi in patch_info_list: + project_checkout = manifest.FindCheckout(pi.project, strict=False) + if not project_checkout: + logger.GetLogger().LogError( + 'Failed to find patch project "{project}" in manifest.'.format( + project=pi.project)) + return False + + pi_str = '{project}:{ref}'.format(project=pi.project, ref=pi.ref) + try: + project_git_path = project_checkout.GetPath(absolute=True) + logger.GetLogger().LogOutput('Applying patch "{0}" in "{1}" ...'.format( + pi_str, project_git_path)) + pi.Apply(project_git_path, branch, trivial=False) + except Exception: + traceback.print_exc(file=sys.stdout) + logger.GetLogger().LogError('Failed to apply patch "{0}"'.format(pi_str)) + return False + return True + + +def BooleanPrompt(prompt='Do you want to continue?', + default=True, + true_value='yes', + false_value='no', + prolog=None): + """Helper function for processing boolean choice prompts. + + Args: + prompt: The question to present to the user. + default: Boolean to return if the user just presses enter. + true_value: The text to display that represents a True returned. + false_value: The text to display that represents a False returned. + prolog: The text to display before prompt. + + Returns: + True or False. + """ + true_value, false_value = true_value.lower(), false_value.lower() + true_text, false_text = true_value, false_value + if true_value == false_value: + raise ValueError('true_value and false_value must differ: got %r' % + true_value) + + if default: + true_text = true_text[0].upper() + true_text[1:] + else: + false_text = false_text[0].upper() + false_text[1:] + + prompt = ('\n%s (%s/%s)? ' % (prompt, true_text, false_text)) + + if prolog: + prompt = ('\n%s\n%s' % (prolog, prompt)) + + while True: + try: + response = raw_input(prompt).lower() + except EOFError: + # If the user hits CTRL+D, or stdin is disabled, use the default. + print() + response = None + except KeyboardInterrupt: + # If the user hits CTRL+C, just exit the process. + print() + print('CTRL+C detected; exiting') + sys.exit() + + if not response: + return default + if true_value.startswith(response): + if not false_value.startswith(response): + return True + # common prefix between the two... + elif false_value.startswith(response): + return False diff --git a/cros_utils/misc_test.py b/cros_utils/misc_test.py new file mode 100644 index 00000000..80082207 --- /dev/null +++ b/cros_utils/misc_test.py @@ -0,0 +1,51 @@ +# Copyright 2012 Google Inc. All Rights Reserved. +"""Tests for misc.""" + +from __future__ import print_function + +__author__ = 'asharif@google.com (Ahmad Sharif)' + +# System modules +import unittest + +# Local modules +import misc + + +class UtilsTest(unittest.TestCase): + """Tests for misc.""" + + def testGetFilenameFromString(self): + string = 'a /b=c"d^$?\\' + filename = misc.GetFilenameFromString(string) + self.assertEqual(filename, 'a___bcd') + + def testPrependMergeEnv(self): + var = 'USE' + use_flags = 'hello 123' + added_use_flags = 'bla bla' + env_string = '%s=%r' % (var, use_flags) + new_env_string = misc.MergeEnvStringWithDict(env_string, + {var: added_use_flags}) + expected_new_env = '%s=%r' % (var, ' '.join([added_use_flags, use_flags])) + self.assertEqual(new_env_string, ' '.join([env_string, expected_new_env])) + + def testGetChromeOSVersionFromLSBVersion(self): + versions_dict = {'2630.0.0': '22', '2030.0.0': '19'} + f = misc.GetChromeOSVersionFromLSBVersion + for k, v in versions_dict.items(): + self.assertEqual(f(k), 'R%s-%s' % (v, k)) + + def testPostpendMergeEnv(self): + var = 'USE' + use_flags = 'hello 123' + added_use_flags = 'bla bla' + env_string = '%s=%r' % (var, use_flags) + new_env_string = misc.MergeEnvStringWithDict(env_string, + {var: added_use_flags}, False) + expected_new_env = '%s=%r' % (var, ' '.join([use_flags, added_use_flags])) + self.assertEqual(new_env_string, ' '.join([env_string, expected_new_env])) + + +if __name__ == '__main__': + unittest.main() diff --git a/cros_utils/no_pseudo_terminal_test.py b/cros_utils/no_pseudo_terminal_test.py new file mode 100644 index 00000000..43eabb13 --- /dev/null +++ b/cros_utils/no_pseudo_terminal_test.py @@ -0,0 +1,53 @@ +"""Test to ensure we're not touching /dev/ptmx when running commands.""" + +from __future__ import print_function + +import os +import subprocess +import tempfile +import time +import unittest +from cros_utils import command_executer + + +class NoPsuedoTerminalTest(unittest.TestCase): + """Test to ensure we're not touching /dev/ptmx when running commands.""" + + _strace_process = None + STRACE_TIMEOUT = 10 + + def _AttachStraceToSelf(self, output_file): + """Attaches strace to the current process.""" + args = ['strace', '-o', output_file, '-p', str(os.getpid())] + print(args) + self._strace_process = subprocess.Popen(args) + # Wait until we see some activity. + start_time = time.time() + while time.time() - start_time < self.STRACE_TIMEOUT: + if os.path.isfile(output_file) and open(output_file).read(1): + return True + time.sleep(1) + return False + + def _KillStraceProcess(self): + """Kills strace that was started by _AttachStraceToSelf().""" + self._strace_process.terminate() + self._strace_process.wait() + return True + + def testNoPseudoTerminalWhenRunningCommand(self): + """Test to make sure we're not touching /dev/ptmx when running commands.""" + temp_file = tempfile.mktemp() + self.assertTrue(self._AttachStraceToSelf(temp_file)) + + ce = command_executer.GetCommandExecuter() + ce.RunCommand('echo') + + self.assertTrue(self._KillStraceProcess()) + + strace_contents = open(temp_file).read() + self.assertFalse('/dev/ptmx' in strace_contents) + + +if __name__ == '__main__': + unittest.main() diff --git a/cros_utils/perf_diff.py b/cros_utils/perf_diff.py new file mode 100755 index 00000000..c861f6ba --- /dev/null +++ b/cros_utils/perf_diff.py @@ -0,0 +1,332 @@ +#!/usr/bin/python2 +# Copyright 2012 Google Inc. All Rights Reserved. +"""One-line documentation for perf_diff module. + +A detailed description of perf_diff. +""" + +from __future__ import print_function + +__author__ = 'asharif@google.com (Ahmad Sharif)' + +import argparse +import re +import sys + +import misc +import tabulator + +ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table' +TOTAL_EVENTS = 'Total_events_of_this_profile' + + +def GetPerfDictFromReport(report_file): + output = {} + perf_report = PerfReport(report_file) + for k, v in perf_report.sections.items(): + if k not in output: + output[k] = {} + output[k][ROWS_TO_SHOW] = 0 + output[k][TOTAL_EVENTS] = 0 + for function in v.functions: + out_key = '%s' % (function.name) + output[k][out_key] = function.count + output[k][TOTAL_EVENTS] += function.count + if function.percent > 1: + output[k][ROWS_TO_SHOW] += 1 + return output + + +def _SortDictionaryByValue(d): + l = [(k, v) for (k, v) in d.iteritems()] + + def GetFloat(x): + if misc.IsFloat(x): + return float(x) + else: + return x + + sorted_l = sorted(l, key=lambda x: GetFloat(x[1])) + sorted_l.reverse() + return [f[0] for f in sorted_l] + + +class Tabulator(object): + """Make tables.""" + + def __init__(self, all_dicts): + self._all_dicts = all_dicts + + def PrintTable(self): + for dicts in self._all_dicts: + self.PrintTableHelper(dicts) + + def PrintTableHelper(self, dicts): + """Transfrom dicts to tables.""" + fields = {} + for d in dicts: + for f in d.keys(): + if f not in fields: + fields[f] = d[f] + else: + fields[f] = max(fields[f], d[f]) + table = [] + header = ['name'] + for i in range(len(dicts)): + header.append(i) + + table.append(header) + + sorted_fields = _SortDictionaryByValue(fields) + + for f in sorted_fields: + row = [f] + for d in dicts: + if f in d: + row.append(d[f]) + else: + row.append('0') + table.append(row) + + print(tabulator.GetSimpleTable(table)) + + +class Function(object): + """Function for formatting.""" + + def __init__(self): + self.count = 0 + self.name = '' + self.percent = 0 + + +class Section(object): + """Section formatting.""" + + def __init__(self, contents): + self.name = '' + self.raw_contents = contents + self._ParseSection() + + def _ParseSection(self): + matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents) + assert len(matches) <= 1, 'More than one event found in 1 section' + if not matches: + return + match = matches[0] + self.name = match[1] + self.count = misc.UnitToNumber(match[0]) + + self.functions = [] + for line in self.raw_contents.splitlines(): + if not line.strip(): + continue + if '%' not in line: + continue + if not line.startswith('#'): + fields = [f for f in line.split(' ') if f] + function = Function() + function.percent = float(fields[0].strip('%')) + function.count = int(fields[1]) + function.name = ' '.join(fields[2:]) + self.functions.append(function) + + +class PerfReport(object): + """Get report from raw report.""" + + def __init__(self, perf_file): + self.perf_file = perf_file + self._ReadFile() + self.sections = {} + self.metadata = {} + self._section_contents = [] + self._section_header = '' + self._SplitSections() + self._ParseSections() + self._ParseSectionHeader() + + def _ParseSectionHeader(self): + """Parse a header of a perf report file.""" + # The "captured on" field is inaccurate - this actually refers to when the + # report was generated, not when the data was captured. + for line in self._section_header.splitlines(): + line = line[2:] + if ':' in line: + key, val = line.strip().split(':', 1) + key = key.strip() + val = val.strip() + self.metadata[key] = val + + def _ReadFile(self): + self._perf_contents = open(self.perf_file).read() + + def _ParseSections(self): + self.event_counts = {} + self.sections = {} + for section_content in self._section_contents: + section = Section(section_content) + section.name = self._GetHumanReadableName(section.name) + self.sections[section.name] = section + + # TODO(asharif): Do this better. + def _GetHumanReadableName(self, section_name): + if not 'raw' in section_name: + return section_name + raw_number = section_name.strip().split(' ')[-1] + for line in self._section_header.splitlines(): + if raw_number in line: + name = line.strip().split(' ')[5] + return name + + def _SplitSections(self): + self._section_contents = [] + indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)] + indices.append(len(self._perf_contents)) + for i in range(len(indices) - 1): + section_content = self._perf_contents[indices[i]:indices[i + 1]] + self._section_contents.append(section_content) + self._section_header = '' + if indices: + self._section_header = self._perf_contents[0:indices[0]] + + +class PerfDiffer(object): + """Perf differ class.""" + + def __init__(self, reports, num_symbols, common_only): + self._reports = reports + self._num_symbols = num_symbols + self._common_only = common_only + self._common_function_names = {} + + def DoDiff(self): + """The function that does the diff.""" + section_names = self._FindAllSections() + + filename_dicts = [] + summary_dicts = [] + for report in self._reports: + d = {} + filename_dicts.append({'file': report.perf_file}) + for section_name in section_names: + if section_name in report.sections: + d[section_name] = report.sections[section_name].count + summary_dicts.append(d) + + all_dicts = [filename_dicts, summary_dicts] + + for section_name in section_names: + function_names = self._GetTopFunctions(section_name, self._num_symbols) + self._FindCommonFunctions(section_name) + dicts = [] + for report in self._reports: + d = {} + if section_name in report.sections: + section = report.sections[section_name] + + # Get a common scaling factor for this report. + common_scaling_factor = self._GetCommonScalingFactor(section) + + for function in section.functions: + if function.name in function_names: + key = '%s %s' % (section.name, function.name) + d[key] = function.count + # Compute a factor to scale the function count by in common_only + # mode. + if self._common_only and ( + function.name in self._common_function_names[section.name]): + d[key + ' scaled'] = common_scaling_factor * function.count + dicts.append(d) + + all_dicts.append(dicts) + + mytabulator = Tabulator(all_dicts) + mytabulator.PrintTable() + + def _FindAllSections(self): + sections = {} + for report in self._reports: + for section in report.sections.values(): + if section.name not in sections: + sections[section.name] = section.count + else: + sections[section.name] = max(sections[section.name], section.count) + return _SortDictionaryByValue(sections) + + def _GetCommonScalingFactor(self, section): + unique_count = self._GetCount( + section, lambda x: x in self._common_function_names[section.name]) + return 100.0 / unique_count + + def _GetCount(self, section, filter_fun=None): + total_count = 0 + for function in section.functions: + if not filter_fun or filter_fun(function.name): + total_count += int(function.count) + return total_count + + def _FindCommonFunctions(self, section_name): + function_names_list = [] + for report in self._reports: + if section_name in report.sections: + section = report.sections[section_name] + function_names = [f.name for f in section.functions] + function_names_list.append(function_names) + + self._common_function_names[section_name] = ( + reduce(set.intersection, map(set, function_names_list))) + + def _GetTopFunctions(self, section_name, num_functions): + all_functions = {} + for report in self._reports: + if section_name in report.sections: + section = report.sections[section_name] + for f in section.functions[:num_functions]: + if f.name in all_functions: + all_functions[f.name] = max(all_functions[f.name], f.count) + else: + all_functions[f.name] = f.count + # FIXME(asharif): Don't really need to sort these... + return _SortDictionaryByValue(all_functions) + + def _GetFunctionsDict(self, section, function_names): + d = {} + for function in section.functions: + if function.name in function_names: + d[function.name] = function.count + return d + + +def Main(argv): + """The entry of the main.""" + parser = argparse.ArgumentParser() + parser.add_argument('-n', + '--num_symbols', + dest='num_symbols', + default='5', + help='The number of symbols to show.') + parser.add_argument('-c', + '--common_only', + dest='common_only', + action='store_true', + default=False, + help='Diff common symbols only.') + + options, args = parser.parse_known_args(argv) + + try: + reports = [] + for report in args[1:]: + report = PerfReport(report) + reports.append(report) + pd = PerfDiffer(reports, int(options.num_symbols), options.common_only) + pd.DoDiff() + finally: + pass + + return 0 + + +if __name__ == '__main__': + sys.exit(Main(sys.argv)) diff --git a/cros_utils/pstat.py b/cros_utils/pstat.py new file mode 100644 index 00000000..602fc0c7 --- /dev/null +++ b/cros_utils/pstat.py @@ -0,0 +1,1077 @@ +# We did not author this file nor mantain it. Skip linting it. +#pylint: skip-file +# Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Comments and/or additions are welcome (send e-mail to: +# strang@nmr.mgh.harvard.edu). +# +"""pstat.py module + +################################################# +####### Written by: Gary Strangman ########### +####### Last modified: Dec 18, 2007 ########### +################################################# + +This module provides some useful list and array manipulation routines +modeled after those found in the |Stat package by Gary Perlman, plus a +number of other useful list/file manipulation functions. The list-based +functions include: + + abut (source,*args) + simpleabut (source, addon) + colex (listoflists,cnums) + collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) + dm (listoflists,criterion) + flat (l) + linexand (listoflists,columnlist,valuelist) + linexor (listoflists,columnlist,valuelist) + linedelimited (inlist,delimiter) + lineincols (inlist,colsize) + lineincustcols (inlist,colsizes) + list2string (inlist) + makelol(inlist) + makestr(x) + printcc (lst,extra=2) + printincols (listoflists,colsize) + pl (listoflists) + printl(listoflists) + replace (lst,oldval,newval) + recode (inlist,listmap,cols='all') + remap (listoflists,criterion) + roundlist (inlist,num_digits_to_round_floats_to) + sortby(listoflists,sortcols) + unique (inlist) + duplicates(inlist) + writedelimited (listoflists, delimiter, file, writetype='w') + +Some of these functions have alternate versions which are defined only if +Numeric (NumPy) can be imported. These functions are generally named as +above, with an 'a' prefix. + + aabut (source, *args) + acolex (a,indices,axis=1) + acollapse (a,keepcols,collapsecols,sterr=0,ns=0) + adm (a,criterion) + alinexand (a,columnlist,valuelist) + alinexor (a,columnlist,valuelist) + areplace (a,oldval,newval) + arecode (a,listmap,col='all') + arowcompare (row1, row2) + arowsame (row1, row2) + asortrows(a,axis=0) + aunique(inarray) + aduplicates(inarray) + +Currently, the code is all but completely un-optimized. In many cases, the +array versions of functions amount simply to aliases to built-in array +functions/methods. Their inclusion here is for function name consistency. +""" + +## CHANGE LOG: +## ========== +## 07-11-26 ... edited to work with numpy +## 01-11-15 ... changed list2string() to accept a delimiter +## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1 +## 01-05-31 ... added duplicates() and aduplicates() functions +## 00-12-28 ... license made GPL, docstring and import requirements +## 99-11-01 ... changed version to 0.3 +## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py) +## 03/27/99 ... added areplace function, made replace fcn recursive +## 12/31/98 ... added writefc function for ouput to fixed column sizes +## 12/07/98 ... fixed import problem (failed on collapse() fcn) +## added __version__ variable (now 0.2) +## 12/05/98 ... updated doc-strings +## added features to collapse() function +## added flat() function for lists +## fixed a broken asortrows() +## 11/16/98 ... fixed minor bug in aput for 1D arrays +## +## 11/08/98 ... fixed aput to output large arrays correctly + +import stats # required 3rd party module +import string, copy +from types import * + +__version__ = 0.4 + +###=========================== LIST FUNCTIONS ========================== +### +### Here are the list functions, DEFINED FOR ALL SYSTEMS. +### Array functions (for NumPy-enabled computers) appear below. +### + + +def abut(source, *args): + """ +Like the |Stat abut command. It concatenates two lists side-by-side +and returns the result. '2D' lists are also accomodated for either argument +(source or addon). CAUTION: If one list is shorter, it will be repeated +until it is as long as the longest list. If this behavior is not desired, +use pstat.simpleabut(). + +Usage: abut(source, args) where args=any # of lists +Returns: a list of lists as long as the LONGEST list past, source on the + 'left', lists in <args> attached consecutively on the 'right' +""" + + if type(source) not in [ListType, TupleType]: + source = [source] + for addon in args: + if type(addon) not in [ListType, TupleType]: + addon = [addon] + if len(addon) < len(source): # is source list longer? + if len(source) % len(addon) == 0: # are they integer multiples? + repeats = len(source) / len(addon) # repeat addon n times + origadd = copy.deepcopy(addon) + for i in range(repeats - 1): + addon = addon + origadd + else: + repeats = len(source) / len(addon) + 1 # repeat addon x times, + origadd = copy.deepcopy(addon) # x is NOT an integer + for i in range(repeats - 1): + addon = addon + origadd + addon = addon[0:len(source)] + elif len(source) < len(addon): # is addon list longer? + if len(addon) % len(source) == 0: # are they integer multiples? + repeats = len(addon) / len(source) # repeat source n times + origsour = copy.deepcopy(source) + for i in range(repeats - 1): + source = source + origsour + else: + repeats = len(addon) / len(source) + 1 # repeat source x times, + origsour = copy.deepcopy(source) # x is NOT an integer + for i in range(repeats - 1): + source = source + origsour + source = source[0:len(addon)] + + source = simpleabut(source, addon) + return source + + +def simpleabut(source, addon): + """ +Concatenates two lists as columns and returns the result. '2D' lists +are also accomodated for either argument (source or addon). This DOES NOT +repeat either list to make the 2 lists of equal length. Beware of list pairs +with different lengths ... the resulting list will be the length of the +FIRST list passed. + +Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) +Returns: a list of lists as long as source, with source on the 'left' and + addon on the 'right' +""" + if type(source) not in [ListType, TupleType]: + source = [source] + if type(addon) not in [ListType, TupleType]: + addon = [addon] + minlen = min(len(source), len(addon)) + list = copy.deepcopy(source) # start abut process + if type(source[0]) not in [ListType, TupleType]: + if type(addon[0]) not in [ListType, TupleType]: + for i in range(minlen): + list[i] = [source[i]] + [addon[i]] # source/addon = column + else: + for i in range(minlen): + list[i] = [source[i]] + addon[i] # addon=list-of-lists + else: + if type(addon[0]) not in [ListType, TupleType]: + for i in range(minlen): + list[i] = source[i] + [addon[i]] # source=list-of-lists + else: + for i in range(minlen): + list[i] = source[i] + addon[i] # source/addon = list-of-lists + source = list + return source + + +def colex(listoflists, cnums): + """ +Extracts from listoflists the columns specified in the list 'cnums' +(cnums can be an integer, a sequence of integers, or a string-expression that +corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex +columns 3 onward from the listoflists). + +Usage: colex (listoflists,cnums) +Returns: a list-of-lists corresponding to the columns from listoflists + specified by cnums, in the order the column numbers appear in cnums +""" + global index + column = 0 + if type(cnums) in [ListType, TupleType]: # if multiple columns to get + index = cnums[0] + column = map(lambda x: x[index], listoflists) + for col in cnums[1:]: + index = col + column = abut(column, map(lambda x: x[index], listoflists)) + elif type(cnums) == StringType: # if an 'x[3:]' type expr. + evalstring = 'map(lambda x: x' + cnums + ', listoflists)' + column = eval(evalstring) + else: # else it's just 1 col to get + index = cnums + column = map(lambda x: x[index], listoflists) + return column + + +def collapse(listoflists, + keepcols, + collapsecols, + fcn1=None, + fcn2=None, + cfcn=None): + """ +Averages data in collapsecol, keeping all unique items in keepcols +(using unique, which keeps unique LISTS of column numbers), retaining the +unique sets of values in keepcols, the mean for each. Setting fcn1 +and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) +will append those results (e.g., the sterr, N) after each calculated mean. +cfcn is the collapse function to apply (defaults to mean, defined here in the +pstat module to avoid circular imports with stats.py, but harmonicmean or +others could be passed). + +Usage: collapse +(listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) +Returns: a list of lists with all unique permutations of entries appearing in + columns ("conditions") specified by keepcols, abutted with the result of + cfcn (if cfcn=None, defaults to the mean) of each column specified by + collapsecols. +""" + + def collmean(inlist): + s = 0 + for item in inlist: + s = s + item + return s / float(len(inlist)) + + if type(keepcols) not in [ListType, TupleType]: + keepcols = [keepcols] + if type(collapsecols) not in [ListType, TupleType]: + collapsecols = [collapsecols] + if cfcn == None: + cfcn = collmean + if keepcols == []: + means = [0] * len(collapsecols) + for i in range(len(collapsecols)): + avgcol = colex(listoflists, collapsecols[i]) + means[i] = cfcn(avgcol) + if fcn1: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + means[i] = [means[i], test] + if fcn2: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + try: + means[i] = means[i] + [len(avgcol)] + except TypeError: + means[i] = [means[i], len(avgcol)] + return means + else: + values = colex(listoflists, keepcols) + uniques = unique(values) + uniques.sort() + newlist = [] + if type(keepcols) not in [ListType, TupleType]: + keepcols = [keepcols] + for item in uniques: + if type(item) not in [ListType, TupleType]: + item = [item] + tmprows = linexand(listoflists, keepcols, item) + for col in collapsecols: + avgcol = colex(tmprows, col) + item.append(cfcn(avgcol)) + if fcn1 <> None: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + item.append(test) + if fcn2 <> None: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + item.append(test) + newlist.append(item) + return newlist + + +def dm(listoflists, criterion): + """ +Returns rows from the passed list of lists that meet the criteria in +the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' +will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows +with column 2 equal to the string 'N'). + +Usage: dm (listoflists, criterion) +Returns: rows from listoflists that meet the specified criterion. +""" + function = 'filter(lambda x: ' + criterion + ',listoflists)' + lines = eval(function) + return lines + + +def flat(l): + """ +Returns the flattened version of a '2D' list. List-correlate to the a.ravel()() +method of NumPy arrays. + +Usage: flat(l) +""" + newl = [] + for i in range(len(l)): + for j in range(len(l[i])): + newl.append(l[i][j]) + return newl + + +def linexand(listoflists, columnlist, valuelist): + """ +Returns the rows of a list of lists where col (from columnlist) = val +(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). +len(columnlist) must equal len(valuelist). + +Usage: linexand (listoflists,columnlist,valuelist) +Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i +""" + if type(columnlist) not in [ListType, TupleType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType, TupleType]: + valuelist = [valuelist] + criterion = '' + for i in range(len(columnlist)): + if type(valuelist[i]) == StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x[' + str(columnlist[ + i]) + ']==' + critval + ' and' + criterion = criterion[0:-3] # remove the "and" after the last crit + function = 'filter(lambda x: ' + criterion + ',listoflists)' + lines = eval(function) + return lines + + +def linexor(listoflists, columnlist, valuelist): + """ +Returns the rows of a list of lists where col (from columnlist) = val +(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). +One value is required for each column in columnlist. If only one value +exists for columnlist but multiple values appear in valuelist, the +valuelist values are all assumed to pertain to the same column. + +Usage: linexor (listoflists,columnlist,valuelist) +Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i +""" + if type(columnlist) not in [ListType, TupleType]: + columnlist = [columnlist] + if type(valuelist) not in [ListType, TupleType]: + valuelist = [valuelist] + criterion = '' + if len(columnlist) == 1 and len(valuelist) > 1: + columnlist = columnlist * len(valuelist) + for i in range(len(columnlist)): # build an exec string + if type(valuelist[i]) == StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x[' + str(columnlist[i]) + ']==' + critval + ' or' + criterion = criterion[0:-2] # remove the "or" after the last crit + function = 'filter(lambda x: ' + criterion + ',listoflists)' + lines = eval(function) + return lines + + +def linedelimited(inlist, delimiter): + """ +Returns a string composed of elements in inlist, with each element +separated by 'delimiter.' Used by function writedelimited. Use '\t' +for tab-delimiting. + +Usage: linedelimited (inlist,delimiter) +""" + outstr = '' + for item in inlist: + if type(item) <> StringType: + item = str(item) + outstr = outstr + item + delimiter + outstr = outstr[0:-1] + return outstr + + +def lineincols(inlist, colsize): + """ +Returns a string composed of elements in inlist, with each element +right-aligned in columns of (fixed) colsize. + +Usage: lineincols (inlist,colsize) where colsize is an integer +""" + outstr = '' + for item in inlist: + if type(item) <> StringType: + item = str(item) + size = len(item) + if size <= colsize: + for i in range(colsize - size): + outstr = outstr + ' ' + outstr = outstr + item + else: + outstr = outstr + item[0:colsize + 1] + return outstr + + +def lineincustcols(inlist, colsizes): + """ +Returns a string composed of elements in inlist, with each element +right-aligned in a column of width specified by a sequence colsizes. The +length of colsizes must be greater than or equal to the number of columns +in inlist. + +Usage: lineincustcols (inlist,colsizes) +Returns: formatted string created from inlist +""" + outstr = '' + for i in range(len(inlist)): + if type(inlist[i]) <> StringType: + item = str(inlist[i]) + else: + item = inlist[i] + size = len(item) + if size <= colsizes[i]: + for j in range(colsizes[i] - size): + outstr = outstr + ' ' + outstr = outstr + item + else: + outstr = outstr + item[0:colsizes[i] + 1] + return outstr + + +def list2string(inlist, delimit=' '): + """ +Converts a 1D list to a single long string for file output, using +the string.join function. + +Usage: list2string (inlist,delimit=' ') +Returns: the string created from inlist +""" + stringlist = map(makestr, inlist) + return string.join(stringlist, delimit) + + +def makelol(inlist): + """ +Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you +want to use put() to write a 1D list one item per line in the file. + +Usage: makelol(inlist) +Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. +""" + x = [] + for item in inlist: + x.append([item]) + return x + + +def makestr(x): + if type(x) <> StringType: + x = str(x) + return x + + +def printcc(lst, extra=2): + """ +Prints a list of lists in columns, customized by the max size of items +within the columns (max size of items in col, plus 'extra' number of spaces). +Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, +respectively. + +Usage: printcc (lst,extra=2) +Returns: None +""" + if type(lst[0]) not in [ListType, TupleType]: + lst = [lst] + rowstokill = [] + list2print = copy.deepcopy(lst) + for i in range(len(lst)): + if lst[i] == [ + '\n' + ] or lst[i] == '\n' or lst[i] == 'dashes' or lst[i] == '' or lst[i] == ['']: + rowstokill = rowstokill + [i] + rowstokill.reverse() # delete blank rows from the end + for row in rowstokill: + del list2print[row] + maxsize = [0] * len(list2print[0]) + for col in range(len(list2print[0])): + items = colex(list2print, col) + items = map(makestr, items) + maxsize[col] = max(map(len, items)) + extra + for row in lst: + if row == ['\n'] or row == '\n' or row == '' or row == ['']: + print + elif row == ['dashes'] or row == 'dashes': + dashes = [0] * len(maxsize) + for j in range(len(maxsize)): + dashes[j] = '-' * (maxsize[j] - 2) + print lineincustcols(dashes, maxsize) + else: + print lineincustcols(row, maxsize) + return None + + +def printincols(listoflists, colsize): + """ +Prints a list of lists in columns of (fixed) colsize width, where +colsize is an integer. + +Usage: printincols (listoflists,colsize) +Returns: None +""" + for row in listoflists: + print lineincols(row, colsize) + return None + + +def pl(listoflists): + """ +Prints a list of lists, 1 list (row) at a time. + +Usage: pl(listoflists) +Returns: None +""" + for row in listoflists: + if row[-1] == '\n': + print row, + else: + print row + return None + + +def printl(listoflists): + """Alias for pl.""" + pl(listoflists) + return + + +def replace(inlst, oldval, newval): + """ +Replaces all occurrences of 'oldval' with 'newval', recursively. + +Usage: replace (inlst,oldval,newval) +""" + lst = inlst * 1 + for i in range(len(lst)): + if type(lst[i]) not in [ListType, TupleType]: + if lst[i] == oldval: + lst[i] = newval + else: + lst[i] = replace(lst[i], oldval, newval) + return lst + + +def recode(inlist, listmap, cols=None): + """ +Changes the values in a list to a new set of values (useful when +you need to recode data from (e.g.) strings to numbers. cols defaults +to None (meaning all columns are recoded). + +Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list +Returns: inlist with the appropriate values replaced with new ones +""" + lst = copy.deepcopy(inlist) + if cols != None: + if type(cols) not in [ListType, TupleType]: + cols = [cols] + for col in cols: + for row in range(len(lst)): + try: + idx = colex(listmap, 0).index(lst[row][col]) + lst[row][col] = listmap[idx][1] + except ValueError: + pass + else: + for row in range(len(lst)): + for col in range(len(lst)): + try: + idx = colex(listmap, 0).index(lst[row][col]) + lst[row][col] = listmap[idx][1] + except ValueError: + pass + return lst + + +def remap(listoflists, criterion): + """ +Remaps values in a given column of a 2D list (listoflists). This requires +a criterion as a function of 'x' so that the result of the following is +returned ... map(lambda x: 'criterion',listoflists). + +Usage: remap(listoflists,criterion) criterion=string +Returns: remapped version of listoflists +""" + function = 'map(lambda x: ' + criterion + ',listoflists)' + lines = eval(function) + return lines + + +def roundlist(inlist, digits): + """ +Goes through each element in a 1D or 2D inlist, and applies the following +function to all elements of FloatType ... round(element,digits). + +Usage: roundlist(inlist,digits) +Returns: list with rounded floats +""" + if type(inlist[0]) in [IntType, FloatType]: + inlist = [inlist] + l = inlist * 1 + for i in range(len(l)): + for j in range(len(l[i])): + if type(l[i][j]) == FloatType: + l[i][j] = round(l[i][j], digits) + return l + + +def sortby(listoflists, sortcols): + """ +Sorts a list of lists on the column(s) specified in the sequence +sortcols. + +Usage: sortby(listoflists,sortcols) +Returns: sorted list, unchanged column ordering +""" + newlist = abut(colex(listoflists, sortcols), listoflists) + newlist.sort() + try: + numcols = len(sortcols) + except TypeError: + numcols = 1 + crit = '[' + str(numcols) + ':]' + newlist = colex(newlist, crit) + return newlist + + +def unique(inlist): + """ +Returns all unique items in the passed list. If the a list-of-lists +is passed, unique LISTS are found (i.e., items in the first dimension are +compared). + +Usage: unique (inlist) +Returns: the unique elements (or rows) in inlist +""" + uniques = [] + for item in inlist: + if item not in uniques: + uniques.append(item) + return uniques + + +def duplicates(inlist): + """ +Returns duplicate items in the FIRST dimension of the passed list. + +Usage: duplicates (inlist) +""" + dups = [] + for i in range(len(inlist)): + if inlist[i] in inlist[i + 1:]: + dups.append(inlist[i]) + return dups + + +def nonrepeats(inlist): + """ +Returns items that are NOT duplicated in the first dim of the passed list. + +Usage: nonrepeats (inlist) +""" + nonrepeats = [] + for i in range(len(inlist)): + if inlist.count(inlist[i]) == 1: + nonrepeats.append(inlist[i]) + return nonrepeats + +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== +#=================== PSTAT ARRAY FUNCTIONS ===================== + +try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE + import numpy as N + + def aabut(source, *args): + """ +Like the |Stat abut command. It concatenates two arrays column-wise +and returns the result. CAUTION: If one array is shorter, it will be +repeated until it is as long as the other. + +Usage: aabut (source, args) where args=any # of arrays +Returns: an array as long as the LONGEST array past, source appearing on the + 'left', arrays in <args> attached on the 'right'. +""" + if len(source.shape) == 1: + width = 1 + source = N.resize(source, [source.shape[0], width]) + else: + width = source.shape[1] + for addon in args: + if len(addon.shape) == 1: + width = 1 + addon = N.resize(addon, [source.shape[0], width]) + else: + width = source.shape[1] + if len(addon) < len(source): + addon = N.resize(addon, [source.shape[0], addon.shape[1]]) + elif len(source) < len(addon): + source = N.resize(source, [addon.shape[0], source.shape[1]]) + source = N.concatenate((source, addon), 1) + return source + + def acolex(a, indices, axis=1): + """ +Extracts specified indices (a list) from passed array, along passed +axis (column extraction is default). BEWARE: A 1D array is presumed to be a +column-array (and that the whole array will be returned as a column). + +Usage: acolex (a,indices,axis=1) +Returns: the columns of a specified by indices +""" + if type(indices) not in [ListType, TupleType, N.ndarray]: + indices = [indices] + if len(N.shape(a)) == 1: + cols = N.resize(a, [a.shape[0], 1]) + else: + # print a[:3] + cols = N.take(a, indices, axis) +# print cols[:3] + return cols + + def acollapse(a, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None): + """ +Averages data in collapsecol, keeping all unique items in keepcols +(using unique, which keeps unique LISTS of column numbers), retaining +the unique sets of values in keepcols, the mean for each. If stderror or +N of the mean are desired, set either or both parameters to 1. + +Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) +Returns: unique 'conditions' specified by the contents of columns specified + by keepcols, abutted with the mean(s) of column(s) specified by + collapsecols +""" + + def acollmean(inarray): + return N.sum(N.ravel(inarray)) + + if type(keepcols) not in [ListType, TupleType, N.ndarray]: + keepcols = [keepcols] + if type(collapsecols) not in [ListType, TupleType, N.ndarray]: + collapsecols = [collapsecols] + + if cfcn == None: + cfcn = acollmean + if keepcols == []: + avgcol = acolex(a, collapsecols) + means = N.sum(avgcol) / float(len(avgcol)) + if fcn1 <> None: + try: + test = fcn1(avgcol) + except: + test = N.array(['N/A'] * len(means)) + means = aabut(means, test) + if fcn2 <> None: + try: + test = fcn2(avgcol) + except: + test = N.array(['N/A'] * len(means)) + means = aabut(means, test) + return means + else: + if type(keepcols) not in [ListType, TupleType, N.ndarray]: + keepcols = [keepcols] + values = colex(a, keepcols) # so that "item" can be appended (below) + uniques = unique(values) # get a LIST, so .sort keeps rows intact + uniques.sort() + newlist = [] + for item in uniques: + if type(item) not in [ListType, TupleType, N.ndarray]: + item = [item] + tmprows = alinexand(a, keepcols, item) + for col in collapsecols: + avgcol = acolex(tmprows, col) + item.append(acollmean(avgcol)) + if fcn1 <> None: + try: + test = fcn1(avgcol) + except: + test = 'N/A' + item.append(test) + if fcn2 <> None: + try: + test = fcn2(avgcol) + except: + test = 'N/A' + item.append(test) + newlist.append(item) + try: + new_a = N.array(newlist) + except TypeError: + new_a = N.array(newlist, 'O') + return new_a + + def adm(a, criterion): + """ +Returns rows from the passed list of lists that meet the criteria in +the passed criterion expression (a string as a function of x). + +Usage: adm (a,criterion) where criterion is like 'x[2]==37' +""" + function = 'filter(lambda x: ' + criterion + ',a)' + lines = eval(function) + try: + lines = N.array(lines) + except: + lines = N.array(lines, dtype='O') + return lines + + def isstring(x): + if type(x) == StringType: + return 1 + else: + return 0 + + def alinexand(a, columnlist, valuelist): + """ +Returns the rows of an array where col (from columnlist) = val +(from valuelist). One value is required for each column in columnlist. + +Usage: alinexand (a,columnlist,valuelist) +Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i +""" + if type(columnlist) not in [ListType, TupleType, N.ndarray]: + columnlist = [columnlist] + if type(valuelist) not in [ListType, TupleType, N.ndarray]: + valuelist = [valuelist] + criterion = '' + for i in range(len(columnlist)): + if type(valuelist[i]) == StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x[' + str(columnlist[ + i]) + ']==' + critval + ' and' + criterion = criterion[0:-3] # remove the "and" after the last crit + return adm(a, criterion) + + def alinexor(a, columnlist, valuelist): + """ +Returns the rows of an array where col (from columnlist) = val (from +valuelist). One value is required for each column in columnlist. +The exception is if either columnlist or valuelist has only 1 value, +in which case that item will be expanded to match the length of the +other list. + +Usage: alinexor (a,columnlist,valuelist) +Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i +""" + if type(columnlist) not in [ListType, TupleType, N.ndarray]: + columnlist = [columnlist] + if type(valuelist) not in [ListType, TupleType, N.ndarray]: + valuelist = [valuelist] + criterion = '' + if len(columnlist) == 1 and len(valuelist) > 1: + columnlist = columnlist * len(valuelist) + elif len(valuelist) == 1 and len(columnlist) > 1: + valuelist = valuelist * len(columnlist) + for i in range(len(columnlist)): + if type(valuelist[i]) == StringType: + critval = '\'' + valuelist[i] + '\'' + else: + critval = str(valuelist[i]) + criterion = criterion + ' x[' + str(columnlist[ + i]) + ']==' + critval + ' or' + criterion = criterion[0:-2] # remove the "or" after the last crit + return adm(a, criterion) + + def areplace(a, oldval, newval): + """ +Replaces all occurrences of oldval with newval in array a. + +Usage: areplace(a,oldval,newval) +""" + return N.where(a == oldval, newval, a) + + def arecode(a, listmap, col='all'): + """ +Remaps the values in an array to a new set of values (useful when +you need to recode data from (e.g.) strings to numbers as most stats +packages require. Can work on SINGLE columns, or 'all' columns at once. +@@@BROKEN 2007-11-26 + +Usage: arecode (a,listmap,col='all') +Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] +""" + ashape = a.shape + if col == 'all': + work = a.ravel() + else: + work = acolex(a, col) + work = work.ravel() + for pair in listmap: + if type(pair[ + 1]) == StringType or work.dtype.char == 'O' or a.dtype.char == 'O': + work = N.array(work, dtype='O') + a = N.array(a, dtype='O') + for i in range(len(work)): + if work[i] == pair[0]: + work[i] = pair[1] + if col == 'all': + return N.reshape(work, ashape) + else: + return N.concatenate( + [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1) + else: # must be a non-Object type array and replacement + work = N.where(work == pair[0], pair[1], work) + return N.concatenate( + [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1) + + def arowcompare(row1, row2): + """ +Compares two rows from an array, regardless of whether it is an +array of numbers or of python objects (which requires the cmp function). +@@@PURPOSE? 2007-11-26 + +Usage: arowcompare(row1,row2) +Returns: an array of equal length containing 1s where the two rows had + identical elements and 0 otherwise +""" + return + if row1.dtype.char == 'O' or row2.dtype == 'O': + cmpvect = N.logical_not( + abs(N.array(map(cmp, row1, row2)))) # cmp fcn gives -1,0,1 + else: + cmpvect = N.equal(row1, row2) + return cmpvect + + def arowsame(row1, row2): + """ +Compares two rows from an array, regardless of whether it is an +array of numbers or of python objects (which requires the cmp function). + +Usage: arowsame(row1,row2) +Returns: 1 if the two rows are identical, 0 otherwise. +""" + cmpval = N.alltrue(arowcompare(row1, row2)) + return cmpval + + def asortrows(a, axis=0): + """ +Sorts an array "by rows". This differs from the Numeric.sort() function, +which sorts elements WITHIN the given axis. Instead, this function keeps +the elements along the given axis intact, but shifts them 'up or down' +relative to one another. + +Usage: asortrows(a,axis=0) +Returns: sorted version of a +""" + return N.sort(a, axis=axis, kind='mergesort') + + def aunique(inarray): + """ +Returns unique items in the FIRST dimension of the passed array. Only +works on arrays NOT including string items. + +Usage: aunique (inarray) +""" + uniques = N.array([inarray[0]]) + if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY + for item in inarray[1:]: + if N.add.reduce(N.equal(uniques, item).ravel()) == 0: + try: + uniques = N.concatenate([uniques, N.array[N.newaxis, :]]) + except TypeError: + uniques = N.concatenate([uniques, N.array([item])]) + else: # IT MUST BE A 2+D ARRAY + if inarray.dtype.char != 'O': # not an Object array + for item in inarray[1:]: + if not N.sum(N.alltrue(N.equal(uniques, item), 1)): + try: + uniques = N.concatenate([uniques, item[N.newaxis, :]]) + except TypeError: # the item to add isn't a list + uniques = N.concatenate([uniques, N.array([item])]) + else: + pass # this item is already in the uniques array + else: # must be an Object array, alltrue/equal functions don't work + for item in inarray[1:]: + newflag = 1 + for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> + test = N.sum(abs(N.array(map(cmp, item, unq)))) + if test == 0: # if item identical to any 1 row in uniques + newflag = 0 # then not a novel item to add + break + if newflag == 1: + try: + uniques = N.concatenate([uniques, item[N.newaxis, :]]) + except TypeError: # the item to add isn't a list + uniques = N.concatenate([uniques, N.array([item])]) + return uniques + + def aduplicates(inarray): + """ +Returns duplicate items in the FIRST dimension of the passed array. Only +works on arrays NOT including string items. + +Usage: aunique (inarray) +""" + inarray = N.array(inarray) + if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY + dups = [] + inarray = inarray.tolist() + for i in range(len(inarray)): + if inarray[i] in inarray[i + 1:]: + dups.append(inarray[i]) + dups = aunique(dups) + else: # IT MUST BE A 2+D ARRAY + dups = [] + aslist = inarray.tolist() + for i in range(len(aslist)): + if aslist[i] in aslist[i + 1:]: + dups.append(aslist[i]) + dups = unique(dups) + dups = N.array(dups) + return dups + +except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs + pass diff --git a/cros_utils/stats.py b/cros_utils/stats.py new file mode 100644 index 00000000..0387a076 --- /dev/null +++ b/cros_utils/stats.py @@ -0,0 +1,4519 @@ +# We did not author this file nor mantain it. Skip linting it. +#pylint: skip-file +# Copyright (c) 1999-2008 Gary Strangman; All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# Comments and/or additions are welcome (send e-mail to: +# strang@nmr.mgh.harvard.edu). +# +"""stats.py module + +(Requires pstat.py module.) + +################################################# +####### Written by: Gary Strangman ########### +####### Last modified: Oct 31, 2008 ########### +################################################# + +A collection of basic statistical functions for python. The function +names appear below. + +IMPORTANT: There are really *3* sets of functions. The first set has an 'l' +prefix, which can be used with list or tuple arguments. The second set has +an 'a' prefix, which can accept NumPy array arguments. These latter +functions are defined only when NumPy is available on the system. The third +type has NO prefix (i.e., has the name that appears below). Functions of +this set are members of a "Dispatch" class, c/o David Ascher. This class +allows different functions to be called depending on the type of the passed +arguments. Thus, stats.mean is a member of the Dispatch class and +stats.mean(range(20)) will call stats.lmean(range(20)) while +stats.mean(Numeric.arange(20)) will call stats.amean(Numeric.arange(20)). +This is a handy way to keep consistent function names when different +argument types require different functions to be called. Having +implementated the Dispatch class, however, means that to get info on +a given function, you must use the REAL function name ... that is +"print stats.lmean.__doc__" or "print stats.amean.__doc__" work fine, +while "print stats.mean.__doc__" will print the doc for the Dispatch +class. NUMPY FUNCTIONS ('a' prefix) generally have more argument options +but should otherwise be consistent with the corresponding list functions. + +Disclaimers: The function list is obviously incomplete and, worse, the +functions are not optimized. All functions have been tested (some more +so than others), but they are far from bulletproof. Thus, as with any +free software, no warranty or guarantee is expressed or implied. :-) A +few extra functions that don't appear in the list below can be found by +interested treasure-hunters. These functions don't necessarily have +both list and array versions but were deemed useful + +CENTRAL TENDENCY: geometricmean + harmonicmean + mean + median + medianscore + mode + +MOMENTS: moment + variation + skew + kurtosis + skewtest (for Numpy arrays only) + kurtosistest (for Numpy arrays only) + normaltest (for Numpy arrays only) + +ALTERED VERSIONS: tmean (for Numpy arrays only) + tvar (for Numpy arrays only) + tmin (for Numpy arrays only) + tmax (for Numpy arrays only) + tstdev (for Numpy arrays only) + tsem (for Numpy arrays only) + describe + +FREQUENCY STATS: itemfreq + scoreatpercentile + percentileofscore + histogram + cumfreq + relfreq + +VARIABILITY: obrientransform + samplevar + samplestdev + signaltonoise (for Numpy arrays only) + var + stdev + sterr + sem + z + zs + zmap (for Numpy arrays only) + +TRIMMING FCNS: threshold (for Numpy arrays only) + trimboth + trim1 + round (round all vals to 'n' decimals; Numpy only) + +CORRELATION FCNS: covariance (for Numpy arrays only) + correlation (for Numpy arrays only) + paired + pearsonr + spearmanr + pointbiserialr + kendalltau + linregress + +INFERENTIAL STATS: ttest_1samp + ttest_ind + ttest_rel + chisquare + ks_2samp + mannwhitneyu + ranksums + wilcoxont + kruskalwallish + friedmanchisquare + +PROBABILITY CALCS: chisqprob + erfcc + zprob + ksprob + fprob + betacf + gammln + betai + +ANOVA FUNCTIONS: F_oneway + F_value + +SUPPORT FUNCTIONS: writecc + incr + sign (for Numpy arrays only) + sum + cumsum + ss + summult + sumdiffsquared + square_of_sums + shellsort + rankdata + outputpairedstats + findwithin +""" +## CHANGE LOG: +## =========== +## 09-07-21 ... added capability for getting the 'proportion' out of l/amannwhitneyu (but comment-disabled) +## 08-10-31 ... fixed import LinearAlgebra bug before glm fcns +## 07-11-26 ... conversion for numpy started +## 07-05-16 ... added Lin's Concordance Correlation Coefficient (alincc) and acov +## 05-08-21 ... added "Dice's coefficient" +## 04-10-26 ... added ap2t(), an ugly fcn for converting p-vals to T-vals +## 04-04-03 ... added amasslinregress() function to do regression on N-D arrays +## 03-01-03 ... CHANGED VERSION TO 0.6 +## fixed atsem() to properly handle limits=None case +## improved histogram and median functions (estbinwidth) and +## fixed atvar() function (wrong answers for neg numbers?!?) +## 02-11-19 ... fixed attest_ind and attest_rel for div-by-zero Overflows +## 02-05-10 ... fixed lchisqprob indentation (failed when df=even) +## 00-12-28 ... removed aanova() to separate module, fixed licensing to +## match Python License, fixed doc string & imports +## 00-04-13 ... pulled all "global" statements, except from aanova() +## added/fixed lots of documentation, removed io.py dependency +## changed to version 0.5 +## 99-11-13 ... added asign() function +## 99-11-01 ... changed version to 0.4 ... enough incremental changes now +## 99-10-25 ... added acovariance and acorrelation functions +## 99-10-10 ... fixed askew/akurtosis to avoid divide-by-zero errors +## added aglm function (crude, but will be improved) +## 99-10-04 ... upgraded acumsum, ass, asummult, asamplevar, avar, etc. to +## all handle lists of 'dimension's and keepdims +## REMOVED ar0, ar2, ar3, ar4 and replaced them with around +## reinserted fixes for abetai to avoid math overflows +## 99-09-05 ... rewrote achisqprob/aerfcc/aksprob/afprob/abetacf/abetai to +## handle multi-dimensional arrays (whew!) +## 99-08-30 ... fixed l/amoment, l/askew, l/akurtosis per D'Agostino (1990) +## added anormaltest per same reference +## re-wrote azprob to calc arrays of probs all at once +## 99-08-22 ... edited attest_ind printing section so arrays could be rounded +## 99-08-19 ... fixed amean and aharmonicmean for non-error(!) overflow on +## short/byte arrays (mean of #s btw 100-300 = -150??) +## 99-08-09 ... fixed asum so that the None case works for Byte arrays +## 99-08-08 ... fixed 7/3 'improvement' to handle t-calcs on N-D arrays +## 99-07-03 ... improved attest_ind, attest_rel (zero-division errortrap) +## 99-06-24 ... fixed bug(?) in attest_ind (n1=a.shape[0]) +## 04/11/99 ... added asignaltonoise, athreshold functions, changed all +## max/min in array section to N.maximum/N.minimum, +## fixed square_of_sums to prevent integer overflow +## 04/10/99 ... !!! Changed function name ... sumsquared ==> square_of_sums +## 03/18/99 ... Added ar0, ar2, ar3 and ar4 rounding functions +## 02/28/99 ... Fixed aobrientransform to return an array rather than a list +## 01/15/99 ... Essentially ceased updating list-versions of functions (!!!) +## 01/13/99 ... CHANGED TO VERSION 0.3 +## fixed bug in a/lmannwhitneyu p-value calculation +## 12/31/98 ... fixed variable-name bug in ldescribe +## 12/19/98 ... fixed bug in findwithin (fcns needed pstat. prefix) +## 12/16/98 ... changed amedianscore to return float (not array) for 1 score +## 12/14/98 ... added atmin and atmax functions +## removed umath from import line (not needed) +## l/ageometricmean modified to reduce chance of overflows (take +## nth root first, then multiply) +## 12/07/98 ... added __version__variable (now 0.2) +## removed all 'stats.' from anova() fcn +## 12/06/98 ... changed those functions (except shellsort) that altered +## arguments in-place ... cumsum, ranksort, ... +## updated (and fixed some) doc-strings +## 12/01/98 ... added anova() function (requires NumPy) +## incorporated Dispatch class +## 11/12/98 ... added functionality to amean, aharmonicmean, ageometricmean +## added 'asum' function (added functionality to N.add.reduce) +## fixed both moment and amoment (two errors) +## changed name of skewness and askewness to skew and askew +## fixed (a)histogram (which sometimes counted points <lowerlimit) + +import pstat # required 3rd party module +import math, string, copy # required python modules +from types import * + +__version__ = 0.6 + +############# DISPATCH CODE ############## + + +class Dispatch: + """ +The Dispatch class, care of David Ascher, allows different functions to +be called depending on the argument types. This way, there can be one +function name regardless of the argument type. To access function doc +in stats.py module, prefix the function with an 'l' or 'a' for list or +array arguments, respectively. That is, print stats.lmean.__doc__ or +print stats.amean.__doc__ or whatever. +""" + + def __init__(self, *tuples): + self._dispatch = {} + for func, types in tuples: + for t in types: + if t in self._dispatch.keys(): + raise ValueError, "can't have two dispatches on " + str(t) + self._dispatch[t] = func + self._types = self._dispatch.keys() + + def __call__(self, arg1, *args, **kw): + if type(arg1) not in self._types: + raise TypeError, "don't know how to dispatch %s arguments" % type(arg1) + return apply(self._dispatch[type(arg1)], (arg1,) + args, kw) + +########################################################################## +######################## LIST-BASED FUNCTIONS ######################## +########################################################################## + +### Define these regardless + +#################################### +####### CENTRAL TENDENCY ######### +#################################### + + +def lgeometricmean(inlist): + """ +Calculates the geometric mean of the values in the passed list. +That is: n-th root of (x1 * x2 * ... * xn). Assumes a '1D' list. + +Usage: lgeometricmean(inlist) +""" + mult = 1.0 + one_over_n = 1.0 / len(inlist) + for item in inlist: + mult = mult * pow(item, one_over_n) + return mult + + +def lharmonicmean(inlist): + """ +Calculates the harmonic mean of the values in the passed list. +That is: n / (1/x1 + 1/x2 + ... + 1/xn). Assumes a '1D' list. + +Usage: lharmonicmean(inlist) +""" + sum = 0 + for item in inlist: + sum = sum + 1.0 / item + return len(inlist) / sum + + +def lmean(inlist): + """ +Returns the arithematic mean of the values in the passed list. +Assumes a '1D' list, but will function on the 1st dim of an array(!). + +Usage: lmean(inlist) +""" + sum = 0 + for item in inlist: + sum = sum + item + return sum / float(len(inlist)) + + +def lmedian(inlist, numbins=1000): + """ +Returns the computed median value of a list of numbers, given the +number of bins to use for the histogram (more bins brings the computed value +closer to the median score, default number of bins = 1000). See G.W. +Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics. + +Usage: lmedian (inlist, numbins=1000) +""" + (hist, smallest, binsize, extras) = histogram( + inlist, numbins, [min(inlist), max(inlist)]) # make histog + cumhist = cumsum(hist) # make cumulative histogram + for i in range(len(cumhist)): # get 1st(!) index holding 50%ile score + if cumhist[i] >= len(inlist) / 2.0: + cfbin = i + break + LRL = smallest + binsize * cfbin # get lower read limit of that bin + cfbelow = cumhist[cfbin - 1] + freq = float(hist[cfbin]) # frequency IN the 50%ile bin + median = LRL + ( + (len(inlist) / 2.0 - cfbelow) / float(freq)) * binsize # median formula + return median + + +def lmedianscore(inlist): + """ +Returns the 'middle' score of the passed list. If there is an even +number of scores, the mean of the 2 middle scores is returned. + +Usage: lmedianscore(inlist) +""" + + newlist = copy.deepcopy(inlist) + newlist.sort() + if len(newlist) % 2 == 0: # if even number of scores, average middle 2 + index = len(newlist) / 2 # integer division correct + median = float(newlist[index] + newlist[index - 1]) / 2 + else: + index = len(newlist) / 2 # int divsion gives mid value when count from 0 + median = newlist[index] + return median + + +def lmode(inlist): + """ +Returns a list of the modal (most common) score(s) in the passed +list. If there is more than one such score, all are returned. The +bin-count for the mode(s) is also returned. + +Usage: lmode(inlist) +Returns: bin-count for mode(s), a list of modal value(s) +""" + + scores = pstat.unique(inlist) + scores.sort() + freq = [] + for item in scores: + freq.append(inlist.count(item)) + maxfreq = max(freq) + mode = [] + stillmore = 1 + while stillmore: + try: + indx = freq.index(maxfreq) + mode.append(scores[indx]) + del freq[indx] + del scores[indx] + except ValueError: + stillmore = 0 + return maxfreq, mode + +#################################### +############ MOMENTS ############# +#################################### + + +def lmoment(inlist, moment=1): + """ +Calculates the nth moment about the mean for a sample (defaults to +the 1st moment). Used to calculate coefficients of skewness and kurtosis. + +Usage: lmoment(inlist,moment=1) +Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r) +""" + if moment == 1: + return 0.0 + else: + mn = mean(inlist) + n = len(inlist) + s = 0 + for x in inlist: + s = s + (x - mn)**moment + return s / float(n) + + +def lvariation(inlist): + """ +Returns the coefficient of variation, as defined in CRC Standard +Probability and Statistics, p.6. + +Usage: lvariation(inlist) +""" + return 100.0 * samplestdev(inlist) / float(mean(inlist)) + + +def lskew(inlist): + """ +Returns the skewness of a distribution, as defined in Numerical +Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) + +Usage: lskew(inlist) +""" + return moment(inlist, 3) / pow(moment(inlist, 2), 1.5) + + +def lkurtosis(inlist): + """ +Returns the kurtosis of a distribution, as defined in Numerical +Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) + +Usage: lkurtosis(inlist) +""" + return moment(inlist, 4) / pow(moment(inlist, 2), 2.0) + + +def ldescribe(inlist): + """ +Returns some descriptive statistics of the passed list (assumed to be 1D). + +Usage: ldescribe(inlist) +Returns: n, mean, standard deviation, skew, kurtosis +""" + n = len(inlist) + mm = (min(inlist), max(inlist)) + m = mean(inlist) + sd = stdev(inlist) + sk = skew(inlist) + kurt = kurtosis(inlist) + return n, mm, m, sd, sk, kurt + +#################################### +####### FREQUENCY STATS ########## +#################################### + + +def litemfreq(inlist): + """ +Returns a list of pairs. Each pair consists of one of the scores in inlist +and it's frequency count. Assumes a 1D list is passed. + +Usage: litemfreq(inlist) +Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies) +""" + scores = pstat.unique(inlist) + scores.sort() + freq = [] + for item in scores: + freq.append(inlist.count(item)) + return pstat.abut(scores, freq) + + +def lscoreatpercentile(inlist, percent): + """ +Returns the score at a given percentile relative to the distribution +given by inlist. + +Usage: lscoreatpercentile(inlist,percent) +""" + if percent > 1: + print '\nDividing percent>1 by 100 in lscoreatpercentile().\n' + percent = percent / 100.0 + targetcf = percent * len(inlist) + h, lrl, binsize, extras = histogram(inlist) + cumhist = cumsum(copy.deepcopy(h)) + for i in range(len(cumhist)): + if cumhist[i] >= targetcf: + break + score = binsize * ( + (targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i) + return score + + +def lpercentileofscore(inlist, score, histbins=10, defaultlimits=None): + """ +Returns the percentile value of a score relative to the distribution +given by inlist. Formula depends on the values used to histogram the data(!). + +Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None) +""" + + h, lrl, binsize, extras = histogram(inlist, histbins, defaultlimits) + cumhist = cumsum(copy.deepcopy(h)) + i = int((score - lrl) / float(binsize)) + pct = (cumhist[i - 1] + ( + (score - + (lrl + binsize * i)) / float(binsize)) * h[i]) / float(len(inlist)) * 100 + return pct + + +def lhistogram(inlist, numbins=10, defaultreallimits=None, printextras=0): + """ +Returns (i) a list of histogram bin counts, (ii) the smallest value +of the histogram binning, and (iii) the bin width (the last 2 are not +necessarily integers). Default number of bins is 10. If no sequence object +is given for defaultreallimits, the routine picks (usually non-pretty) bins +spanning all the numbers in the inlist. + +Usage: lhistogram (inlist, numbins=10, +defaultreallimits=None,suppressoutput=0) +Returns: list of bin values, lowerreallimit, binsize, extrapoints +""" + if (defaultreallimits <> None): + if type(defaultreallimits) not in [ListType, TupleType] or len( + defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd + lowerreallimit = defaultreallimits + upperreallimit = 1.000001 * max(inlist) + else: # assume both limits given + lowerreallimit = defaultreallimits[0] + upperreallimit = defaultreallimits[1] + binsize = (upperreallimit - lowerreallimit) / float(numbins) + else: # no limits given for histogram, both must be calc'd + estbinwidth = (max(inlist) - + min(inlist)) / float(numbins) + 1e-6 #1=>cover all + binsize = ((max(inlist) - min(inlist) + estbinwidth)) / float(numbins) + lowerreallimit = min(inlist) - binsize / 2 #lower real limit,1st bin + bins = [0] * (numbins) + extrapoints = 0 + for num in inlist: + try: + if (num - lowerreallimit) < 0: + extrapoints = extrapoints + 1 + else: + bintoincrement = int((num - lowerreallimit) / float(binsize)) + bins[bintoincrement] = bins[bintoincrement] + 1 + except: + extrapoints = extrapoints + 1 + if (extrapoints > 0 and printextras == 1): + print '\nPoints outside given histogram range =', extrapoints + return (bins, lowerreallimit, binsize, extrapoints) + + +def lcumfreq(inlist, numbins=10, defaultreallimits=None): + """ +Returns a cumulative frequency histogram, using the histogram function. + +Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None) +Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h, l, b, e = histogram(inlist, numbins, defaultreallimits) + cumhist = cumsum(copy.deepcopy(h)) + return cumhist, l, b, e + + +def lrelfreq(inlist, numbins=10, defaultreallimits=None): + """ +Returns a relative frequency histogram, using the histogram function. + +Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None) +Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h, l, b, e = histogram(inlist, numbins, defaultreallimits) + for i in range(len(h)): + h[i] = h[i] / float(len(inlist)) + return h, l, b, e + +#################################### +##### VARIABILITY FUNCTIONS ###### +#################################### + + +def lobrientransform(*args): + """ +Computes a transform on input data (any number of columns). Used to +test for homogeneity of variance prior to running one-way stats. From +Maxwell and Delaney, p.112. + +Usage: lobrientransform(*args) +Returns: transformed data for use in an ANOVA +""" + TINY = 1e-10 + k = len(args) + n = [0.0] * k + v = [0.0] * k + m = [0.0] * k + nargs = [] + for i in range(k): + nargs.append(copy.deepcopy(args[i])) + n[i] = float(len(nargs[i])) + v[i] = var(nargs[i]) + m[i] = mean(nargs[i]) + for j in range(k): + for i in range(n[j]): + t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j])**2 + t2 = 0.5 * v[j] * (n[j] - 1.0) + t3 = (n[j] - 1.0) * (n[j] - 2.0) + nargs[j][i] = (t1 - t2) / float(t3) + check = 1 + for j in range(k): + if v[j] - mean(nargs[j]) > TINY: + check = 0 + if check <> 1: + raise ValueError, 'Problem in obrientransform.' + else: + return nargs + + +def lsamplevar(inlist): + """ +Returns the variance of the values in the passed list using +N for the denominator (i.e., DESCRIBES the sample variance only). + +Usage: lsamplevar(inlist) +""" + n = len(inlist) + mn = mean(inlist) + deviations = [] + for item in inlist: + deviations.append(item - mn) + return ss(deviations) / float(n) + + +def lsamplestdev(inlist): + """ +Returns the standard deviation of the values in the passed list using +N for the denominator (i.e., DESCRIBES the sample stdev only). + +Usage: lsamplestdev(inlist) +""" + return math.sqrt(samplevar(inlist)) + + +def lcov(x, y, keepdims=0): + """ +Returns the estimated covariance of the values in the passed +array (i.e., N-1). Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). Set keepdims=1 to return an array with the +same number of dimensions as inarray. + +Usage: lcov(x,y,keepdims=0) +""" + + n = len(x) + xmn = mean(x) + ymn = mean(y) + xdeviations = [0] * len(x) + ydeviations = [0] * len(y) + for i in range(len(x)): + xdeviations[i] = x[i] - xmn + ydeviations[i] = y[i] - ymn + ss = 0.0 + for i in range(len(xdeviations)): + ss = ss + xdeviations[i] * ydeviations[i] + return ss / float(n - 1) + + +def lvar(inlist): + """ +Returns the variance of the values in the passed list using N-1 +for the denominator (i.e., for estimating population variance). + +Usage: lvar(inlist) +""" + n = len(inlist) + mn = mean(inlist) + deviations = [0] * len(inlist) + for i in range(len(inlist)): + deviations[i] = inlist[i] - mn + return ss(deviations) / float(n - 1) + + +def lstdev(inlist): + """ +Returns the standard deviation of the values in the passed list +using N-1 in the denominator (i.e., to estimate population stdev). + +Usage: lstdev(inlist) +""" + return math.sqrt(var(inlist)) + + +def lsterr(inlist): + """ +Returns the standard error of the values in the passed list using N-1 +in the denominator (i.e., to estimate population standard error). + +Usage: lsterr(inlist) +""" + return stdev(inlist) / float(math.sqrt(len(inlist))) + + +def lsem(inlist): + """ +Returns the estimated standard error of the mean (sx-bar) of the +values in the passed list. sem = stdev / sqrt(n) + +Usage: lsem(inlist) +""" + sd = stdev(inlist) + n = len(inlist) + return sd / math.sqrt(n) + + +def lz(inlist, score): + """ +Returns the z-score for a given input score, given that score and the +list from which that score came. Not appropriate for population calculations. + +Usage: lz(inlist, score) +""" + z = (score - mean(inlist)) / samplestdev(inlist) + return z + + +def lzs(inlist): + """ +Returns a list of z-scores, one for each score in the passed list. + +Usage: lzs(inlist) +""" + zscores = [] + for item in inlist: + zscores.append(z(inlist, item)) + return zscores + +#################################### +####### TRIMMING FUNCTIONS ####### +#################################### + + +def ltrimboth(l, proportiontocut): + """ +Slices off the passed proportion of items from BOTH ends of the passed +list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost' +10% of scores. Assumes list is sorted by magnitude. Slices off LESS if +proportion results in a non-integer slice index (i.e., conservatively +slices off proportiontocut). + +Usage: ltrimboth (l,proportiontocut) +Returns: trimmed version of list l +""" + lowercut = int(proportiontocut * len(l)) + uppercut = len(l) - lowercut + return l[lowercut:uppercut] + + +def ltrim1(l, proportiontocut, tail='right'): + """ +Slices off the passed proportion of items from ONE end of the passed +list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' +10% of scores). Slices off LESS if proportion results in a non-integer +slice index (i.e., conservatively slices off proportiontocut). + +Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left' +Returns: trimmed version of list l +""" + if tail == 'right': + lowercut = 0 + uppercut = len(l) - int(proportiontocut * len(l)) + elif tail == 'left': + lowercut = int(proportiontocut * len(l)) + uppercut = len(l) + return l[lowercut:uppercut] + +#################################### +##### CORRELATION FUNCTIONS ###### +#################################### + + +def lpaired(x, y): + """ +Interactively determines the type of data and then runs the +appropriated statistic for paired group data. + +Usage: lpaired(x,y) +Returns: appropriate statistic name, value, and probability +""" + samples = '' + while samples not in ['i', 'r', 'I', 'R', 'c', 'C']: + print '\nIndependent or related samples, or correlation (i,r,c): ', + samples = raw_input() + + if samples in ['i', 'I', 'r', 'R']: + print '\nComparing variances ...', + # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112 + r = obrientransform(x, y) + f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1)) + if p < 0.05: + vartype = 'unequal, p=' + str(round(p, 4)) + else: + vartype = 'equal' + print vartype + if samples in ['i', 'I']: + if vartype[0] == 'e': + t, p = ttest_ind(x, y, 0) + print '\nIndependent samples t-test: ', round(t, 4), round(p, 4) + else: + if len(x) > 20 or len(y) > 20: + z, p = ranksums(x, y) + print '\nRank Sums test (NONparametric, n>20): ', round(z, 4), round( + p, 4) + else: + u, p = mannwhitneyu(x, y) + print '\nMann-Whitney U-test (NONparametric, ns<20): ', round( + u, 4), round(p, 4) + + else: # RELATED SAMPLES + if vartype[0] == 'e': + t, p = ttest_rel(x, y, 0) + print '\nRelated samples t-test: ', round(t, 4), round(p, 4) + else: + t, p = ranksums(x, y) + print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4) + else: # CORRELATION ANALYSIS + corrtype = '' + while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']: + print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ', + corrtype = raw_input() + if corrtype in ['c', 'C']: + m, b, r, p, see = linregress(x, y) + print '\nLinear regression for continuous variables ...' + lol = [['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'], + [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)] + ] + pstat.printcc(lol) + elif corrtype in ['r', 'R']: + r, p = spearmanr(x, y) + print '\nCorrelation for ranked variables ...' + print "Spearman's r: ", round(r, 4), round(p, 4) + else: # DICHOTOMOUS + r, p = pointbiserialr(x, y) + print '\nAssuming x contains a dichotomous variable ...' + print 'Point Biserial r: ', round(r, 4), round(p, 4) + print '\n\n' + return None + + +def lpearsonr(x, y): + """ +Calculates a Pearson correlation coefficient and the associated +probability value. Taken from Heiman's Basic Statistics for the Behav. +Sci (2nd), p.195. + +Usage: lpearsonr(x,y) where x and y are equal-length lists +Returns: Pearson's r value, two-tailed p-value +""" + TINY = 1.0e-30 + if len(x) <> len(y): + raise ValueError, 'Input values not paired in pearsonr. Aborting.' + n = len(x) + x = map(float, x) + y = map(float, y) + xmean = mean(x) + ymean = mean(y) + r_num = n * (summult(x, y)) - sum(x) * sum(y) + r_den = math.sqrt((n * ss(x) - square_of_sums(x)) * + (n * ss(y) - square_of_sums(y))) + r = (r_num / r_den) # denominator already a float + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = betai(0.5 * df, 0.5, df / float(df + t * t)) + return r, prob + + +def llincc(x, y): + """ +Calculates Lin's concordance correlation coefficient. + +Usage: alincc(x,y) where x, y are equal-length arrays +Returns: Lin's CC +""" + covar = lcov(x, y) * (len(x) - 1) / float(len(x)) # correct denom to n + xvar = lvar(x) * (len(x) - 1) / float(len(x)) # correct denom to n + yvar = lvar(y) * (len(y) - 1) / float(len(y)) # correct denom to n + lincc = (2 * covar) / ((xvar + yvar) + ((amean(x) - amean(y))**2)) + return lincc + + +def lspearmanr(x, y): + """ +Calculates a Spearman rank-order correlation coefficient. Taken +from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. + +Usage: lspearmanr(x,y) where x and y are equal-length lists +Returns: Spearman's r, two-tailed p-value +""" + TINY = 1e-30 + if len(x) <> len(y): + raise ValueError, 'Input values not paired in spearmanr. Aborting.' + n = len(x) + rankx = rankdata(x) + ranky = rankdata(y) + dsq = sumdiffsquared(rankx, ranky) + rs = 1 - 6 * dsq / float(n * (n**2 - 1)) + t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs))) + df = n - 2 + probrs = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float + # probability values for rs are from part 2 of the spearman function in + # Numerical Recipies, p.510. They are close to tables, but not exact. (?) + return rs, probrs + + +def lpointbiserialr(x, y): + """ +Calculates a point-biserial correlation coefficient and the associated +probability value. Taken from Heiman's Basic Statistics for the Behav. +Sci (1st), p.194. + +Usage: lpointbiserialr(x,y) where x,y are equal-length lists +Returns: Point-biserial r, two-tailed p-value +""" + TINY = 1e-30 + if len(x) <> len(y): + raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.' + data = pstat.abut(x, y) + categories = pstat.unique(x) + if len(categories) <> 2: + raise ValueError, 'Exactly 2 categories required for pointbiserialr().' + else: # there are 2 categories, continue + codemap = pstat.abut(categories, range(2)) + recoded = pstat.recode(data, codemap, 0) + x = pstat.linexand(data, 0, categories[0]) + y = pstat.linexand(data, 0, categories[1]) + xmean = mean(pstat.colex(x, 1)) + ymean = mean(pstat.colex(y, 1)) + n = len(data) + adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n))) + rpb = (ymean - xmean) / samplestdev(pstat.colex(data, 1)) * adjust + df = n - 2 + t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY))) + prob = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float + return rpb, prob + + +def lkendalltau(x, y): + """ +Calculates Kendall's tau ... correlation of ordinal data. Adapted +from function kendl1 in Numerical Recipies. Needs good test-routine.@@@ + +Usage: lkendalltau(x,y) +Returns: Kendall's tau, two-tailed p-value +""" + n1 = 0 + n2 = 0 + iss = 0 + for j in range(len(x) - 1): + for k in range(j, len(y)): + a1 = x[j] - x[k] + a2 = y[j] - y[k] + aa = a1 * a2 + if (aa): # neither list has a tie + n1 = n1 + 1 + n2 = n2 + 1 + if aa > 0: + iss = iss + 1 + else: + iss = iss - 1 + else: + if (a1): + n1 = n1 + 1 + else: + n2 = n2 + 1 + tau = iss / math.sqrt(n1 * n2) + svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1)) + z = tau / math.sqrt(svar) + prob = erfcc(abs(z) / 1.4142136) + return tau, prob + + +def llinregress(x, y): + """ +Calculates a regression line on x,y pairs. + +Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates +Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate +""" + TINY = 1.0e-20 + if len(x) <> len(y): + raise ValueError, 'Input values not paired in linregress. Aborting.' + n = len(x) + x = map(float, x) + y = map(float, y) + xmean = mean(x) + ymean = mean(y) + r_num = float(n * (summult(x, y)) - sum(x) * sum(y)) + r_den = math.sqrt((n * ss(x) - square_of_sums(x)) * + (n * ss(y) - square_of_sums(y))) + r = r_num / r_den + z = 0.5 * math.log((1.0 + r + TINY) / (1.0 - r + TINY)) + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = betai(0.5 * df, 0.5, df / (df + t * t)) + slope = r_num / float(n * ss(x) - square_of_sums(x)) + intercept = ymean - slope * xmean + sterrest = math.sqrt(1 - r * r) * samplestdev(y) + return slope, intercept, r, prob, sterrest + +#################################### +##### INFERENTIAL STATISTICS ##### +#################################### + + +def lttest_1samp(a, popmean, printit=0, name='Sample', writemode='a'): + """ +Calculates the t-obtained for the independent samples T-test on ONE group +of scores a, given a population mean. If printit=1, results are printed +to the screen. If printit='filename', the results are output to 'filename' +using the given writemode (default=append). Returns t-value, and prob. + +Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') +Returns: t-value, two-tailed prob +""" + x = mean(a) + v = var(a) + n = len(a) + df = n - 1 + svar = ((n - 1) * v) / float(df) + t = (x - popmean) / math.sqrt(svar * (1.0 / n)) + prob = betai(0.5 * df, 0.5, float(df) / (df + t * t)) + + if printit <> 0: + statname = 'Single-sample T-test.' + outputpairedstats(printit, writemode, 'Population', '--', popmean, 0, 0, 0, + name, n, x, v, min(a), max(a), statname, t, prob) + return t, prob + + +def lttest_ind(a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'): + """ +Calculates the t-obtained T-test on TWO INDEPENDENT samples of +scores a, and b. From Numerical Recipies, p.483. If printit=1, results +are printed to the screen. If printit='filename', the results are output +to 'filename' using the given writemode (default=append). Returns t-value, +and prob. + +Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a') +Returns: t-value, two-tailed prob +""" + x1 = mean(a) + x2 = mean(b) + v1 = stdev(a)**2 + v2 = stdev(b)**2 + n1 = len(a) + n2 = len(b) + df = n1 + n2 - 2 + svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df) + if not svar: + svar = 1.0e-26 + t = (x1 - x2) / math.sqrt(svar * (1.0 / n1 + 1.0 / n2)) + prob = betai(0.5 * df, 0.5, df / (df + t * t)) + + if printit <> 0: + statname = 'Independent samples T-test.' + outputpairedstats(printit, writemode, name1, n1, x1, v1, min(a), max(a), + name2, n2, x2, v2, min(b), max(b), statname, t, prob) + return t, prob + + +def lttest_rel(a, + b, + printit=0, + name1='Sample1', + name2='Sample2', + writemode='a'): + """ +Calculates the t-obtained T-test on TWO RELATED samples of scores, +a and b. From Numerical Recipies, p.483. If printit=1, results are +printed to the screen. If printit='filename', the results are output to +'filename' using the given writemode (default=append). Returns t-value, +and prob. + +Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a') +Returns: t-value, two-tailed prob +""" + if len(a) <> len(b): + raise ValueError, 'Unequal length lists in ttest_rel.' + x1 = mean(a) + x2 = mean(b) + v1 = var(a) + v2 = var(b) + n = len(a) + cov = 0 + for i in range(len(a)): + cov = cov + (a[i] - x1) * (b[i] - x2) + df = n - 1 + cov = cov / float(df) + sd = math.sqrt((v1 + v2 - 2.0 * cov) / float(n)) + t = (x1 - x2) / sd + prob = betai(0.5 * df, 0.5, df / (df + t * t)) + + if printit <> 0: + statname = 'Related samples T-test.' + outputpairedstats(printit, writemode, name1, n, x1, v1, min(a), max(a), + name2, n, x2, v2, min(b), max(b), statname, t, prob) + return t, prob + + +def lchisquare(f_obs, f_exp=None): + """ +Calculates a one-way chi square for list of observed frequencies and returns +the result. If no expected frequencies are given, the total N is assumed to +be equally distributed across all groups. + +Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq. +Returns: chisquare-statistic, associated p-value +""" + k = len(f_obs) # number of groups + if f_exp == None: + f_exp = [sum(f_obs) / float(k)] * len(f_obs) # create k bins with = freq. + chisq = 0 + for i in range(len(f_obs)): + chisq = chisq + (f_obs[i] - f_exp[i])**2 / float(f_exp[i]) + return chisq, chisqprob(chisq, k - 1) + + +def lks_2samp(data1, data2): + """ +Computes the Kolmogorov-Smirnof statistic on 2 samples. From +Numerical Recipies in C, page 493. + +Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions +Returns: KS D-value, associated p-value +""" + j1 = 0 + j2 = 0 + fn1 = 0.0 + fn2 = 0.0 + n1 = len(data1) + n2 = len(data2) + en1 = n1 + en2 = n2 + d = 0.0 + data1.sort() + data2.sort() + while j1 < n1 and j2 < n2: + d1 = data1[j1] + d2 = data2[j2] + if d1 <= d2: + fn1 = (j1) / float(en1) + j1 = j1 + 1 + if d2 <= d1: + fn2 = (j2) / float(en2) + j2 = j2 + 1 + dt = (fn2 - fn1) + if math.fabs(dt) > math.fabs(d): + d = dt + try: + en = math.sqrt(en1 * en2 / float(en1 + en2)) + prob = ksprob((en + 0.12 + 0.11 / en) * abs(d)) + except: + prob = 1.0 + return d, prob + + +def lmannwhitneyu(x, y): + """ +Calculates a Mann-Whitney U statistic on the provided scores and +returns the result. Use only when the n in each condition is < 20 and +you have 2 independent samples of ranks. NOTE: Mann-Whitney U is +significant if the u-obtained is LESS THAN or equal to the critical +value of U found in the tables. Equivalent to Kruskal-Wallis H with +just 2 groups. + +Usage: lmannwhitneyu(data) +Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) +""" + n1 = len(x) + n2 = len(y) + ranked = rankdata(x + y) + rankx = ranked[0:n1] # get the x-ranks + ranky = ranked[n1:] # the rest are y-ranks + u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x + u2 = n1 * n2 - u1 # remainder is U for y + bigu = max(u1, u2) + smallu = min(u1, u2) + proportion = bigu / float(n1 * n2) + T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores + if T == 0: + raise ValueError, 'All numbers are identical in lmannwhitneyu' + sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0) + z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc + return smallu, 1.0 - zprob(z) #, proportion + + +def ltiecorrect(rankvals): + """ +Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See +Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences. +New York: McGraw-Hill. Code adapted from |Stat rankind.c code. + +Usage: ltiecorrect(rankvals) +Returns: T correction factor for U or H +""" + sorted, posn = shellsort(rankvals) + n = len(sorted) + T = 0.0 + i = 0 + while (i < n - 1): + if sorted[i] == sorted[i + 1]: + nties = 1 + while (i < n - 1) and (sorted[i] == sorted[i + 1]): + nties = nties + 1 + i = i + 1 + T = T + nties**3 - nties + i = i + 1 + T = T / float(n**3 - n) + return 1.0 - T + + +def lranksums(x, y): + """ +Calculates the rank sums statistic on the provided scores and +returns the result. Use only when the n in each condition is > 20 and you +have 2 independent samples of ranks. + +Usage: lranksums(x,y) +Returns: a z-statistic, two-tailed p-value +""" + n1 = len(x) + n2 = len(y) + alldata = x + y + ranked = rankdata(alldata) + x = ranked[:n1] + y = ranked[n1:] + s = sum(x) + expected = n1 * (n1 + n2 + 1) / 2.0 + z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0) + prob = 2 * (1.0 - zprob(abs(z))) + return z, prob + + +def lwilcoxont(x, y): + """ +Calculates the Wilcoxon T-test for related samples and returns the +result. A non-parametric T-test. + +Usage: lwilcoxont(x,y) +Returns: a t-statistic, two-tail probability estimate +""" + if len(x) <> len(y): + raise ValueError, 'Unequal N in wilcoxont. Aborting.' + d = [] + for i in range(len(x)): + diff = x[i] - y[i] + if diff <> 0: + d.append(diff) + count = len(d) + absd = map(abs, d) + absranked = rankdata(absd) + r_plus = 0.0 + r_minus = 0.0 + for i in range(len(absd)): + if d[i] < 0: + r_minus = r_minus + absranked[i] + else: + r_plus = r_plus + absranked[i] + wt = min(r_plus, r_minus) + mn = count * (count + 1) * 0.25 + se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0) + z = math.fabs(wt - mn) / se + prob = 2 * (1.0 - zprob(abs(z))) + return wt, prob + + +def lkruskalwallish(*args): + """ +The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more +groups, requiring at least 5 subjects in each group. This function +calculates the Kruskal-Wallis H-test for 3 or more independent samples +and returns the result. + +Usage: lkruskalwallish(*args) +Returns: H-statistic (corrected for ties), associated p-value +""" + args = list(args) + n = [0] * len(args) + all = [] + n = map(len, args) + for i in range(len(args)): + all = all + args[i] + ranked = rankdata(all) + T = tiecorrect(ranked) + for i in range(len(args)): + args[i] = ranked[0:n[i]] + del ranked[0:n[i]] + rsums = [] + for i in range(len(args)): + rsums.append(sum(args[i])**2) + rsums[i] = rsums[i] / float(n[i]) + ssbn = sum(rsums) + totaln = sum(n) + h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1) + df = len(args) - 1 + if T == 0: + raise ValueError, 'All numbers are identical in lkruskalwallish' + h = h / float(T) + return h, chisqprob(h, df) + + +def lfriedmanchisquare(*args): + """ +Friedman Chi-Square is a non-parametric, one-way within-subjects +ANOVA. This function calculates the Friedman Chi-square test for repeated +measures and returns the result, along with the associated probability +value. It assumes 3 or more repeated measures. Only 3 levels requires a +minimum of 10 subjects in the study. Four levels requires 5 subjects per +level(??). + +Usage: lfriedmanchisquare(*args) +Returns: chi-square statistic, associated p-value +""" + k = len(args) + if k < 3: + raise ValueError, 'Less than 3 levels. Friedman test not appropriate.' + n = len(args[0]) + data = apply(pstat.abut, tuple(args)) + for i in range(len(data)): + data[i] = rankdata(data[i]) + ssbn = 0 + for i in range(k): + ssbn = ssbn + sum(args[i])**2 + chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1) + return chisq, chisqprob(chisq, k - 1) + +#################################### +#### PROBABILITY CALCULATIONS #### +#################################### + + +def lchisqprob(chisq, df): + """ +Returns the (1-tailed) probability value associated with the provided +chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat. + +Usage: lchisqprob(chisq,df) +""" + BIG = 20.0 + + def ex(x): + BIG = 20.0 + if x < -BIG: + return 0.0 + else: + return math.exp(x) + + if chisq <= 0 or df < 1: + return 1.0 + a = 0.5 * chisq + if df % 2 == 0: + even = 1 + else: + even = 0 + if df > 1: + y = ex(-a) + if even: + s = y + else: + s = 2.0 * zprob(-math.sqrt(chisq)) + if (df > 2): + chisq = 0.5 * (df - 1.0) + if even: + z = 1.0 + else: + z = 0.5 + if a > BIG: + if even: + e = 0.0 + else: + e = math.log(math.sqrt(math.pi)) + c = math.log(a) + while (z <= chisq): + e = math.log(z) + e + s = s + ex(c * z - a - e) + z = z + 1.0 + return s + else: + if even: + e = 1.0 + else: + e = 1.0 / math.sqrt(math.pi) / math.sqrt(a) + c = 0.0 + while (z <= chisq): + e = e * (a / float(z)) + c = c + e + z = z + 1.0 + return (c * y + s) + else: + return s + + +def lerfcc(x): + """ +Returns the complementary error function erfc(x) with fractional +error everywhere less than 1.2e-7. Adapted from Numerical Recipies. + +Usage: lerfcc(x) +""" + z = abs(x) + t = 1.0 / (1.0 + 0.5 * z) + ans = t * math.exp(-z * z - 1.26551223 + t * (1.00002368 + t * ( + 0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (0.27886807 + t * ( + -1.13520398 + t * (1.48851587 + t * (-0.82215223 + t * 0.17087277))))) + )))) + if x >= 0: + return ans + else: + return 2.0 - ans + + +def lzprob(z): + """ +Returns the area under the normal curve 'to the left of' the given z value. +Thus, + for z<0, zprob(z) = 1-tail probability + for z>0, 1.0-zprob(z) = 1-tail probability + for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability +Adapted from z.c in Gary Perlman's |Stat. + +Usage: lzprob(z) +""" + Z_MAX = 6.0 # maximum meaningful z-value + if z == 0.0: + x = 0.0 + else: + y = 0.5 * math.fabs(z) + if y >= (Z_MAX * 0.5): + x = 1.0 + elif (y < 1.0): + w = y * y + x = (( + ((((((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w - + 0.019198292004) * w + 0.059054035642) * w - 0.151968751364) * w + + 0.319152932694) * w - 0.531923007300) * w + 0.797884560593) * y * 2.0 + else: + y = y - 2.0 + x = ((((((( + ((((((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) * y + - 0.000676904986) * y + 0.001390604284) * y - 0.000794620820) * y + - 0.002034254874) * y + 0.006549791214) * y - 0.010557625006) * y + + 0.011630447319) * y - 0.009279453341) * y + 0.005353579108) * y - + 0.002141268741) * y + 0.000535310849) * y + 0.999936657524 + if z > 0.0: + prob = ((x + 1.0) * 0.5) + else: + prob = ((1.0 - x) * 0.5) + return prob + + +def lksprob(alam): + """ +Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from +Numerical Recipies. + +Usage: lksprob(alam) +""" + fac = 2.0 + sum = 0.0 + termbf = 0.0 + a2 = -2.0 * alam * alam + for j in range(1, 201): + term = fac * math.exp(a2 * j * j) + sum = sum + term + if math.fabs(term) <= (0.001 * termbf) or math.fabs(term) < (1.0e-8 * sum): + return sum + fac = -fac + termbf = math.fabs(term) + return 1.0 # Get here only if fails to converge; was 0.0!! + + +def lfprob(dfnum, dfden, F): + """ +Returns the (1-tailed) significance level (p-value) of an F +statistic given the degrees of freedom for the numerator (dfR-dfF) and +the degrees of freedom for the denominator (dfF). + +Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn +""" + p = betai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F)) + return p + + +def lbetacf(a, b, x): + """ +This function evaluates the continued fraction form of the incomplete +Beta function, betai. (Adapted from: Numerical Recipies in C.) + +Usage: lbetacf(a,b,x) +""" + ITMAX = 200 + EPS = 3.0e-7 + + bm = az = am = 1.0 + qab = a + b + qap = a + 1.0 + qam = a - 1.0 + bz = 1.0 - qab * x / qap + for i in range(ITMAX + 1): + em = float(i + 1) + tem = em + em + d = em * (b - em) * x / ((qam + tem) * (a + tem)) + ap = az + d * am + bp = bz + d * bm + d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem)) + app = ap + d * az + bpp = bp + d * bz + aold = az + am = ap / bpp + bm = bp / bpp + az = app / bpp + bz = 1.0 + if (abs(az - aold) < (EPS * abs(az))): + return az + print 'a or b too big, or ITMAX too small in Betacf.' + + +def lgammln(xx): + """ +Returns the gamma function of xx. + Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. +(Adapted from: Numerical Recipies in C.) + +Usage: lgammln(xx) +""" + + coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, 0.120858003e-2, + -0.536382e-5] + x = xx - 1.0 + tmp = x + 5.5 + tmp = tmp - (x + 0.5) * math.log(tmp) + ser = 1.0 + for j in range(len(coeff)): + x = x + 1 + ser = ser + coeff[j] / x + return -tmp + math.log(2.50662827465 * ser) + + +def lbetai(a, b, x): + """ +Returns the incomplete beta function: + + I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) + +where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma +function of a. The continued fraction formulation is implemented here, +using the betacf function. (Adapted from: Numerical Recipies in C.) + +Usage: lbetai(a,b,x) +""" + if (x < 0.0 or x > 1.0): + raise ValueError, 'Bad x in lbetai' + if (x == 0.0 or x == 1.0): + bt = 0.0 + else: + bt = math.exp(gammln(a + b) - gammln(a) - gammln(b) + a * math.log(x) + b * + math.log(1.0 - x)) + if (x < (a + 1.0) / (a + b + 2.0)): + return bt * betacf(a, b, x) / float(a) + else: + return 1.0 - bt * betacf(b, a, 1.0 - x) / float(b) + +#################################### +####### ANOVA CALCULATIONS ####### +#################################### + + +def lF_oneway(*lists): + """ +Performs a 1-way ANOVA, returning an F-value and probability given +any number of groups. From Heiman, pp.394-7. + +Usage: F_oneway(*lists) where *lists is any number of lists, one per + treatment group +Returns: F value, one-tailed p-value +""" + a = len(lists) # ANOVA on 'a' groups, each in it's own list + means = [0] * a + vars = [0] * a + ns = [0] * a + alldata = [] + tmp = map(N.array, lists) + means = map(amean, tmp) + vars = map(avar, tmp) + ns = map(len, lists) + for i in range(len(lists)): + alldata = alldata + lists[i] + alldata = N.array(alldata) + bign = len(alldata) + sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign)) + ssbn = 0 + for list in lists: + ssbn = ssbn + asquare_of_sums(N.array(list)) / float(len(list)) + ssbn = ssbn - (asquare_of_sums(alldata) / float(bign)) + sswn = sstot - ssbn + dfbn = a - 1 + dfwn = bign - a + msb = ssbn / float(dfbn) + msw = sswn / float(dfwn) + f = msb / msw + prob = fprob(dfbn, dfwn, f) + return f, prob + + +def lF_value(ER, EF, dfnum, dfden): + """ +Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR-dfF = degrees of freedom of the numerator + dfF = degrees of freedom associated with the denominator/Full model + +Usage: lF_value(ER,EF,dfnum,dfden) +""" + return ((ER - EF) / float(dfnum) / (EF / float(dfden))) + +#################################### +######## SUPPORT FUNCTIONS ####### +#################################### + + +def writecc(listoflists, file, writetype='w', extra=2): + """ +Writes a list of lists to a file in columns, customized by the max +size of items within the columns (max size of items in col, +2 characters) +to specified file. File-overwrite is the default. + +Usage: writecc (listoflists,file,writetype='w',extra=2) +Returns: None +""" + if type(listoflists[0]) not in [ListType, TupleType]: + listoflists = [listoflists] + outfile = open(file, writetype) + rowstokill = [] + list2print = copy.deepcopy(listoflists) + for i in range(len(listoflists)): + if listoflists[i] == [ + '\n' + ] or listoflists[i] == '\n' or listoflists[i] == 'dashes': + rowstokill = rowstokill + [i] + rowstokill.reverse() + for row in rowstokill: + del list2print[row] + maxsize = [0] * len(list2print[0]) + for col in range(len(list2print[0])): + items = pstat.colex(list2print, col) + items = map(pstat.makestr, items) + maxsize[col] = max(map(len, items)) + extra + for row in listoflists: + if row == ['\n'] or row == '\n': + outfile.write('\n') + elif row == ['dashes'] or row == 'dashes': + dashes = [0] * len(maxsize) + for j in range(len(maxsize)): + dashes[j] = '-' * (maxsize[j] - 2) + outfile.write(pstat.lineincustcols(dashes, maxsize)) + else: + outfile.write(pstat.lineincustcols(row, maxsize)) + outfile.write('\n') + outfile.close() + return None + + +def lincr(l, cap): # to increment a list up to a max-list of 'cap' + """ +Simulate a counting system from an n-dimensional list. + +Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n +Returns: next set of values for list l, OR -1 (if overflow) +""" + l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap) + for i in range(len(l)): + if l[i] > cap[i] and i < len(l) - 1: # if carryover AND not done + l[i] = 0 + l[i + 1] = l[i + 1] + 1 + elif l[i] > cap[i] and i == len( + l) - 1: # overflow past last column, must be finished + l = -1 + return l + + +def lsum(inlist): + """ +Returns the sum of the items in the passed list. + +Usage: lsum(inlist) +""" + s = 0 + for item in inlist: + s = s + item + return s + + +def lcumsum(inlist): + """ +Returns a list consisting of the cumulative sum of the items in the +passed list. + +Usage: lcumsum(inlist) +""" + newlist = copy.deepcopy(inlist) + for i in range(1, len(newlist)): + newlist[i] = newlist[i] + newlist[i - 1] + return newlist + + +def lss(inlist): + """ +Squares each value in the passed list, adds up these squares and +returns the result. + +Usage: lss(inlist) +""" + ss = 0 + for item in inlist: + ss = ss + item * item + return ss + + +def lsummult(list1, list2): + """ +Multiplies elements in list1 and list2, element by element, and +returns the sum of all resulting multiplications. Must provide equal +length lists. + +Usage: lsummult(list1,list2) +""" + if len(list1) <> len(list2): + raise ValueError, 'Lists not equal length in summult.' + s = 0 + for item1, item2 in pstat.abut(list1, list2): + s = s + item1 * item2 + return s + + +def lsumdiffsquared(x, y): + """ +Takes pairwise differences of the values in lists x and y, squares +these differences, and returns the sum of these squares. + +Usage: lsumdiffsquared(x,y) +Returns: sum[(x[i]-y[i])**2] +""" + sds = 0 + for i in range(len(x)): + sds = sds + (x[i] - y[i])**2 + return sds + + +def lsquare_of_sums(inlist): + """ +Adds the values in the passed list, squares the sum, and returns +the result. + +Usage: lsquare_of_sums(inlist) +Returns: sum(inlist[i])**2 +""" + s = sum(inlist) + return float(s) * s + + +def lshellsort(inlist): + """ +Shellsort algorithm. Sorts a 1D-list. + +Usage: lshellsort(inlist) +Returns: sorted-inlist, sorting-index-vector (for original list) +""" + n = len(inlist) + svec = copy.deepcopy(inlist) + ivec = range(n) + gap = n / 2 # integer division needed + while gap > 0: + for i in range(gap, n): + for j in range(i - gap, -1, -gap): + while j >= 0 and svec[j] > svec[j + gap]: + temp = svec[j] + svec[j] = svec[j + gap] + svec[j + gap] = temp + itemp = ivec[j] + ivec[j] = ivec[j + gap] + ivec[j + gap] = itemp + gap = gap / 2 # integer division needed +# svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]] + return svec, ivec + + +def lrankdata(inlist): + """ +Ranks the data in inlist, dealing with ties appropritely. Assumes +a 1D inlist. Adapted from Gary Perlman's |Stat ranksort. + +Usage: lrankdata(inlist) +Returns: a list of length equal to inlist, containing rank scores +""" + n = len(inlist) + svec, ivec = shellsort(inlist) + sumranks = 0 + dupcount = 0 + newlist = [0] * n + for i in range(n): + sumranks = sumranks + i + dupcount = dupcount + 1 + if i == n - 1 or svec[i] <> svec[i + 1]: + averank = sumranks / float(dupcount) + 1 + for j in range(i - dupcount + 1, i + 1): + newlist[ivec[j]] = averank + sumranks = 0 + dupcount = 0 + return newlist + + +def outputpairedstats(fname, writemode, name1, n1, m1, se1, min1, max1, name2, + n2, m2, se2, min2, max2, statname, stat, prob): + """ +Prints or write to a file stats for two groups, using the name, n, +mean, sterr, min and max for each group, as well as the statistic name, +its value, and the associated p-value. + +Usage: outputpairedstats(fname,writemode, + name1,n1,mean1,stderr1,min1,max1, + name2,n2,mean2,stderr2,min2,max2, + statname,stat,prob) +Returns: None +""" + suffix = '' # for *s after the p-value + try: + x = prob.shape + prob = prob[0] + except: + pass + if prob < 0.001: + suffix = ' ***' + elif prob < 0.01: + suffix = ' **' + elif prob < 0.05: + suffix = ' *' + title = [['Name', 'N', 'Mean', 'SD', 'Min', 'Max']] + lofl = title + [[name1, n1, round(m1, 3), round( + math.sqrt(se1), 3), min1, max1], [name2, n2, round(m2, 3), round( + math.sqrt(se2), 3), min2, max2]] + if type(fname) <> StringType or len(fname) == 0: + print + print statname + print + pstat.printcc(lofl) + print + try: + if stat.shape == (): + stat = stat[0] + if prob.shape == (): + prob = prob[0] + except: + pass + print 'Test statistic = ', round(stat, 3), ' p = ', round(prob, 3), suffix + print + else: + file = open(fname, writemode) + file.write('\n' + statname + '\n\n') + file.close() + writecc(lofl, fname, 'a') + file = open(fname, 'a') + try: + if stat.shape == (): + stat = stat[0] + if prob.shape == (): + prob = prob[0] + except: + pass + file.write(pstat.list2string(['\nTest statistic = ', round(stat, 4), + ' p = ', round(prob, 4), suffix, '\n\n'])) + file.close() + return None + + +def lfindwithin(data): + """ +Returns an integer representing a binary vector, where 1=within- +subject factor, 0=between. Input equals the entire data 2D list (i.e., +column 0=random factor, column -1=measured values (those two are skipped). +Note: input data is in |Stat format ... a list of lists ("2D list") with +one row per measured value, first column=subject identifier, last column= +score, one in-between column per factor (these columns contain level +designations on each factor). See also stats.anova.__doc__. + +Usage: lfindwithin(data) data in |Stat format +""" + + numfact = len(data[0]) - 1 + withinvec = 0 + for col in range(1, numfact): + examplelevel = pstat.unique(pstat.colex(data, col))[0] + rows = pstat.linexand(data, col, examplelevel) # get 1 level of this factor + factsubjs = pstat.unique(pstat.colex(rows, 0)) + allsubjs = pstat.unique(pstat.colex(data, 0)) + if len(factsubjs) == len(allsubjs): # fewer Ss than scores on this factor? + withinvec = withinvec + (1 << col) + return withinvec + +######################################################### +######################################################### +####### DISPATCH LISTS AND TUPLES TO ABOVE FCNS ######### +######################################################### +######################################################### + +## CENTRAL TENDENCY: +geometricmean = Dispatch((lgeometricmean, (ListType, TupleType)),) +harmonicmean = Dispatch((lharmonicmean, (ListType, TupleType)),) +mean = Dispatch((lmean, (ListType, TupleType)),) +median = Dispatch((lmedian, (ListType, TupleType)),) +medianscore = Dispatch((lmedianscore, (ListType, TupleType)),) +mode = Dispatch((lmode, (ListType, TupleType)),) + +## MOMENTS: +moment = Dispatch((lmoment, (ListType, TupleType)),) +variation = Dispatch((lvariation, (ListType, TupleType)),) +skew = Dispatch((lskew, (ListType, TupleType)),) +kurtosis = Dispatch((lkurtosis, (ListType, TupleType)),) +describe = Dispatch((ldescribe, (ListType, TupleType)),) + +## FREQUENCY STATISTICS: +itemfreq = Dispatch((litemfreq, (ListType, TupleType)),) +scoreatpercentile = Dispatch((lscoreatpercentile, (ListType, TupleType)),) +percentileofscore = Dispatch((lpercentileofscore, (ListType, TupleType)),) +histogram = Dispatch((lhistogram, (ListType, TupleType)),) +cumfreq = Dispatch((lcumfreq, (ListType, TupleType)),) +relfreq = Dispatch((lrelfreq, (ListType, TupleType)),) + +## VARIABILITY: +obrientransform = Dispatch((lobrientransform, (ListType, TupleType)),) +samplevar = Dispatch((lsamplevar, (ListType, TupleType)),) +samplestdev = Dispatch((lsamplestdev, (ListType, TupleType)),) +var = Dispatch((lvar, (ListType, TupleType)),) +stdev = Dispatch((lstdev, (ListType, TupleType)),) +sterr = Dispatch((lsterr, (ListType, TupleType)),) +sem = Dispatch((lsem, (ListType, TupleType)),) +z = Dispatch((lz, (ListType, TupleType)),) +zs = Dispatch((lzs, (ListType, TupleType)),) + +## TRIMMING FCNS: +trimboth = Dispatch((ltrimboth, (ListType, TupleType)),) +trim1 = Dispatch((ltrim1, (ListType, TupleType)),) + +## CORRELATION FCNS: +paired = Dispatch((lpaired, (ListType, TupleType)),) +pearsonr = Dispatch((lpearsonr, (ListType, TupleType)),) +spearmanr = Dispatch((lspearmanr, (ListType, TupleType)),) +pointbiserialr = Dispatch((lpointbiserialr, (ListType, TupleType)),) +kendalltau = Dispatch((lkendalltau, (ListType, TupleType)),) +linregress = Dispatch((llinregress, (ListType, TupleType)),) + +## INFERENTIAL STATS: +ttest_1samp = Dispatch((lttest_1samp, (ListType, TupleType)),) +ttest_ind = Dispatch((lttest_ind, (ListType, TupleType)),) +ttest_rel = Dispatch((lttest_rel, (ListType, TupleType)),) +chisquare = Dispatch((lchisquare, (ListType, TupleType)),) +ks_2samp = Dispatch((lks_2samp, (ListType, TupleType)),) +mannwhitneyu = Dispatch((lmannwhitneyu, (ListType, TupleType)),) +ranksums = Dispatch((lranksums, (ListType, TupleType)),) +tiecorrect = Dispatch((ltiecorrect, (ListType, TupleType)),) +wilcoxont = Dispatch((lwilcoxont, (ListType, TupleType)),) +kruskalwallish = Dispatch((lkruskalwallish, (ListType, TupleType)),) +friedmanchisquare = Dispatch((lfriedmanchisquare, (ListType, TupleType)),) + +## PROBABILITY CALCS: +chisqprob = Dispatch((lchisqprob, (IntType, FloatType)),) +zprob = Dispatch((lzprob, (IntType, FloatType)),) +ksprob = Dispatch((lksprob, (IntType, FloatType)),) +fprob = Dispatch((lfprob, (IntType, FloatType)),) +betacf = Dispatch((lbetacf, (IntType, FloatType)),) +betai = Dispatch((lbetai, (IntType, FloatType)),) +erfcc = Dispatch((lerfcc, (IntType, FloatType)),) +gammln = Dispatch((lgammln, (IntType, FloatType)),) + +## ANOVA FUNCTIONS: +F_oneway = Dispatch((lF_oneway, (ListType, TupleType)),) +F_value = Dispatch((lF_value, (ListType, TupleType)),) + +## SUPPORT FUNCTIONS: +incr = Dispatch((lincr, (ListType, TupleType)),) +sum = Dispatch((lsum, (ListType, TupleType)),) +cumsum = Dispatch((lcumsum, (ListType, TupleType)),) +ss = Dispatch((lss, (ListType, TupleType)),) +summult = Dispatch((lsummult, (ListType, TupleType)),) +square_of_sums = Dispatch((lsquare_of_sums, (ListType, TupleType)),) +sumdiffsquared = Dispatch((lsumdiffsquared, (ListType, TupleType)),) +shellsort = Dispatch((lshellsort, (ListType, TupleType)),) +rankdata = Dispatch((lrankdata, (ListType, TupleType)),) +findwithin = Dispatch((lfindwithin, (ListType, TupleType)),) + +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== +#============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== + +try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE + import numpy as N + import numpy.linalg as LA + + ##################################### + ######## ACENTRAL TENDENCY ######## + ##################################### + + + def ageometricmean(inarray, dimension=None, keepdims=0): + """ +Calculates the geometric mean of the values in the passed array. +That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in +the passed array. Use dimension=None to flatten array first. REMEMBER: if +dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and +if dimension is a sequence, it collapses over all specified dimensions. If +keepdims is set to 1, the resulting array will have as many dimensions as +inarray, with only 1 'level' per dim that was collapsed over. + +Usage: ageometricmean(inarray,dimension=None,keepdims=0) +Returns: geometric mean computed over dim(s) listed in dimension +""" + inarray = N.array(inarray, N.float_) + if dimension == None: + inarray = N.ravel(inarray) + size = len(inarray) + mult = N.power(inarray, 1.0 / size) + mult = N.multiply.reduce(mult) + elif type(dimension) in [IntType, FloatType]: + size = inarray.shape[dimension] + mult = N.power(inarray, 1.0 / size) + mult = N.multiply.reduce(mult, dimension) + if keepdims == 1: + shp = list(inarray.shape) + shp[dimension] = 1 + sum = N.reshape(sum, shp) + else: # must be a SEQUENCE of dims to average over + dims = list(dimension) + dims.sort() + dims.reverse() + size = N.array(N.multiply.reduce(N.take(inarray.shape, dims)), N.float_) + mult = N.power(inarray, 1.0 / size) + for dim in dims: + mult = N.multiply.reduce(mult, dim) + if keepdims == 1: + shp = list(inarray.shape) + for dim in dims: + shp[dim] = 1 + mult = N.reshape(mult, shp) + return mult + + def aharmonicmean(inarray, dimension=None, keepdims=0): + """ +Calculates the harmonic mean of the values in the passed array. +That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in +the passed array. Use dimension=None to flatten array first. REMEMBER: if +dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and +if dimension is a sequence, it collapses over all specified dimensions. If +keepdims is set to 1, the resulting array will have as many dimensions as +inarray, with only 1 'level' per dim that was collapsed over. + +Usage: aharmonicmean(inarray,dimension=None,keepdims=0) +Returns: harmonic mean computed over dim(s) in dimension +""" + inarray = inarray.astype(N.float_) + if dimension == None: + inarray = N.ravel(inarray) + size = len(inarray) + s = N.add.reduce(1.0 / inarray) + elif type(dimension) in [IntType, FloatType]: + size = float(inarray.shape[dimension]) + s = N.add.reduce(1.0 / inarray, dimension) + if keepdims == 1: + shp = list(inarray.shape) + shp[dimension] = 1 + s = N.reshape(s, shp) + else: # must be a SEQUENCE of dims to average over + dims = list(dimension) + dims.sort() + nondims = [] + for i in range(len(inarray.shape)): + if i not in dims: + nondims.append(i) + tinarray = N.transpose(inarray, nondims + dims) # put keep-dims first + idx = [0] * len(nondims) + if idx == []: + size = len(N.ravel(inarray)) + s = asum(1.0 / inarray) + if keepdims == 1: + s = N.reshape([s], N.ones(len(inarray.shape))) + else: + idx[0] = -1 + loopcap = N.array(tinarray.shape[0:len(nondims)]) - 1 + s = N.zeros(loopcap + 1, N.float_) + while incr(idx, loopcap) <> -1: + s[idx] = asum(1.0 / tinarray[idx]) + size = N.multiply.reduce(N.take(inarray.shape, dims)) + if keepdims == 1: + shp = list(inarray.shape) + for dim in dims: + shp[dim] = 1 + s = N.reshape(s, shp) + return size / s + + def amean(inarray, dimension=None, keepdims=0): + """ +Calculates the arithmatic mean of the values in the passed array. +That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the +passed array. Use dimension=None to flatten array first. REMEMBER: if +dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and +if dimension is a sequence, it collapses over all specified dimensions. If +keepdims is set to 1, the resulting array will have as many dimensions as +inarray, with only 1 'level' per dim that was collapsed over. + +Usage: amean(inarray,dimension=None,keepdims=0) +Returns: arithematic mean calculated over dim(s) in dimension +""" + if inarray.dtype in [N.int_, N.short, N.ubyte]: + inarray = inarray.astype(N.float_) + if dimension == None: + inarray = N.ravel(inarray) + sum = N.add.reduce(inarray) + denom = float(len(inarray)) + elif type(dimension) in [IntType, FloatType]: + sum = asum(inarray, dimension) + denom = float(inarray.shape[dimension]) + if keepdims == 1: + shp = list(inarray.shape) + shp[dimension] = 1 + sum = N.reshape(sum, shp) + else: # must be a TUPLE of dims to average over + dims = list(dimension) + dims.sort() + dims.reverse() + sum = inarray * 1.0 + for dim in dims: + sum = N.add.reduce(sum, dim) + denom = N.array(N.multiply.reduce(N.take(inarray.shape, dims)), N.float_) + if keepdims == 1: + shp = list(inarray.shape) + for dim in dims: + shp[dim] = 1 + sum = N.reshape(sum, shp) + return sum / denom + + def amedian(inarray, numbins=1000): + """ +Calculates the COMPUTED median value of an array of numbers, given the +number of bins to use for the histogram (more bins approaches finding the +precise median value of the array; default number of bins = 1000). From +G.W. Heiman's Basic Stats, or CRC Probability & Statistics. +NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first). + +Usage: amedian(inarray,numbins=1000) +Returns: median calculated over ALL values in inarray +""" + inarray = N.ravel(inarray) + (hist, smallest, binsize, extras) = ahistogram(inarray, numbins, + [min(inarray), max(inarray)]) + cumhist = N.cumsum(hist) # make cumulative histogram + otherbins = N.greater_equal(cumhist, len(inarray) / 2.0) + otherbins = list(otherbins) # list of 0/1s, 1s start at median bin + cfbin = otherbins.index(1) # get 1st(!) index holding 50%ile score + LRL = smallest + binsize * cfbin # get lower read limit of that bin + cfbelow = N.add.reduce(hist[0:cfbin]) # cum. freq. below bin + freq = hist[cfbin] # frequency IN the 50%ile bin + median = LRL + ( + (len(inarray) / 2.0 - cfbelow) / float(freq)) * binsize # MEDIAN + return median + + def amedianscore(inarray, dimension=None): + """ +Returns the 'middle' score of the passed array. If there is an even +number of scores, the mean of the 2 middle scores is returned. Can function +with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can +be None, to pre-flatten the array, or else dimension must equal 0). + +Usage: amedianscore(inarray,dimension=None) +Returns: 'middle' score of the array, or the mean of the 2 middle scores +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + inarray = N.sort(inarray, dimension) + if inarray.shape[dimension] % 2 == 0: # if even number of elements + indx = inarray.shape[dimension] / 2 # integer division correct + median = N.asarray(inarray[indx] + inarray[indx - 1]) / 2.0 + else: + indx = inarray.shape[dimension] / 2 # integer division correct + median = N.take(inarray, [indx], dimension) + if median.shape == (1,): + median = median[0] + return median + + def amode(a, dimension=None): + """ +Returns an array of the modal (most common) score in the passed array. +If there is more than one such score, ONLY THE FIRST is returned. +The bin-count for the modal values is also returned. Operates on whole +array (dimension=None), or on a given dimension. + +Usage: amode(a, dimension=None) +Returns: array of bin-counts for mode(s), array of corresponding modal values +""" + + if dimension == None: + a = N.ravel(a) + dimension = 0 + scores = pstat.aunique(N.ravel(a)) # get ALL unique values + testshape = list(a.shape) + testshape[dimension] = 1 + oldmostfreq = N.zeros(testshape) + oldcounts = N.zeros(testshape) + for score in scores: + template = N.equal(a, score) + counts = asum(template, dimension, 1) + mostfrequent = N.where(counts > oldcounts, score, oldmostfreq) + oldcounts = N.where(counts > oldcounts, counts, oldcounts) + oldmostfreq = mostfrequent + return oldcounts, mostfrequent + + def atmean(a, limits=None, inclusive=(1, 1)): + """ +Returns the arithmetic mean of all values in an array, ignoring values +strictly outside the sequence passed to 'limits'. Note: either limit +in the sequence, or the value of limits itself, can be set to None. The +inclusive list/tuple determines whether the lower and upper limiting bounds +(respectively) are open/exclusive (0) or closed/inclusive (1). + +Usage: atmean(a,limits=None,inclusive=(1,1)) +""" + if a.dtype in [N.int_, N.short, N.ubyte]: + a = a.astype(N.float_) + if limits == None: + return mean(a) + assert type(limits) in [ListType, TupleType, N.ndarray + ], 'Wrong type for limits in atmean' + if inclusive[0]: + lowerfcn = N.greater_equal + else: + lowerfcn = N.greater + if inclusive[1]: + upperfcn = N.less_equal + else: + upperfcn = N.less + if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce( + N.ravel(a)): + raise ValueError, 'No array values within given limits (atmean).' + elif limits[0] == None and limits[1] <> None: + mask = upperfcn(a, limits[1]) + elif limits[0] <> None and limits[1] == None: + mask = lowerfcn(a, limits[0]) + elif limits[0] <> None and limits[1] <> None: + mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1]) + s = float(N.add.reduce(N.ravel(a * mask))) + n = float(N.add.reduce(N.ravel(mask))) + return s / n + + def atvar(a, limits=None, inclusive=(1, 1)): + """ +Returns the sample variance of values in an array, (i.e., using N-1), +ignoring values strictly outside the sequence passed to 'limits'. +Note: either limit in the sequence, or the value of limits itself, +can be set to None. The inclusive list/tuple determines whether the lower +and upper limiting bounds (respectively) are open/exclusive (0) or +closed/inclusive (1). ASSUMES A FLAT ARRAY (OR ELSE PREFLATTENS). + +Usage: atvar(a,limits=None,inclusive=(1,1)) +""" + a = a.astype(N.float_) + if limits == None or limits == [None, None]: + return avar(a) + assert type(limits) in [ListType, TupleType, N.ndarray + ], 'Wrong type for limits in atvar' + if inclusive[0]: + lowerfcn = N.greater_equal + else: + lowerfcn = N.greater + if inclusive[1]: + upperfcn = N.less_equal + else: + upperfcn = N.less + if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce( + N.ravel(a)): + raise ValueError, 'No array values within given limits (atvar).' + elif limits[0] == None and limits[1] <> None: + mask = upperfcn(a, limits[1]) + elif limits[0] <> None and limits[1] == None: + mask = lowerfcn(a, limits[0]) + elif limits[0] <> None and limits[1] <> None: + mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1]) + + a = N.compress(mask, a) # squish out excluded values + return avar(a) + + def atmin(a, lowerlimit=None, dimension=None, inclusive=1): + """ +Returns the minimum value of a, along dimension, including only values less +than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None, +all values in the array are used. + +Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1) +""" + if inclusive: + lowerfcn = N.greater + else: + lowerfcn = N.greater_equal + if dimension == None: + a = N.ravel(a) + dimension = 0 + if lowerlimit == None: + lowerlimit = N.minimum.reduce(N.ravel(a)) - 11 + biggest = N.maximum.reduce(N.ravel(a)) + ta = N.where(lowerfcn(a, lowerlimit), a, biggest) + return N.minimum.reduce(ta, dimension) + + def atmax(a, upperlimit, dimension=None, inclusive=1): + """ +Returns the maximum value of a, along dimension, including only values greater +than (or equal to, if inclusive=1) upperlimit. If the limit is set to None, +a limit larger than the max value in the array is used. + +Usage: atmax(a,upperlimit,dimension=None,inclusive=1) +""" + if inclusive: + upperfcn = N.less + else: + upperfcn = N.less_equal + if dimension == None: + a = N.ravel(a) + dimension = 0 + if upperlimit == None: + upperlimit = N.maximum.reduce(N.ravel(a)) + 1 + smallest = N.minimum.reduce(N.ravel(a)) + ta = N.where(upperfcn(a, upperlimit), a, smallest) + return N.maximum.reduce(ta, dimension) + + def atstdev(a, limits=None, inclusive=(1, 1)): + """ +Returns the standard deviation of all values in an array, ignoring values +strictly outside the sequence passed to 'limits'. Note: either limit +in the sequence, or the value of limits itself, can be set to None. The +inclusive list/tuple determines whether the lower and upper limiting bounds +(respectively) are open/exclusive (0) or closed/inclusive (1). + +Usage: atstdev(a,limits=None,inclusive=(1,1)) +""" + return N.sqrt(tvar(a, limits, inclusive)) + + def atsem(a, limits=None, inclusive=(1, 1)): + """ +Returns the standard error of the mean for the values in an array, +(i.e., using N for the denominator), ignoring values strictly outside +the sequence passed to 'limits'. Note: either limit in the sequence, +or the value of limits itself, can be set to None. The inclusive list/tuple +determines whether the lower and upper limiting bounds (respectively) are +open/exclusive (0) or closed/inclusive (1). + +Usage: atsem(a,limits=None,inclusive=(1,1)) +""" + sd = tstdev(a, limits, inclusive) + if limits == None or limits == [None, None]: + n = float(len(N.ravel(a))) + limits = [min(a) - 1, max(a) + 1] + assert type(limits) in [ListType, TupleType, N.ndarray + ], 'Wrong type for limits in atsem' + if inclusive[0]: + lowerfcn = N.greater_equal + else: + lowerfcn = N.greater + if inclusive[1]: + upperfcn = N.less_equal + else: + upperfcn = N.less + if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce( + N.ravel(a)): + raise ValueError, 'No array values within given limits (atsem).' + elif limits[0] == None and limits[1] <> None: + mask = upperfcn(a, limits[1]) + elif limits[0] <> None and limits[1] == None: + mask = lowerfcn(a, limits[0]) + elif limits[0] <> None and limits[1] <> None: + mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1]) + term1 = N.add.reduce(N.ravel(a * a * mask)) + n = float(N.add.reduce(N.ravel(mask))) + return sd / math.sqrt(n) + +##################################### +############ AMOMENTS ############# +##################################### + + def amoment(a, moment=1, dimension=None): + """ +Calculates the nth moment about the mean for a sample (defaults to the +1st moment). Generally used to calculate coefficients of skewness and +kurtosis. Dimension can equal None (ravel array first), an integer +(the dimension over which to operate), or a sequence (operate over +multiple dimensions). + +Usage: amoment(a,moment=1,dimension=None) +Returns: appropriate moment along given dimension +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + if moment == 1: + return 0.0 + else: + mn = amean(a, dimension, 1) # 1=keepdims + s = N.power((a - mn), moment) + return amean(s, dimension) + + def avariation(a, dimension=None): + """ +Returns the coefficient of variation, as defined in CRC Standard +Probability and Statistics, p.6. Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). + +Usage: avariation(a,dimension=None) +""" + return 100.0 * asamplestdev(a, dimension) / amean(a, dimension) + + def askew(a, dimension=None): + """ +Returns the skewness of a distribution (normal ==> 0.0; >0 means extra +weight in left tail). Use askewtest() to see if it's close enough. +Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). + +Usage: askew(a, dimension=None) +Returns: skew of vals in a along dimension, returning ZERO where all vals equal +""" + denom = N.power(amoment(a, 2, dimension), 1.5) + zero = N.equal(denom, 0) + if type(denom) == N.ndarray and asum(zero) <> 0: + print 'Number of zeros in askew: ', asum(zero) + denom = denom + zero # prevent divide-by-zero + return N.where(zero, 0, amoment(a, 3, dimension) / denom) + + def akurtosis(a, dimension=None): + """ +Returns the kurtosis of a distribution (normal ==> 3.0; >3 means +heavier in the tails, and usually more peaked). Use akurtosistest() +to see if it's close enough. Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). + +Usage: akurtosis(a,dimension=None) +Returns: kurtosis of values in a along dimension, and ZERO where all vals equal +""" + denom = N.power(amoment(a, 2, dimension), 2) + zero = N.equal(denom, 0) + if type(denom) == N.ndarray and asum(zero) <> 0: + print 'Number of zeros in akurtosis: ', asum(zero) + denom = denom + zero # prevent divide-by-zero + return N.where(zero, 0, amoment(a, 4, dimension) / denom) + + def adescribe(inarray, dimension=None): + """ +Returns several descriptive statistics of the passed array. Dimension +can equal None (ravel array first), an integer (the dimension over +which to operate), or a sequence (operate over multiple dimensions). + +Usage: adescribe(inarray,dimension=None) +Returns: n, (min,max), mean, standard deviation, skew, kurtosis +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + n = inarray.shape[dimension] + mm = (N.minimum.reduce(inarray), N.maximum.reduce(inarray)) + m = amean(inarray, dimension) + sd = astdev(inarray, dimension) + skew = askew(inarray, dimension) + kurt = akurtosis(inarray, dimension) + return n, mm, m, sd, skew, kurt + +##################################### +######## NORMALITY TESTS ########## +##################################### + + def askewtest(a, dimension=None): + """ +Tests whether the skew is significantly different from a normal +distribution. Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). + +Usage: askewtest(a,dimension=None) +Returns: z-score and 2-tail z-probability +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + b2 = askew(a, dimension) + n = float(a.shape[dimension]) + y = b2 * N.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2))) + beta2 = (3.0 * (n * n + 27 * n - 70) * (n + 1) * + (n + 3)) / ((n - 2.0) * (n + 5) * (n + 7) * (n + 9)) + W2 = -1 + N.sqrt(2 * (beta2 - 1)) + delta = 1 / N.sqrt(N.log(N.sqrt(W2))) + alpha = N.sqrt(2 / (W2 - 1)) + y = N.where(y == 0, 1, y) + Z = delta * N.log(y / alpha + N.sqrt((y / alpha)**2 + 1)) + return Z, (1.0 - zprob(Z)) * 2 + + def akurtosistest(a, dimension=None): + """ +Tests whether a dataset has normal kurtosis (i.e., +kurtosis=3(n-1)/(n+1)) Valid only for n>20. Dimension can equal None +(ravel array first), an integer (the dimension over which to operate), +or a sequence (operate over multiple dimensions). + +Usage: akurtosistest(a,dimension=None) +Returns: z-score and 2-tail z-probability, returns 0 for bad pixels +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + n = float(a.shape[dimension]) + if n < 20: + print 'akurtosistest only valid for n>=20 ... continuing anyway, n=', n + b2 = akurtosis(a, dimension) + E = 3.0 * (n - 1) / (n + 1) + varb2 = 24.0 * n * (n - 2) * (n - 3) / ((n + 1) * (n + 1) * (n + 3) * + (n + 5)) + x = (b2 - E) / N.sqrt(varb2) + sqrtbeta1 = 6.0 * (n * n - 5 * n + 2) / ((n + 7) * (n + 9)) * N.sqrt( + (6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3))) + A = 6.0 + 8.0 / sqrtbeta1 * (2.0 / sqrtbeta1 + + N.sqrt(1 + 4.0 / (sqrtbeta1**2))) + term1 = 1 - 2 / (9.0 * A) + denom = 1 + x * N.sqrt(2 / (A - 4.0)) + denom = N.where(N.less(denom, 0), 99, denom) + term2 = N.where( + N.equal(denom, 0), term1, N.power( + (1 - 2.0 / A) / denom, 1 / 3.0)) + Z = (term1 - term2) / N.sqrt(2 / (9.0 * A)) + Z = N.where(N.equal(denom, 99), 0, Z) + return Z, (1.0 - zprob(Z)) * 2 + + def anormaltest(a, dimension=None): + """ +Tests whether skew and/OR kurtosis of dataset differs from normal +curve. Can operate over multiple dimensions. Dimension can equal +None (ravel array first), an integer (the dimension over which to +operate), or a sequence (operate over multiple dimensions). + +Usage: anormaltest(a,dimension=None) +Returns: z-score and 2-tail probability +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + s, p = askewtest(a, dimension) + k, p = akurtosistest(a, dimension) + k2 = N.power(s, 2) + N.power(k, 2) + return k2, achisqprob(k2, 2) + +##################################### +###### AFREQUENCY FUNCTIONS ####### +##################################### + + def aitemfreq(a): + """ +Returns a 2D array of item frequencies. Column 1 contains item values, +column 2 contains their respective counts. Assumes a 1D array is passed. +@@@sorting OK? + +Usage: aitemfreq(a) +Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies) +""" + scores = pstat.aunique(a) + scores = N.sort(scores) + freq = N.zeros(len(scores)) + for i in range(len(scores)): + freq[i] = N.add.reduce(N.equal(a, scores[i])) + return N.array(pstat.aabut(scores, freq)) + + def ascoreatpercentile(inarray, percent): + """ +Usage: ascoreatpercentile(inarray,percent) 0<percent<100 +Returns: score at given percentile, relative to inarray distribution +""" + percent = percent / 100.0 + targetcf = percent * len(inarray) + h, lrl, binsize, extras = histogram(inarray) + cumhist = cumsum(h * 1) + for i in range(len(cumhist)): + if cumhist[i] >= targetcf: + break + score = binsize * ( + (targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i) + return score + + def apercentileofscore(inarray, score, histbins=10, defaultlimits=None): + """ +Note: result of this function depends on the values used to histogram +the data(!). + +Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None) +Returns: percentile-position of score (0-100) relative to inarray +""" + h, lrl, binsize, extras = histogram(inarray, histbins, defaultlimits) + cumhist = cumsum(h * 1) + i = int((score - lrl) / float(binsize)) + pct = (cumhist[i - 1] + ((score - (lrl + binsize * i)) / float(binsize)) * + h[i]) / float(len(inarray)) * 100 + return pct + + def ahistogram(inarray, numbins=10, defaultlimits=None, printextras=1): + """ +Returns (i) an array of histogram bin counts, (ii) the smallest value +of the histogram binning, and (iii) the bin width (the last 2 are not +necessarily integers). Default number of bins is 10. Defaultlimits +can be None (the routine picks bins spanning all the numbers in the +inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the +following: array of bin values, lowerreallimit, binsize, extrapoints. + +Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1) +Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range) +""" + inarray = N.ravel(inarray) # flatten any >1D arrays + if (defaultlimits <> None): + lowerreallimit = defaultlimits[0] + upperreallimit = defaultlimits[1] + binsize = (upperreallimit - lowerreallimit) / float(numbins) + else: + Min = N.minimum.reduce(inarray) + Max = N.maximum.reduce(inarray) + estbinwidth = float(Max - Min) / float(numbins) + 1e-6 + binsize = (Max - Min + estbinwidth) / float(numbins) + lowerreallimit = Min - binsize / 2.0 #lower real limit,1st bin + bins = N.zeros(numbins) + extrapoints = 0 + for num in inarray: + try: + if (num - lowerreallimit) < 0: + extrapoints = extrapoints + 1 + else: + bintoincrement = int((num - lowerreallimit) / float(binsize)) + bins[bintoincrement] = bins[bintoincrement] + 1 + except: # point outside lower/upper limits + extrapoints = extrapoints + 1 + if (extrapoints > 0 and printextras == 1): + print '\nPoints outside given histogram range =', extrapoints + return (bins, lowerreallimit, binsize, extrapoints) + + def acumfreq(a, numbins=10, defaultreallimits=None): + """ +Returns a cumulative frequency histogram, using the histogram function. +Defaultreallimits can be None (use all data), or a 2-sequence containing +lower and upper limits on values to include. + +Usage: acumfreq(a,numbins=10,defaultreallimits=None) +Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h, l, b, e = histogram(a, numbins, defaultreallimits) + cumhist = cumsum(h * 1) + return cumhist, l, b, e + + def arelfreq(a, numbins=10, defaultreallimits=None): + """ +Returns a relative frequency histogram, using the histogram function. +Defaultreallimits can be None (use all data), or a 2-sequence containing +lower and upper limits on values to include. + +Usage: arelfreq(a,numbins=10,defaultreallimits=None) +Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints +""" + h, l, b, e = histogram(a, numbins, defaultreallimits) + h = N.array(h / float(a.shape[0])) + return h, l, b, e + +##################################### +###### AVARIABILITY FUNCTIONS ##### +##################################### + + def aobrientransform(*args): + """ +Computes a transform on input data (any number of columns). Used to +test for homogeneity of variance prior to running one-way stats. Each +array in *args is one level of a factor. If an F_oneway() run on the +transformed data and found significant, variances are unequal. From +Maxwell and Delaney, p.112. + +Usage: aobrientransform(*args) *args = 1D arrays, one per level of factor +Returns: transformed data for use in an ANOVA +""" + TINY = 1e-10 + k = len(args) + n = N.zeros(k, N.float_) + v = N.zeros(k, N.float_) + m = N.zeros(k, N.float_) + nargs = [] + for i in range(k): + nargs.append(args[i].astype(N.float_)) + n[i] = float(len(nargs[i])) + v[i] = var(nargs[i]) + m[i] = mean(nargs[i]) + for j in range(k): + for i in range(n[j]): + t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j])**2 + t2 = 0.5 * v[j] * (n[j] - 1.0) + t3 = (n[j] - 1.0) * (n[j] - 2.0) + nargs[j][i] = (t1 - t2) / float(t3) + check = 1 + for j in range(k): + if v[j] - mean(nargs[j]) > TINY: + check = 0 + if check <> 1: + raise ValueError, 'Lack of convergence in obrientransform.' + else: + return N.array(nargs) + + def asamplevar(inarray, dimension=None, keepdims=0): + """ +Returns the sample standard deviation of the values in the passed +array (i.e., using N). Dimension can equal None (ravel array first), +an integer (the dimension over which to operate), or a sequence +(operate over multiple dimensions). Set keepdims=1 to return an array +with the same number of dimensions as inarray. + +Usage: asamplevar(inarray,dimension=None,keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + if dimension == 1: + mn = amean(inarray, dimension)[:, N.NewAxis] + else: + mn = amean(inarray, dimension, keepdims=1) + deviations = inarray - mn + if type(dimension) == ListType: + n = 1 + for d in dimension: + n = n * inarray.shape[d] + else: + n = inarray.shape[dimension] + svar = ass(deviations, dimension, keepdims) / float(n) + return svar + + def asamplestdev(inarray, dimension=None, keepdims=0): + """ +Returns the sample standard deviation of the values in the passed +array (i.e., using N). Dimension can equal None (ravel array first), +an integer (the dimension over which to operate), or a sequence +(operate over multiple dimensions). Set keepdims=1 to return an array +with the same number of dimensions as inarray. + +Usage: asamplestdev(inarray,dimension=None,keepdims=0) +""" + return N.sqrt(asamplevar(inarray, dimension, keepdims)) + + def asignaltonoise(instack, dimension=0): + """ +Calculates signal-to-noise. Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). + +Usage: asignaltonoise(instack,dimension=0): +Returns: array containing the value of (mean/stdev) along dimension, + or 0 when stdev=0 +""" + m = mean(instack, dimension) + sd = stdev(instack, dimension) + return N.where(sd == 0, 0, m / sd) + + def acov(x, y, dimension=None, keepdims=0): + """ +Returns the estimated covariance of the values in the passed +array (i.e., N-1). Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). Set keepdims=1 to return an array with the +same number of dimensions as inarray. + +Usage: acov(x,y,dimension=None,keepdims=0) +""" + if dimension == None: + x = N.ravel(x) + y = N.ravel(y) + dimension = 0 + xmn = amean(x, dimension, 1) # keepdims + xdeviations = x - xmn + ymn = amean(y, dimension, 1) # keepdims + ydeviations = y - ymn + if type(dimension) == ListType: + n = 1 + for d in dimension: + n = n * x.shape[d] + else: + n = x.shape[dimension] + covar = N.sum(xdeviations * ydeviations) / float(n - 1) + return covar + + def avar(inarray, dimension=None, keepdims=0): + """ +Returns the estimated population variance of the values in the passed +array (i.e., N-1). Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). Set keepdims=1 to return an array with the +same number of dimensions as inarray. + +Usage: avar(inarray,dimension=None,keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + mn = amean(inarray, dimension, 1) + deviations = inarray - mn + if type(dimension) == ListType: + n = 1 + for d in dimension: + n = n * inarray.shape[d] + else: + n = inarray.shape[dimension] + var = ass(deviations, dimension, keepdims) / float(n - 1) + return var + + def astdev(inarray, dimension=None, keepdims=0): + """ +Returns the estimated population standard deviation of the values in +the passed array (i.e., N-1). Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). Set keepdims=1 to return +an array with the same number of dimensions as inarray. + +Usage: astdev(inarray,dimension=None,keepdims=0) +""" + return N.sqrt(avar(inarray, dimension, keepdims)) + + def asterr(inarray, dimension=None, keepdims=0): + """ +Returns the estimated population standard error of the values in the +passed array (i.e., N-1). Dimension can equal None (ravel array +first), an integer (the dimension over which to operate), or a +sequence (operate over multiple dimensions). Set keepdims=1 to return +an array with the same number of dimensions as inarray. + +Usage: asterr(inarray,dimension=None,keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + return astdev(inarray, dimension, + keepdims) / float(N.sqrt(inarray.shape[dimension])) + + def asem(inarray, dimension=None, keepdims=0): + """ +Returns the standard error of the mean (i.e., using N) of the values +in the passed array. Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions). Set keepdims=1 to return an array with the +same number of dimensions as inarray. + +Usage: asem(inarray,dimension=None, keepdims=0) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + if type(dimension) == ListType: + n = 1 + for d in dimension: + n = n * inarray.shape[d] + else: + n = inarray.shape[dimension] + s = asamplestdev(inarray, dimension, keepdims) / N.sqrt(n - 1) + return s + + def az(a, score): + """ +Returns the z-score of a given input score, given thearray from which +that score came. Not appropriate for population calculations, nor for +arrays > 1D. + +Usage: az(a, score) +""" + z = (score - amean(a)) / asamplestdev(a) + return z + + def azs(a): + """ +Returns a 1D array of z-scores, one for each score in the passed array, +computed relative to the passed array. + +Usage: azs(a) +""" + zscores = [] + for item in a: + zscores.append(z(a, item)) + return N.array(zscores) + + def azmap(scores, compare, dimension=0): + """ +Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to +array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0 +of the compare array. + +Usage: azs(scores, compare, dimension=0) +""" + mns = amean(compare, dimension) + sstd = asamplestdev(compare, 0) + return (scores - mns) / sstd + +##################################### +####### ATRIMMING FUNCTIONS ####### +##################################### + +## deleted around() as it's in numpy now + + def athreshold(a, threshmin=None, threshmax=None, newval=0): + """ +Like Numeric.clip() except that values <threshmid or >threshmax are replaced +by newval instead of by threshmin/threshmax (respectively). + +Usage: athreshold(a,threshmin=None,threshmax=None,newval=0) +Returns: a, with values <threshmin or >threshmax replaced with newval +""" + mask = N.zeros(a.shape) + if threshmin <> None: + mask = mask + N.where(a < threshmin, 1, 0) + if threshmax <> None: + mask = mask + N.where(a > threshmax, 1, 0) + mask = N.clip(mask, 0, 1) + return N.where(mask, newval, a) + + def atrimboth(a, proportiontocut): + """ +Slices off the passed proportion of items from BOTH ends of the passed +array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND +'rightmost' 10% of scores. You must pre-sort the array if you want +"proper" trimming. Slices off LESS if proportion results in a +non-integer slice index (i.e., conservatively slices off +proportiontocut). + +Usage: atrimboth (a,proportiontocut) +Returns: trimmed version of array a +""" + lowercut = int(proportiontocut * len(a)) + uppercut = len(a) - lowercut + return a[lowercut:uppercut] + + def atrim1(a, proportiontocut, tail='right'): + """ +Slices off the passed proportion of items from ONE end of the passed +array (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' +10% of scores). Slices off LESS if proportion results in a non-integer +slice index (i.e., conservatively slices off proportiontocut). + +Usage: atrim1(a,proportiontocut,tail='right') or set tail='left' +Returns: trimmed version of array a +""" + if string.lower(tail) == 'right': + lowercut = 0 + uppercut = len(a) - int(proportiontocut * len(a)) + elif string.lower(tail) == 'left': + lowercut = int(proportiontocut * len(a)) + uppercut = len(a) + return a[lowercut:uppercut] + +##################################### +##### ACORRELATION FUNCTIONS ###### +##################################### + + def acovariance(X): + """ +Computes the covariance matrix of a matrix X. Requires a 2D matrix input. + +Usage: acovariance(X) +Returns: covariance matrix of X +""" + if len(X.shape) <> 2: + raise TypeError, 'acovariance requires 2D matrices' + n = X.shape[0] + mX = amean(X, 0) + return N.dot(N.transpose(X), X) / float(n) - N.multiply.outer(mX, mX) + + def acorrelation(X): + """ +Computes the correlation matrix of a matrix X. Requires a 2D matrix input. + +Usage: acorrelation(X) +Returns: correlation matrix of X +""" + C = acovariance(X) + V = N.diagonal(C) + return C / N.sqrt(N.multiply.outer(V, V)) + + def apaired(x, y): + """ +Interactively determines the type of data in x and y, and then runs the +appropriated statistic for paired group data. + +Usage: apaired(x,y) x,y = the two arrays of values to be compared +Returns: appropriate statistic name, value, and probability +""" + samples = '' + while samples not in ['i', 'r', 'I', 'R', 'c', 'C']: + print '\nIndependent or related samples, or correlation (i,r,c): ', + samples = raw_input() + + if samples in ['i', 'I', 'r', 'R']: + print '\nComparing variances ...', + # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112 + r = obrientransform(x, y) + f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1)) + if p < 0.05: + vartype = 'unequal, p=' + str(round(p, 4)) + else: + vartype = 'equal' + print vartype + if samples in ['i', 'I']: + if vartype[0] == 'e': + t, p = ttest_ind(x, y, None, 0) + print '\nIndependent samples t-test: ', round(t, 4), round(p, 4) + else: + if len(x) > 20 or len(y) > 20: + z, p = ranksums(x, y) + print '\nRank Sums test (NONparametric, n>20): ', round( + z, 4), round(p, 4) + else: + u, p = mannwhitneyu(x, y) + print '\nMann-Whitney U-test (NONparametric, ns<20): ', round( + u, 4), round(p, 4) + + else: # RELATED SAMPLES + if vartype[0] == 'e': + t, p = ttest_rel(x, y, 0) + print '\nRelated samples t-test: ', round(t, 4), round(p, 4) + else: + t, p = ranksums(x, y) + print '\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4) + else: # CORRELATION ANALYSIS + corrtype = '' + while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']: + print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ', + corrtype = raw_input() + if corrtype in ['c', 'C']: + m, b, r, p, see = linregress(x, y) + print '\nLinear regression for continuous variables ...' + lol = [ + ['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'], + [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)] + ] + pstat.printcc(lol) + elif corrtype in ['r', 'R']: + r, p = spearmanr(x, y) + print '\nCorrelation for ranked variables ...' + print "Spearman's r: ", round(r, 4), round(p, 4) + else: # DICHOTOMOUS + r, p = pointbiserialr(x, y) + print '\nAssuming x contains a dichotomous variable ...' + print 'Point Biserial r: ', round(r, 4), round(p, 4) + print '\n\n' + return None + + def dices(x, y): + """ +Calculates Dice's coefficient ... (2*number of common terms)/(number of terms in +x + +number of terms in y). Returns a value between 0 (orthogonal) and 1. + +Usage: dices(x,y) +""" + import sets + x = sets.Set(x) + y = sets.Set(y) + common = len(x.intersection(y)) + total = float(len(x) + len(y)) + return 2 * common / total + + def icc(x, y=None, verbose=0): + """ +Calculates intraclass correlation coefficients using simple, Type I sums of +squares. +If only one variable is passed, assumed it's an Nx2 matrix + +Usage: icc(x,y=None,verbose=0) +Returns: icc rho, prob ####PROB IS A GUESS BASED ON PEARSON +""" + TINY = 1.0e-20 + if y: + all = N.concatenate([x, y], 0) + else: + all = x + 0 + x = all[:, 0] + y = all[:, 1] + totalss = ass(all - mean(all)) + pairmeans = (x + y) / 2. + withinss = ass(x - pairmeans) + ass(y - pairmeans) + withindf = float(len(x)) + betwdf = float(len(x) - 1) + withinms = withinss / withindf + betweenms = (totalss - withinss) / betwdf + rho = (betweenms - withinms) / (withinms + betweenms) + t = rho * math.sqrt(betwdf / ((1.0 - rho + TINY) * (1.0 + rho + TINY))) + prob = abetai(0.5 * betwdf, 0.5, betwdf / (betwdf + t * t), verbose) + return rho, prob + + def alincc(x, y): + """ +Calculates Lin's concordance correlation coefficient. + +Usage: alincc(x,y) where x, y are equal-length arrays +Returns: Lin's CC +""" + x = N.ravel(x) + y = N.ravel(y) + covar = acov(x, y) * (len(x) - 1) / float(len(x)) # correct denom to n + xvar = avar(x) * (len(x) - 1) / float(len(x)) # correct denom to n + yvar = avar(y) * (len(y) - 1) / float(len(y)) # correct denom to n + lincc = (2 * covar) / ((xvar + yvar) + ((amean(x) - amean(y))**2)) + return lincc + + def apearsonr(x, y, verbose=1): + """ +Calculates a Pearson correlation coefficient and returns p. Taken +from Heiman's Basic Statistics for the Behav. Sci (2nd), p.195. + +Usage: apearsonr(x,y,verbose=1) where x,y are equal length arrays +Returns: Pearson's r, two-tailed p-value +""" + TINY = 1.0e-20 + n = len(x) + xmean = amean(x) + ymean = amean(y) + r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y) + r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) * + (n * ass(y) - asquare_of_sums(y))) + r = (r_num / r_den) + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = abetai(0.5 * df, 0.5, df / (df + t * t), verbose) + return r, prob + + def aspearmanr(x, y): + """ +Calculates a Spearman rank-order correlation coefficient. Taken +from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. + +Usage: aspearmanr(x,y) where x,y are equal-length arrays +Returns: Spearman's r, two-tailed p-value +""" + TINY = 1e-30 + n = len(x) + rankx = rankdata(x) + ranky = rankdata(y) + dsq = N.add.reduce((rankx - ranky)**2) + rs = 1 - 6 * dsq / float(n * (n**2 - 1)) + t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs))) + df = n - 2 + probrs = abetai(0.5 * df, 0.5, df / (df + t * t)) + # probability values for rs are from part 2 of the spearman function in + # Numerical Recipies, p.510. They close to tables, but not exact.(?) + return rs, probrs + + def apointbiserialr(x, y): + """ +Calculates a point-biserial correlation coefficient and the associated +probability value. Taken from Heiman's Basic Statistics for the Behav. +Sci (1st), p.194. + +Usage: apointbiserialr(x,y) where x,y are equal length arrays +Returns: Point-biserial r, two-tailed p-value +""" + TINY = 1e-30 + categories = pstat.aunique(x) + data = pstat.aabut(x, y) + if len(categories) <> 2: + raise ValueError, ('Exactly 2 categories required (in x) for ' + 'pointbiserialr().') + else: # there are 2 categories, continue + codemap = pstat.aabut(categories, N.arange(2)) + recoded = pstat.arecode(data, codemap, 0) + x = pstat.alinexand(data, 0, categories[0]) + y = pstat.alinexand(data, 0, categories[1]) + xmean = amean(pstat.acolex(x, 1)) + ymean = amean(pstat.acolex(y, 1)) + n = len(data) + adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n))) + rpb = (ymean - xmean) / asamplestdev(pstat.acolex(data, 1)) * adjust + df = n - 2 + t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY))) + prob = abetai(0.5 * df, 0.5, df / (df + t * t)) + return rpb, prob + + def akendalltau(x, y): + """ +Calculates Kendall's tau ... correlation of ordinal data. Adapted +from function kendl1 in Numerical Recipies. Needs good test-cases.@@@ + +Usage: akendalltau(x,y) +Returns: Kendall's tau, two-tailed p-value +""" + n1 = 0 + n2 = 0 + iss = 0 + for j in range(len(x) - 1): + for k in range(j, len(y)): + a1 = x[j] - x[k] + a2 = y[j] - y[k] + aa = a1 * a2 + if (aa): # neither array has a tie + n1 = n1 + 1 + n2 = n2 + 1 + if aa > 0: + iss = iss + 1 + else: + iss = iss - 1 + else: + if (a1): + n1 = n1 + 1 + else: + n2 = n2 + 1 + tau = iss / math.sqrt(n1 * n2) + svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1)) + z = tau / math.sqrt(svar) + prob = erfcc(abs(z) / 1.4142136) + return tau, prob + + def alinregress(*args): + """ +Calculates a regression line on two arrays, x and y, corresponding to x,y +pairs. If a single 2D array is passed, alinregress finds dim with 2 levels +and splits data into x,y pairs along that dim. + +Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array +Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n +""" + TINY = 1.0e-20 + if len(args) == 1: # more than 1D array? + args = args[0] + if len(args) == 2: + x = args[0] + y = args[1] + else: + x = args[:, 0] + y = args[:, 1] + else: + x = args[0] + y = args[1] + n = len(x) + xmean = amean(x) + ymean = amean(y) + r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y) + r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) * + (n * ass(y) - asquare_of_sums(y))) + r = r_num / r_den + z = 0.5 * math.log((1.0 + r + TINY) / (1.0 - r + TINY)) + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = abetai(0.5 * df, 0.5, df / (df + t * t)) + slope = r_num / (float(n) * ass(x) - asquare_of_sums(x)) + intercept = ymean - slope * xmean + sterrest = math.sqrt(1 - r * r) * asamplestdev(y) + return slope, intercept, r, prob, sterrest, n + + def amasslinregress(*args): + """ +Calculates a regression line on one 1D array (x) and one N-D array (y). + +Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate, n +""" + TINY = 1.0e-20 + if len(args) == 1: # more than 1D array? + args = args[0] + if len(args) == 2: + x = N.ravel(args[0]) + y = args[1] + else: + x = N.ravel(args[:, 0]) + y = args[:, 1] + else: + x = args[0] + y = args[1] + x = x.astype(N.float_) + y = y.astype(N.float_) + n = len(x) + xmean = amean(x) + ymean = amean(y, 0) + shp = N.ones(len(y.shape)) + shp[0] = len(x) + x.shape = shp + print x.shape, y.shape + r_num = n * (N.add.reduce(x * y, 0)) - N.add.reduce(x) * N.add.reduce(y, 0) + r_den = N.sqrt((n * ass(x) - asquare_of_sums(x)) * + (n * ass(y, 0) - asquare_of_sums(y, 0))) + zerodivproblem = N.equal(r_den, 0) + r_den = N.where(zerodivproblem, 1, r_den + ) # avoid zero-division in 1st place + r = r_num / r_den # need to do this nicely for matrix division + r = N.where(zerodivproblem, 0.0, r) + z = 0.5 * N.log((1.0 + r + TINY) / (1.0 - r + TINY)) + df = n - 2 + t = r * N.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = abetai(0.5 * df, 0.5, df / (df + t * t)) + + ss = float(n) * ass(x) - asquare_of_sums(x) + s_den = N.where(ss == 0, 1, ss) # avoid zero-division in 1st place + slope = r_num / s_den + intercept = ymean - slope * xmean + sterrest = N.sqrt(1 - r * r) * asamplestdev(y, 0) + return slope, intercept, r, prob, sterrest, n + +##################################### +##### AINFERENTIAL STATISTICS ##### +##################################### + + def attest_1samp(a, popmean, printit=0, name='Sample', writemode='a'): + """ +Calculates the t-obtained for the independent samples T-test on ONE group +of scores a, given a population mean. If printit=1, results are printed +to the screen. If printit='filename', the results are output to 'filename' +using the given writemode (default=append). Returns t-value, and prob. + +Usage: attest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') +Returns: t-value, two-tailed prob +""" + if type(a) != N.ndarray: + a = N.array(a) + x = amean(a) + v = avar(a) + n = len(a) + df = n - 1 + svar = ((n - 1) * v) / float(df) + t = (x - popmean) / math.sqrt(svar * (1.0 / n)) + prob = abetai(0.5 * df, 0.5, df / (df + t * t)) + + if printit <> 0: + statname = 'Single-sample T-test.' + outputpairedstats(printit, writemode, 'Population', '--', popmean, 0, 0, + 0, name, n, x, v, N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), statname, t, prob) + return t, prob + + def attest_ind(a, + b, + dimension=None, + printit=0, + name1='Samp1', + name2='Samp2', + writemode='a'): + """ +Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores +a, and b. From Numerical Recipies, p.483. If printit=1, results are +printed to the screen. If printit='filename', the results are output +to 'filename' using the given writemode (default=append). Dimension +can equal None (ravel array first), or an integer (the dimension over +which to operate on a and b). + +Usage: attest_ind (a,b,dimension=None,printit=0, + Name1='Samp1',Name2='Samp2',writemode='a') +Returns: t-value, two-tailed p-value +""" + if dimension == None: + a = N.ravel(a) + b = N.ravel(b) + dimension = 0 + x1 = amean(a, dimension) + x2 = amean(b, dimension) + v1 = avar(a, dimension) + v2 = avar(b, dimension) + n1 = a.shape[dimension] + n2 = b.shape[dimension] + df = n1 + n2 - 2 + svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df) + zerodivproblem = N.equal(svar, 0) + svar = N.where(zerodivproblem, 1, svar) # avoid zero-division in 1st place + t = (x1 - x2) / N.sqrt(svar * + (1.0 / n1 + 1.0 / n2)) # N-D COMPUTATION HERE!!!!!! + t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0 + probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) + + if type(t) == N.ndarray: + probs = N.reshape(probs, t.shape) + if probs.shape == (1,): + probs = probs[0] + + if printit <> 0: + if type(t) == N.ndarray: + t = t[0] + if type(probs) == N.ndarray: + probs = probs[0] + statname = 'Independent samples T-test.' + outputpairedstats(printit, writemode, name1, n1, x1, v1, + N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), name2, n2, x2, v2, + N.minimum.reduce(N.ravel(b)), + N.maximum.reduce(N.ravel(b)), statname, t, probs) + return + return t, probs + + def ap2t(pval, df): + """ +Tries to compute a t-value from a p-value (or pval array) and associated df. +SLOW for large numbers of elements(!) as it re-computes p-values 20 times +(smaller step-sizes) at which point it decides it's done. Keeps the signs +of the input array. Returns 1000 (or -1000) if t>100. + +Usage: ap2t(pval,df) +Returns: an array of t-values with the shape of pval + """ + pval = N.array(pval) + signs = N.sign(pval) + pval = abs(pval) + t = N.ones(pval.shape, N.float_) * 50 + step = N.ones(pval.shape, N.float_) * 25 + print 'Initial ap2t() prob calc' + prob = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) + print 'ap2t() iter: ', + for i in range(10): + print i, ' ', + t = N.where(pval < prob, t + step, t - step) + prob = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) + step = step / 2 + print + # since this is an ugly hack, we get ugly boundaries + t = N.where(t > 99.9, 1000, t) # hit upper-boundary + t = t + signs + return t #, prob, pval + + def attest_rel(a, + b, + dimension=None, + printit=0, + name1='Samp1', + name2='Samp2', + writemode='a'): + """ +Calculates the t-obtained T-test on TWO RELATED samples of scores, a +and b. From Numerical Recipies, p.483. If printit=1, results are +printed to the screen. If printit='filename', the results are output +to 'filename' using the given writemode (default=append). Dimension +can equal None (ravel array first), or an integer (the dimension over +which to operate on a and b). + +Usage: attest_rel(a,b,dimension=None,printit=0, + name1='Samp1',name2='Samp2',writemode='a') +Returns: t-value, two-tailed p-value +""" + if dimension == None: + a = N.ravel(a) + b = N.ravel(b) + dimension = 0 + if len(a) <> len(b): + raise ValueError, 'Unequal length arrays.' + x1 = amean(a, dimension) + x2 = amean(b, dimension) + v1 = avar(a, dimension) + v2 = avar(b, dimension) + n = a.shape[dimension] + df = float(n - 1) + d = (a - b).astype('d') + + denom = N.sqrt( + (n * N.add.reduce(d * d, dimension) - N.add.reduce(d, dimension)**2) / + df) + zerodivproblem = N.equal(denom, 0) + denom = N.where(zerodivproblem, 1, denom + ) # avoid zero-division in 1st place + t = N.add.reduce(d, dimension) / denom # N-D COMPUTATION HERE!!!!!! + t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0 + probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) + if type(t) == N.ndarray: + probs = N.reshape(probs, t.shape) + if probs.shape == (1,): + probs = probs[0] + + if printit <> 0: + statname = 'Related samples T-test.' + outputpairedstats(printit, writemode, name1, n, x1, v1, + N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), name2, n, x2, v2, + N.minimum.reduce(N.ravel(b)), + N.maximum.reduce(N.ravel(b)), statname, t, probs) + return + return t, probs + + def achisquare(f_obs, f_exp=None): + """ +Calculates a one-way chi square for array of observed frequencies and returns +the result. If no expected frequencies are given, the total N is assumed to +be equally distributed across all groups. +@@@NOT RIGHT?? + +Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq. +Returns: chisquare-statistic, associated p-value +""" + + k = len(f_obs) + if f_exp == None: + f_exp = N.array([sum(f_obs) / float(k)] * len(f_obs), N.float_) + f_exp = f_exp.astype(N.float_) + chisq = N.add.reduce((f_obs - f_exp)**2 / f_exp) + return chisq, achisqprob(chisq, k - 1) + + def aks_2samp(data1, data2): + """ +Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from +Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc- +like. + +Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays +Returns: KS D-value, p-value +""" + j1 = 0 # N.zeros(data1.shape[1:]) TRIED TO MAKE THIS UFUNC-LIKE + j2 = 0 # N.zeros(data2.shape[1:]) + fn1 = 0.0 # N.zeros(data1.shape[1:],N.float_) + fn2 = 0.0 # N.zeros(data2.shape[1:],N.float_) + n1 = data1.shape[0] + n2 = data2.shape[0] + en1 = n1 * 1 + en2 = n2 * 1 + d = N.zeros(data1.shape[1:], N.float_) + data1 = N.sort(data1, 0) + data2 = N.sort(data2, 0) + while j1 < n1 and j2 < n2: + d1 = data1[j1] + d2 = data2[j2] + if d1 <= d2: + fn1 = (j1) / float(en1) + j1 = j1 + 1 + if d2 <= d1: + fn2 = (j2) / float(en2) + j2 = j2 + 1 + dt = (fn2 - fn1) + if abs(dt) > abs(d): + d = dt +# try: + en = math.sqrt(en1 * en2 / float(en1 + en2)) + prob = aksprob((en + 0.12 + 0.11 / en) * N.fabs(d)) + # except: + # prob = 1.0 + return d, prob + + def amannwhitneyu(x, y): + """ +Calculates a Mann-Whitney U statistic on the provided scores and +returns the result. Use only when the n in each condition is < 20 and +you have 2 independent samples of ranks. REMEMBER: Mann-Whitney U is +significant if the u-obtained is LESS THAN or equal to the critical +value of U. + +Usage: amannwhitneyu(x,y) where x,y are arrays of values for 2 conditions +Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) +""" + n1 = len(x) + n2 = len(y) + ranked = rankdata(N.concatenate((x, y))) + rankx = ranked[0:n1] # get the x-ranks + ranky = ranked[n1:] # the rest are y-ranks + u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x + u2 = n1 * n2 - u1 # remainder is U for y + bigu = max(u1, u2) + smallu = min(u1, u2) + proportion = bigu / float(n1 * n2) + T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores + if T == 0: + raise ValueError, 'All numbers are identical in amannwhitneyu' + sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0) + z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc + return smallu, 1.0 - azprob(z), proportion + + def atiecorrect(rankvals): + """ +Tie-corrector for ties in Mann Whitney U and Kruskal Wallis H tests. +See Siegel, S. (1956) Nonparametric Statistics for the Behavioral +Sciences. New York: McGraw-Hill. Code adapted from |Stat rankind.c +code. + +Usage: atiecorrect(rankvals) +Returns: T correction factor for U or H +""" + sorted, posn = ashellsort(N.array(rankvals)) + n = len(sorted) + T = 0.0 + i = 0 + while (i < n - 1): + if sorted[i] == sorted[i + 1]: + nties = 1 + while (i < n - 1) and (sorted[i] == sorted[i + 1]): + nties = nties + 1 + i = i + 1 + T = T + nties**3 - nties + i = i + 1 + T = T / float(n**3 - n) + return 1.0 - T + + def aranksums(x, y): + """ +Calculates the rank sums statistic on the provided scores and returns +the result. + +Usage: aranksums(x,y) where x,y are arrays of values for 2 conditions +Returns: z-statistic, two-tailed p-value +""" + n1 = len(x) + n2 = len(y) + alldata = N.concatenate((x, y)) + ranked = arankdata(alldata) + x = ranked[:n1] + y = ranked[n1:] + s = sum(x) + expected = n1 * (n1 + n2 + 1) / 2.0 + z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0) + prob = 2 * (1.0 - azprob(abs(z))) + return z, prob + + def awilcoxont(x, y): + """ +Calculates the Wilcoxon T-test for related samples and returns the +result. A non-parametric T-test. + +Usage: awilcoxont(x,y) where x,y are equal-length arrays for 2 conditions +Returns: t-statistic, two-tailed p-value +""" + if len(x) <> len(y): + raise ValueError, 'Unequal N in awilcoxont. Aborting.' + d = x - y + d = N.compress(N.not_equal(d, 0), d) # Keep all non-zero differences + count = len(d) + absd = abs(d) + absranked = arankdata(absd) + r_plus = 0.0 + r_minus = 0.0 + for i in range(len(absd)): + if d[i] < 0: + r_minus = r_minus + absranked[i] + else: + r_plus = r_plus + absranked[i] + wt = min(r_plus, r_minus) + mn = count * (count + 1) * 0.25 + se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0) + z = math.fabs(wt - mn) / se + z = math.fabs(wt - mn) / se + prob = 2 * (1.0 - zprob(abs(z))) + return wt, prob + + def akruskalwallish(*args): + """ +The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more +groups, requiring at least 5 subjects in each group. This function +calculates the Kruskal-Wallis H and associated p-value for 3 or more +independent samples. + +Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions +Returns: H-statistic (corrected for ties), associated p-value +""" + assert len(args) == 3, 'Need at least 3 groups in stats.akruskalwallish()' + args = list(args) + n = [0] * len(args) + n = map(len, args) + all = [] + for i in range(len(args)): + all = all + args[i].tolist() + ranked = rankdata(all) + T = tiecorrect(ranked) + for i in range(len(args)): + args[i] = ranked[0:n[i]] + del ranked[0:n[i]] + rsums = [] + for i in range(len(args)): + rsums.append(sum(args[i])**2) + rsums[i] = rsums[i] / float(n[i]) + ssbn = sum(rsums) + totaln = sum(n) + h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1) + df = len(args) - 1 + if T == 0: + raise ValueError, 'All numbers are identical in akruskalwallish' + h = h / float(T) + return h, chisqprob(h, df) + + def afriedmanchisquare(*args): + """ +Friedman Chi-Square is a non-parametric, one-way within-subjects +ANOVA. This function calculates the Friedman Chi-square test for +repeated measures and returns the result, along with the associated +probability value. It assumes 3 or more repeated measures. Only 3 +levels requires a minimum of 10 subjects in the study. Four levels +requires 5 subjects per level(??). + +Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions +Returns: chi-square statistic, associated p-value +""" + k = len(args) + if k < 3: + raise ValueError, ('\nLess than 3 levels. Friedman test not ' + 'appropriate.\n') + n = len(args[0]) + data = apply(pstat.aabut, args) + data = data.astype(N.float_) + for i in range(len(data)): + data[i] = arankdata(data[i]) + ssbn = asum(asum(args, 1)**2) + chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1) + return chisq, achisqprob(chisq, k - 1) + +##################################### +#### APROBABILITY CALCULATIONS #### +##################################### + + def achisqprob(chisq, df): + """ +Returns the (1-tail) probability value associated with the provided chi-square +value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can +handle multiple dimensions. + +Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom +""" + BIG = 200.0 + + def ex(x): + BIG = 200.0 + exponents = N.where(N.less(x, -BIG), -BIG, x) + return N.exp(exponents) + + if type(chisq) == N.ndarray: + arrayflag = 1 + else: + arrayflag = 0 + chisq = N.array([chisq]) + if df < 1: + return N.ones(chisq.shape, N.float) + probs = N.zeros(chisq.shape, N.float_) + probs = N.where( + N.less_equal(chisq, 0), 1.0, probs) # set prob=1 for chisq<0 + a = 0.5 * chisq + if df > 1: + y = ex(-a) + if df % 2 == 0: + even = 1 + s = y * 1 + s2 = s * 1 + else: + even = 0 + s = 2.0 * azprob(-N.sqrt(chisq)) + s2 = s * 1 + if (df > 2): + chisq = 0.5 * (df - 1.0) + if even: + z = N.ones(probs.shape, N.float_) + else: + z = 0.5 * N.ones(probs.shape, N.float_) + if even: + e = N.zeros(probs.shape, N.float_) + else: + e = N.log(N.sqrt(N.pi)) * N.ones(probs.shape, N.float_) + c = N.log(a) + mask = N.zeros(probs.shape) + a_big = N.greater(a, BIG) + a_big_frozen = -1 * N.ones(probs.shape, N.float_) + totalelements = N.multiply.reduce(N.array(probs.shape)) + while asum(mask) <> totalelements: + e = N.log(z) + e + s = s + ex(c * z - a - e) + z = z + 1.0 + # print z, e, s + newmask = N.greater(z, chisq) + a_big_frozen = N.where(newmask * N.equal(mask, 0) * a_big, s, + a_big_frozen) + mask = N.clip(newmask + mask, 0, 1) + if even: + z = N.ones(probs.shape, N.float_) + e = N.ones(probs.shape, N.float_) + else: + z = 0.5 * N.ones(probs.shape, N.float_) + e = 1.0 / N.sqrt(N.pi) / N.sqrt(a) * N.ones(probs.shape, N.float_) + c = 0.0 + mask = N.zeros(probs.shape) + a_notbig_frozen = -1 * N.ones(probs.shape, N.float_) + while asum(mask) <> totalelements: + e = e * (a / z.astype(N.float_)) + c = c + e + z = z + 1.0 + # print '#2', z, e, c, s, c*y+s2 + newmask = N.greater(z, chisq) + a_notbig_frozen = N.where(newmask * N.equal(mask, 0) * (1 - a_big), + c * y + s2, a_notbig_frozen) + mask = N.clip(newmask + mask, 0, 1) + probs = N.where( + N.equal(probs, 1), 1, N.where( + N.greater(a, BIG), a_big_frozen, a_notbig_frozen)) + return probs + else: + return s + + def aerfcc(x): + """ +Returns the complementary error function erfc(x) with fractional error +everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can +handle multiple dimensions. + +Usage: aerfcc(x) +""" + z = abs(x) + t = 1.0 / (1.0 + 0.5 * z) + ans = t * N.exp(-z * z - 1.26551223 + t * (1.00002368 + t * ( + 0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * ( + 0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * ( + -0.82215223 + t * 0.17087277))))))))) + return N.where(N.greater_equal(x, 0), ans, 2.0 - ans) + + def azprob(z): + """ +Returns the area under the normal curve 'to the left of' the given z value. +Thus, + for z<0, zprob(z) = 1-tail probability + for z>0, 1.0-zprob(z) = 1-tail probability + for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability +Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions. + +Usage: azprob(z) where z is a z-value +""" + + def yfunc(y): + x = ((((((( + ((((((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) * y + - 0.000676904986) * y + 0.001390604284) * y - 0.000794620820) * y + - 0.002034254874) * y + 0.006549791214) * y - 0.010557625006) * y + + 0.011630447319) * y - 0.009279453341) * y + 0.005353579108) * y - + 0.002141268741) * y + 0.000535310849) * y + 0.999936657524 + return x + + def wfunc(w): + x = ((((((((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w + - 0.019198292004) * w + 0.059054035642) * w - 0.151968751364) * + w + 0.319152932694) * w - 0.531923007300) * w + + 0.797884560593) * N.sqrt(w) * 2.0 + return x + + Z_MAX = 6.0 # maximum meaningful z-value + x = N.zeros(z.shape, N.float_) # initialize + y = 0.5 * N.fabs(z) + x = N.where(N.less(y, 1.0), wfunc(y * y), yfunc(y - 2.0)) # get x's + x = N.where(N.greater(y, Z_MAX * 0.5), 1.0, x) # kill those with big Z + prob = N.where(N.greater(z, 0), (x + 1) * 0.5, (1 - x) * 0.5) + return prob + + def aksprob(alam): + """ +Returns the probability value for a K-S statistic computed via ks_2samp. +Adapted from Numerical Recipies. Can handle multiple dimensions. + +Usage: aksprob(alam) +""" + if type(alam) == N.ndarray: + frozen = -1 * N.ones(alam.shape, N.float64) + alam = alam.astype(N.float64) + arrayflag = 1 + else: + frozen = N.array(-1.) + alam = N.array(alam, N.float64) + arrayflag = 1 + mask = N.zeros(alam.shape) + fac = 2.0 * N.ones(alam.shape, N.float_) + sum = N.zeros(alam.shape, N.float_) + termbf = N.zeros(alam.shape, N.float_) + a2 = N.array(-2.0 * alam * alam, N.float64) + totalelements = N.multiply.reduce(N.array(mask.shape)) + for j in range(1, 201): + if asum(mask) == totalelements: + break + exponents = (a2 * j * j) + overflowmask = N.less(exponents, -746) + frozen = N.where(overflowmask, 0, frozen) + mask = mask + overflowmask + term = fac * N.exp(exponents) + sum = sum + term + newmask = N.where( + N.less_equal( + abs(term), (0.001 * termbf)) + N.less( + abs(term), 1.0e-8 * sum), 1, 0) + frozen = N.where(newmask * N.equal(mask, 0), sum, frozen) + mask = N.clip(mask + newmask, 0, 1) + fac = -fac + termbf = abs(term) + if arrayflag: + return N.where( + N.equal(frozen, -1), 1.0, frozen) # 1.0 if doesn't converge + else: + return N.where( + N.equal(frozen, -1), 1.0, frozen)[0] # 1.0 if doesn't converge + + def afprob(dfnum, dfden, F): + """ +Returns the 1-tailed significance level (p-value) of an F statistic +given the degrees of freedom for the numerator (dfR-dfF) and the degrees +of freedom for the denominator (dfF). Can handle multiple dims for F. + +Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn +""" + if type(F) == N.ndarray: + return abetai(0.5 * dfden, 0.5 * dfnum, dfden / (1.0 * dfden + dfnum * F)) + else: + return abetai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F)) + + def abetacf(a, b, x, verbose=1): + """ +Evaluates the continued fraction form of the incomplete Beta function, +betai. (Adapted from: Numerical Recipies in C.) Can handle multiple +dimensions for x. + +Usage: abetacf(a,b,x,verbose=1) +""" + ITMAX = 200 + EPS = 3.0e-7 + + arrayflag = 1 + if type(x) == N.ndarray: + frozen = N.ones(x.shape, + N.float_) * -1 #start out w/ -1s, should replace all + else: + arrayflag = 0 + frozen = N.array([-1]) + x = N.array([x]) + mask = N.zeros(x.shape) + bm = az = am = 1.0 + qab = a + b + qap = a + 1.0 + qam = a - 1.0 + bz = 1.0 - qab * x / qap + for i in range(ITMAX + 1): + if N.sum(N.ravel(N.equal(frozen, -1))) == 0: + break + em = float(i + 1) + tem = em + em + d = em * (b - em) * x / ((qam + tem) * (a + tem)) + ap = az + d * am + bp = bz + d * bm + d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem)) + app = ap + d * az + bpp = bp + d * bz + aold = az * 1 + am = ap / bpp + bm = bp / bpp + az = app / bpp + bz = 1.0 + newmask = N.less(abs(az - aold), EPS * abs(az)) + frozen = N.where(newmask * N.equal(mask, 0), az, frozen) + mask = N.clip(mask + newmask, 0, 1) + noconverge = asum(N.equal(frozen, -1)) + if noconverge <> 0 and verbose: + print 'a or b too big, or ITMAX too small in Betacf for ', noconverge, ' elements' + if arrayflag: + return frozen + else: + return frozen[0] + + def agammln(xx): + """ +Returns the gamma function of xx. + Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. +Adapted from: Numerical Recipies in C. Can handle multiple dims ... but +probably doesn't normally have to. + +Usage: agammln(xx) +""" + coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, + 0.120858003e-2, -0.536382e-5] + x = xx - 1.0 + tmp = x + 5.5 + tmp = tmp - (x + 0.5) * N.log(tmp) + ser = 1.0 + for j in range(len(coeff)): + x = x + 1 + ser = ser + coeff[j] / x + return -tmp + N.log(2.50662827465 * ser) + + def abetai(a, b, x, verbose=1): + """ +Returns the incomplete beta function: + + I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) + +where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma +function of a. The continued fraction formulation is implemented +here, using the betacf function. (Adapted from: Numerical Recipies in +C.) Can handle multiple dimensions. + +Usage: abetai(a,b,x,verbose=1) +""" + TINY = 1e-15 + if type(a) == N.ndarray: + if asum(N.less(x, 0) + N.greater(x, 1)) <> 0: + raise ValueError, 'Bad x in abetai' + x = N.where(N.equal(x, 0), TINY, x) + x = N.where(N.equal(x, 1.0), 1 - TINY, x) + + bt = N.where(N.equal(x, 0) + N.equal(x, 1), 0, -1) + exponents = (gammln(a + b) - gammln(a) - gammln(b) + a * N.log(x) + b * + N.log(1.0 - x)) + # 746 (below) is the MAX POSSIBLE BEFORE OVERFLOW + exponents = N.where(N.less(exponents, -740), -740, exponents) + bt = N.exp(exponents) + if type(x) == N.ndarray: + ans = N.where( + N.less(x, (a + 1) / (a + b + 2.0)), bt * abetacf(a, b, x, verbose) / + float(a), 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b)) + else: + if x < (a + 1) / (a + b + 2.0): + ans = bt * abetacf(a, b, x, verbose) / float(a) + else: + ans = 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b) + return ans + +##################################### +####### AANOVA CALCULATIONS ####### +##################################### + + import numpy.linalg, operator + LA = numpy.linalg + + def aglm(data, para): + """ +Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken +from: + Peterson et al. Statistical limitations in functional neuroimaging + I. Non-inferential methods and statistical models. Phil Trans Royal Soc + Lond B 354: 1239-1260. + +Usage: aglm(data,para) +Returns: statistic, p-value ??? +""" + if len(para) <> len(data): + print 'data and para must be same length in aglm' + return + n = len(para) + p = pstat.aunique(para) + x = N.zeros((n, len(p))) # design matrix + for l in range(len(p)): + x[:, l] = N.equal(para, p[l]) + b = N.dot( + N.dot( + LA.inv(N.dot( + N.transpose(x), x)), # i.e., b=inv(X'X)X'Y + N.transpose(x)), + data) + diffs = (data - N.dot(x, b)) + s_sq = 1. / (n - len(p)) * N.dot(N.transpose(diffs), diffs) + + if len(p) == 2: # ttest_ind + c = N.array([1, -1]) + df = n - 2 + fact = asum(1.0 / asum(x, 0)) # i.e., 1/n1 + 1/n2 + 1/n3 ... + t = N.dot(c, b) / N.sqrt(s_sq * fact) + probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) + return t, probs + + def aF_oneway(*args): + """ +Performs a 1-way ANOVA, returning an F-value and probability given +any number of groups. From Heiman, pp.394-7. + +Usage: aF_oneway (*args) where *args is 2 or more arrays, one per + treatment group +Returns: f-value, probability +""" + na = len(args) # ANOVA on 'na' groups, each in it's own array + means = [0] * na + vars = [0] * na + ns = [0] * na + alldata = [] + tmp = map(N.array, args) + means = map(amean, tmp) + vars = map(avar, tmp) + ns = map(len, args) + alldata = N.concatenate(args) + bign = len(alldata) + sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign)) + ssbn = 0 + for a in args: + ssbn = ssbn + asquare_of_sums(N.array(a)) / float(len(a)) + ssbn = ssbn - (asquare_of_sums(alldata) / float(bign)) + sswn = sstot - ssbn + dfbn = na - 1 + dfwn = bign - na + msb = ssbn / float(dfbn) + msw = sswn / float(dfwn) + f = msb / msw + prob = fprob(dfbn, dfwn, f) + return f, prob + + def aF_value(ER, EF, dfR, dfF): + """ +Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR = degrees of freedom the Restricted model + dfF = degrees of freedom associated with the Restricted model +""" + return ((ER - EF) / float(dfR - dfF) / (EF / float(dfF))) + + def outputfstats(Enum, Eden, dfnum, dfden, f, prob): + Enum = round(Enum, 3) + Eden = round(Eden, 3) + dfnum = round(Enum, 3) + dfden = round(dfden, 3) + f = round(f, 3) + prob = round(prob, 3) + suffix = '' # for *s after the p-value + if prob < 0.001: + suffix = ' ***' + elif prob < 0.01: + suffix = ' **' + elif prob < 0.05: + suffix = ' *' + title = [['EF/ER', 'DF', 'Mean Square', 'F-value', 'prob', '']] + lofl = title + [[Enum, dfnum, round(Enum / float(dfnum), 3), f, prob, suffix + ], [Eden, dfden, round(Eden / float(dfden), 3), '', '', '']] + pstat.printcc(lofl) + return + + def F_value_multivariate(ER, EF, dfnum, dfden): + """ +Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR = degrees of freedom the Restricted model + dfF = degrees of freedom associated with the Restricted model +where ER and EF are matrices from a multivariate F calculation. +""" + if type(ER) in [IntType, FloatType]: + ER = N.array([[ER]]) + if type(EF) in [IntType, FloatType]: + EF = N.array([[EF]]) + n_um = (LA.det(ER) - LA.det(EF)) / float(dfnum) + d_en = LA.det(EF) / float(dfden) + return n_um / d_en + +##################################### +####### ASUPPORT FUNCTIONS ######## +##################################### + + def asign(a): + """ +Usage: asign(a) +Returns: array shape of a, with -1 where a<0 and +1 where a>=0 +""" + a = N.asarray(a) + if ((type(a) == type(1.4)) or (type(a) == type(1))): + return a - a - N.less(a, 0) + N.greater(a, 0) + else: + return N.zeros(N.shape(a)) - N.less(a, 0) + N.greater(a, 0) + + def asum(a, dimension=None, keepdims=0): + """ +An alternative to the Numeric.add.reduce function, which allows one to +(1) collapse over multiple dimensions at once, and/or (2) to retain +all dimensions in the original array (squashing one down to size. +Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). If keepdims=1, the resulting array will have as many +dimensions as the input array. + +Usage: asum(a, dimension=None, keepdims=0) +Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1 +""" + if type(a) == N.ndarray and a.dtype in [N.int_, N.short, N.ubyte]: + a = a.astype(N.float_) + if dimension == None: + s = N.sum(N.ravel(a)) + elif type(dimension) in [IntType, FloatType]: + s = N.add.reduce(a, dimension) + if keepdims == 1: + shp = list(a.shape) + shp[dimension] = 1 + s = N.reshape(s, shp) + else: # must be a SEQUENCE of dims to sum over + dims = list(dimension) + dims.sort() + dims.reverse() + s = a * 1.0 + for dim in dims: + s = N.add.reduce(s, dim) + if keepdims == 1: + shp = list(a.shape) + for dim in dims: + shp[dim] = 1 + s = N.reshape(s, shp) + return s + + def acumsum(a, dimension=None): + """ +Returns an array consisting of the cumulative sum of the items in the +passed array. Dimension can equal None (ravel array first), an +integer (the dimension over which to operate), or a sequence (operate +over multiple dimensions, but this last one just barely makes sense). + +Usage: acumsum(a,dimension=None) +""" + if dimension == None: + a = N.ravel(a) + dimension = 0 + if type(dimension) in [ListType, TupleType, N.ndarray]: + dimension = list(dimension) + dimension.sort() + dimension.reverse() + for d in dimension: + a = N.add.accumulate(a, d) + return a + else: + return N.add.accumulate(a, dimension) + + def ass(inarray, dimension=None, keepdims=0): + """ +Squares each value in the passed array, adds these squares & returns +the result. Unfortunate function name. :-) Defaults to ALL values in +the array. Dimension can equal None (ravel array first), an integer +(the dimension over which to operate), or a sequence (operate over +multiple dimensions). Set keepdims=1 to maintain the original number +of dimensions. + +Usage: ass(inarray, dimension=None, keepdims=0) +Returns: sum-along-'dimension' for (inarray*inarray) +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + return asum(inarray * inarray, dimension, keepdims) + + def asummult(array1, array2, dimension=None, keepdims=0): + """ +Multiplies elements in array1 and array2, element by element, and +returns the sum (along 'dimension') of all resulting multiplications. +Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). A trivial function, but included for completeness. + +Usage: asummult(array1,array2,dimension=None,keepdims=0) +""" + if dimension == None: + array1 = N.ravel(array1) + array2 = N.ravel(array2) + dimension = 0 + return asum(array1 * array2, dimension, keepdims) + + def asquare_of_sums(inarray, dimension=None, keepdims=0): + """ +Adds the values in the passed array, squares that sum, and returns the +result. Dimension can equal None (ravel array first), an integer (the +dimension over which to operate), or a sequence (operate over multiple +dimensions). If keepdims=1, the returned array will have the same +NUMBER of dimensions as the original. + +Usage: asquare_of_sums(inarray, dimension=None, keepdims=0) +Returns: the square of the sum over dim(s) in dimension +""" + if dimension == None: + inarray = N.ravel(inarray) + dimension = 0 + s = asum(inarray, dimension, keepdims) + if type(s) == N.ndarray: + return s.astype(N.float_) * s + else: + return float(s) * s + + def asumdiffsquared(a, b, dimension=None, keepdims=0): + """ +Takes pairwise differences of the values in arrays a and b, squares +these differences, and returns the sum of these squares. Dimension +can equal None (ravel array first), an integer (the dimension over +which to operate), or a sequence (operate over multiple dimensions). +keepdims=1 means the return shape = len(a.shape) = len(b.shape) + +Usage: asumdiffsquared(a,b) +Returns: sum[ravel(a-b)**2] +""" + if dimension == None: + inarray = N.ravel(a) + dimension = 0 + return asum((a - b)**2, dimension, keepdims) + + def ashellsort(inarray): + """ +Shellsort algorithm. Sorts a 1D-array. + +Usage: ashellsort(inarray) +Returns: sorted-inarray, sorting-index-vector (for original array) +""" + n = len(inarray) + svec = inarray * 1.0 + ivec = range(n) + gap = n / 2 # integer division needed + while gap > 0: + for i in range(gap, n): + for j in range(i - gap, -1, -gap): + while j >= 0 and svec[j] > svec[j + gap]: + temp = svec[j] + svec[j] = svec[j + gap] + svec[j + gap] = temp + itemp = ivec[j] + ivec[j] = ivec[j + gap] + ivec[j + gap] = itemp + gap = gap / 2 # integer division needed +# svec is now sorted input vector, ivec has the order svec[i] = vec[ivec[i]] + return svec, ivec + + def arankdata(inarray): + """ +Ranks the data in inarray, dealing with ties appropritely. Assumes +a 1D inarray. Adapted from Gary Perlman's |Stat ranksort. + +Usage: arankdata(inarray) +Returns: array of length equal to inarray, containing rank scores +""" + n = len(inarray) + svec, ivec = ashellsort(inarray) + sumranks = 0 + dupcount = 0 + newarray = N.zeros(n, N.float_) + for i in range(n): + sumranks = sumranks + i + dupcount = dupcount + 1 + if i == n - 1 or svec[i] <> svec[i + 1]: + averank = sumranks / float(dupcount) + 1 + for j in range(i - dupcount + 1, i + 1): + newarray[ivec[j]] = averank + sumranks = 0 + dupcount = 0 + return newarray + + def afindwithin(data): + """ +Returns a binary vector, 1=within-subject factor, 0=between. Input +equals the entire data array (i.e., column 1=random factor, last +column = measured values. + +Usage: afindwithin(data) data in |Stat format +""" + numfact = len(data[0]) - 2 + withinvec = [0] * numfact + for col in range(1, numfact + 1): + rows = pstat.linexand(data, col, pstat.unique(pstat.colex(data, 1))[0] + ) # get 1 level of this factor + if len(pstat.unique(pstat.colex(rows, 0))) < len( + rows): # if fewer subjects than scores on this factor + withinvec[col - 1] = 1 + return withinvec + + ######################################################### + ######################################################### + ###### RE-DEFINE DISPATCHES TO INCLUDE ARRAYS ######### + ######################################################### + ######################################################### + + ## CENTRAL TENDENCY: + geometricmean = Dispatch( + (lgeometricmean, (ListType, TupleType)), (ageometricmean, (N.ndarray,))) + harmonicmean = Dispatch( + (lharmonicmean, (ListType, TupleType)), (aharmonicmean, (N.ndarray,))) + mean = Dispatch((lmean, (ListType, TupleType)), (amean, (N.ndarray,))) + median = Dispatch((lmedian, (ListType, TupleType)), (amedian, (N.ndarray,))) + medianscore = Dispatch( + (lmedianscore, (ListType, TupleType)), (amedianscore, (N.ndarray,))) + mode = Dispatch((lmode, (ListType, TupleType)), (amode, (N.ndarray,))) + tmean = Dispatch((atmean, (N.ndarray,))) + tvar = Dispatch((atvar, (N.ndarray,))) + tstdev = Dispatch((atstdev, (N.ndarray,))) + tsem = Dispatch((atsem, (N.ndarray,))) + + ## VARIATION: + moment = Dispatch((lmoment, (ListType, TupleType)), (amoment, (N.ndarray,))) + variation = Dispatch( + (lvariation, (ListType, TupleType)), (avariation, (N.ndarray,))) + skew = Dispatch((lskew, (ListType, TupleType)), (askew, (N.ndarray,))) + kurtosis = Dispatch( + (lkurtosis, (ListType, TupleType)), (akurtosis, (N.ndarray,))) + describe = Dispatch( + (ldescribe, (ListType, TupleType)), (adescribe, (N.ndarray,))) + + ## DISTRIBUTION TESTS + + skewtest = Dispatch( + (askewtest, (ListType, TupleType)), (askewtest, (N.ndarray,))) + kurtosistest = Dispatch( + (akurtosistest, (ListType, TupleType)), (akurtosistest, (N.ndarray,))) + normaltest = Dispatch( + (anormaltest, (ListType, TupleType)), (anormaltest, (N.ndarray,))) + + ## FREQUENCY STATS: + itemfreq = Dispatch( + (litemfreq, (ListType, TupleType)), (aitemfreq, (N.ndarray,))) + scoreatpercentile = Dispatch( + (lscoreatpercentile, (ListType, TupleType)), (ascoreatpercentile, + (N.ndarray,))) + percentileofscore = Dispatch( + (lpercentileofscore, (ListType, TupleType)), (apercentileofscore, + (N.ndarray,))) + histogram = Dispatch( + (lhistogram, (ListType, TupleType)), (ahistogram, (N.ndarray,))) + cumfreq = Dispatch( + (lcumfreq, (ListType, TupleType)), (acumfreq, (N.ndarray,))) + relfreq = Dispatch( + (lrelfreq, (ListType, TupleType)), (arelfreq, (N.ndarray,))) + + ## VARIABILITY: + obrientransform = Dispatch( + (lobrientransform, (ListType, TupleType)), (aobrientransform, + (N.ndarray,))) + samplevar = Dispatch( + (lsamplevar, (ListType, TupleType)), (asamplevar, (N.ndarray,))) + samplestdev = Dispatch( + (lsamplestdev, (ListType, TupleType)), (asamplestdev, (N.ndarray,))) + signaltonoise = Dispatch((asignaltonoise, (N.ndarray,)),) + var = Dispatch((lvar, (ListType, TupleType)), (avar, (N.ndarray,))) + stdev = Dispatch((lstdev, (ListType, TupleType)), (astdev, (N.ndarray,))) + sterr = Dispatch((lsterr, (ListType, TupleType)), (asterr, (N.ndarray,))) + sem = Dispatch((lsem, (ListType, TupleType)), (asem, (N.ndarray,))) + z = Dispatch((lz, (ListType, TupleType)), (az, (N.ndarray,))) + zs = Dispatch((lzs, (ListType, TupleType)), (azs, (N.ndarray,))) + + ## TRIMMING FCNS: + threshold = Dispatch((athreshold, (N.ndarray,)),) + trimboth = Dispatch( + (ltrimboth, (ListType, TupleType)), (atrimboth, (N.ndarray,))) + trim1 = Dispatch((ltrim1, (ListType, TupleType)), (atrim1, (N.ndarray,))) + + ## CORRELATION FCNS: + paired = Dispatch((lpaired, (ListType, TupleType)), (apaired, (N.ndarray,))) + lincc = Dispatch((llincc, (ListType, TupleType)), (alincc, (N.ndarray,))) + pearsonr = Dispatch( + (lpearsonr, (ListType, TupleType)), (apearsonr, (N.ndarray,))) + spearmanr = Dispatch( + (lspearmanr, (ListType, TupleType)), (aspearmanr, (N.ndarray,))) + pointbiserialr = Dispatch( + (lpointbiserialr, (ListType, TupleType)), (apointbiserialr, (N.ndarray,))) + kendalltau = Dispatch( + (lkendalltau, (ListType, TupleType)), (akendalltau, (N.ndarray,))) + linregress = Dispatch( + (llinregress, (ListType, TupleType)), (alinregress, (N.ndarray,))) + + ## INFERENTIAL STATS: + ttest_1samp = Dispatch( + (lttest_1samp, (ListType, TupleType)), (attest_1samp, (N.ndarray,))) + ttest_ind = Dispatch( + (lttest_ind, (ListType, TupleType)), (attest_ind, (N.ndarray,))) + ttest_rel = Dispatch( + (lttest_rel, (ListType, TupleType)), (attest_rel, (N.ndarray,))) + chisquare = Dispatch( + (lchisquare, (ListType, TupleType)), (achisquare, (N.ndarray,))) + ks_2samp = Dispatch( + (lks_2samp, (ListType, TupleType)), (aks_2samp, (N.ndarray,))) + mannwhitneyu = Dispatch( + (lmannwhitneyu, (ListType, TupleType)), (amannwhitneyu, (N.ndarray,))) + tiecorrect = Dispatch( + (ltiecorrect, (ListType, TupleType)), (atiecorrect, (N.ndarray,))) + ranksums = Dispatch( + (lranksums, (ListType, TupleType)), (aranksums, (N.ndarray,))) + wilcoxont = Dispatch( + (lwilcoxont, (ListType, TupleType)), (awilcoxont, (N.ndarray,))) + kruskalwallish = Dispatch( + (lkruskalwallish, (ListType, TupleType)), (akruskalwallish, (N.ndarray,))) + friedmanchisquare = Dispatch( + (lfriedmanchisquare, (ListType, TupleType)), (afriedmanchisquare, + (N.ndarray,))) + + ## PROBABILITY CALCS: + chisqprob = Dispatch( + (lchisqprob, (IntType, FloatType)), (achisqprob, (N.ndarray,))) + zprob = Dispatch((lzprob, (IntType, FloatType)), (azprob, (N.ndarray,))) + ksprob = Dispatch((lksprob, (IntType, FloatType)), (aksprob, (N.ndarray,))) + fprob = Dispatch((lfprob, (IntType, FloatType)), (afprob, (N.ndarray,))) + betacf = Dispatch((lbetacf, (IntType, FloatType)), (abetacf, (N.ndarray,))) + betai = Dispatch((lbetai, (IntType, FloatType)), (abetai, (N.ndarray,))) + erfcc = Dispatch((lerfcc, (IntType, FloatType)), (aerfcc, (N.ndarray,))) + gammln = Dispatch((lgammln, (IntType, FloatType)), (agammln, (N.ndarray,))) + + ## ANOVA FUNCTIONS: + F_oneway = Dispatch( + (lF_oneway, (ListType, TupleType)), (aF_oneway, (N.ndarray,))) + F_value = Dispatch( + (lF_value, (ListType, TupleType)), (aF_value, (N.ndarray,))) + + ## SUPPORT FUNCTIONS: + incr = Dispatch((lincr, (ListType, TupleType, N.ndarray)),) + sum = Dispatch((lsum, (ListType, TupleType)), (asum, (N.ndarray,))) + cumsum = Dispatch((lcumsum, (ListType, TupleType)), (acumsum, (N.ndarray,))) + ss = Dispatch((lss, (ListType, TupleType)), (ass, (N.ndarray,))) + summult = Dispatch( + (lsummult, (ListType, TupleType)), (asummult, (N.ndarray,))) + square_of_sums = Dispatch( + (lsquare_of_sums, (ListType, TupleType)), (asquare_of_sums, (N.ndarray,))) + sumdiffsquared = Dispatch( + (lsumdiffsquared, (ListType, TupleType)), (asumdiffsquared, (N.ndarray,))) + shellsort = Dispatch( + (lshellsort, (ListType, TupleType)), (ashellsort, (N.ndarray,))) + rankdata = Dispatch( + (lrankdata, (ListType, TupleType)), (arankdata, (N.ndarray,))) + findwithin = Dispatch( + (lfindwithin, (ListType, TupleType)), (afindwithin, (N.ndarray,))) + +###################### END OF NUMERIC FUNCTION BLOCK ##################### + +###################### END OF STATISTICAL FUNCTIONS ###################### + +except ImportError: + pass diff --git a/cros_utils/tabulator.py b/cros_utils/tabulator.py new file mode 100644 index 00000000..2c26ccad --- /dev/null +++ b/cros_utils/tabulator.py @@ -0,0 +1,1248 @@ +# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Table generating, analyzing and printing functions. + +This defines several classes that are used to generate, analyze and print +tables. + +Example usage: + + from cros_utils import tabulator + + data = [["benchmark1", "33", "44"],["benchmark2", "44", "33"]] + tabulator.GetSimpleTable(data) + +You could also use it to generate more complex tables with analysis such as +p-values, custom colors, etc. Tables are generated by TableGenerator and +analyzed/formatted by TableFormatter. TableFormatter can take in a list of +columns with custom result computation and coloring, and will compare values in +each row according to taht scheme. Here is a complex example on printing a +table: + + from cros_utils import tabulator + + runs = [[{"k1": "10", "k2": "12", "k5": "40", "k6": "40", + "ms_1": "20", "k7": "FAIL", "k8": "PASS", "k9": "PASS", + "k10": "0"}, + {"k1": "13", "k2": "14", "k3": "15", "ms_1": "10", "k8": "PASS", + "k9": "FAIL", "k10": "0"}], + [{"k1": "50", "k2": "51", "k3": "52", "k4": "53", "k5": "35", "k6": + "45", "ms_1": "200", "ms_2": "20", "k7": "FAIL", "k8": "PASS", "k9": + "PASS"}]] + labels = ["vanilla", "modified"] + tg = TableGenerator(runs, labels, TableGenerator.SORT_BY_VALUES_DESC) + table = tg.GetTable() + columns = [Column(LiteralResult(), + Format(), + "Literal"), + Column(AmeanResult(), + Format()), + Column(StdResult(), + Format()), + Column(CoeffVarResult(), + CoeffVarFormat()), + Column(NonEmptyCountResult(), + Format()), + Column(AmeanRatioResult(), + PercentFormat()), + Column(AmeanRatioResult(), + RatioFormat()), + Column(GmeanRatioResult(), + RatioFormat()), + Column(PValueResult(), + PValueFormat()), + ] + tf = TableFormatter(table, columns) + cell_table = tf.GetCellTable() + tp = TablePrinter(cell_table, out_to) + print tp.Print() + +""" + +from __future__ import print_function + +import getpass +import math +import sys +import numpy + +import colortrans +from email_sender import EmailSender +import misc + + +def _AllFloat(values): + return all([misc.IsFloat(v) for v in values]) + + +def _GetFloats(values): + return [float(v) for v in values] + + +def _StripNone(results): + res = [] + for result in results: + if result is not None: + res.append(result) + return res + + +class TableGenerator(object): + """Creates a table from a list of list of dicts. + + The main public function is called GetTable(). + """ + SORT_BY_KEYS = 0 + SORT_BY_KEYS_DESC = 1 + SORT_BY_VALUES = 2 + SORT_BY_VALUES_DESC = 3 + + MISSING_VALUE = 'x' + + def __init__(self, d, l, sort=SORT_BY_KEYS, key_name='keys'): + self._runs = d + self._labels = l + self._sort = sort + self._key_name = key_name + + def _AggregateKeys(self): + keys = set([]) + for run_list in self._runs: + for run in run_list: + keys = keys.union(run.keys()) + return keys + + def _GetHighestValue(self, key): + values = [] + for run_list in self._runs: + for run in run_list: + if key in run: + values.append(run[key]) + values = _StripNone(values) + if _AllFloat(values): + values = _GetFloats(values) + return max(values) + + def _GetLowestValue(self, key): + values = [] + for run_list in self._runs: + for run in run_list: + if key in run: + values.append(run[key]) + values = _StripNone(values) + if _AllFloat(values): + values = _GetFloats(values) + return min(values) + + def _SortKeys(self, keys): + if self._sort == self.SORT_BY_KEYS: + return sorted(keys) + elif self._sort == self.SORT_BY_VALUES: + # pylint: disable=unnecessary-lambda + return sorted(keys, key=lambda x: self._GetLowestValue(x)) + elif self._sort == self.SORT_BY_VALUES_DESC: + # pylint: disable=unnecessary-lambda + return sorted(keys, key=lambda x: self._GetHighestValue(x), reverse=True) + else: + assert 0, 'Unimplemented sort %s' % self._sort + + def _GetKeys(self): + keys = self._AggregateKeys() + return self._SortKeys(keys) + + def GetTable(self, number_of_rows=sys.maxint): + """Returns a table from a list of list of dicts. + + The list of list of dicts is passed into the constructor of TableGenerator. + This method converts that into a canonical list of lists which represents a + table of values. + + Args: + number_of_rows: Maximum number of rows to return from the table. + + Returns: + A list of lists which is the table. + + Example: + We have the following runs: + [[{"k1": "v1", "k2": "v2"}, {"k1": "v3"}], + [{"k1": "v4", "k4": "v5"}]] + and the following labels: + ["vanilla", "modified"] + it will return: + [["Key", "vanilla", "modified"] + ["k1", ["v1", "v3"], ["v4"]] + ["k2", ["v2"], []] + ["k4", [], ["v5"]]] + The returned table can then be processed further by other classes in this + module. + """ + keys = self._GetKeys() + header = [self._key_name] + self._labels + table = [header] + rows = 0 + for k in keys: + row = [k] + unit = None + for run_list in self._runs: + v = [] + for run in run_list: + if k in run: + if type(run[k]) is list: + val = run[k][0] + unit = run[k][1] + else: + val = run[k] + v.append(val) + else: + v.append(None) + row.append(v) + # If we got a 'unit' value, append the units name to the key name. + if unit: + keyname = row[0] + ' (%s) ' % unit + row[0] = keyname + table.append(row) + rows += 1 + if rows == number_of_rows: + break + return table + + +class Result(object): + """A class that respresents a single result. + + This single result is obtained by condensing the information from a list of + runs and a list of baseline runs. + """ + + def __init__(self): + pass + + def _AllStringsSame(self, values): + values_set = set(values) + return len(values_set) == 1 + + def NeedsBaseline(self): + return False + + # pylint: disable=unused-argument + def _Literal(self, cell, values, baseline_values): + cell.value = ' '.join([str(v) for v in values]) + + def _ComputeFloat(self, cell, values, baseline_values): + self._Literal(cell, values, baseline_values) + + def _ComputeString(self, cell, values, baseline_values): + self._Literal(cell, values, baseline_values) + + def _InvertIfLowerIsBetter(self, cell): + pass + + def _GetGmean(self, values): + if not values: + return float('nan') + if any([v < 0 for v in values]): + return float('nan') + if any([v == 0 for v in values]): + return 0.0 + log_list = [math.log(v) for v in values] + gmean_log = sum(log_list) / len(log_list) + return math.exp(gmean_log) + + def Compute(self, cell, values, baseline_values): + """Compute the result given a list of values and baseline values. + + Args: + cell: A cell data structure to populate. + values: List of values. + baseline_values: List of baseline values. Can be none if this is the + baseline itself. + """ + all_floats = True + values = _StripNone(values) + if not values: + cell.value = '' + return + if _AllFloat(values): + float_values = _GetFloats(values) + else: + all_floats = False + if baseline_values: + baseline_values = _StripNone(baseline_values) + if baseline_values: + if _AllFloat(baseline_values): + float_baseline_values = _GetFloats(baseline_values) + else: + all_floats = False + else: + if self.NeedsBaseline(): + cell.value = '' + return + float_baseline_values = None + if all_floats: + self._ComputeFloat(cell, float_values, float_baseline_values) + self._InvertIfLowerIsBetter(cell) + else: + self._ComputeString(cell, values, baseline_values) + + +class LiteralResult(Result): + """A literal result.""" + + def __init__(self, iteration=0): + super(LiteralResult, self).__init__() + self.iteration = iteration + + def Compute(self, cell, values, baseline_values): + try: + cell.value = values[self.iteration] + except IndexError: + cell.value = '-' + + +class NonEmptyCountResult(Result): + """A class that counts the number of non-empty results. + + The number of non-empty values will be stored in the cell. + """ + + def Compute(self, cell, values, baseline_values): + """Put the number of non-empty values in the cell result. + + Args: + cell: Put the result in cell.value. + values: A list of values for the row. + baseline_values: A list of baseline values for the row. + """ + cell.value = len(_StripNone(values)) + if not baseline_values: + return + base_value = len(_StripNone(baseline_values)) + if cell.value == base_value: + return + f = ColorBoxFormat() + len_values = len(values) + len_baseline_values = len(baseline_values) + tmp_cell = Cell() + tmp_cell.value = 1.0 + (float(cell.value - base_value) / + (max(len_values, len_baseline_values))) + f.Compute(tmp_cell) + cell.bgcolor = tmp_cell.bgcolor + + +class StringMeanResult(Result): + """Mean of string values.""" + + def _ComputeString(self, cell, values, baseline_values): + if self._AllStringsSame(values): + cell.value = str(values[0]) + else: + cell.value = '?' + + +class AmeanResult(StringMeanResult): + """Arithmetic mean.""" + + def _ComputeFloat(self, cell, values, baseline_values): + cell.value = numpy.mean(values) + + +class RawResult(Result): + """Raw result.""" + pass + + +class MinResult(Result): + """Minimum.""" + + def _ComputeFloat(self, cell, values, baseline_values): + cell.value = min(values) + + def _ComputeString(self, cell, values, baseline_values): + if values: + cell.value = min(values) + else: + cell.value = '' + + +class MaxResult(Result): + """Maximum.""" + + def _ComputeFloat(self, cell, values, baseline_values): + cell.value = max(values) + + def _ComputeString(self, cell, values, baseline_values): + if values: + cell.value = max(values) + else: + cell.value = '' + + +class NumericalResult(Result): + """Numerical result.""" + + def _ComputeString(self, cell, values, baseline_values): + cell.value = '?' + + +class StdResult(NumericalResult): + """Standard deviation.""" + + def _ComputeFloat(self, cell, values, baseline_values): + cell.value = numpy.std(values) + + +class CoeffVarResult(NumericalResult): + """Standard deviation / Mean""" + + def _ComputeFloat(self, cell, values, baseline_values): + if numpy.mean(values) != 0.0: + noise = numpy.abs(numpy.std(values) / numpy.mean(values)) + else: + noise = 0.0 + cell.value = noise + + +class ComparisonResult(Result): + """Same or Different.""" + + def NeedsBaseline(self): + return True + + def _ComputeString(self, cell, values, baseline_values): + value = None + baseline_value = None + if self._AllStringsSame(values): + value = values[0] + if self._AllStringsSame(baseline_values): + baseline_value = baseline_values[0] + if value is not None and baseline_value is not None: + if value == baseline_value: + cell.value = 'SAME' + else: + cell.value = 'DIFFERENT' + else: + cell.value = '?' + + +class PValueResult(ComparisonResult): + """P-value.""" + + def _ComputeFloat(self, cell, values, baseline_values): + if len(values) < 2 or len(baseline_values) < 2: + cell.value = float('nan') + return + import stats + _, cell.value = stats.lttest_ind(values, baseline_values) + + def _ComputeString(self, cell, values, baseline_values): + return float('nan') + + +class KeyAwareComparisonResult(ComparisonResult): + """Automatic key aware comparison.""" + + def _IsLowerBetter(self, key): + # TODO(llozano): Trying to guess direction by looking at the name of the + # test does not seem like a good idea. Test frameworks should provide this + # info explicitly. I believe Telemetry has this info. Need to find it out. + # + # Below are some test names for which we are not sure what the + # direction is. + # + # For these we dont know what the direction is. But, since we dont + # specify anything, crosperf will assume higher is better: + # --percent_impl_scrolled--percent_impl_scrolled--percent + # --solid_color_tiles_analyzed--solid_color_tiles_analyzed--count + # --total_image_cache_hit_count--total_image_cache_hit_count--count + # --total_texture_upload_time_by_url + # + # About these we are doubtful but we made a guess: + # --average_num_missing_tiles_by_url--*--units (low is good) + # --experimental_mean_frame_time_by_url--*--units (low is good) + # --experimental_median_frame_time_by_url--*--units (low is good) + # --texture_upload_count--texture_upload_count--count (high is good) + # --total_deferred_image_decode_count--count (low is good) + # --total_tiles_analyzed--total_tiles_analyzed--count (high is good) + lower_is_better_keys = ['milliseconds', 'ms_', 'seconds_', 'KB', 'rdbytes', + 'wrbytes', 'dropped_percent', '(ms)', '(seconds)', + '--ms', '--average_num_missing_tiles', + '--experimental_jank', '--experimental_mean_frame', + '--experimental_median_frame_time', + '--total_deferred_image_decode_count', '--seconds'] + + return any([l in key for l in lower_is_better_keys]) + + def _InvertIfLowerIsBetter(self, cell): + if self._IsLowerBetter(cell.name): + if cell.value: + cell.value = 1.0 / cell.value + + +class AmeanRatioResult(KeyAwareComparisonResult): + """Ratio of arithmetic means of values vs. baseline values.""" + + def _ComputeFloat(self, cell, values, baseline_values): + if numpy.mean(baseline_values) != 0: + cell.value = numpy.mean(values) / numpy.mean(baseline_values) + elif numpy.mean(values) != 0: + cell.value = 0.00 + # cell.value = 0 means the values and baseline_values have big difference + else: + cell.value = 1.00 + # no difference if both values and baseline_values are 0 + + +class GmeanRatioResult(KeyAwareComparisonResult): + """Ratio of geometric means of values vs. baseline values.""" + + def _ComputeFloat(self, cell, values, baseline_values): + if self._GetGmean(baseline_values) != 0: + cell.value = self._GetGmean(values) / self._GetGmean(baseline_values) + elif self._GetGmean(values) != 0: + cell.value = 0.00 + else: + cell.value = 1.00 + + +class Color(object): + """Class that represents color in RGBA format.""" + + def __init__(self, r=0, g=0, b=0, a=0): + self.r = r + self.g = g + self.b = b + self.a = a + + def __str__(self): + return 'r: %s g: %s: b: %s: a: %s' % (self.r, self.g, self.b, self.a) + + def Round(self): + """Round RGBA values to the nearest integer.""" + self.r = int(self.r) + self.g = int(self.g) + self.b = int(self.b) + self.a = int(self.a) + + def GetRGB(self): + """Get a hex representation of the color.""" + return '%02x%02x%02x' % (self.r, self.g, self.b) + + @classmethod + def Lerp(cls, ratio, a, b): + """Perform linear interpolation between two colors. + + Args: + ratio: The ratio to use for linear polation. + a: The first color object (used when ratio is 0). + b: The second color object (used when ratio is 1). + + Returns: + Linearly interpolated color. + """ + ret = cls() + ret.r = (b.r - a.r) * ratio + a.r + ret.g = (b.g - a.g) * ratio + a.g + ret.b = (b.b - a.b) * ratio + a.b + ret.a = (b.a - a.a) * ratio + a.a + return ret + + +class Format(object): + """A class that represents the format of a column.""" + + def __init__(self): + pass + + def Compute(self, cell): + """Computes the attributes of a cell based on its value. + + Attributes typically are color, width, etc. + + Args: + cell: The cell whose attributes are to be populated. + """ + if cell.value is None: + cell.string_value = '' + if isinstance(cell.value, float): + self._ComputeFloat(cell) + else: + self._ComputeString(cell) + + def _ComputeFloat(self, cell): + cell.string_value = '{0:.2f}'.format(cell.value) + + def _ComputeString(self, cell): + cell.string_value = str(cell.value) + + def _GetColor(self, value, low, mid, high, power=6, mid_value=1.0): + min_value = 0.0 + max_value = 2.0 + if math.isnan(value): + return mid + if value > mid_value: + value = max_value - mid_value / value + + return self._GetColorBetweenRange(value, min_value, mid_value, max_value, + low, mid, high, power) + + def _GetColorBetweenRange(self, value, min_value, mid_value, max_value, + low_color, mid_color, high_color, power): + assert value <= max_value + assert value >= min_value + if value > mid_value: + value = (max_value - value) / (max_value - mid_value) + value **= power + ret = Color.Lerp(value, high_color, mid_color) + else: + value = (value - min_value) / (mid_value - min_value) + value **= power + ret = Color.Lerp(value, low_color, mid_color) + ret.Round() + return ret + + +class PValueFormat(Format): + """Formatting for p-value.""" + + def _ComputeFloat(self, cell): + cell.string_value = '%0.2f' % float(cell.value) + if float(cell.value) < 0.05: + cell.bgcolor = self._GetColor(cell.value, + Color(255, 255, 0, 0), + Color(255, 255, 255, 0), + Color(255, 255, 255, 0), + mid_value=0.05, + power=1) + + +class StorageFormat(Format): + """Format the cell as a storage number. + + Example: + If the cell contains a value of 1024, the string_value will be 1.0K. + """ + + def _ComputeFloat(self, cell): + base = 1024 + suffices = ['K', 'M', 'G'] + v = float(cell.value) + current = 0 + while v >= base**(current + 1) and current < len(suffices): + current += 1 + + if current: + divisor = base**current + cell.string_value = '%1.1f%s' % ((v / divisor), suffices[current - 1]) + else: + cell.string_value = str(cell.value) + + +class CoeffVarFormat(Format): + """Format the cell as a percent. + + Example: + If the cell contains a value of 1.5, the string_value will be +150%. + """ + + def _ComputeFloat(self, cell): + cell.string_value = '%1.1f%%' % (float(cell.value) * 100) + cell.color = self._GetColor(cell.value, + Color(0, 255, 0, 0), + Color(0, 0, 0, 0), + Color(255, 0, 0, 0), + mid_value=0.02, + power=1) + + +class PercentFormat(Format): + """Format the cell as a percent. + + Example: + If the cell contains a value of 1.5, the string_value will be +50%. + """ + + def _ComputeFloat(self, cell): + cell.string_value = '%+1.1f%%' % ((float(cell.value) - 1) * 100) + cell.color = self._GetColor(cell.value, Color(255, 0, 0, 0), + Color(0, 0, 0, 0), Color(0, 255, 0, 0)) + + +class RatioFormat(Format): + """Format the cell as a ratio. + + Example: + If the cell contains a value of 1.5642, the string_value will be 1.56. + """ + + def _ComputeFloat(self, cell): + cell.string_value = '%+1.1f%%' % ((cell.value - 1) * 100) + cell.color = self._GetColor(cell.value, Color(255, 0, 0, 0), + Color(0, 0, 0, 0), Color(0, 255, 0, 0)) + + +class ColorBoxFormat(Format): + """Format the cell as a color box. + + Example: + If the cell contains a value of 1.5, it will get a green color. + If the cell contains a value of 0.5, it will get a red color. + The intensity of the green/red will be determined by how much above or below + 1.0 the value is. + """ + + def _ComputeFloat(self, cell): + cell.string_value = '--' + bgcolor = self._GetColor(cell.value, Color(255, 0, 0, 0), + Color(255, 255, 255, 0), Color(0, 255, 0, 0)) + cell.bgcolor = bgcolor + cell.color = bgcolor + + +class Cell(object): + """A class to represent a cell in a table. + + Attributes: + value: The raw value of the cell. + color: The color of the cell. + bgcolor: The background color of the cell. + string_value: The string value of the cell. + suffix: A string suffix to be attached to the value when displaying. + prefix: A string prefix to be attached to the value when displaying. + color_row: Indicates whether the whole row is to inherit this cell's color. + bgcolor_row: Indicates whether the whole row is to inherit this cell's + bgcolor. + width: Optional specifier to make a column narrower than the usual width. + The usual width of a column is the max of all its cells widths. + colspan: Set the colspan of the cell in the HTML table, this is used for + table headers. Default value is 1. + name: the test name of the cell. + header: Whether this is a header in html. + """ + + def __init__(self): + self.value = None + self.color = None + self.bgcolor = None + self.string_value = None + self.suffix = None + self.prefix = None + # Entire row inherits this color. + self.color_row = False + self.bgcolor_row = False + self.width = None + self.colspan = 1 + self.name = None + self.header = False + + def __str__(self): + l = [] + l.append('value: %s' % self.value) + l.append('string_value: %s' % self.string_value) + return ' '.join(l) + + +class Column(object): + """Class representing a column in a table. + + Attributes: + result: an object of the Result class. + fmt: an object of the Format class. + """ + + def __init__(self, result, fmt, name=''): + self.result = result + self.fmt = fmt + self.name = name + + +# Takes in: +# ["Key", "Label1", "Label2"] +# ["k", ["v", "v2"], [v3]] +# etc. +# Also takes in a format string. +# Returns a table like: +# ["Key", "Label1", "Label2"] +# ["k", avg("v", "v2"), stddev("v", "v2"), etc.]] +# according to format string +class TableFormatter(object): + """Class to convert a plain table into a cell-table. + + This class takes in a table generated by TableGenerator and a list of column + formats to apply to the table and returns a table of cells. + """ + + def __init__(self, table, columns): + """The constructor takes in a table and a list of columns. + + Args: + table: A list of lists of values. + columns: A list of column containing what to produce and how to format it. + """ + self._table = table + self._columns = columns + self._table_columns = [] + self._out_table = [] + + def GenerateCellTable(self, table_type): + row_index = 0 + all_failed = False + + for row in self._table[1:]: + # It does not make sense to put retval in the summary table. + if str(row[0]) == 'retval' and table_type == 'summary': + # Check to see if any runs passed, and update all_failed. + all_failed = True + for values in row[1:]: + if 0 in values: + all_failed = False + continue + key = Cell() + key.string_value = str(row[0]) + out_row = [key] + baseline = None + for values in row[1:]: + for column in self._columns: + cell = Cell() + cell.name = key.string_value + if column.result.NeedsBaseline(): + if baseline is not None: + column.result.Compute(cell, values, baseline) + column.fmt.Compute(cell) + out_row.append(cell) + if not row_index: + self._table_columns.append(column) + else: + column.result.Compute(cell, values, baseline) + column.fmt.Compute(cell) + out_row.append(cell) + if not row_index: + self._table_columns.append(column) + + if baseline is None: + baseline = values + self._out_table.append(out_row) + row_index += 1 + + # If this is a summary table, and the only row in it is 'retval', and + # all the test runs failed, we need to a 'Results' row to the output + # table. + if table_type == 'summary' and all_failed and len(self._table) == 2: + labels_row = self._table[0] + key = Cell() + key.string_value = 'Results' + out_row = [key] + baseline = None + for _ in labels_row[1:]: + for column in self._columns: + cell = Cell() + cell.name = key.string_value + column.result.Compute(cell, ['Fail'], baseline) + column.fmt.Compute(cell) + out_row.append(cell) + if not row_index: + self._table_columns.append(column) + self._out_table.append(out_row) + + def AddColumnName(self): + """Generate Column name at the top of table.""" + key = Cell() + key.header = True + key.string_value = 'Keys' + header = [key] + for column in self._table_columns: + cell = Cell() + cell.header = True + if column.name: + cell.string_value = column.name + else: + result_name = column.result.__class__.__name__ + format_name = column.fmt.__class__.__name__ + + cell.string_value = '%s %s' % (result_name.replace('Result', ''), + format_name.replace('Format', '')) + + header.append(cell) + + self._out_table = [header] + self._out_table + + def AddHeader(self, s): + """Put additional string on the top of the table.""" + cell = Cell() + cell.header = True + cell.string_value = str(s) + header = [cell] + colspan = max(1, max(len(row) for row in self._table)) + cell.colspan = colspan + self._out_table = [header] + self._out_table + + def GetPassesAndFails(self, values): + passes = 0 + fails = 0 + for val in values: + if val == 0: + passes = passes + 1 + else: + fails = fails + 1 + return passes, fails + + def AddLabelName(self): + """Put label on the top of the table.""" + top_header = [] + base_colspan = len([c for c in self._columns if not c.result.NeedsBaseline() + ]) + compare_colspan = len(self._columns) + # Find the row with the key 'retval', if it exists. This + # will be used to calculate the number of iterations that passed and + # failed for each image label. + retval_row = None + for row in self._table: + if row[0] == 'retval': + retval_row = row + # The label is organized as follows + # "keys" label_base, label_comparison1, label_comparison2 + # The first cell has colspan 1, the second is base_colspan + # The others are compare_colspan + column_position = 0 + for label in self._table[0]: + cell = Cell() + cell.header = True + # Put the number of pass/fail iterations in the image label header. + if column_position > 0 and retval_row: + retval_values = retval_row[column_position] + if type(retval_values) is list: + passes, fails = self.GetPassesAndFails(retval_values) + cell.string_value = str(label) + ' (pass:%d fail:%d)' % (passes, + fails) + else: + cell.string_value = str(label) + else: + cell.string_value = str(label) + if top_header: + cell.colspan = base_colspan + if len(top_header) > 1: + cell.colspan = compare_colspan + top_header.append(cell) + column_position = column_position + 1 + self._out_table = [top_header] + self._out_table + + def _PrintOutTable(self): + o = '' + for row in self._out_table: + for cell in row: + o += str(cell) + ' ' + o += '\n' + print(o) + + def GetCellTable(self, table_type='full', headers=True): + """Function to return a table of cells. + + The table (list of lists) is converted into a table of cells by this + function. + + Args: + table_type: Can be 'full' or 'summary' + headers: A boolean saying whether we want default headers + + Returns: + A table of cells with each cell having the properties and string values as + requiested by the columns passed in the constructor. + """ + # Generate the cell table, creating a list of dynamic columns on the fly. + if not self._out_table: + self.GenerateCellTable(table_type) + if headers: + self.AddColumnName() + self.AddLabelName() + return self._out_table + + +class TablePrinter(object): + """Class to print a cell table to the console, file or html.""" + PLAIN = 0 + CONSOLE = 1 + HTML = 2 + TSV = 3 + EMAIL = 4 + + def __init__(self, table, output_type): + """Constructor that stores the cell table and output type.""" + self._table = table + self._output_type = output_type + self._row_styles = [] + self._column_styles = [] + + # Compute whole-table properties like max-size, etc. + def _ComputeStyle(self): + self._row_styles = [] + for row in self._table: + row_style = Cell() + for cell in row: + if cell.color_row: + assert cell.color, 'Cell color not set but color_row set!' + assert not row_style.color, 'Multiple row_style.colors found!' + row_style.color = cell.color + if cell.bgcolor_row: + assert cell.bgcolor, 'Cell bgcolor not set but bgcolor_row set!' + assert not row_style.bgcolor, 'Multiple row_style.bgcolors found!' + row_style.bgcolor = cell.bgcolor + self._row_styles.append(row_style) + + self._column_styles = [] + if len(self._table) < 2: + return + + for i in range(max(len(row) for row in self._table)): + column_style = Cell() + for row in self._table: + if not any([cell.colspan != 1 for cell in row]): + column_style.width = max(column_style.width, len(row[i].string_value)) + self._column_styles.append(column_style) + + def _GetBGColorFix(self, color): + if self._output_type == self.CONSOLE: + rgb = color.GetRGB() + prefix, _ = colortrans.rgb2short(rgb) + # pylint: disable=anomalous-backslash-in-string + prefix = '\033[48;5;%sm' % prefix + suffix = '\033[0m' + elif self._output_type in [self.EMAIL, self.HTML]: + rgb = color.GetRGB() + prefix = ("<FONT style=\"BACKGROUND-COLOR:#{0}\">".format(rgb)) + suffix = '</FONT>' + elif self._output_type in [self.PLAIN, self.TSV]: + prefix = '' + suffix = '' + return prefix, suffix + + def _GetColorFix(self, color): + if self._output_type == self.CONSOLE: + rgb = color.GetRGB() + prefix, _ = colortrans.rgb2short(rgb) + # pylint: disable=anomalous-backslash-in-string + prefix = '\033[38;5;%sm' % prefix + suffix = '\033[0m' + elif self._output_type in [self.EMAIL, self.HTML]: + rgb = color.GetRGB() + prefix = '<FONT COLOR=#{0}>'.format(rgb) + suffix = '</FONT>' + elif self._output_type in [self.PLAIN, self.TSV]: + prefix = '' + suffix = '' + return prefix, suffix + + def Print(self): + """Print the table to a console, html, etc. + + Returns: + A string that contains the desired representation of the table. + """ + self._ComputeStyle() + return self._GetStringValue() + + def _GetCellValue(self, i, j): + cell = self._table[i][j] + out = cell.string_value + raw_width = len(out) + + if cell.color: + p, s = self._GetColorFix(cell.color) + out = '%s%s%s' % (p, out, s) + + if cell.bgcolor: + p, s = self._GetBGColorFix(cell.bgcolor) + out = '%s%s%s' % (p, out, s) + + if self._output_type in [self.PLAIN, self.CONSOLE, self.EMAIL]: + if cell.width: + width = cell.width + else: + if self._column_styles: + width = self._column_styles[j].width + else: + width = len(cell.string_value) + if cell.colspan > 1: + width = 0 + start = 0 + for k in range(j): + start += self._table[i][k].colspan + for k in range(cell.colspan): + width += self._column_styles[start + k].width + if width > raw_width: + padding = ('%' + str(width - raw_width) + 's') % '' + out = padding + out + + if self._output_type == self.HTML: + if cell.header: + tag = 'th' + else: + tag = 'td' + out = "<{0} colspan = \"{2}\"> {1} </{0}>".format(tag, out, cell.colspan) + + return out + + def _GetHorizontalSeparator(self): + if self._output_type in [self.CONSOLE, self.PLAIN, self.EMAIL]: + return ' ' + if self._output_type == self.HTML: + return '' + if self._output_type == self.TSV: + return '\t' + + def _GetVerticalSeparator(self): + if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]: + return '\n' + if self._output_type == self.HTML: + return '</tr>\n<tr>' + + def _GetPrefix(self): + if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]: + return '' + if self._output_type == self.HTML: + return "<p></p><table id=\"box-table-a\">\n<tr>" + + def _GetSuffix(self): + if self._output_type in [self.PLAIN, self.CONSOLE, self.TSV, self.EMAIL]: + return '' + if self._output_type == self.HTML: + return '</tr>\n</table>' + + def _GetStringValue(self): + o = '' + o += self._GetPrefix() + for i in range(len(self._table)): + row = self._table[i] + # Apply row color and bgcolor. + p = s = bgp = bgs = '' + if self._row_styles[i].bgcolor: + bgp, bgs = self._GetBGColorFix(self._row_styles[i].bgcolor) + if self._row_styles[i].color: + p, s = self._GetColorFix(self._row_styles[i].color) + o += p + bgp + for j in range(len(row)): + out = self._GetCellValue(i, j) + o += out + self._GetHorizontalSeparator() + o += s + bgs + o += self._GetVerticalSeparator() + o += self._GetSuffix() + return o + + +# Some common drivers +def GetSimpleTable(table, out_to=TablePrinter.CONSOLE): + """Prints a simple table. + + This is used by code that has a very simple list-of-lists and wants to produce + a table with ameans, a percentage ratio of ameans and a colorbox. + + Args: + table: a list of lists. + out_to: specify the fomat of output. Currently it supports HTML and CONSOLE. + + Returns: + A string version of the table that can be printed to the console. + + Example: + GetSimpleConsoleTable([["binary", "b1", "b2"],["size", "300", "400"]]) + will produce a colored table that can be printed to the console. + """ + columns = [ + Column(AmeanResult(), Format()), + Column(AmeanRatioResult(), PercentFormat()), + Column(AmeanRatioResult(), ColorBoxFormat()), + ] + our_table = [table[0]] + for row in table[1:]: + our_row = [row[0]] + for v in row[1:]: + our_row.append([v]) + our_table.append(our_row) + + tf = TableFormatter(our_table, columns) + cell_table = tf.GetCellTable() + tp = TablePrinter(cell_table, out_to) + return tp.Print() + + +# pylint: disable=redefined-outer-name +def GetComplexTable(runs, labels, out_to=TablePrinter.CONSOLE): + """Prints a complex table. + + This can be used to generate a table with arithmetic mean, standard deviation, + coefficient of variation, p-values, etc. + + Args: + runs: A list of lists with data to tabulate. + labels: A list of labels that correspond to the runs. + out_to: specifies the format of the table (example CONSOLE or HTML). + + Returns: + A string table that can be printed to the console or put in an HTML file. + """ + tg = TableGenerator(runs, labels, TableGenerator.SORT_BY_VALUES_DESC) + table = tg.GetTable() + columns = [Column(LiteralResult(), Format(), 'Literal'), + Column(AmeanResult(), Format()), Column(StdResult(), Format()), + Column(CoeffVarResult(), CoeffVarFormat()), + Column(NonEmptyCountResult(), Format()), + Column(AmeanRatioResult(), PercentFormat()), + Column(AmeanRatioResult(), RatioFormat()), + Column(GmeanRatioResult(), RatioFormat()), + Column(PValueResult(), PValueFormat())] + tf = TableFormatter(table, columns) + cell_table = tf.GetCellTable() + tp = TablePrinter(cell_table, out_to) + return tp.Print() + + +if __name__ == '__main__': + # Run a few small tests here. + runs = [[{'k1': '10', + 'k2': '12', + 'k5': '40', + 'k6': '40', + 'ms_1': '20', + 'k7': 'FAIL', + 'k8': 'PASS', + 'k9': 'PASS', + 'k10': '0'}, {'k1': '13', + 'k2': '14', + 'k3': '15', + 'ms_1': '10', + 'k8': 'PASS', + 'k9': 'FAIL', + 'k10': '0'}], [{'k1': '50', + 'k2': '51', + 'k3': '52', + 'k4': '53', + 'k5': '35', + 'k6': '45', + 'ms_1': '200', + 'ms_2': '20', + 'k7': 'FAIL', + 'k8': 'PASS', + 'k9': 'PASS'}]] + labels = ['vanilla', 'modified'] + t = GetComplexTable(runs, labels, TablePrinter.CONSOLE) + print(t) + email = GetComplexTable(runs, labels, TablePrinter.EMAIL) + + runs = [[{'k1': '1'}, {'k1': '1.1'}, {'k1': '1.2'}], + [{'k1': '5'}, {'k1': '5.1'}, {'k1': '5.2'}]] + t = GetComplexTable(runs, labels, TablePrinter.CONSOLE) + print(t) + + simple_table = [ + ['binary', 'b1', 'b2', 'b3'], + ['size', 100, 105, 108], + ['rodata', 100, 80, 70], + ['data', 100, 100, 100], + ['debug', 100, 140, 60], + ] + t = GetSimpleTable(simple_table) + print(t) + email += GetSimpleTable(simple_table, TablePrinter.HTML) + email_to = [getpass.getuser()] + email = "<pre style='font-size: 13px'>%s</pre>" % email + EmailSender().SendEmail(email_to, 'SimpleTableTest', email, msg_type='html') diff --git a/cros_utils/tabulator_test.py b/cros_utils/tabulator_test.py new file mode 100644 index 00000000..21cd1e73 --- /dev/null +++ b/cros_utils/tabulator_test.py @@ -0,0 +1,141 @@ +# Copyright 2012 Google Inc. All Rights Reserved. +"""Tests for the tabulator module.""" + +from __future__ import print_function + +__author__ = 'asharif@google.com (Ahmad Sharif)' + +# System modules +import unittest + +# Local modules +import tabulator + + +class TabulatorTest(unittest.TestCase): + """Tests for the Tabulator class.""" + + def testResult(self): + table = ['k1', ['1', '3'], ['55']] + result = tabulator.Result() + cell = tabulator.Cell() + result.Compute(cell, table[2], table[1]) + expected = ' '.join([str(float(v)) for v in table[2]]) + self.assertTrue(cell.value == expected) + + result = tabulator.AmeanResult() + cell = tabulator.Cell() + result.Compute(cell, table[2], table[1]) + self.assertTrue(cell.value == float(table[2][0])) + + def testStringMean(self): + smr = tabulator.StringMeanResult() + cell = tabulator.Cell() + value = 'PASS' + values = [value for _ in range(3)] + smr.Compute(cell, values, None) + self.assertTrue(cell.value == value) + values.append('FAIL') + smr.Compute(cell, values, None) + self.assertTrue(cell.value == '?') + + def testStorageFormat(self): + sf = tabulator.StorageFormat() + cell = tabulator.Cell() + base = 1024.0 + cell.value = base + sf.Compute(cell) + self.assertTrue(cell.string_value == '1.0K') + cell.value = base**2 + sf.Compute(cell) + self.assertTrue(cell.string_value == '1.0M') + cell.value = base**3 + sf.Compute(cell) + self.assertTrue(cell.string_value == '1.0G') + + def testLerp(self): + c1 = tabulator.Color(0, 0, 0, 0) + c2 = tabulator.Color(255, 0, 0, 0) + c3 = tabulator.Color.Lerp(0.5, c1, c2) + self.assertTrue(c3.r == 127.5) + self.assertTrue(c3.g == 0) + self.assertTrue(c3.b == 0) + self.assertTrue(c3.a == 0) + c3.Round() + self.assertTrue(c3.r == 127) + + def testGmean(self): + a = [1.0e+308] * 3 + # pylint: disable=protected-access + b = tabulator.Result()._GetGmean(a) + self.assertTrue(b >= 0.99e+308 and b <= 1.01e+308) + + def testTableGenerator(self): + runs = [[{'k1': '10', + 'k2': '12'}, {'k1': '13', + 'k2': '14', + 'k3': '15'}], [{'k1': '50', + 'k2': '51', + 'k3': '52', + 'k4': '53'}]] + labels = ['vanilla', 'modified'] + tg = tabulator.TableGenerator(runs, labels) + table = tg.GetTable() + header = table.pop(0) + + self.assertTrue(header == ['keys', 'vanilla', 'modified']) + row = table.pop(0) + self.assertTrue(row == ['k1', ['10', '13'], ['50']]) + row = table.pop(0) + self.assertTrue(row == ['k2', ['12', '14'], ['51']]) + row = table.pop(0) + self.assertTrue(row == ['k3', [None, '15'], ['52']]) + row = table.pop(0) + self.assertTrue(row == ['k4', [None, None], ['53']]) + + table = tg.GetTable() + columns = [ + tabulator.Column(tabulator.AmeanResult(), tabulator.Format()), + tabulator.Column(tabulator.AmeanRatioResult(), + tabulator.PercentFormat()), + ] + tf = tabulator.TableFormatter(table, columns) + table = tf.GetCellTable() + self.assertTrue(table) + + def testColspan(self): + simple_table = [ + ['binary', 'b1', 'b2', 'b3'], + ['size', 100, 105, 108], + ['rodata', 100, 80, 70], + ['data', 100, 100, 100], + ['debug', 100, 140, 60], + ] + columns = [ + tabulator.Column(tabulator.AmeanResult(), tabulator.Format()), + tabulator.Column(tabulator.MinResult(), tabulator.Format()), + tabulator.Column(tabulator.AmeanRatioResult(), + tabulator.PercentFormat()), + tabulator.Column(tabulator.AmeanRatioResult(), + tabulator.ColorBoxFormat()), + ] + our_table = [simple_table[0]] + for row in simple_table[1:]: + our_row = [row[0]] + for v in row[1:]: + our_row.append([v]) + our_table.append(our_row) + + tf = tabulator.TableFormatter(our_table, columns) + cell_table = tf.GetCellTable() + self.assertTrue(cell_table[0][0].colspan == 1) + self.assertTrue(cell_table[0][1].colspan == 2) + self.assertTrue(cell_table[0][2].colspan == 4) + self.assertTrue(cell_table[0][3].colspan == 4) + for row in cell_table[1:]: + for cell in row: + self.assertTrue(cell.colspan == 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/cros_utils/timeline.py b/cros_utils/timeline.py new file mode 100644 index 00000000..873aaa30 --- /dev/null +++ b/cros_utils/timeline.py @@ -0,0 +1,52 @@ +# Copyright 2012 Google Inc. All Rights Reserved. +# +"""Tools for recording and reporting timeline of benchmark_run.""" + +from __future__ import print_function + +__author__ = 'yunlian@google.com (Yunlian Jiang)' + +import time + + +class Event(object): + """One event on the timeline.""" + + def __init__(self, name='', cur_time=0): + self.name = name + self.timestamp = cur_time + + +class Timeline(object): + """Use a dict to store the timeline.""" + + def __init__(self): + self.events = [] + + def Record(self, event): + for e in self.events: + assert e.name != event, ('The event {0} is already recorded.' + .format(event)) + cur_event = Event(name=event, cur_time=time.time()) + self.events.append(cur_event) + + def GetEvents(self): + return ([e.name for e in self.events]) + + def GetEventDict(self): + tl = {} + for e in self.events: + tl[e.name] = e.timestamp + return tl + + def GetEventTime(self, event): + for e in self.events: + if e.name == event: + return e.timestamp + raise IndexError, 'The event {0} is not recorded'.format(event) + + def GetLastEventTime(self): + return self.events[-1].timestamp + + def GetLastEvent(self): + return self.events[-1].name diff --git a/cros_utils/timeline_test.py b/cros_utils/timeline_test.py new file mode 100644 index 00000000..c93a1274 --- /dev/null +++ b/cros_utils/timeline_test.py @@ -0,0 +1,57 @@ +# Copyright 2012 Google Inc. All Rights Reserved. +"""Tests for time_line.py.""" + +from __future__ import print_function + +__author__ = 'yunlian@google.com (Yunlian Jiang)' + +import time +import unittest + +import timeline + + +class TimeLineTest(unittest.TestCase): + """Tests for the Timeline class.""" + + def testRecord(self): + tl = timeline.Timeline() + tl.Record('A') + t = time.time() + t1 = tl.events[0].timestamp + self.assertEqual(int(t1 - t), 0) + self.assertRaises(AssertionError, tl.Record, 'A') + + def testGetEvents(self): + tl = timeline.Timeline() + tl.Record('A') + e = tl.GetEvents() + self.assertEqual(e, ['A']) + tl.Record('B') + e = tl.GetEvents() + self.assertEqual(e, ['A', 'B']) + + def testGetEventTime(self): + tl = timeline.Timeline() + tl.Record('A') + t = time.time() + t1 = tl.GetEventTime('A') + self.assertEqual(int(t1 - t), 0) + self.assertRaises(IndexError, tl.GetEventTime, 'B') + + def testGetLastEventTime(self): + tl = timeline.Timeline() + self.assertRaises(IndexError, tl.GetLastEventTime) + tl.Record('A') + t = time.time() + t1 = tl.GetLastEventTime() + self.assertEqual(int(t1 - t), 0) + time.sleep(2) + tl.Record('B') + t = time.time() + t1 = tl.GetLastEventTime() + self.assertEqual(int(t1 - t), 0) + + +if __name__ == '__main__': + unittest.main() |