aboutsummaryrefslogtreecommitdiff
path: root/catapult/devil/devil/android/md5sum.py
diff options
context:
space:
mode:
Diffstat (limited to 'catapult/devil/devil/android/md5sum.py')
-rw-r--r--catapult/devil/devil/android/md5sum.py59
1 files changed, 34 insertions, 25 deletions
diff --git a/catapult/devil/devil/android/md5sum.py b/catapult/devil/devil/android/md5sum.py
index f5b6f3cf..8adf4ef7 100644
--- a/catapult/devil/devil/android/md5sum.py
+++ b/catapult/devil/devil/android/md5sum.py
@@ -2,9 +2,11 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+import base64
+import gzip
import os
-import posixpath
import re
+import StringIO
from devil import devil_env
from devil.android import device_errors
@@ -13,7 +15,16 @@ from devil.utils import cmd_helper
MD5SUM_DEVICE_LIB_PATH = '/data/local/tmp/md5sum'
MD5SUM_DEVICE_BIN_PATH = MD5SUM_DEVICE_LIB_PATH + '/md5sum_bin'
-_STARTS_WITH_CHECKSUM_RE = re.compile(r'^\s*[0-9a-fA-F]{32}\s+')
+_STARTS_WITH_CHECKSUM_RE = re.compile(r'^[0-9a-fA-F]{16}$')
+
+# We need to cap how many paths we send to the md5_sum binaries at once because
+# the ARG_MAX on Android devices is relatively small, typically 131072 bytes.
+# However, the more paths we use per invocation, the lower the overhead of
+# starting processes, so we want to maximize this number, but we can't compute
+# it exactly as we don't know how well our paths will compress.
+# 5000 is experimentally determined to be reasonable. 10000 fails, and 7500
+# works with existing usage, so 5000 seems like a pretty safe compromise.
+_MAX_PATHS_PER_INVOCATION = 5000
def CalculateHostMd5Sums(paths):
@@ -29,14 +40,22 @@ def CalculateHostMd5Sums(paths):
"""
if isinstance(paths, basestring):
paths = [paths]
+ paths = list(paths)
md5sum_bin_host_path = devil_env.config.FetchPath('md5sum_host')
if not os.path.exists(md5sum_bin_host_path):
raise IOError('File not built: %s' % md5sum_bin_host_path)
- out = cmd_helper.GetCmdOutput(
- [md5sum_bin_host_path] + [os.path.realpath(p) for p in paths])
+ out = ""
+ for i in range(0, len(paths), _MAX_PATHS_PER_INVOCATION):
+ mem_file = StringIO.StringIO()
+ compressed = gzip.GzipFile(fileobj=mem_file, mode="wb")
+ compressed.write(";".join(
+ [os.path.realpath(p) for p in paths[i:i+_MAX_PATHS_PER_INVOCATION]]))
+ compressed.close()
+ compressed_paths = base64.b64encode(mem_file.getvalue())
+ out += cmd_helper.GetCmdOutput([md5sum_bin_host_path, "-gz", compressed_paths])
- return _ParseMd5SumOutput(out.splitlines())
+ return dict(zip(paths, out.splitlines()))
def CalculateDeviceMd5Sums(paths, device):
@@ -55,7 +74,6 @@ def CalculateDeviceMd5Sums(paths, device):
if isinstance(paths, basestring):
paths = [paths]
- # Allow generators
paths = list(paths)
md5sum_dist_path = devil_env.config.FetchPath('md5sum_device', device=device)
@@ -78,16 +96,13 @@ def CalculateDeviceMd5Sums(paths, device):
md5sum_script += '! [[ $(ls -l $a) = *%d* ]]&&exit 2;' % md5sum_file_size
# Make sure it can find libbase.so
md5sum_script += 'export LD_LIBRARY_PATH=%s;' % MD5SUM_DEVICE_LIB_PATH
- if len(paths) > 1:
- prefix = posixpath.commonprefix(paths)
- if len(prefix) > 4:
- md5sum_script += 'p="%s";' % prefix
- paths = ['$p"%s"' % p[len(prefix):] for p in paths]
-
- md5sum_script += ';'.join('$a %s' % p for p in paths)
- # Don't fail the script if the last md5sum fails (due to file not found)
- # Note: ":" is equivalent to "true".
- md5sum_script += ';:'
+ for i in range(0, len(paths), _MAX_PATHS_PER_INVOCATION):
+ mem_file = StringIO.StringIO()
+ compressed = gzip.GzipFile(fileobj=mem_file, mode="wb")
+ compressed.write(";".join(paths[i:i+_MAX_PATHS_PER_INVOCATION]))
+ compressed.close()
+ compressed_paths = base64.b64encode(mem_file.getvalue())
+ md5sum_script += '$a -gz %s;' % compressed_paths
try:
out = device.RunShellCommand(
md5sum_script, shell=True, check_return=True, large_output=True)
@@ -99,7 +114,8 @@ def CalculateDeviceMd5Sums(paths, device):
# to re-push as non-root causes the push command to report success, but
# actually fail. So, wipe the directory first.
device.RunShellCommand(['rm', '-rf', MD5SUM_DEVICE_LIB_PATH],
- as_root=True, check_return=True)
+ as_root=True,
+ check_return=True)
if os.path.isdir(md5sum_dist_path):
device.adb.Push(md5sum_dist_path, MD5SUM_DEVICE_LIB_PATH)
else:
@@ -112,11 +128,4 @@ def CalculateDeviceMd5Sums(paths, device):
else:
raise
- return _ParseMd5SumOutput(out)
-
-
-def _ParseMd5SumOutput(out):
- hash_and_path = (l.split(None, 1) for l in out
- if l and _STARTS_WITH_CHECKSUM_RE.match(l))
- return dict((p, h) for h, p in hash_and_path)
-
+ return dict(zip(paths, [l for l in out if _STARTS_WITH_CHECKSUM_RE.match(l)]))