aboutsummaryrefslogtreecommitdiff
path: root/catapult/devil/devil/android/md5sum.py
blob: 8adf4ef700f0c9b2bcec66818b68bca490c1cac3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import base64
import gzip
import os
import re
import StringIO

from devil import devil_env
from devil.android import device_errors
from devil.utils import cmd_helper

MD5SUM_DEVICE_LIB_PATH = '/data/local/tmp/md5sum'
MD5SUM_DEVICE_BIN_PATH = MD5SUM_DEVICE_LIB_PATH + '/md5sum_bin'

_STARTS_WITH_CHECKSUM_RE = re.compile(r'^[0-9a-fA-F]{16}$')

# We need to cap how many paths we send to the md5_sum binaries at once because
# the ARG_MAX on Android devices is relatively small, typically 131072 bytes.
# However, the more paths we use per invocation, the lower the overhead of
# starting processes, so we want to maximize this number, but we can't compute
# it exactly as we don't know how well our paths will compress.
# 5000 is experimentally determined to be reasonable. 10000 fails, and 7500
# works with existing usage, so 5000 seems like a pretty safe compromise.
_MAX_PATHS_PER_INVOCATION = 5000


def CalculateHostMd5Sums(paths):
  """Calculates the MD5 sum value for all items in |paths|.

  Directories are traversed recursively and the MD5 sum of each file found is
  reported in the result.

  Args:
    paths: A list of host paths to md5sum.
  Returns:
    A dict mapping file paths to their respective md5sum checksums.
  """
  if isinstance(paths, basestring):
    paths = [paths]
  paths = list(paths)

  md5sum_bin_host_path = devil_env.config.FetchPath('md5sum_host')
  if not os.path.exists(md5sum_bin_host_path):
    raise IOError('File not built: %s' % md5sum_bin_host_path)
  out = ""
  for i in range(0, len(paths), _MAX_PATHS_PER_INVOCATION):
    mem_file = StringIO.StringIO()
    compressed = gzip.GzipFile(fileobj=mem_file, mode="wb")
    compressed.write(";".join(
        [os.path.realpath(p) for p in paths[i:i+_MAX_PATHS_PER_INVOCATION]]))
    compressed.close()
    compressed_paths = base64.b64encode(mem_file.getvalue())
    out += cmd_helper.GetCmdOutput([md5sum_bin_host_path, "-gz", compressed_paths])

  return dict(zip(paths, out.splitlines()))


def CalculateDeviceMd5Sums(paths, device):
  """Calculates the MD5 sum value for all items in |paths|.

  Directories are traversed recursively and the MD5 sum of each file found is
  reported in the result.

  Args:
    paths: A list of device paths to md5sum.
  Returns:
    A dict mapping file paths to their respective md5sum checksums.
  """
  if not paths:
    return {}

  if isinstance(paths, basestring):
    paths = [paths]
  paths = list(paths)

  md5sum_dist_path = devil_env.config.FetchPath('md5sum_device', device=device)

  if os.path.isdir(md5sum_dist_path):
    md5sum_dist_bin_path = os.path.join(md5sum_dist_path, 'md5sum_bin')
  else:
    md5sum_dist_bin_path = md5sum_dist_path

  if not os.path.exists(md5sum_dist_path):
    raise IOError('File not built: %s' % md5sum_dist_path)
  md5sum_file_size = os.path.getsize(md5sum_dist_bin_path)

  # For better performance, make the script as small as possible to try and
  # avoid needing to write to an intermediary file (which RunShellCommand will
  # do if necessary).
  md5sum_script = 'a=%s;' % MD5SUM_DEVICE_BIN_PATH
  # Check if the binary is missing or has changed (using its file size as an
  # indicator), and trigger a (re-)push via the exit code.
  md5sum_script += '! [[ $(ls -l $a) = *%d* ]]&&exit 2;' % md5sum_file_size
  # Make sure it can find libbase.so
  md5sum_script += 'export LD_LIBRARY_PATH=%s;' % MD5SUM_DEVICE_LIB_PATH
  for i in range(0, len(paths), _MAX_PATHS_PER_INVOCATION):
    mem_file = StringIO.StringIO()
    compressed = gzip.GzipFile(fileobj=mem_file, mode="wb")
    compressed.write(";".join(paths[i:i+_MAX_PATHS_PER_INVOCATION]))
    compressed.close()
    compressed_paths = base64.b64encode(mem_file.getvalue())
    md5sum_script += '$a -gz %s;' % compressed_paths
  try:
    out = device.RunShellCommand(
        md5sum_script, shell=True, check_return=True, large_output=True)
  except device_errors.AdbShellCommandFailedError as e:
    # Push the binary only if it is found to not exist
    # (faster than checking up-front).
    if e.status == 2:
      # If files were previously pushed as root (adbd running as root), trying
      # to re-push as non-root causes the push command to report success, but
      # actually fail. So, wipe the directory first.
      device.RunShellCommand(['rm', '-rf', MD5SUM_DEVICE_LIB_PATH],
                             as_root=True,
                             check_return=True)
      if os.path.isdir(md5sum_dist_path):
        device.adb.Push(md5sum_dist_path, MD5SUM_DEVICE_LIB_PATH)
      else:
        mkdir_cmd = 'a=%s;[[ -e $a ]] || mkdir $a' % MD5SUM_DEVICE_LIB_PATH
        device.RunShellCommand(mkdir_cmd, shell=True, check_return=True)
        device.adb.Push(md5sum_dist_bin_path, MD5SUM_DEVICE_BIN_PATH)

      out = device.RunShellCommand(
          md5sum_script, shell=True, check_return=True, large_output=True)
    else:
      raise

  return dict(zip(paths, [l for l in out if _STARTS_WITH_CHECKSUM_RE.match(l)]))