vndk/tools/image-diff-tool/diff.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240

#
# Copyright (C) 2019 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import collections
import fnmatch
import hashlib
import os
import pathlib
import subprocess
import tempfile
import zipfile
def silent_call(cmd):
  return subprocess.call(
      cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0
def sha1sum(f):
  with open(f, "rb") as fin:
    return hashlib.sha1(fin.read()).hexdigest()
def sha1sum_without_signing_key(filepath):
  apk = zipfile.ZipFile(filepath)
  l = []
  for f in sorted(apk.namelist()):
    if f.startswith("META-INF/"):
      continue
    l.append(hashlib.sha1(apk.read(f)).hexdigest())
    l.append(f)
  return hashlib.sha1(",".join(l).encode()).hexdigest()
def strip_and_sha1sum(filepath):
  """Strip informations of elf file and calculate sha1 hash."""
  tmp_filepath = filepath + ".tmp.no-build-id"
  llvm_strip = [
      "llvm-strip", "--strip-all", "--keep-section=.ARM.attributes",
      "--remove-section=.note.gnu.build-id", filepath, "-o", tmp_filepath
  ]
  strip_all_and_remove_build_id = lambda: silent_call(llvm_strip)
  try:
    if strip_all_and_remove_build_id():
      return sha1sum(tmp_filepath)
    else:
      return sha1sum(filepath)
  finally:
    if os.path.exists(tmp_filepath):
      os.remove(tmp_filepath)
  return sha1sum(filepath)
def make_filter_from_allowlists(allowlists, all_targets):
  """Creates a callable filter from a list of allowlist files.
  Allowlist can contain pathname patterns or skipped lines. Pathnames are case
  insensitive.
  Allowlist can contain single-line comments. Comment lines begin with #
  For example, this ignores the file "system/build.prop":
    SYSTEM/build.prop
  This ignores txt files:
    *.txt
  This ignores files in directory "system/dontcare/"
    SYSTEM/dontcare/*
  This ignores lines prefixed with pat1 or pat2 in file "system/build.prop":
    SYSTEM/build.prop=pat1 pat2
  Args:
    allowlists: A list of allowlist filenames.
    all_targets: A list of targets to compare.
  Returns:
    A callable object that accepts a file pathname and returns True if the file
    is skipped by the allowlists and False when it is not.
  """
  skipped_patterns = set()
  skipped_lines = collections.defaultdict(list)
  for allowlist in allowlists:
    if not os.path.isfile(allowlist):
      continue
    with open(allowlist, "rb") as f:
      for line in f:
        pat = line.strip().decode()
        if pat.startswith("#"):
          continue
        if pat and pat[-1] == "\\":
          pat = pat.rstrip("\\")
        if "=" in pat:
          filename, prefixes = pat.split("=", 1)
          prefixes = prefixes.split()
          if prefixes:
            skipped_lines[filename.lower()].extend(prefixes)
        elif pat:
          skipped_patterns.add(pat.lower())
  def diff_with_skipped_lines(filename, prefixes):
    """Compares sha1 digest of file while ignoring lines.
    Args:
      filename: File to compare among each target.
      prefixes: A list of prefixes. Lines that start with prefix are skipped.
    Returns:
      True if file is identical among each target.
    """
    file_digest_respect_ignore = []
    for target in all_targets:
      pathname = os.path.join(target, filename)
      if not os.path.isfile(pathname):
        return False
      sha1 = hashlib.sha1()
      with open(pathname, "rb") as f:
        for line in f:
          line_text = line.decode()
          if not any(line_text.startswith(prefix) for prefix in prefixes):
            sha1.update(line)
      file_digest_respect_ignore.append(sha1.hexdigest())
    return (len(file_digest_respect_ignore) == len(all_targets) and
            len(set(file_digest_respect_ignore)) == 1)
  def allowlist_filter(filename):
    norm_filename = filename.lower()
    for pattern in skipped_patterns:
      if fnmatch.fnmatch(norm_filename, pattern):
        return True
    if norm_filename in skipped_lines:
      skipped_prefixes = skipped_lines[norm_filename]
      return diff_with_skipped_lines(filename, skipped_prefixes)
    return False
  return allowlist_filter
def main(all_targets,
         search_paths,
         allowlists,
         ignore_signing_key=False,
         list_only=False):
  def run(path):
    is_executable_component = silent_call(["llvm-objdump", "-a", path])
    is_apk = path.endswith(".apk")
    if is_executable_component:
      return strip_and_sha1sum(path)
    elif is_apk and ignore_signing_key:
      return sha1sum_without_signing_key(path)
    else:
      return sha1sum(path)
  # artifact_sha1_target_map[filename][sha1] = list of targets
  artifact_sha1_target_map = collections.defaultdict(
      lambda: collections.defaultdict(list))
  for target in all_targets:
    paths = []
    for search_path in search_paths:
      for path in pathlib.Path(target, search_path).glob("**/*"):
        if path.exists() and not path.is_dir():
          paths.append((str(path), str(path.relative_to(target))))
    target_basename = os.path.basename(os.path.normpath(target))
    for path, filename in paths:
      sha1 = 0
      if not list_only:
        sha1 = run(path)
      artifact_sha1_target_map[filename][sha1].append(target_basename)
  def pretty_print(sha1, filename, targets, exclude_sha1):
    if exclude_sha1:
      return "{}, {}\n".format(filename, ";".join(targets))
    return "{}, {}, {}\n".format(filename, sha1[:10], ";".join(targets))
  def is_common(sha1_target_map):
    for _, targets in sha1_target_map.items():
      return len(sha1_target_map) == 1 and len(targets) == len(all_targets)
    return False
  allowlist_filter = make_filter_from_allowlists(allowlists, all_targets)
  common = []
  diff = []
  allowlisted_diff = []
  for filename, sha1_target_map in artifact_sha1_target_map.items():
    if is_common(sha1_target_map):
      for sha1, targets in sha1_target_map.items():
        common.append(pretty_print(sha1, filename, targets, list_only))
    else:
      if allowlist_filter(filename):
        for sha1, targets in sha1_target_map.items():
          allowlisted_diff.append(
              pretty_print(sha1, filename, targets, list_only))
      else:
        for sha1, targets in sha1_target_map.items():
          diff.append(pretty_print(sha1, filename, targets, list_only))
  common = sorted(common)
  diff = sorted(diff)
  allowlisted_diff = sorted(allowlisted_diff)
  header = "filename, sha1sum, targets\n"
  if list_only:
    header = "filename, targets\n"
  with open("common.csv", "w") as fout:
    fout.write(header)
    fout.writelines(common)
  with open("diff.csv", "w") as fout:
    fout.write(header)
    fout.writelines(diff)
  with open("allowlisted_diff.csv", "w") as fout:
    fout.write(header)
    fout.writelines(allowlisted_diff)
def main_with_zip(extracted_paths, main_args):
  for origin_path, tmp_path in zip(main_args.target, extracted_paths):
    unzip_cmd = ["unzip", "-qd", tmp_path, os.path.join(origin_path, "*.zip")]
    unzip_cmd.extend([os.path.join(s, "*") for s in main_args.search_path])
    subprocess.call(unzip_cmd)
  main(
      extracted_paths,
      main_args.search_path,
      main_args.allowlist,
      main_args.ignore_signing_key,
      list_only=main_args.list_only)
if __name__ == "__main__":
  parser = argparse.ArgumentParser(
      prog="compare_images",
      usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-p] [-w allowlist1] [-w allowlist2]"
  )
  parser.add_argument("-t", "--target", nargs="+", required=True)
  parser.add_argument("-s", "--search_path", nargs="+", required=True)
  parser.add_argument("-i", "--ignore_signing_key", action="store_true")
  parser.add_argument(
      "-l",
      "--list_only",
      action="store_true",
      help="Compare file list only and ignore SHA-1 diff")
  parser.add_argument("-u", "--unzip", action="store_true")
  parser.add_argument("-p", "--preserve_extracted_files", action="store_true")
  parser.add_argument("-w", "--allowlist", action="append", default=[])
  args = parser.parse_args()
  if len(args.target) < 2:
    parser.error("The number of targets has to be at least two.")
  if args.unzip:
    if args.preserve_extracted_files:
      main_with_zip(args.target, args)
    else:
      with tempfile.TemporaryDirectory() as tmpdir:
        target_in_tmp = [os.path.join(tmpdir, t) for t in args.target]
        for p in target_in_tmp:
          os.makedirs(p)
        main_with_zip(target_in_tmp, args)
  else:
    main(
        args.target,
        args.search_path,
        args.allowlist,
        args.ignore_signing_key,
        list_only=args.list_only)