aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorAnas Sulaiman <mrahs@google.com>2024-02-06 19:09:44 +0000
committerAnas Sulaiman <mrahs@google.com>2024-02-07 21:57:41 +0000
commitc755fdbe8e24f3f4f647c88fd65919df66bfe210 (patch)
tree4eb59c7dacec623836fba4bdcd7672e068ad677d /python
parent55b7ba0645406b4461b3a953ccf9e707e95644aa (diff)
downloadsoong-c755fdbe8e24f3f4f647c88fd65919df66bfe210.tar.gz
Fix non-deterministic python compilation
This CL fixes timestamps for zip entries to a hardcoded value to ensure deterministic zip archives across checkouts, not just for incremental builds. The value chosen for the fixed date is the same as the one used by soong_zip. Bug: b/322788229 Test: Ran a few builds and verified that mdate for zipped files are fixed as well as the generated zip files have identical hashes. Change-Id: I3cd6fcf559d4d83d8813c93319b46e267ae64a2b
Diffstat (limited to 'python')
-rw-r--r--python/scripts/precompile_python.py21
1 files changed, 12 insertions, 9 deletions
diff --git a/python/scripts/precompile_python.py b/python/scripts/precompile_python.py
index 80e7c76a1..aa1a5df53 100644
--- a/python/scripts/precompile_python.py
+++ b/python/scripts/precompile_python.py
@@ -24,7 +24,12 @@ import zipfile
# This file needs to support both python 2 and 3.
-def process_one_file(info, infile, outzip):
+def process_one_file(name, infile, outzip):
+ # Create a ZipInfo instance with a fixed date to ensure a deterministic output.
+ # Date was chosen to be the same as
+ # https://cs.android.com/android/platform/superproject/main/+/main:build/soong/jar/jar.go;l=36;drc=2863e4535eb65e15f955dc8ed48fa99b1d2a1db5
+ info = zipfile.ZipInfo(filename=name, date_time=(2008, 1, 1, 0, 0, 0))
+
if not info.filename.endswith('.py'):
outzip.writestr(info, infile.read())
return
@@ -37,17 +42,15 @@ def process_one_file(info, infile, outzip):
with tempfile.NamedTemporaryFile(prefix="Soong_precompile_", delete=False) as tmp:
out_name = tmp.name
try:
- # Ensure deterministic pyc by using the hash rather than timestamp.
- # This is required to improve caching in accelerated builds.
- # Only works on Python 3.7+ (see https://docs.python.org/3/library/py_compile.html#py_compile.PycInvalidationMode)
- # which should cover most updated branches and developer machines.
+ # Ensure a deterministic .pyc output by using the hash rather than the timestamp.
+ # Only works on Python 3.7+
+ # See https://docs.python.org/3/library/py_compile.html#py_compile.PycInvalidationMode
if sys.version_info >= (3, 7):
py_compile.compile(in_name, out_name, info.filename, doraise=True, invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH)
else:
py_compile.compile(in_name, out_name, info.filename, doraise=True)
with open(out_name, 'rb') as f:
info.filename = info.filename + 'c'
- # Use ZipInfo rather than str to reuse timestamps for deterministic zip files.
outzip.writestr(info, f.read())
finally:
os.remove(in_name)
@@ -62,9 +65,9 @@ def main():
with open(args.dst_zip, 'wb') as outf, open(args.src_zip, 'rb') as inf:
with zipfile.ZipFile(outf, mode='w') as outzip, zipfile.ZipFile(inf, mode='r') as inzip:
- for info in inzip.infolist():
- with inzip.open(info.filename, mode='r') as inzipf:
- process_one_file(info, inzipf, outzip)
+ for name in inzip.namelist():
+ with inzip.open(name, mode='r') as inzipf:
+ process_one_file(name, inzipf, outzip)
if __name__ == "__main__":