// Copyright 2022 The ChromiumOS Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. use std::collections::{BTreeMap, BTreeSet}; use std::fs::{copy, File}; use std::io::{BufRead, BufReader, Read, Write}; use std::path::{Path, PathBuf}; use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; /// JSON serde struct. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct PatchDictSchema { pub metadata: Option>, #[serde(default, skip_serializing_if = "BTreeSet::is_empty")] pub platforms: BTreeSet, pub rel_patch_path: String, pub version_range: Option, } #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct VersionRange { pub from: Option, pub until: Option, } impl PatchDictSchema { /// Return the first version this patch applies to. pub fn get_from_version(&self) -> Option { self.version_range.and_then(|x| x.from) } /// Return the version after the last version this patch /// applies to. pub fn get_until_version(&self) -> Option { self.version_range.and_then(|x| x.until) } } /// Struct to keep track of patches and their relative paths. #[derive(Debug, Clone)] pub struct PatchCollection { pub patches: Vec, pub workdir: PathBuf, } impl PatchCollection { /// Create a `PatchCollection` from a PATCHES. pub fn parse_from_file(json_file: &Path) -> Result { Ok(Self { patches: serde_json::from_reader(File::open(json_file)?)?, workdir: json_file .parent() .ok_or_else(|| anyhow!("failed to get json_file parent"))? .to_path_buf(), }) } /// Create a `PatchCollection` from a string literal and a workdir. pub fn parse_from_str(workdir: PathBuf, contents: &str) -> Result { Ok(Self { patches: serde_json::from_str(contents).context("parsing from str")?, workdir, }) } /// Copy this collection with patches filtered by given criterion. pub fn filter_patches(&self, f: impl FnMut(&PatchDictSchema) -> bool) -> Self { Self { patches: self.patches.iter().cloned().filter(f).collect(), workdir: self.workdir.clone(), } } /// Map over the patches. pub fn map_patches(&self, f: impl FnMut(&PatchDictSchema) -> PatchDictSchema) -> Self { Self { patches: self.patches.iter().map(f).collect(), workdir: self.workdir.clone(), } } /// Return true if the collection is tracking any patches. pub fn is_empty(&self) -> bool { self.patches.is_empty() } /// Compute the set-set subtraction, returning a new `PatchCollection` which /// keeps the minuend's workdir. pub fn subtract(&self, subtrahend: &Self) -> Result { let mut new_patches = Vec::new(); // This is O(n^2) when it could be much faster, but n is always going to be less // than 1k and speed is not important here. for our_patch in &self.patches { let found_in_sub = subtrahend.patches.iter().any(|sub_patch| { let hash1 = subtrahend .hash_from_rel_patch(sub_patch) .expect("getting hash from subtrahend patch"); let hash2 = self .hash_from_rel_patch(our_patch) .expect("getting hash from our patch"); hash1 == hash2 }); if !found_in_sub { new_patches.push(our_patch.clone()); } } Ok(Self { patches: new_patches, workdir: self.workdir.clone(), }) } pub fn union(&self, other: &Self) -> Result { self.union_helper( other, |p| self.hash_from_rel_patch(p), |p| other.hash_from_rel_patch(p), ) } /// Vec of every PatchDictSchema with differing /// version ranges but the same rel_patch_paths. fn version_range_diffs(&self, other: &Self) -> Vec<(String, Option)> { let other_map: BTreeMap<_, _> = other .patches .iter() .map(|p| (p.rel_patch_path.clone(), p)) .collect(); self.patches .iter() .filter_map(|ours| match other_map.get(&ours.rel_patch_path) { Some(theirs) => { if ours.get_from_version() != theirs.get_from_version() || ours.get_until_version() != theirs.get_until_version() { Some((ours.rel_patch_path.clone(), ours.version_range)) } else { None } } _ => None, }) .collect() } /// Given a vector of tuples with (rel_patch_path, Option), replace /// all version ranges in this collection with a matching one in the new_versions parameter. pub fn update_version_ranges(&self, new_versions: &[(String, Option)]) -> Self { // new_versions should be really tiny (len() <= 2 for the most part), so // the overhead of O(1) lookups is not worth it. let get_updated_version = |rel_patch_path: &str| -> Option> { // The first Option indicates whether we are updating it at all. // The second Option indicates we can update it with None. new_versions .iter() .find(|i| i.0 == rel_patch_path) .map(|x| x.1) }; let cloned_patches = self .patches .iter() .map(|p| match get_updated_version(&p.rel_patch_path) { Some(version_range) => PatchDictSchema { version_range, ..p.clone() }, _ => p.clone(), }) .collect(); Self { workdir: self.workdir.clone(), patches: cloned_patches, } } fn union_helper( &self, other: &Self, our_hash_f: impl Fn(&PatchDictSchema) -> Result, their_hash_f: impl Fn(&PatchDictSchema) -> Result, ) -> Result { // 1. For all our patches: // a. If there exists a matching patch hash from `other`: // i. Create a new patch with merged platform info, // ii. add the new patch to our new collection. // iii. Mark the other patch as "merged" // b. Otherwise, copy our patch to the new collection // 2. For all unmerged patches from the `other` // a. Copy their patch into the new collection let mut combined_patches = Vec::new(); let mut other_merged = vec![false; other.patches.len()]; // 1. for p in &self.patches { let our_hash = our_hash_f(p)?; let mut found = false; // a. for (idx, merged) in other_merged.iter_mut().enumerate() { if !*merged { let other_p = &other.patches[idx]; let their_hash = their_hash_f(other_p)?; if our_hash == their_hash { // i. let new_platforms = p.platforms.union(&other_p.platforms).cloned().collect(); // ii. combined_patches.push(PatchDictSchema { rel_patch_path: p.rel_patch_path.clone(), platforms: new_platforms, metadata: p.metadata.clone(), version_range: p.version_range, }); // iii. *merged = true; found = true; break; } } } // b. if !found { combined_patches.push(p.clone()); } } // 2. // Add any remaining, other-only patches. for (idx, merged) in other_merged.iter().enumerate() { if !*merged { combined_patches.push(other.patches[idx].clone()); } } Ok(Self { workdir: self.workdir.clone(), patches: combined_patches, }) } /// Copy all patches from this collection into another existing collection, and write that /// to the existing collection's file. pub fn transpose_write(&self, existing_collection: &mut Self) -> Result<()> { for p in &self.patches { let original_file_path = self.workdir.join(&p.rel_patch_path); let copy_file_path = existing_collection.workdir.join(&p.rel_patch_path); copy_create_parents(&original_file_path, ©_file_path)?; existing_collection.patches.push(p.clone()); } existing_collection.write_patches_json("PATCHES.json") } /// Write out the patch collection contents to a PATCHES.json file. fn write_patches_json(&self, filename: &str) -> Result<()> { let write_path = self.workdir.join(filename); let mut new_patches_file = File::create(&write_path) .with_context(|| format!("writing to {}", write_path.display()))?; new_patches_file.write_all(self.serialize_patches()?.as_bytes())?; Ok(()) } pub fn serialize_patches(&self) -> Result { let mut serialization_buffer = Vec::::new(); // Four spaces to indent json serialization. let mut serializer = serde_json::Serializer::with_formatter( &mut serialization_buffer, serde_json::ser::PrettyFormatter::with_indent(b" "), ); self.patches .serialize(&mut serializer) .context("serializing patches to JSON")?; // Append a newline at the end if not present. This is necessary to get // past some pre-upload hooks. if serialization_buffer.last() != Some(&b'\n') { serialization_buffer.push(b'\n'); } Ok(std::str::from_utf8(&serialization_buffer)?.to_string()) } /// Return whether a given patch actually exists on the file system. pub fn patch_exists(&self, patch: &PatchDictSchema) -> bool { self.workdir.join(&patch.rel_patch_path).exists() } fn hash_from_rel_patch(&self, patch: &PatchDictSchema) -> Result { hash_from_patch_path(&self.workdir.join(&patch.rel_patch_path)) } } impl std::fmt::Display for PatchCollection { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { for (i, p) in self.patches.iter().enumerate() { let title = p .metadata .as_ref() .and_then(|x| x.get("title")) .and_then(serde_json::Value::as_str) .unwrap_or("[No Title]"); let path = self.workdir.join(&p.rel_patch_path); writeln!(f, "* {}", title)?; if i == self.patches.len() - 1 { write!(f, " {}", path.display())?; } else { writeln!(f, " {}", path.display())?; } } Ok(()) } } /// Represents information which changed between now and an old version of a PATCHES.json file. pub struct PatchTemporalDiff { pub cur_collection: PatchCollection, pub new_patches: PatchCollection, // Store version_updates as a vec, not a map, as it's likely to be very small (<=2), // and the overhead of using a O(1) look up structure isn't worth it. pub version_updates: Vec<(String, Option)>, } /// Generate a PatchCollection incorporating only the diff between current patches and old patch /// contents. pub fn new_patches( patches_path: &Path, old_patch_contents: &str, platform: &str, ) -> Result { // Set up the current patch collection. let cur_collection = PatchCollection::parse_from_file(patches_path) .with_context(|| format!("parsing {} PATCHES.json", platform))?; let cur_collection = filter_patches_by_platform(&cur_collection, platform); let cur_collection = cur_collection.filter_patches(|p| cur_collection.patch_exists(p)); // Set up the old patch collection. let old_collection = PatchCollection::parse_from_str( patches_path.parent().unwrap().to_path_buf(), old_patch_contents, )?; let old_collection = old_collection.filter_patches(|p| old_collection.patch_exists(p)); // Set up the differential values let version_updates = cur_collection.version_range_diffs(&old_collection); let new_patches: PatchCollection = cur_collection.subtract(&old_collection)?; let new_patches = new_patches.map_patches(|p| { let mut platforms = BTreeSet::new(); platforms.extend(["android".to_string(), "chromiumos".to_string()]); PatchDictSchema { platforms: platforms.union(&p.platforms).cloned().collect(), ..p.to_owned() } }); Ok(PatchTemporalDiff { cur_collection, new_patches, version_updates, }) } /// Create a new collection with only the patches that apply to the /// given platform. /// /// If there's no platform listed, the patch should still apply if the patch file exists. pub fn filter_patches_by_platform(collection: &PatchCollection, platform: &str) -> PatchCollection { collection.filter_patches(|p| { p.platforms.contains(platform) || (p.platforms.is_empty() && collection.patch_exists(p)) }) } /// Get the hash from the patch file contents. /// /// Not every patch file actually contains its own hash, /// we must compute the hash ourselves when it's not found. fn hash_from_patch(patch_contents: impl Read) -> Result { let mut reader = BufReader::new(patch_contents); let mut buf = String::new(); reader.read_line(&mut buf)?; let mut first_line_iter = buf.trim().split(' ').fuse(); let (fst_word, snd_word) = (first_line_iter.next(), first_line_iter.next()); if let (Some("commit" | "From"), Some(hash_str)) = (fst_word, snd_word) { // If the first line starts with either "commit" or "From", the following // text is almost certainly a commit hash. Ok(hash_str.to_string()) } else { // This is an annoying case where the patch isn't actually a commit. // So we'll hash the entire file, and hope that's sufficient. let mut hasher = Sha256::new(); hasher.update(&buf); // Have to hash the first line. reader.read_to_string(&mut buf)?; hasher.update(buf); // Hash the rest of the file. let sha = hasher.finalize(); Ok(format!("{:x}", &sha)) } } fn hash_from_patch_path(patch: &Path) -> Result { let f = File::open(patch).with_context(|| format!("opening patch file {}", patch.display()))?; hash_from_patch(f) } /// Copy a file from one path to another, and create any parent /// directories along the way. fn copy_create_parents(from: &Path, to: &Path) -> Result<()> { let to_parent = to .parent() .with_context(|| format!("getting parent of {}", to.display()))?; if !to_parent.exists() { std::fs::create_dir_all(to_parent)?; } copy(&from, &to) .with_context(|| format!("copying file from {} to {}", &from.display(), &to.display()))?; Ok(()) } #[cfg(test)] mod test { use super::*; /// Test we can extract the hash from patch files. #[test] fn test_hash_from_patch() { // Example git patch from Gerrit let desired_hash = "004be4037e1e9c6092323c5c9268acb3ecf9176c"; let test_file_contents = "commit 004be4037e1e9c6092323c5c9268acb3ecf9176c\n\ Author: An Author \n\ Date: Thu Aug 6 12:34:16 2020 -0700"; assert_eq!( &hash_from_patch(test_file_contents.as_bytes()).unwrap(), desired_hash ); // Example git patch from upstream let desired_hash = "6f85225ef3791357f9b1aa097b575b0a2b0dff48"; let test_file_contents = "From 6f85225ef3791357f9b1aa097b575b0a2b0dff48\n\ Mon Sep 17 00:00:00 2001\n\ From: Another Author \n\ Date: Wed, 18 Aug 2021 15:03:03 -0700"; assert_eq!( &hash_from_patch(test_file_contents.as_bytes()).unwrap(), desired_hash ); } #[test] fn test_union() { let patch1 = PatchDictSchema { rel_patch_path: "a".into(), metadata: None, platforms: BTreeSet::from(["x".into()]), version_range: Some(VersionRange { from: Some(0), until: Some(1), }), }; let patch2 = PatchDictSchema { rel_patch_path: "b".into(), platforms: BTreeSet::from(["x".into(), "y".into()]), ..patch1.clone() }; let patch3 = PatchDictSchema { platforms: BTreeSet::from(["z".into(), "x".into()]), ..patch1.clone() }; let collection1 = PatchCollection { workdir: PathBuf::new(), patches: vec![patch1, patch2], }; let collection2 = PatchCollection { workdir: PathBuf::new(), patches: vec![patch3], }; let union = collection1 .union_helper( &collection2, |p| Ok(p.rel_patch_path.to_string()), |p| Ok(p.rel_patch_path.to_string()), ) .expect("could not create union"); assert_eq!(union.patches.len(), 2); assert_eq!( union.patches[0].platforms.iter().collect::>(), vec!["x", "z"] ); assert_eq!( union.patches[1].platforms.iter().collect::>(), vec!["x", "y"] ); } #[test] fn test_union_empties() { let patch1 = PatchDictSchema { rel_patch_path: "a".into(), metadata: None, platforms: Default::default(), version_range: Some(VersionRange { from: Some(0), until: Some(1), }), }; let collection1 = PatchCollection { workdir: PathBuf::new(), patches: vec![patch1.clone()], }; let collection2 = PatchCollection { workdir: PathBuf::new(), patches: vec![patch1], }; let union = collection1 .union_helper( &collection2, |p| Ok(p.rel_patch_path.to_string()), |p| Ok(p.rel_patch_path.to_string()), ) .expect("could not create union"); assert_eq!(union.patches.len(), 1); assert_eq!(union.patches[0].platforms.len(), 0); } #[test] fn test_version_differentials() { let fixture = version_range_fixture(); let diff = fixture[0].version_range_diffs(&fixture[1]); assert_eq!(diff.len(), 1); assert_eq!( &diff, &[( "a".to_string(), Some(VersionRange { from: Some(0), until: Some(1) }) )] ); let diff = fixture[1].version_range_diffs(&fixture[2]); assert_eq!(diff.len(), 0); } #[test] fn test_version_updates() { let fixture = version_range_fixture(); let collection = fixture[0].update_version_ranges(&[("a".into(), None)]); assert_eq!(collection.patches[0].version_range, None); assert_eq!(collection.patches[1], fixture[1].patches[1]); let new_version_range = Some(VersionRange { from: Some(42), until: Some(43), }); let collection = fixture[0].update_version_ranges(&[("a".into(), new_version_range)]); assert_eq!(collection.patches[0].version_range, new_version_range); assert_eq!(collection.patches[1], fixture[1].patches[1]); } fn version_range_fixture() -> Vec { let patch1 = PatchDictSchema { rel_patch_path: "a".into(), metadata: None, platforms: Default::default(), version_range: Some(VersionRange { from: Some(0), until: Some(1), }), }; let patch1_updated = PatchDictSchema { version_range: Some(VersionRange { from: Some(0), until: Some(3), }), ..patch1.clone() }; let patch2 = PatchDictSchema { rel_patch_path: "b".into(), ..patch1.clone() }; let collection1 = PatchCollection { workdir: PathBuf::new(), patches: vec![patch1, patch2.clone()], }; let collection2 = PatchCollection { workdir: PathBuf::new(), patches: vec![patch1_updated, patch2.clone()], }; let collection3 = PatchCollection { workdir: PathBuf::new(), patches: vec![patch2], }; vec![collection1, collection2, collection3] } }