aboutsummaryrefslogtreecommitdiff
path: root/generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java
diff options
context:
space:
mode:
Diffstat (limited to 'generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java')
-rw-r--r--generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java391
1 files changed, 391 insertions, 0 deletions
diff --git a/generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java b/generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java
new file mode 100644
index 0000000..43e7898
--- /dev/null
+++ b/generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java
@@ -0,0 +1,391 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.archivepatcher.generator.bsdiff;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+import java.nio.charset.StandardCharsets;
+
+// TODO(andrewhayden) clean up the various generatePatch(...) methods, there are too many.
+
+/**
+ * A helper class that handles the main BsDiff I/O and patch generation, by calling into the main
+ * algorithm implementation in {@link BsDiff}
+ */
+public class BsDiffPatchWriter {
+
+ static final int DEFAULT_MINIMUM_MATCH_LENGTH = 16;
+
+ /**
+ * Write a patch entry.
+ *
+ * @param newData
+ * @param oldData
+ * @param newPosition the first byte in |newData| to write either raw or as a diff against
+ * |oldData|.
+ * @param oldPosition the first byte in |oldData| to diff against |newData|. Ignored if
+ * |diffLength| is empty.
+ * @param diffLength the number of bytes to diff between |newData| and |oldData| starting at
+ * |newPosition| and |oldPosition| respectively.
+ * @param extraLength the number of bytes from |newData| to write starting at |newPosition +
+ * diffLength|.
+ * @param oldPositionOffsetForNextEntry the offset between |oldPosition| for the next entry and
+ * |oldPosition| + |diffLength| for this entry.
+ * @param outputStream the output stream to write the patch entry to.
+ * @throws IOException if unable to read or write data
+ */
+ private static void writeEntry(
+ RandomAccessObject newData,
+ RandomAccessObject oldData,
+ int newPosition,
+ int oldPosition,
+ int diffLength,
+ int extraLength,
+ int oldPositionOffsetForNextEntry,
+ OutputStream outputStream)
+ throws IOException {
+ // Write control data
+ BsUtil.writeFormattedLong(diffLength, outputStream);
+ BsUtil.writeFormattedLong(extraLength, outputStream);
+ BsUtil.writeFormattedLong(oldPositionOffsetForNextEntry, outputStream);
+
+ newData.seek(newPosition);
+ oldData.seek(oldPosition);
+ // Write diff data
+ for (int i = 0; i < diffLength; ++i) {
+ // TODO(hartmanng): test using a small buffer to insulate read() calls (and write() for that
+ // matter).
+ outputStream.write(newData.readUnsignedByte() - oldData.readUnsignedByte());
+ }
+
+ if (extraLength > 0) {
+ // This seek will throw an IOException sometimes, if we try to seek to the byte after
+ // the end of the RandomAccessObject.
+ newData.seek(newPosition + diffLength);
+ // Write extra data
+ for (int i = 0; i < extraLength; ++i) {
+ // TODO(hartmanng): same as above - test buffering readByte().
+ outputStream.write(newData.readByte());
+ }
+ }
+ }
+
+ /**
+ * Generate a BsDiff patch given a Matcher.
+ *
+ * @param oldData the old blob
+ * @param newData the new blob
+ * @param matcher a Matcher to find binary matches between oldData and newData
+ * @param outputStream the outputStream for the new generated patch
+ * @throws IOException if unable to read or write data
+ * @throws InterruptedException if any thread interrupts this thread
+ */
+ // Visible for testing only
+ static void generatePatchWithMatcher(
+ RandomAccessObject oldData,
+ RandomAccessObject newData,
+ Matcher matcher,
+ OutputStream outputStream)
+ throws IOException, InterruptedException {
+ // Compute the differences, writing ctrl as we go
+ int lastNewPosition = 0;
+ int lastOldPosition = 0;
+
+ int newPosition = 0;
+ int oldPosition = 0;
+ while (newPosition < newData.length()) {
+ if (Thread.interrupted()) {
+ throw new InterruptedException();
+ }
+ Matcher.NextMatch nextMatch = matcher.next();
+ if (nextMatch.didFindMatch) {
+ newPosition = nextMatch.newPosition;
+ oldPosition = nextMatch.oldPosition;
+ } else {
+ newPosition = (int) newData.length();
+ }
+
+ // Extend the current match (|newPosition|, |oldPosition|) backward such that 50% of the bytes
+ // match. We have written diff / extra data up till |lastNewPosition| so we cannot extend
+ // further back than |lastNewPosition|.
+ int backwardExtension = 0;
+ if (newPosition < newData.length()) {
+ int score = 0;
+ int bestScore = 0;
+ for (int i = 1; newPosition - i >= lastNewPosition && oldPosition >= i; ++i) {
+ oldData.seek(oldPosition - i);
+ newData.seek(newPosition - i);
+ if (oldData.readByte() == newData.readByte()) {
+ ++score;
+ } else {
+ --score;
+ }
+
+ if (score > bestScore) {
+ bestScore = score;
+ backwardExtension = i;
+ }
+ }
+ }
+
+ // Extend the previous match (|lastNewPosition|, |lastOldPosition|) forward such that 50% of
+ // the bytes match. (|lastNewPosition|, |lastOldPosition|) were extended backward in the
+ // previous iteration of the loop.
+ int forwardExtension = 0;
+ {
+ int score = 0;
+ int bestScore = 0;
+ oldData.seek(lastOldPosition);
+ newData.seek(lastNewPosition);
+ for (int i = 0;
+ lastNewPosition + i < newPosition && lastOldPosition + i < oldData.length();
+ ++i) {
+ if (oldData.readByte() == newData.readByte()) {
+ ++score;
+ } else {
+ --score;
+ }
+ if (score > bestScore) {
+ bestScore = score;
+ forwardExtension = i + 1;
+ }
+ }
+ }
+
+ // Adjust |backwardExtension| and |forwardExtension| such that the extended matches do
+ // not intersect in |newData|. They can intersect in |oldData|.
+ int overlap = (lastNewPosition + forwardExtension) - (newPosition - backwardExtension);
+ if (overlap > 0) {
+ int score = 0;
+ int bestScore = 0;
+ int backwardExtensionDecrement = 0;
+ for (int i = 0; i < overlap; ++i) {
+ newData.seek(lastNewPosition + forwardExtension - overlap + i);
+ oldData.seek(lastOldPosition + forwardExtension - overlap + i);
+ if (newData.readByte() == oldData.readByte()) {
+ ++score;
+ }
+
+ newData.seek(newPosition - backwardExtension + i);
+ oldData.seek(oldPosition - backwardExtension + i);
+ if (newData.readByte() == oldData.readByte()) {
+ --score;
+ }
+ if (score > bestScore) {
+ bestScore = score;
+ backwardExtensionDecrement = i + 1;
+ }
+ }
+ forwardExtension -= overlap - backwardExtensionDecrement;
+ backwardExtension -= backwardExtensionDecrement;
+ }
+
+ // Write an entry with:
+ // - The diff between |newData| and |oldData| for the previous extended match:
+ // oldData[lastOldPosition ... lastOldPosition + forwardExtension - 1] and
+ // newData[lastNewPosition ... lastNewPosition + forwardExtension - 1].
+ // - The bytes in |newData| between |lastNewPosition| and |newPosition| which are part of
+ // neither the previous extended match or the new extended match:
+ // newData[lastNewPosition + forwardExtension ... newPosition - backwardExtension - 1]
+
+ int oldPositionOffset = 0;
+ if (newPosition < newData.length()) {
+ // The offset from the byte after the last byte of the previous match in |newData| to the
+ // first byte of the new match in |oldData|.
+ oldPositionOffset =
+ (oldPosition - backwardExtension) - (lastOldPosition + forwardExtension);
+ }
+
+ // The number of bytes in |newData| between |lastNewPosition| and |newPosition| which are part
+ // of neither the previous extended match or the new extended match.
+ int newNoMatchLength =
+ (newPosition - backwardExtension) - (lastNewPosition + forwardExtension);
+
+ writeEntry(
+ newData,
+ oldData,
+ lastNewPosition,
+ lastOldPosition,
+ forwardExtension,
+ newNoMatchLength,
+ oldPositionOffset,
+ outputStream);
+
+ lastNewPosition = newPosition - backwardExtension;
+ lastOldPosition = oldPosition - backwardExtension;
+ }
+ }
+
+ /**
+ * Generate a diff between the old data and the new, writing to the specified stream. Uses {@link
+ * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length.
+ *
+ * @param oldData the old data
+ * @param newData the new data
+ * @param outputStream where output should be written
+ * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff
+ * @throws IOException if unable to read or write data
+ * @throws InterruptedException if any thread interrupts this thread
+ */
+ public static void generatePatch(
+ final RandomAccessObject oldData,
+ final RandomAccessObject newData,
+ final OutputStream outputStream,
+ final RandomAccessObjectFactory randomAccessObjectFactory)
+ throws IOException, InterruptedException {
+ generatePatch(
+ oldData, newData, outputStream, randomAccessObjectFactory, DEFAULT_MINIMUM_MATCH_LENGTH);
+ }
+
+ /**
+ * Generate a diff between the old data and the new, writing to the specified stream. Uses
+ * in-memory byte array storage for ancillary allocations and {@link
+ * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length.
+ *
+ * @param oldData the old data
+ * @param newData the new data
+ * @param outputStream where output should be written
+ * @throws IOException if unable to read or write data
+ * @throws InterruptedException if any thread interrupts this thread
+ */
+ public static void generatePatch(
+ final byte[] oldData, final byte[] newData, final OutputStream outputStream)
+ throws IOException, InterruptedException {
+ generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH);
+ }
+
+ /**
+ * Generate a diff between the old data and the new, writing to the specified stream. Uses
+ * in-memory byte array storage for ancillary allocations.
+ *
+ * @param oldData the old data
+ * @param newData the new data
+ * @param outputStream where output should be written
+ * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the
+ * oldData and newData. This can have a significant effect on both the generated patch size
+ * and the amount of time and memory required to apply the patch.
+ * @throws IOException if unable to read or write data
+ * @throws InterruptedException if any thread interrupts this thread
+ */
+ public static void generatePatch(
+ final byte[] oldData,
+ final byte[] newData,
+ final OutputStream outputStream,
+ final int minimumMatchLength)
+ throws IOException, InterruptedException {
+ try (RandomAccessObject oldDataRAO =
+ new RandomAccessObject.RandomAccessByteArrayObject(oldData);
+ RandomAccessObject newDataRAO =
+ new RandomAccessObject.RandomAccessByteArrayObject(newData); ) {
+ generatePatch(
+ oldDataRAO,
+ newDataRAO,
+ outputStream,
+ new RandomAccessObjectFactory.RandomAccessByteArrayObjectFactory(),
+ minimumMatchLength);
+ }
+ }
+
+ /**
+ * Generate a diff between the old data and the new, writing to the specified stream. Uses
+ * file-based storage for ancillary operations and {@link #DEFAULT_MINIMUM_MATCH_LENGTH} as the
+ * match length.
+ *
+ * @param oldData a file containing the old data
+ * @param newData a file containing the new data
+ * @param outputStream where output should be written
+ * @throws IOException if unable to read or write data
+ * @throws InterruptedException if any thread interrupts this thread
+ */
+ public static void generatePatch(
+ final File oldData, final File newData, final OutputStream outputStream)
+ throws IOException, InterruptedException {
+ generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH);
+ }
+
+ /**
+ * Generate a diff between the old data and the new, writing to the specified stream. Uses
+ * file-based storage for ancillary allocations.
+ *
+ * @param oldData a file containing the old data
+ * @param newData a file containing the new data
+ * @param outputStream where output should be written
+ * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the
+ * oldData and newData. This can have a significant effect on both the generated patch size
+ * and the amount of time and memory required to apply the patch.
+ * @throws IOException if unable to read or write data
+ * @throws InterruptedException if any thread interrupts this thread
+ */
+ public static void generatePatch(
+ final File oldData,
+ final File newData,
+ final OutputStream outputStream,
+ final int minimumMatchLength)
+ throws IOException, InterruptedException {
+ try (RandomAccessFile oldDataRAF = new RandomAccessFile(oldData, "r");
+ RandomAccessFile newDataRAF = new RandomAccessFile(newData, "r");
+ RandomAccessObject oldDataRAO =
+ new RandomAccessObject.RandomAccessMmapObject(oldDataRAF, "r");
+ RandomAccessObject newDataRAO =
+ new RandomAccessObject.RandomAccessMmapObject(newDataRAF, "r"); ) {
+ generatePatch(
+ oldDataRAO,
+ newDataRAO,
+ outputStream,
+ new RandomAccessObjectFactory.RandomAccessMmapObjectFactory("rw"),
+ minimumMatchLength);
+ }
+
+ // Due to a bug in the JVM (http://bugs.java.com/view_bug.do?bug_id=6417205), we need to call
+ // gc() and runFinalization() explicitly to get rid of any MappedByteBuffers we may have used
+ // during patch generation.
+ System.gc();
+ System.runFinalization();
+ }
+
+ /**
+ * Generate a diff between the old data and the new, writing to the specified stream.
+ *
+ * @param oldData the old data
+ * @param newData the new data
+ * @param outputStream where output should be written
+ * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff
+ * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the
+ * oldData and newData. This can have a significant effect on both the generated patch size
+ * and the amount of time and memory required to apply the patch.
+ * @throws IOException if unable to read or write data
+ * @throws InterruptedException if any thread interrupts this thread
+ */
+ public static void generatePatch(
+ final RandomAccessObject oldData,
+ final RandomAccessObject newData,
+ final OutputStream outputStream,
+ final RandomAccessObjectFactory randomAccessObjectFactory,
+ final int minimumMatchLength)
+ throws IOException, InterruptedException {
+ // Write header (signature + new file length)
+ outputStream.write("ENDSLEY/BSDIFF43".getBytes(StandardCharsets.US_ASCII));
+ BsUtil.writeFormattedLong(newData.length(), outputStream);
+
+ // Do the suffix search.
+ try (final RandomAccessObject groupArray =
+ new DivSuffixSorter(randomAccessObjectFactory).suffixSort(oldData)) {
+ BsDiffMatcher matcher = new BsDiffMatcher(oldData, newData, groupArray, minimumMatchLength);
+ generatePatchWithMatcher(oldData, newData, matcher, outputStream);
+ }
+ }
+}