diff options
Diffstat (limited to 'generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java')
-rw-r--r-- | generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java b/generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java new file mode 100644 index 0000000..43e7898 --- /dev/null +++ b/generator/src/main/java/com/google/archivepatcher/generator/bsdiff/BsDiffPatchWriter.java @@ -0,0 +1,391 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.archivepatcher.generator.bsdiff; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.RandomAccessFile; +import java.nio.charset.StandardCharsets; + +// TODO(andrewhayden) clean up the various generatePatch(...) methods, there are too many. + +/** + * A helper class that handles the main BsDiff I/O and patch generation, by calling into the main + * algorithm implementation in {@link BsDiff} + */ +public class BsDiffPatchWriter { + + static final int DEFAULT_MINIMUM_MATCH_LENGTH = 16; + + /** + * Write a patch entry. + * + * @param newData + * @param oldData + * @param newPosition the first byte in |newData| to write either raw or as a diff against + * |oldData|. + * @param oldPosition the first byte in |oldData| to diff against |newData|. Ignored if + * |diffLength| is empty. + * @param diffLength the number of bytes to diff between |newData| and |oldData| starting at + * |newPosition| and |oldPosition| respectively. + * @param extraLength the number of bytes from |newData| to write starting at |newPosition + + * diffLength|. + * @param oldPositionOffsetForNextEntry the offset between |oldPosition| for the next entry and + * |oldPosition| + |diffLength| for this entry. + * @param outputStream the output stream to write the patch entry to. + * @throws IOException if unable to read or write data + */ + private static void writeEntry( + RandomAccessObject newData, + RandomAccessObject oldData, + int newPosition, + int oldPosition, + int diffLength, + int extraLength, + int oldPositionOffsetForNextEntry, + OutputStream outputStream) + throws IOException { + // Write control data + BsUtil.writeFormattedLong(diffLength, outputStream); + BsUtil.writeFormattedLong(extraLength, outputStream); + BsUtil.writeFormattedLong(oldPositionOffsetForNextEntry, outputStream); + + newData.seek(newPosition); + oldData.seek(oldPosition); + // Write diff data + for (int i = 0; i < diffLength; ++i) { + // TODO(hartmanng): test using a small buffer to insulate read() calls (and write() for that + // matter). + outputStream.write(newData.readUnsignedByte() - oldData.readUnsignedByte()); + } + + if (extraLength > 0) { + // This seek will throw an IOException sometimes, if we try to seek to the byte after + // the end of the RandomAccessObject. + newData.seek(newPosition + diffLength); + // Write extra data + for (int i = 0; i < extraLength; ++i) { + // TODO(hartmanng): same as above - test buffering readByte(). + outputStream.write(newData.readByte()); + } + } + } + + /** + * Generate a BsDiff patch given a Matcher. + * + * @param oldData the old blob + * @param newData the new blob + * @param matcher a Matcher to find binary matches between oldData and newData + * @param outputStream the outputStream for the new generated patch + * @throws IOException if unable to read or write data + * @throws InterruptedException if any thread interrupts this thread + */ + // Visible for testing only + static void generatePatchWithMatcher( + RandomAccessObject oldData, + RandomAccessObject newData, + Matcher matcher, + OutputStream outputStream) + throws IOException, InterruptedException { + // Compute the differences, writing ctrl as we go + int lastNewPosition = 0; + int lastOldPosition = 0; + + int newPosition = 0; + int oldPosition = 0; + while (newPosition < newData.length()) { + if (Thread.interrupted()) { + throw new InterruptedException(); + } + Matcher.NextMatch nextMatch = matcher.next(); + if (nextMatch.didFindMatch) { + newPosition = nextMatch.newPosition; + oldPosition = nextMatch.oldPosition; + } else { + newPosition = (int) newData.length(); + } + + // Extend the current match (|newPosition|, |oldPosition|) backward such that 50% of the bytes + // match. We have written diff / extra data up till |lastNewPosition| so we cannot extend + // further back than |lastNewPosition|. + int backwardExtension = 0; + if (newPosition < newData.length()) { + int score = 0; + int bestScore = 0; + for (int i = 1; newPosition - i >= lastNewPosition && oldPosition >= i; ++i) { + oldData.seek(oldPosition - i); + newData.seek(newPosition - i); + if (oldData.readByte() == newData.readByte()) { + ++score; + } else { + --score; + } + + if (score > bestScore) { + bestScore = score; + backwardExtension = i; + } + } + } + + // Extend the previous match (|lastNewPosition|, |lastOldPosition|) forward such that 50% of + // the bytes match. (|lastNewPosition|, |lastOldPosition|) were extended backward in the + // previous iteration of the loop. + int forwardExtension = 0; + { + int score = 0; + int bestScore = 0; + oldData.seek(lastOldPosition); + newData.seek(lastNewPosition); + for (int i = 0; + lastNewPosition + i < newPosition && lastOldPosition + i < oldData.length(); + ++i) { + if (oldData.readByte() == newData.readByte()) { + ++score; + } else { + --score; + } + if (score > bestScore) { + bestScore = score; + forwardExtension = i + 1; + } + } + } + + // Adjust |backwardExtension| and |forwardExtension| such that the extended matches do + // not intersect in |newData|. They can intersect in |oldData|. + int overlap = (lastNewPosition + forwardExtension) - (newPosition - backwardExtension); + if (overlap > 0) { + int score = 0; + int bestScore = 0; + int backwardExtensionDecrement = 0; + for (int i = 0; i < overlap; ++i) { + newData.seek(lastNewPosition + forwardExtension - overlap + i); + oldData.seek(lastOldPosition + forwardExtension - overlap + i); + if (newData.readByte() == oldData.readByte()) { + ++score; + } + + newData.seek(newPosition - backwardExtension + i); + oldData.seek(oldPosition - backwardExtension + i); + if (newData.readByte() == oldData.readByte()) { + --score; + } + if (score > bestScore) { + bestScore = score; + backwardExtensionDecrement = i + 1; + } + } + forwardExtension -= overlap - backwardExtensionDecrement; + backwardExtension -= backwardExtensionDecrement; + } + + // Write an entry with: + // - The diff between |newData| and |oldData| for the previous extended match: + // oldData[lastOldPosition ... lastOldPosition + forwardExtension - 1] and + // newData[lastNewPosition ... lastNewPosition + forwardExtension - 1]. + // - The bytes in |newData| between |lastNewPosition| and |newPosition| which are part of + // neither the previous extended match or the new extended match: + // newData[lastNewPosition + forwardExtension ... newPosition - backwardExtension - 1] + + int oldPositionOffset = 0; + if (newPosition < newData.length()) { + // The offset from the byte after the last byte of the previous match in |newData| to the + // first byte of the new match in |oldData|. + oldPositionOffset = + (oldPosition - backwardExtension) - (lastOldPosition + forwardExtension); + } + + // The number of bytes in |newData| between |lastNewPosition| and |newPosition| which are part + // of neither the previous extended match or the new extended match. + int newNoMatchLength = + (newPosition - backwardExtension) - (lastNewPosition + forwardExtension); + + writeEntry( + newData, + oldData, + lastNewPosition, + lastOldPosition, + forwardExtension, + newNoMatchLength, + oldPositionOffset, + outputStream); + + lastNewPosition = newPosition - backwardExtension; + lastOldPosition = oldPosition - backwardExtension; + } + } + + /** + * Generate a diff between the old data and the new, writing to the specified stream. Uses {@link + * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length. + * + * @param oldData the old data + * @param newData the new data + * @param outputStream where output should be written + * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff + * @throws IOException if unable to read or write data + * @throws InterruptedException if any thread interrupts this thread + */ + public static void generatePatch( + final RandomAccessObject oldData, + final RandomAccessObject newData, + final OutputStream outputStream, + final RandomAccessObjectFactory randomAccessObjectFactory) + throws IOException, InterruptedException { + generatePatch( + oldData, newData, outputStream, randomAccessObjectFactory, DEFAULT_MINIMUM_MATCH_LENGTH); + } + + /** + * Generate a diff between the old data and the new, writing to the specified stream. Uses + * in-memory byte array storage for ancillary allocations and {@link + * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length. + * + * @param oldData the old data + * @param newData the new data + * @param outputStream where output should be written + * @throws IOException if unable to read or write data + * @throws InterruptedException if any thread interrupts this thread + */ + public static void generatePatch( + final byte[] oldData, final byte[] newData, final OutputStream outputStream) + throws IOException, InterruptedException { + generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH); + } + + /** + * Generate a diff between the old data and the new, writing to the specified stream. Uses + * in-memory byte array storage for ancillary allocations. + * + * @param oldData the old data + * @param newData the new data + * @param outputStream where output should be written + * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the + * oldData and newData. This can have a significant effect on both the generated patch size + * and the amount of time and memory required to apply the patch. + * @throws IOException if unable to read or write data + * @throws InterruptedException if any thread interrupts this thread + */ + public static void generatePatch( + final byte[] oldData, + final byte[] newData, + final OutputStream outputStream, + final int minimumMatchLength) + throws IOException, InterruptedException { + try (RandomAccessObject oldDataRAO = + new RandomAccessObject.RandomAccessByteArrayObject(oldData); + RandomAccessObject newDataRAO = + new RandomAccessObject.RandomAccessByteArrayObject(newData); ) { + generatePatch( + oldDataRAO, + newDataRAO, + outputStream, + new RandomAccessObjectFactory.RandomAccessByteArrayObjectFactory(), + minimumMatchLength); + } + } + + /** + * Generate a diff between the old data and the new, writing to the specified stream. Uses + * file-based storage for ancillary operations and {@link #DEFAULT_MINIMUM_MATCH_LENGTH} as the + * match length. + * + * @param oldData a file containing the old data + * @param newData a file containing the new data + * @param outputStream where output should be written + * @throws IOException if unable to read or write data + * @throws InterruptedException if any thread interrupts this thread + */ + public static void generatePatch( + final File oldData, final File newData, final OutputStream outputStream) + throws IOException, InterruptedException { + generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH); + } + + /** + * Generate a diff between the old data and the new, writing to the specified stream. Uses + * file-based storage for ancillary allocations. + * + * @param oldData a file containing the old data + * @param newData a file containing the new data + * @param outputStream where output should be written + * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the + * oldData and newData. This can have a significant effect on both the generated patch size + * and the amount of time and memory required to apply the patch. + * @throws IOException if unable to read or write data + * @throws InterruptedException if any thread interrupts this thread + */ + public static void generatePatch( + final File oldData, + final File newData, + final OutputStream outputStream, + final int minimumMatchLength) + throws IOException, InterruptedException { + try (RandomAccessFile oldDataRAF = new RandomAccessFile(oldData, "r"); + RandomAccessFile newDataRAF = new RandomAccessFile(newData, "r"); + RandomAccessObject oldDataRAO = + new RandomAccessObject.RandomAccessMmapObject(oldDataRAF, "r"); + RandomAccessObject newDataRAO = + new RandomAccessObject.RandomAccessMmapObject(newDataRAF, "r"); ) { + generatePatch( + oldDataRAO, + newDataRAO, + outputStream, + new RandomAccessObjectFactory.RandomAccessMmapObjectFactory("rw"), + minimumMatchLength); + } + + // Due to a bug in the JVM (http://bugs.java.com/view_bug.do?bug_id=6417205), we need to call + // gc() and runFinalization() explicitly to get rid of any MappedByteBuffers we may have used + // during patch generation. + System.gc(); + System.runFinalization(); + } + + /** + * Generate a diff between the old data and the new, writing to the specified stream. + * + * @param oldData the old data + * @param newData the new data + * @param outputStream where output should be written + * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff + * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the + * oldData and newData. This can have a significant effect on both the generated patch size + * and the amount of time and memory required to apply the patch. + * @throws IOException if unable to read or write data + * @throws InterruptedException if any thread interrupts this thread + */ + public static void generatePatch( + final RandomAccessObject oldData, + final RandomAccessObject newData, + final OutputStream outputStream, + final RandomAccessObjectFactory randomAccessObjectFactory, + final int minimumMatchLength) + throws IOException, InterruptedException { + // Write header (signature + new file length) + outputStream.write("ENDSLEY/BSDIFF43".getBytes(StandardCharsets.US_ASCII)); + BsUtil.writeFormattedLong(newData.length(), outputStream); + + // Do the suffix search. + try (final RandomAccessObject groupArray = + new DivSuffixSorter(randomAccessObjectFactory).suffixSort(oldData)) { + BsDiffMatcher matcher = new BsDiffMatcher(oldData, newData, groupArray, minimumMatchLength); + generatePatchWithMatcher(oldData, newData, matcher, outputStream); + } + } +} |