summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Deymo <deymo@google.com>2015-10-13 02:15:31 -0700
committerAlex Deymo <deymo@google.com>2015-10-23 20:09:11 -0700
commit03f1debab429e673ba5e9e317c5a04e36e850cef (patch)
treee2c288b30f3515aaee669ea377fb3214a02468cf
parent20891f9c246ec36e6c148579522ac00051b64457 (diff)
downloadbsdiff-03f1debab429e673ba5e9e317c5a04e36e850cef.tar.gz
bspatch: Use a C++ interface for file access.
This patch moves the exfile.cc implementation to a C++ class with an abstract interface of a file. The implementation of exfile.cc, renamed to extents_file.cc, now uses C++ STL classes and has unittests to test its core functionality. Bug: 24478450 Test: Unittests added. make all test -j5 && ./unittests Change-Id: I8d8f07150ad2ea465c55b5178ca9fbab49185eea
-rw-r--r--Android.mk11
-rw-r--r--Makefile19
-rw-r--r--exfile.cc410
-rw-r--r--exfile.h59
-rw-r--r--extents.h5
-rw-r--r--extents_file.cc111
-rw-r--r--extents_file.h91
-rw-r--r--extents_file_unittest.cc176
-rw-r--r--file.cc94
-rw-r--r--file.h38
-rw-r--r--file_interface.h44
11 files changed, 579 insertions, 479 deletions
diff --git a/Android.mk b/Android.mk
index 2f16f2c..cec06c6 100644
--- a/Android.mk
+++ b/Android.mk
@@ -21,6 +21,12 @@ bsdiff_common_cflags := \
-Wextra \
-Wno-unused-parameter
+bspatch_src_files := \
+ bspatch.cc \
+ extents.cc \
+ extents_file.cc \
+ file.cc
+
include $(CLEAR_VARS)
LOCAL_MODULE := bsdiff
LOCAL_CPP_EXTENSION := .cc
@@ -39,9 +45,8 @@ include $(CLEAR_VARS)
LOCAL_MODULE := bspatch
LOCAL_CPP_EXTENSION := .cc
LOCAL_SRC_FILES := \
- bspatch.cc \
- bspatch_main.cc \
- extents.cc
+ $(bspatch_src_files) \
+ bspatch_main.cc
LOCAL_CFLAGS := $(bsdiff_common_cflags)
LOCAL_C_INCLUDES += external/bzip2
LOCAL_STATIC_LIBRARIES := libbz
diff --git a/Makefile b/Makefile
index f80fa32..5d1ee39 100644
--- a/Makefile
+++ b/Makefile
@@ -32,12 +32,14 @@ BSDIFF_OBJS = \
BSPATCH_LIBS = -lbz2
BSPATCH_OBJS = \
bspatch.o \
- exfile.o \
- extents.o
+ extents.o \
+ extents_file.o \
+ file.o
-UNITTEST_LIBS = -lgtest
+UNITTEST_LIBS = -lgmock -lgtest
UNITTEST_OBJS = \
bsdiff_unittest.o \
+ extents_file_unittest.o \
test_utils.o \
testrunner.o
@@ -49,16 +51,21 @@ bspatch: LDLIBS += $(BSPATCH_LIBS)
unittests: LDLIBS += $(BSDIFF_LIBS) $(BSPATCH_LIBS) $(UNITTEST_LIBS)
unittests: $(BSPATCH_OBJS) $(BSDIFF_OBJS) $(UNITTEST_OBJS)
+
+unittests bsdiff bspatch:
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ $(LDLIBS)
# Source file dependencies.
bsdiff.o: bsdiff.cc
bsdiff_main.o: bsdiff_main.cc bsdiff.h
bsdiff_unittest.o: bsdiff_unittest.cc bsdiff.h test_utils.h
-bspatch.o: bspatch.cc exfile.h extents.h
+bspatch.o: bspatch.cc extents.h extents_file.h file_interface.h
bspatch_main.o: bspatch_main.cc bspatch.h
-exfile.o: exfile.cc exfile.h
-extents.o: extents.cc extents.h exfile.h
+extents.o: extents.cc extents.h extents_file.h file_interface.h
+extents_file.o: extents_file.cc extents_file.h file_interface.h
+extents_file_unittest.o: extents_file_unittest.cc extents_file.h \
+ file_interface.h
+file.o: file.cc file.h file_interface.h
testrunner.o: testrunner.cc
test_utils.o: test_utils.cc test_utils.h
diff --git a/exfile.cc b/exfile.cc
deleted file mode 100644
index 806424e..0000000
--- a/exfile.cc
+++ /dev/null
@@ -1,410 +0,0 @@
-// Copyright 2015 The Chromium OS Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "exfile.h"
-
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-/*
- * Extent files implementation. Some things worth noting:
- *
- * - We are using glibc's buffered FILE objects for the underlying file I/O;
- * this contributes to improved performance, especially with badly fragmented
- * extents. However, the FILE handle we return to the caller is decidedly
- * unbuffered: making it buffered too seems superfluous, causing excess data
- * copying and memory use.
- *
- * - We maintain the "logical" file position separately from the "physical"
- * (underlying) file position. The latter is updated lazily whenever actual
- * file I/O is about to be performed.
- *
- * - The logical position of an extent file is internally represented by the
- * current extent index (curr_ex_idx) and the position within the current
- * extent (curr_ex_pos), as well as an absolute logical position (curr_pos).
- * In general, curr_pos should equal the total length of all extents prior to
- * curr_ex_idx, plus curr_ex_pos. Also, curr_ex_idx may range between 0 and
- * the total extent count; if it is exactly the latter, then curr_ex_pos must
- * be zero, representing the fact that the we are at the logical end of the
- * file. Otherwise, curr_ex_pos may range between 0 and the length of the
- * current extent; if it is exactly the latter, then this is equivalent to
- * position zero on the next extent. All functions should honor this
- * duality.
- *
- * - Seeking is done efficiently at O(log(D)), where D is the
- * number of extents between the current position and the new one. This seems
- * like a good midway for supporting both sequential and random access.
- */
-
-
-#define TRUE 1
-#define FALSE 0
-
-#define arraysize(x) (sizeof(x) / sizeof(*(x)))
-
-
-/* Extent prefix length. */
-typedef struct {
- size_t prec; /* total length of preceding extents */
- size_t total; /* total length including current extent */
-} prefix_len_t;
-
-/* Extent file logical modes. Used as index to the mapping from logical modes
- * to open(2) and fopen(3) modes below. */
-typedef enum {
- EXFILE_MODE_RO,
- EXFILE_MODE_WO,
- EXFILE_MODE_RW,
- EXFILE_MODE_MAX /* sentinel */
-} exfile_mode_t;
-
-/* An extent file control object (aka "cookie"). */
-typedef struct {
- int fd; /* underlying file descriptor */
- size_t ex_count; /* number of extents (non-zero) */
- ex_t* ex_arr; /* array of extents */
- prefix_len_t* prefix_len_arr; /* total lengths of extent prefixes */
- void (*ex_free)(void*); /* extent array free function */
- size_t total_ex_len; /* total length of all extents (constant) */
- off_t curr_file_pos; /* current underlying file position */
- size_t curr_ex_idx; /* current extent index */
- size_t curr_ex_pos; /* current position within extent */
- off_t curr_pos; /* current logical file position */
-} exfile_t;
-
-
-/* Mapping from fopen(3) modes to extent file logical modes. */
-static const struct {
- const char* fopen_mode;
- exfile_mode_t mode;
-} fopen_mode_to_mode[] = {
- {"r", EXFILE_MODE_RO},
- {"r+", EXFILE_MODE_RW},
- {"w", EXFILE_MODE_WO},
- {"w+", EXFILE_MODE_RW},
-};
-
-
-/* Mapping from extent file logical modes to open(2) flags. */
-static const int mode_to_open_flags[EXFILE_MODE_MAX] = {
- O_RDONLY,
- O_WRONLY,
- O_RDWR,
-};
-
-
-/* Searches an array |ex_arr| of |ex_count| extents and returns the index of
- * the extent that contains the location |pos|. Uses an array |prefix_len_arr|
- * of corresponding prefix lengths. The total complexity is O(log(D)), where D
- * is the distance between the returned extent index and |init_ex_idx|. */
-static size_t ex_arr_search(size_t ex_count,
- const ex_t* ex_arr,
- const prefix_len_t* prefix_len_arr,
- size_t pos,
- size_t init_ex_idx) {
- assert(ex_arr && ex_count);
- const ssize_t last_ex_idx = ex_count - 1;
- assert(init_ex_idx <= ex_count);
- assert(pos < prefix_len_arr[last_ex_idx].total);
- if (init_ex_idx == ex_count)
- init_ex_idx = last_ex_idx; /* adjustment for purposes of the search below */
-
- /* First, search in exponentially increasing leaps from the current extent,
- * until an interval bounding the target position was obtained. Here i and j
- * are the left and right (inclusive) index boundaries, respectively. */
- ssize_t i = init_ex_idx;
- ssize_t j = i;
- size_t leap = 1;
- /* Go left, as needed. */
- while (i > 0 && pos < prefix_len_arr[i].prec) {
- j = i - 1;
- if ((i -= leap) < 0)
- i = 0;
- leap <<= 1;
- }
- /* Go right, as needed. */
- while (j < last_ex_idx && pos >= prefix_len_arr[j].total) {
- i = j + 1;
- if ((j += leap) > last_ex_idx)
- j = last_ex_idx;
- leap <<= 1;
- }
-
- /* Then, perform a binary search between i and j. */
- size_t k = 0;
- while (1) {
- k = (i + j) / 2;
- if (pos < prefix_len_arr[k].prec)
- j = k - 1;
- else if (pos >= prefix_len_arr[k].total)
- i = k + 1;
- else
- break;
- }
-
- return k;
-}
-
-/* Performs I/O operations (either read or write) on an extent file, advancing
- * through consecutive extents and updating the logical/physical file position
- * as we go. */
-static ssize_t exfile_io(exfile_t* xf, int do_read, char* buf, size_t size) {
- if (xf->curr_ex_idx == xf->ex_count)
- return 0; /* end-of-extent-file */
-
- /* Reading or writing? */
- typedef ssize_t(io_func_t)(int, void*, size_t);
- io_func_t* io_func;
- ssize_t error_ret_val;
- if (do_read) {
- io_func = read;
- error_ret_val = -1;
- } else {
- io_func = (io_func_t*)write;
- error_ret_val = 0; /* must not return a negative value when writing */
- }
-
- /* Start processing data along extents. */
- const ex_t* curr_ex = xf->ex_arr + xf->curr_ex_idx;
- assert(curr_ex->len >= xf->curr_ex_pos);
- size_t curr_ex_rem_len = curr_ex->len - xf->curr_ex_pos;
- ssize_t total_bytes = 0;
- while (size) {
- /* Advance to the next extent of non-zero length. */
- while (curr_ex_rem_len == 0) {
- xf->curr_ex_idx++;
- xf->curr_ex_pos = 0;
- if (xf->curr_ex_idx == xf->ex_count)
- return total_bytes; /* end-of-extent-file */
- curr_ex++;
- curr_ex_rem_len = curr_ex->len;
- }
-
- const int is_real_ex = (curr_ex->off >= 0);
-
- /* Seek to the correct file position, as necessary. */
- if (is_real_ex) {
- const off_t file_pos = curr_ex->off + xf->curr_ex_pos;
- if (xf->curr_file_pos != file_pos) {
- if (lseek(xf->fd, file_pos, SEEK_SET) == (off_t)-1) {
- xf->curr_file_pos = -1; /* unknown file position */
- return total_bytes ? total_bytes : error_ret_val;
- }
- xf->curr_file_pos = file_pos;
- }
- }
-
- /* Process data to the end of the current extent or the requested
- * count, whichever is smaller. */
- size_t io_count = (size < curr_ex_rem_len ? size : curr_ex_rem_len);
- ssize_t io_bytes = io_count;
- if (is_real_ex)
- io_bytes = io_func(xf->fd, buf, io_count);
- else if (do_read)
- memset(buf, 0, io_count);
-
- /* Stop on error. */
- if (io_bytes < 0) {
- if (total_bytes == 0)
- total_bytes = error_ret_val;
- break;
- }
-
- /* Update read state. */
- total_bytes += io_bytes;
- if (is_real_ex)
- xf->curr_file_pos += io_bytes;
- xf->curr_ex_pos += io_bytes;
- xf->curr_pos += io_bytes;
-
- /* If we didn't read the whole extent, finish; delegate handling of
- * partial read/write back to the caller. */
- if ((curr_ex_rem_len -= io_bytes) > 0)
- break;
-
- /* Update total count and buffer pointer. */
- size -= io_bytes;
- buf += io_bytes;
- }
-
- return total_bytes;
-}
-
-/* Reads up to |size| bytes from an extent file into |buf|. This implements the
- * cookie_read_function_t interface and is a thin wrapper around exfile_io()
- * (see above). Returns the number of bytes read, or -1 on error. */
-static ssize_t exfile_read(void* cookie, char* buf, size_t size) {
- return exfile_io((exfile_t*)cookie, TRUE, buf, size);
-}
-
-/* Writes up to |size| bytes from |buf| to an extent file. This implements the
- * cookie_write_function_t interface and is a thin wrapper around exfile_io()
- * (see above). Returns the number of bytes written; must NOT return a negative
- * value. */
-static ssize_t exfile_write(void* cookie, const char* buf, size_t size) {
- return exfile_io((exfile_t*)cookie, FALSE, (char*)buf, size);
-}
-
-/* Performs seek on an extent file, repositioning it to the value of |*pos_p|
- * according to |whence|. This implements the cookie_seek_function_t interface.
- * On success, stores the resulting logical position measured in bytes along
- * contiguous extents into |*pos_p| and returns 0; otherwise returns -1. */
-static int exfile_seek(void* cookie, off64_t* pos_p, int whence) {
- exfile_t* xf = (exfile_t*)cookie;
-
- /* Compute the absolute logical target position. */
- off64_t new_pos = *pos_p;
- if (whence == SEEK_CUR)
- new_pos += xf->curr_pos;
- else if (whence == SEEK_END)
- new_pos += xf->total_ex_len;
-
- /* Ensure that the target position is valid. Note that repositioning the
- * file right past the last extent is considered valid, in line with normal
- * seek behavior, although no write (nor read) can be performed there. */
- if (new_pos < 0 || new_pos > (off64_t)xf->total_ex_len)
- return -1;
-
- if (new_pos != (off64_t)xf->curr_pos) {
- /* Find the extend that contains the requested logical position; handle
- * special case upfront, for efficiency. */
- size_t new_ex_idx = 0;
- if (new_pos == (off64_t)xf->total_ex_len)
- new_ex_idx = xf->ex_count;
- else if (new_pos)
- new_ex_idx = ex_arr_search(xf->ex_count, xf->ex_arr, xf->prefix_len_arr,
- new_pos, xf->curr_ex_idx);
-
- /* Set the logical position markers. */
- xf->curr_ex_idx = new_ex_idx;
- xf->curr_ex_pos =
- (new_ex_idx < xf->ex_count
- ? (size_t)(new_pos - xf->prefix_len_arr[new_ex_idx].prec)
- : 0);
- xf->curr_pos = (off_t)new_pos;
- }
-
- *pos_p = new_pos;
- return 0;
-}
-
-/* Closes an open extent file. This implements the cookie_close_function_t
- * interface. Always returns 0 (success). */
-static int exfile_close(void* cookie) {
- exfile_t* xf = (exfile_t*)cookie;
- if (xf) {
- if (xf->fd >= 0)
- close(xf->fd);
- free(xf->prefix_len_arr);
- if (xf->ex_free)
- xf->ex_free(xf->ex_arr);
- free(xf);
- }
- return 0;
-}
-
-static const cookie_io_functions_t cookie_io_funcs = {
- .read = exfile_read,
- .write = exfile_write,
- .seek = exfile_seek,
- .close = exfile_close,
-};
-
-static FILE* exfile_open(int fd,
- const char* path,
- const char* fopen_mode,
- ex_t* ex_arr,
- size_t ex_count,
- void (*ex_free)(void*)) {
- /* Argument sanity check. */
- if (!(ex_arr && ex_count && (fd >= 0 || path) && (fd < 0 || !path)))
- return NULL;
-
- /* Validate mode argument. */
- exfile_mode_t mode = EXFILE_MODE_MAX;
- size_t i;
- for (i = 0; i < arraysize(fopen_mode_to_mode); i++)
- if (!strcmp(fopen_mode_to_mode[i].fopen_mode, fopen_mode)) {
- mode = fopen_mode_to_mode[i].mode;
- break;
- }
- if (mode == EXFILE_MODE_MAX)
- return NULL;
-
- /* Open the underlying file, if not already provided. */
- int do_close_fd = FALSE;
- if (fd < 0) {
- if ((fd = open(path, mode_to_open_flags[mode])) < 0)
- return NULL;
- do_close_fd = TRUE;
- }
-
- /* Allocate memory and open file streams, for both the underlying file and
- * the handle returned to the caller. */
- exfile_t* xf = NULL;
- prefix_len_t* prefix_len_arr = NULL;
- FILE* handle = NULL;
- if (!((xf = (exfile_t*)calloc(1, sizeof(exfile_t))) &&
- (prefix_len_arr =
- (prefix_len_t*)malloc(sizeof(prefix_len_t) * ex_count)) &&
- (handle = fopencookie(xf, fopen_mode, cookie_io_funcs)))) {
- /* If a file was opened above, close it. */
- if (do_close_fd)
- close(fd);
- if (xf)
- xf->fd = -1; /* invalidate prior to calling exfile_close() */
-
- free(prefix_len_arr);
- if (handle)
- fclose(handle); /* will call exfile_close already */
- else
- exfile_close(xf);
- return NULL;
- }
-
- /* Compute the prefix lengths. */
- size_t prefix_len = 0;
- for (i = 0; i < ex_count; i++) {
- prefix_len_arr[i].prec = prefix_len;
- prefix_len += ex_arr[i].len;
- prefix_len_arr[i].total = prefix_len;
- }
-
- /* Configure control object, including physical/logical file position. */
- xf->fd = fd;
- xf->ex_count = ex_count;
- xf->ex_arr = ex_arr;
- xf->prefix_len_arr = prefix_len_arr;
- xf->ex_free = ex_free;
- xf->total_ex_len = prefix_len_arr[ex_count - 1].total;
- xf->curr_file_pos = lseek(fd, 0, SEEK_CUR);
- xf->curr_ex_idx = 0;
- xf->curr_ex_pos = 0;
- xf->curr_pos = 0;
-
- /* Return the external stream handle. */
- return handle;
-}
-
-FILE* exfile_fopen(const char* path,
- const char* fopen_mode,
- ex_t* ex_arr,
- size_t ex_count,
- void (*ex_free)(void*)) {
- return exfile_open(-1, path, fopen_mode, ex_arr, ex_count, ex_free);
-}
-
-FILE* exfile_fdopen(int fd,
- const char* fopen_mode,
- ex_t* ex_arr,
- size_t ex_count,
- void (*ex_free)(void*)) {
- return exfile_open(fd, NULL, fopen_mode, ex_arr, ex_count, ex_free);
-}
diff --git a/exfile.h b/exfile.h
deleted file mode 100644
index e109f04..0000000
--- a/exfile.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2015 The Chromium OS Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef _BSDIFF_EXFILE_H_
-#define _BSDIFF_EXFILE_H_
-
-#include <stdio.h>
-
-/*
- * Extent files.
- *
- * This modules provides a familiar interface for handling files through an
- * indirection layer of extents, which are contiguous chunks of variable length
- * at arbitrary offsets within a file. Once an extent file handle is obtained,
- * users may read, write and seek as they do with ordinary files, having the I/O
- * with the underlying file done for them by the extent file implementation. The
- * implementation supports "sparse extents", which are assumed to contain zeros
- * but otherwise have no actual representation in the underlying file; these are
- * denoted by negative offset values.
- *
- * Unlike ordinary files, the size of an extent file is fixed; it is not
- * truncated on open, nor is writing past the extent span allowed. Also, writing
- * to a sparse extent has no effect and will not raise an error.
- */
-
-
-/* An extent, defined by an offset and a length. */
-typedef struct {
- off_t off; /* the extent offset; negative indicates a sparse extent */
- size_t len; /* the extent length */
-} ex_t;
-
-
-/* Opens a file |path| with use mode |fopen_mode| for use with an array of
- * extents |ex_arr| of length |ex_count|. The mode string can be either of "r"
- * (read-only), "w" (write-only) or "r+"/"w+" (read-write); the underlying file
- * is neither created (if not present) nor truncated (if present) when opened
- * for writing. The function |ex_free|, if not NULL, will be called to
- * deallocate the extent array once the file object is closed. Returns a FILE
- * pointer that can be used with ordinary stream functions (e.g. fread), or
- * NULL if opening the file has failed. */
-FILE* exfile_fopen(const char* path,
- const char* fopen_mode,
- ex_t* ex_arr,
- size_t ex_count,
- void (*ex_free)(void*));
-
-/* Associates an extent file stream with an already open file descriptor |fd|.
- * The |fopen_mode| argument is as decribed above and must be compatible with
- * the mode of |fd|. All other arguments, behaviors and return values are as
- * those of exfile_fopen (see above). */
-FILE* exfile_fdopen(int fd,
- const char* fopen_mode,
- ex_t* ex_arr,
- size_t ex_count,
- void (*ex_free)(void*));
-
-#endif /* _BSDIFF_EXFILE_H_ */
diff --git a/extents.h b/extents.h
index d69b65f..af976e0 100644
--- a/extents.h
+++ b/extents.h
@@ -5,8 +5,9 @@
#ifndef _BSDIFF_EXTENTS_H_
#define _BSDIFF_EXTENTS_H_
-#include "exfile.h"
+#include "extents_file.h"
+namespace bsdiff {
/* Parses a string representation |ex_str| and populates an array |ex_arr|
* consisting of |*ex_count_p| extents. The string is expected to be a
@@ -23,4 +24,6 @@
* deallocated with free(3). */
ex_t* extents_parse(const char* ex_str, ex_t* ex_arr, size_t* ex_count_p);
+} // namespace bsdiff
+
#endif /* _BSDIFF_EXTENTS_H_ */
diff --git a/extents_file.cc b/extents_file.cc
new file mode 100644
index 0000000..fc4a10d
--- /dev/null
+++ b/extents_file.cc
@@ -0,0 +1,111 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "extents_file.h"
+
+#include <string.h>
+
+#include <algorithm>
+
+// Extent files implementation extending FileInterface.
+//
+// This class allows to map linear positions in a file to a list of regions in
+// another file. All the reads and writes are unbuffered, passed directly to the
+// underlying file. Seeking is done in O(log(N)), where N is the number of
+// extents in the file, but sequential reads jump to the next extent in O(1).
+
+namespace bsdiff {
+
+ExtentsFile::ExtentsFile(std::unique_ptr<FileInterface> file,
+ const std::vector<ex_t>& extents)
+ : file_(std::move(file)), extents_(extents) {
+ acc_len_.reserve(extents.size());
+ for (const ex_t& extent : extents) {
+ acc_len_.push_back(total_ex_len_);
+ total_ex_len_ += extent.len;
+ }
+}
+
+ExtentsFile::~ExtentsFile() {
+ Close();
+}
+
+bool ExtentsFile::Read(void* buf, size_t count, size_t* bytes_read) {
+ return IOOperation(&FileInterface::Read, buf, count, bytes_read);
+}
+
+
+bool ExtentsFile::Write(const void* buf, size_t count, size_t* bytes_written) {
+ return IOOperation(&FileInterface::Write, buf, count, bytes_written);
+}
+
+bool ExtentsFile::Seek(off_t pos) {
+ if (pos < 0 || static_cast<uint64_t>(pos) > total_ex_len_)
+ return false;
+ if (acc_len_.empty())
+ return true;
+ // Note that the first element of acc_len_ is always 0, and pos is at least 0,
+ // so the upper_bound will never return acc_len_.begin().
+ curr_pos_ = pos;
+ curr_ex_idx_ = std::upper_bound(acc_len_.begin(), acc_len_.end(), pos) -
+ acc_len_.begin();
+ // We handle the corner case where |pos| is the size of all the extents by
+ // leaving the value of curr_ex_idx_ the same way AdvancePos(0) would leave it
+ // after the seek.
+ if (curr_pos_ < total_ex_len_)
+ curr_ex_idx_--;
+ return true;
+}
+
+bool ExtentsFile::Close() {
+ return file_->Close();
+}
+
+void ExtentsFile::AdvancePos(uint64_t size) {
+ curr_pos_ += size;
+ for (; curr_ex_idx_ < extents_.size(); curr_ex_idx_++) {
+ if (curr_pos_ < acc_len_[curr_ex_idx_] + extents_[curr_ex_idx_].len)
+ return;
+ }
+ return;
+}
+
+template <typename T>
+bool ExtentsFile::IOOperation(bool (FileInterface::*io_op)(T*, size_t, size_t*),
+ T* buf,
+ size_t count,
+ size_t* bytes_processed) {
+ bool result = true;
+ size_t processed = 0;
+ AdvancePos(0);
+ while (count > 0 && curr_ex_idx_ < extents_.size()) {
+ const ex_t& ex = extents_[curr_ex_idx_];
+ size_t chunk_size = std::min(static_cast<uint64_t>(count), ex.len);
+ size_t chunk_processed = 0;
+ if (ex.off < 0) {
+ chunk_processed = chunk_size;
+ } else {
+ uint64_t file_pos = ex.off + (curr_pos_ - acc_len_[curr_ex_idx_]);
+ if (!file_->Seek(file_pos) ||
+ !((file_.get()->*io_op))(buf, chunk_size, &chunk_processed)) {
+ processed += chunk_processed;
+ result = processed > 0;
+ break;
+ }
+ }
+ processed += chunk_processed;
+ count -= chunk_processed;
+ // T can be either const void* or void*. We const_cast it back to void* and
+ // then char* to do the arithmetic operation, but |buf| will continue to be
+ // const void* if it was defined that way.
+ buf = static_cast<char*>(const_cast<void*>(buf)) + chunk_processed;
+ AdvancePos(chunk_processed);
+ if (!chunk_processed)
+ break;
+ }
+ *bytes_processed = processed;
+ return result;
+}
+
+} // namespace bsdiff
diff --git a/extents_file.h b/extents_file.h
new file mode 100644
index 0000000..a8007dd
--- /dev/null
+++ b/extents_file.h
@@ -0,0 +1,91 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef _BSDIFF_EXTENTS_FILE_H_
+#define _BSDIFF_EXTENTS_FILE_H_
+
+#include <stdio.h>
+
+#include <memory>
+#include <vector>
+
+#include "file_interface.h"
+
+/*
+ * Extent files.
+ *
+ * This modules provides a familiar interface for handling files through an
+ * indirection layer of extents, which are contiguous chunks of variable length
+ * at arbitrary offsets within a file. Once an extent file handle is obtained,
+ * users may read, write and seek as they do with ordinary files, having the I/O
+ * with the underlying file done for them by the extent file implementation. The
+ * implementation supports "sparse extents", which are assumed to contain zeros
+ * but otherwise have no actual representation in the underlying file; these are
+ * denoted by negative offset values.
+ *
+ * Unlike ordinary files, the size of an extent file is fixed; it is not
+ * truncated on open, nor is writing past the extent span allowed. Also, writing
+ * to a sparse extent has no effect and will not raise an error.
+ */
+
+namespace bsdiff {
+
+/* An extent, defined by an offset and a length. */
+struct ex_t {
+ off_t off; // the extent offset; negative indicates a sparse extent.
+ uint64_t len; // the extent length.
+};
+
+class ExtentsFile : public FileInterface {
+ public:
+ // Creates an ExtentsFile based on the underlying |file| passed. The positions
+ // in the ExtentsFile will be linearly mapped to the extents provided in
+ // |extents|. The created ExtentsFile takes ownership of the |file| will close
+ // it on destruction.
+ ExtentsFile(std::unique_ptr<FileInterface> file,
+ const std::vector<ex_t>& extents);
+
+ ~ExtentsFile() override;
+
+ // FileInterface overrides.
+ bool Read(void* buf, size_t count, size_t* bytes_read) override;
+ bool Write(const void* buf, size_t count, size_t* bytes_written) override;
+ bool Seek(off_t pos) override;
+ bool Close() override;
+
+ private:
+ void AdvancePos(uint64_t size);
+
+ // Performs an I/O operation (either read or write). This template shares the
+ // code for both Read() and Write() implementations.
+ template <typename T>
+ bool IOOperation(bool (FileInterface::*io_op)(T*, size_t, size_t*),
+ T* buf,
+ size_t count,
+ size_t* bytes_processed);
+
+ // The underlying FileInterace instance.
+ std::unique_ptr<FileInterface> file_;
+
+ // The list of extents mapping this instance to |file_|.
+ const std::vector<ex_t> extents_;
+
+ // The accumulated length of the extents. The i-th element contains the sum of
+ // the length of all the extents from 0 up to but not including the i-th
+ // extent. This reduces the complexity for random-access Seek() calls.
+ std::vector<uint64_t> acc_len_;
+
+ // Current extent index.
+ size_t curr_ex_idx_{0};
+
+ // Current logical file position.
+ uint64_t curr_pos_{0};
+
+ // Total length of all extents (constant).
+ uint64_t total_ex_len_{0};
+};
+
+} // namespace bsdiff
+
+#endif // _BSDIFF_EXTENTS_FILE_H_
diff --git a/extents_file_unittest.cc b/extents_file_unittest.cc
new file mode 100644
index 0000000..206328c
--- /dev/null
+++ b/extents_file_unittest.cc
@@ -0,0 +1,176 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "extents_file.h"
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <string>
+#include <vector>
+
+#include "file_interface.h"
+
+using std::string;
+using std::vector;
+using testing::AnyNumber;
+using testing::StrictMock;
+using testing::Return;
+using testing::InSequence;
+using testing::_;
+
+namespace bsdiff {
+
+// Mock class for the underlying file interface.
+class MockFile : public FileInterface {
+ public:
+ MOCK_METHOD3(Read, bool(void*, size_t, size_t*));
+ MOCK_METHOD3(Write, bool(const void*, size_t, size_t*));
+ MOCK_METHOD1(Seek, bool(off_t));
+ MOCK_METHOD0(Close, bool());
+};
+
+ACTION(SucceedIO) {
+ // Check that arg1 (count) can be converted
+ *arg2 = arg1;
+ return true;
+}
+
+ACTION_P(SucceedPartialIO, bytes) {
+ // Check that arg1 (count) can be converted
+ *arg2 = bytes;
+ return true;
+}
+
+class ExtentsFileTest : public testing::Test {
+ protected:
+ void SetUp() {
+ mock_file_ = new StrictMock<MockFile>();
+ mock_file_ptr_.reset(mock_file_);
+ // The destructor of the ExtentsFile will call Close once.
+ EXPECT_CALL(*mock_file_, Close()).WillOnce(Return(true));
+ }
+
+ // Pointer to the underlying File owned by the ExtentsFile under test. This
+ // pointer is invalidated whenever the ExtentsFile is destroyed.
+ StrictMock<MockFile>* mock_file_;
+ std::unique_ptr<FileInterface> mock_file_ptr_;
+};
+
+TEST_F(ExtentsFileTest, DestructorCloses) {
+ ExtentsFile file(std::move(mock_file_ptr_), {});
+}
+
+TEST_F(ExtentsFileTest, CloseIsForwarded) {
+ ExtentsFile file(std::move(mock_file_ptr_), {});
+ EXPECT_TRUE(file.Close());
+ EXPECT_CALL(*mock_file_, Close()).WillOnce(Return(false));
+}
+
+TEST_F(ExtentsFileTest, SeekToRightOffsets) {
+ ExtentsFile file(std::move(mock_file_ptr_),
+ {ex_t{10, 5}, ex_t{20, 5}, {25, 2}});
+ vector<std::pair<off_t, off_t>> tests = {
+ // Seek to the beginning of the file.
+ {0, 10},
+ // Seek to the middle of a extent.
+ {3, 13},
+ {11, 26},
+ // Seek to the extent boundary.
+ {5, 20}, // Seeks to the first byte in the second extent.
+ {10, 25},
+ };
+ for (const auto& offset_pair : tests) {
+ // We use a failing Read() call to trigger the actual seek call to the
+ // underlying file.
+ EXPECT_CALL(*mock_file_, Seek(offset_pair.second)).WillOnce(Return(true));
+ EXPECT_CALL(*mock_file_, Read(_, _, _)).WillOnce(Return(false));
+
+ EXPECT_TRUE(file.Seek(offset_pair.first));
+ size_t bytes_read;
+ EXPECT_FALSE(file.Read(nullptr, 1, &bytes_read));
+ }
+
+ // Seeking to the end of the file is ok, but not past it.
+ EXPECT_TRUE(file.Seek(12));
+ EXPECT_FALSE(file.Seek(13));
+
+ EXPECT_FALSE(file.Seek(-1));
+}
+
+TEST_F(ExtentsFileTest, ReadAcrossAllExtents) {
+ ExtentsFile file(std::move(mock_file_ptr_),
+ {ex_t{10, 5}, ex_t{20, 7}, {27, 3}});
+ InSequence s;
+ char* buf = reinterpret_cast<char*>(0x1234);
+
+ EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true));
+ EXPECT_CALL(*mock_file_, Read(buf, 5, _)).WillOnce(SucceedIO());
+ EXPECT_CALL(*mock_file_, Seek(20)).WillOnce(Return(true));
+ EXPECT_CALL(*mock_file_, Read(buf + 5, 7, _)).WillOnce(SucceedIO());
+ EXPECT_CALL(*mock_file_, Seek(27)).WillOnce(Return(true));
+ EXPECT_CALL(*mock_file_, Read(buf + 12, 3, _)).WillOnce(SucceedIO());
+
+ // FileExtents::Read() should read everything in one shot, by reading all
+ // the little chunks. Note that it doesn't attempt to read past the end of the
+ // FileExtents.
+ size_t bytes_read = 0;
+ EXPECT_TRUE(file.Read(buf, 100, &bytes_read));
+ EXPECT_EQ(15U, bytes_read);
+}
+
+TEST_F(ExtentsFileTest, ReadSmallChunks) {
+ ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}});
+ InSequence s;
+ char* buf = reinterpret_cast<char*>(0x1234);
+
+ EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true));
+ EXPECT_CALL(*mock_file_, Read(buf, 1, _)).WillOnce(SucceedIO());
+ EXPECT_CALL(*mock_file_, Seek(20)).WillOnce(Return(true));
+ // We expect to read only part of the second extent.
+ EXPECT_CALL(*mock_file_, Read(buf + 1, 1, _)).WillOnce(SucceedIO());
+
+ size_t bytes_read = 0;
+ EXPECT_TRUE(file.Read(buf, 2, &bytes_read));
+ EXPECT_EQ(2U, bytes_read);
+}
+
+TEST_F(ExtentsFileTest, ReadFailureFails) {
+ ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}});
+ EXPECT_CALL(*mock_file_, Seek(_))
+ .Times(AnyNumber())
+ .WillRepeatedly(Return(true));
+ EXPECT_CALL(*mock_file_, Read(_, 1, _)).WillOnce(SucceedIO());
+ // A second read that fails will succeed if there was partial data read.
+ EXPECT_CALL(*mock_file_, Read(_, 10, _)).WillOnce(Return(false));
+
+ size_t bytes_read = 0;
+ EXPECT_TRUE(file.Read(nullptr, 100, &bytes_read));
+ EXPECT_EQ(1U, bytes_read);
+}
+
+TEST_F(ExtentsFileTest, ReadFails) {
+ ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}});
+ EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true));
+ EXPECT_CALL(*mock_file_, Read(_, 1, _)).WillOnce(Return(false));
+ size_t bytes_read;
+ EXPECT_FALSE(file.Read(nullptr, 1, &bytes_read));
+}
+
+TEST_F(ExtentsFileTest, ReadPartialReadsAndEOF) {
+ ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}});
+ EXPECT_CALL(*mock_file_, Seek(_))
+ .Times(AnyNumber())
+ .WillRepeatedly(Return(true));
+ char* buf = reinterpret_cast<char*>(0x1234);
+ InSequence s;
+ EXPECT_CALL(*mock_file_, Read(buf, 1, _)).WillOnce(SucceedIO());
+ EXPECT_CALL(*mock_file_, Read(buf + 1, _, _)).WillOnce(SucceedPartialIO(3));
+ EXPECT_CALL(*mock_file_, Read(buf + 4, _, _)).WillOnce(SucceedPartialIO(0));
+
+ size_t bytes_read = 0;
+ EXPECT_TRUE(file.Read(buf, 100, &bytes_read));
+ EXPECT_EQ(4U, bytes_read);
+}
+
+} // namespace bsdiff
diff --git a/file.cc b/file.cc
new file mode 100644
index 0000000..972273a
--- /dev/null
+++ b/file.cc
@@ -0,0 +1,94 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "file.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// TEMP_FAILURE_RETRY is defined by some versions of <unistd.h>.
+#ifndef TEMP_FAILURE_RETRY
+#include <utils/Compat.h>
+#endif
+
+#include <algorithm>
+
+namespace bsdiff {
+
+std::unique_ptr<File> File::FOpen(const char* pathname, int flags) {
+ int fd = TEMP_FAILURE_RETRY(open(pathname, flags));
+ if (fd < 0)
+ return std::unique_ptr<File>();
+ return std::unique_ptr<File>(new File(fd));
+}
+
+File::~File() {
+ Close();
+}
+
+bool File::Read(void* buf, size_t count, size_t* bytes_read) {
+ if (fd_ < 0) {
+ errno = EBADF;
+ return false;
+ }
+ ssize_t rc = TEMP_FAILURE_RETRY(read(fd_, buf, count));
+ if (rc == -1)
+ return false;
+ *bytes_read = static_cast<size_t>(rc);
+ return true;
+}
+
+bool File::Write(const void* buf, size_t count, size_t* bytes_written) {
+ if (fd_ < 0) {
+ errno = EBADF;
+ return false;
+ }
+ ssize_t rc = TEMP_FAILURE_RETRY(write(fd_, buf, count));
+ if (rc == -1)
+ return false;
+ *bytes_written = static_cast<size_t>(rc);
+ return true;
+}
+
+bool File::Seek(off_t pos) {
+ if (fd_ < 0) {
+ errno = EBADF;
+ return false;
+ }
+ // fseek() uses a long value for the offset which could be smaller than off_t.
+ if (pos > std::numeric_limits<long>::max()) {
+ errno = EOVERFLOW;
+ return false;
+ }
+ off_t newpos = lseek(fd_, pos, SEEK_SET) == pos;
+ if (newpos < 0)
+ return false;
+ if (newpos != pos) {
+ errno = EINVAL;
+ return false;
+ }
+ return true;
+}
+
+bool File::Close() {
+ if (fd_ < 0) {
+ errno = EBADF;
+ return false;
+ }
+ bool success = close(fd_) == 0;
+ if (!success && errno == EINTR)
+ success = true;
+ fd_ = -1;
+ return success;
+}
+
+File::File(int fd)
+ : fd_(fd) {}
+
+} // namespace bsdiff
diff --git a/file.h b/file.h
new file mode 100644
index 0000000..00cd638
--- /dev/null
+++ b/file.h
@@ -0,0 +1,38 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef _BSDIFF_FILE_H_
+#define _BSDIFF_FILE_H_
+
+#include <memory>
+
+#include "file_interface.h"
+
+namespace bsdiff {
+
+class File : public FileInterface {
+ public:
+ // Opens a file |pathname| with flags |flags| as defined by open(2). In case
+ // of error, an empty unique_ptr is returned and errno is set accordingly.
+ static std::unique_ptr<File> FOpen(const char* pathname, int flags);
+
+ ~File() override;
+
+ // FileInterface overrides.
+ bool Read(void* buf, size_t count, size_t* bytes_read) override;
+ bool Write(const void* buf, size_t count, size_t* bytes_written) override;
+ bool Seek(off_t pos) override;
+ bool Close() override;
+
+ private:
+ // Creates the File instance for the |fd|. Takes ownership of the file
+ // descriptor.
+ File(int fd);
+
+ int fd_;
+};
+
+} // namespace bsdiff
+
+#endif // _BSDIFF_FILE_H_
diff --git a/file_interface.h b/file_interface.h
new file mode 100644
index 0000000..b32115a
--- /dev/null
+++ b/file_interface.h
@@ -0,0 +1,44 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef _BSDIFF_FILE_INTERFACE_H_
+#define _BSDIFF_FILE_INTERFACE_H_
+
+#include <sys/types.h>
+
+namespace bsdiff {
+
+class FileInterface {
+ public:
+ virtual ~FileInterface() = default;
+
+ // Reads synchronously from the current file position up to |count| bytes into
+ // the passed |buf| buffer. On success, stores in |bytes_read| how many bytes
+ // were actually read from the file. In case of error returns false. This
+ // method may read less than |count| bytes even if there's no error. If the
+ // end of file is reached, 0 bytes will be read and this methods returns true.
+ virtual bool Read(void* buf, size_t count, size_t* bytes_read) = 0;
+
+ // Writes synchronously up to |count| bytes from to passed |buf| buffer to
+ // the file. On success, stores in |bytes_written| how many bytes
+ // were actually written to the file. This method may write less than |count|
+ // bytes and return successfully, or even write 0 bytes if there's no more
+ // space left on the device. Returns whether the write succeeded.
+ virtual bool Write(const void* buf, size_t count, size_t* bytes_written) = 0;
+
+ // Change the current file position to |pos| bytes from the beginning of the
+ // file. Return whether the seek succeeded.
+ virtual bool Seek(off_t pos) = 0;
+
+ // Closes the file and flushes any cached data. Returns whether the close
+ // succeeded.
+ virtual bool Close() = 0;
+
+ protected:
+ FileInterface() = default;
+};
+
+} // namespace bsdiff
+
+#endif // _BSDIFF_FILE_INTERFACE_H_