diff options
author | Alex Deymo <deymo@google.com> | 2015-10-13 02:15:31 -0700 |
---|---|---|
committer | Alex Deymo <deymo@google.com> | 2015-10-23 20:09:11 -0700 |
commit | 03f1debab429e673ba5e9e317c5a04e36e850cef (patch) | |
tree | e2c288b30f3515aaee669ea377fb3214a02468cf | |
parent | 20891f9c246ec36e6c148579522ac00051b64457 (diff) | |
download | bsdiff-03f1debab429e673ba5e9e317c5a04e36e850cef.tar.gz |
bspatch: Use a C++ interface for file access.
This patch moves the exfile.cc implementation to a C++ class with an
abstract interface of a file. The implementation of exfile.cc, renamed
to extents_file.cc, now uses C++ STL classes and has unittests to test
its core functionality.
Bug: 24478450
Test: Unittests added. make all test -j5 && ./unittests
Change-Id: I8d8f07150ad2ea465c55b5178ca9fbab49185eea
-rw-r--r-- | Android.mk | 11 | ||||
-rw-r--r-- | Makefile | 19 | ||||
-rw-r--r-- | exfile.cc | 410 | ||||
-rw-r--r-- | exfile.h | 59 | ||||
-rw-r--r-- | extents.h | 5 | ||||
-rw-r--r-- | extents_file.cc | 111 | ||||
-rw-r--r-- | extents_file.h | 91 | ||||
-rw-r--r-- | extents_file_unittest.cc | 176 | ||||
-rw-r--r-- | file.cc | 94 | ||||
-rw-r--r-- | file.h | 38 | ||||
-rw-r--r-- | file_interface.h | 44 |
11 files changed, 579 insertions, 479 deletions
@@ -21,6 +21,12 @@ bsdiff_common_cflags := \ -Wextra \ -Wno-unused-parameter +bspatch_src_files := \ + bspatch.cc \ + extents.cc \ + extents_file.cc \ + file.cc + include $(CLEAR_VARS) LOCAL_MODULE := bsdiff LOCAL_CPP_EXTENSION := .cc @@ -39,9 +45,8 @@ include $(CLEAR_VARS) LOCAL_MODULE := bspatch LOCAL_CPP_EXTENSION := .cc LOCAL_SRC_FILES := \ - bspatch.cc \ - bspatch_main.cc \ - extents.cc + $(bspatch_src_files) \ + bspatch_main.cc LOCAL_CFLAGS := $(bsdiff_common_cflags) LOCAL_C_INCLUDES += external/bzip2 LOCAL_STATIC_LIBRARIES := libbz @@ -32,12 +32,14 @@ BSDIFF_OBJS = \ BSPATCH_LIBS = -lbz2 BSPATCH_OBJS = \ bspatch.o \ - exfile.o \ - extents.o + extents.o \ + extents_file.o \ + file.o -UNITTEST_LIBS = -lgtest +UNITTEST_LIBS = -lgmock -lgtest UNITTEST_OBJS = \ bsdiff_unittest.o \ + extents_file_unittest.o \ test_utils.o \ testrunner.o @@ -49,16 +51,21 @@ bspatch: LDLIBS += $(BSPATCH_LIBS) unittests: LDLIBS += $(BSDIFF_LIBS) $(BSPATCH_LIBS) $(UNITTEST_LIBS) unittests: $(BSPATCH_OBJS) $(BSDIFF_OBJS) $(UNITTEST_OBJS) + +unittests bsdiff bspatch: $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ $(LDLIBS) # Source file dependencies. bsdiff.o: bsdiff.cc bsdiff_main.o: bsdiff_main.cc bsdiff.h bsdiff_unittest.o: bsdiff_unittest.cc bsdiff.h test_utils.h -bspatch.o: bspatch.cc exfile.h extents.h +bspatch.o: bspatch.cc extents.h extents_file.h file_interface.h bspatch_main.o: bspatch_main.cc bspatch.h -exfile.o: exfile.cc exfile.h -extents.o: extents.cc extents.h exfile.h +extents.o: extents.cc extents.h extents_file.h file_interface.h +extents_file.o: extents_file.cc extents_file.h file_interface.h +extents_file_unittest.o: extents_file_unittest.cc extents_file.h \ + file_interface.h +file.o: file.cc file.h file_interface.h testrunner.o: testrunner.cc test_utils.o: test_utils.cc test_utils.h diff --git a/exfile.cc b/exfile.cc deleted file mode 100644 index 806424e..0000000 --- a/exfile.cc +++ /dev/null @@ -1,410 +0,0 @@ -// Copyright 2015 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "exfile.h" - -#include <assert.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -/* - * Extent files implementation. Some things worth noting: - * - * - We are using glibc's buffered FILE objects for the underlying file I/O; - * this contributes to improved performance, especially with badly fragmented - * extents. However, the FILE handle we return to the caller is decidedly - * unbuffered: making it buffered too seems superfluous, causing excess data - * copying and memory use. - * - * - We maintain the "logical" file position separately from the "physical" - * (underlying) file position. The latter is updated lazily whenever actual - * file I/O is about to be performed. - * - * - The logical position of an extent file is internally represented by the - * current extent index (curr_ex_idx) and the position within the current - * extent (curr_ex_pos), as well as an absolute logical position (curr_pos). - * In general, curr_pos should equal the total length of all extents prior to - * curr_ex_idx, plus curr_ex_pos. Also, curr_ex_idx may range between 0 and - * the total extent count; if it is exactly the latter, then curr_ex_pos must - * be zero, representing the fact that the we are at the logical end of the - * file. Otherwise, curr_ex_pos may range between 0 and the length of the - * current extent; if it is exactly the latter, then this is equivalent to - * position zero on the next extent. All functions should honor this - * duality. - * - * - Seeking is done efficiently at O(log(D)), where D is the - * number of extents between the current position and the new one. This seems - * like a good midway for supporting both sequential and random access. - */ - - -#define TRUE 1 -#define FALSE 0 - -#define arraysize(x) (sizeof(x) / sizeof(*(x))) - - -/* Extent prefix length. */ -typedef struct { - size_t prec; /* total length of preceding extents */ - size_t total; /* total length including current extent */ -} prefix_len_t; - -/* Extent file logical modes. Used as index to the mapping from logical modes - * to open(2) and fopen(3) modes below. */ -typedef enum { - EXFILE_MODE_RO, - EXFILE_MODE_WO, - EXFILE_MODE_RW, - EXFILE_MODE_MAX /* sentinel */ -} exfile_mode_t; - -/* An extent file control object (aka "cookie"). */ -typedef struct { - int fd; /* underlying file descriptor */ - size_t ex_count; /* number of extents (non-zero) */ - ex_t* ex_arr; /* array of extents */ - prefix_len_t* prefix_len_arr; /* total lengths of extent prefixes */ - void (*ex_free)(void*); /* extent array free function */ - size_t total_ex_len; /* total length of all extents (constant) */ - off_t curr_file_pos; /* current underlying file position */ - size_t curr_ex_idx; /* current extent index */ - size_t curr_ex_pos; /* current position within extent */ - off_t curr_pos; /* current logical file position */ -} exfile_t; - - -/* Mapping from fopen(3) modes to extent file logical modes. */ -static const struct { - const char* fopen_mode; - exfile_mode_t mode; -} fopen_mode_to_mode[] = { - {"r", EXFILE_MODE_RO}, - {"r+", EXFILE_MODE_RW}, - {"w", EXFILE_MODE_WO}, - {"w+", EXFILE_MODE_RW}, -}; - - -/* Mapping from extent file logical modes to open(2) flags. */ -static const int mode_to_open_flags[EXFILE_MODE_MAX] = { - O_RDONLY, - O_WRONLY, - O_RDWR, -}; - - -/* Searches an array |ex_arr| of |ex_count| extents and returns the index of - * the extent that contains the location |pos|. Uses an array |prefix_len_arr| - * of corresponding prefix lengths. The total complexity is O(log(D)), where D - * is the distance between the returned extent index and |init_ex_idx|. */ -static size_t ex_arr_search(size_t ex_count, - const ex_t* ex_arr, - const prefix_len_t* prefix_len_arr, - size_t pos, - size_t init_ex_idx) { - assert(ex_arr && ex_count); - const ssize_t last_ex_idx = ex_count - 1; - assert(init_ex_idx <= ex_count); - assert(pos < prefix_len_arr[last_ex_idx].total); - if (init_ex_idx == ex_count) - init_ex_idx = last_ex_idx; /* adjustment for purposes of the search below */ - - /* First, search in exponentially increasing leaps from the current extent, - * until an interval bounding the target position was obtained. Here i and j - * are the left and right (inclusive) index boundaries, respectively. */ - ssize_t i = init_ex_idx; - ssize_t j = i; - size_t leap = 1; - /* Go left, as needed. */ - while (i > 0 && pos < prefix_len_arr[i].prec) { - j = i - 1; - if ((i -= leap) < 0) - i = 0; - leap <<= 1; - } - /* Go right, as needed. */ - while (j < last_ex_idx && pos >= prefix_len_arr[j].total) { - i = j + 1; - if ((j += leap) > last_ex_idx) - j = last_ex_idx; - leap <<= 1; - } - - /* Then, perform a binary search between i and j. */ - size_t k = 0; - while (1) { - k = (i + j) / 2; - if (pos < prefix_len_arr[k].prec) - j = k - 1; - else if (pos >= prefix_len_arr[k].total) - i = k + 1; - else - break; - } - - return k; -} - -/* Performs I/O operations (either read or write) on an extent file, advancing - * through consecutive extents and updating the logical/physical file position - * as we go. */ -static ssize_t exfile_io(exfile_t* xf, int do_read, char* buf, size_t size) { - if (xf->curr_ex_idx == xf->ex_count) - return 0; /* end-of-extent-file */ - - /* Reading or writing? */ - typedef ssize_t(io_func_t)(int, void*, size_t); - io_func_t* io_func; - ssize_t error_ret_val; - if (do_read) { - io_func = read; - error_ret_val = -1; - } else { - io_func = (io_func_t*)write; - error_ret_val = 0; /* must not return a negative value when writing */ - } - - /* Start processing data along extents. */ - const ex_t* curr_ex = xf->ex_arr + xf->curr_ex_idx; - assert(curr_ex->len >= xf->curr_ex_pos); - size_t curr_ex_rem_len = curr_ex->len - xf->curr_ex_pos; - ssize_t total_bytes = 0; - while (size) { - /* Advance to the next extent of non-zero length. */ - while (curr_ex_rem_len == 0) { - xf->curr_ex_idx++; - xf->curr_ex_pos = 0; - if (xf->curr_ex_idx == xf->ex_count) - return total_bytes; /* end-of-extent-file */ - curr_ex++; - curr_ex_rem_len = curr_ex->len; - } - - const int is_real_ex = (curr_ex->off >= 0); - - /* Seek to the correct file position, as necessary. */ - if (is_real_ex) { - const off_t file_pos = curr_ex->off + xf->curr_ex_pos; - if (xf->curr_file_pos != file_pos) { - if (lseek(xf->fd, file_pos, SEEK_SET) == (off_t)-1) { - xf->curr_file_pos = -1; /* unknown file position */ - return total_bytes ? total_bytes : error_ret_val; - } - xf->curr_file_pos = file_pos; - } - } - - /* Process data to the end of the current extent or the requested - * count, whichever is smaller. */ - size_t io_count = (size < curr_ex_rem_len ? size : curr_ex_rem_len); - ssize_t io_bytes = io_count; - if (is_real_ex) - io_bytes = io_func(xf->fd, buf, io_count); - else if (do_read) - memset(buf, 0, io_count); - - /* Stop on error. */ - if (io_bytes < 0) { - if (total_bytes == 0) - total_bytes = error_ret_val; - break; - } - - /* Update read state. */ - total_bytes += io_bytes; - if (is_real_ex) - xf->curr_file_pos += io_bytes; - xf->curr_ex_pos += io_bytes; - xf->curr_pos += io_bytes; - - /* If we didn't read the whole extent, finish; delegate handling of - * partial read/write back to the caller. */ - if ((curr_ex_rem_len -= io_bytes) > 0) - break; - - /* Update total count and buffer pointer. */ - size -= io_bytes; - buf += io_bytes; - } - - return total_bytes; -} - -/* Reads up to |size| bytes from an extent file into |buf|. This implements the - * cookie_read_function_t interface and is a thin wrapper around exfile_io() - * (see above). Returns the number of bytes read, or -1 on error. */ -static ssize_t exfile_read(void* cookie, char* buf, size_t size) { - return exfile_io((exfile_t*)cookie, TRUE, buf, size); -} - -/* Writes up to |size| bytes from |buf| to an extent file. This implements the - * cookie_write_function_t interface and is a thin wrapper around exfile_io() - * (see above). Returns the number of bytes written; must NOT return a negative - * value. */ -static ssize_t exfile_write(void* cookie, const char* buf, size_t size) { - return exfile_io((exfile_t*)cookie, FALSE, (char*)buf, size); -} - -/* Performs seek on an extent file, repositioning it to the value of |*pos_p| - * according to |whence|. This implements the cookie_seek_function_t interface. - * On success, stores the resulting logical position measured in bytes along - * contiguous extents into |*pos_p| and returns 0; otherwise returns -1. */ -static int exfile_seek(void* cookie, off64_t* pos_p, int whence) { - exfile_t* xf = (exfile_t*)cookie; - - /* Compute the absolute logical target position. */ - off64_t new_pos = *pos_p; - if (whence == SEEK_CUR) - new_pos += xf->curr_pos; - else if (whence == SEEK_END) - new_pos += xf->total_ex_len; - - /* Ensure that the target position is valid. Note that repositioning the - * file right past the last extent is considered valid, in line with normal - * seek behavior, although no write (nor read) can be performed there. */ - if (new_pos < 0 || new_pos > (off64_t)xf->total_ex_len) - return -1; - - if (new_pos != (off64_t)xf->curr_pos) { - /* Find the extend that contains the requested logical position; handle - * special case upfront, for efficiency. */ - size_t new_ex_idx = 0; - if (new_pos == (off64_t)xf->total_ex_len) - new_ex_idx = xf->ex_count; - else if (new_pos) - new_ex_idx = ex_arr_search(xf->ex_count, xf->ex_arr, xf->prefix_len_arr, - new_pos, xf->curr_ex_idx); - - /* Set the logical position markers. */ - xf->curr_ex_idx = new_ex_idx; - xf->curr_ex_pos = - (new_ex_idx < xf->ex_count - ? (size_t)(new_pos - xf->prefix_len_arr[new_ex_idx].prec) - : 0); - xf->curr_pos = (off_t)new_pos; - } - - *pos_p = new_pos; - return 0; -} - -/* Closes an open extent file. This implements the cookie_close_function_t - * interface. Always returns 0 (success). */ -static int exfile_close(void* cookie) { - exfile_t* xf = (exfile_t*)cookie; - if (xf) { - if (xf->fd >= 0) - close(xf->fd); - free(xf->prefix_len_arr); - if (xf->ex_free) - xf->ex_free(xf->ex_arr); - free(xf); - } - return 0; -} - -static const cookie_io_functions_t cookie_io_funcs = { - .read = exfile_read, - .write = exfile_write, - .seek = exfile_seek, - .close = exfile_close, -}; - -static FILE* exfile_open(int fd, - const char* path, - const char* fopen_mode, - ex_t* ex_arr, - size_t ex_count, - void (*ex_free)(void*)) { - /* Argument sanity check. */ - if (!(ex_arr && ex_count && (fd >= 0 || path) && (fd < 0 || !path))) - return NULL; - - /* Validate mode argument. */ - exfile_mode_t mode = EXFILE_MODE_MAX; - size_t i; - for (i = 0; i < arraysize(fopen_mode_to_mode); i++) - if (!strcmp(fopen_mode_to_mode[i].fopen_mode, fopen_mode)) { - mode = fopen_mode_to_mode[i].mode; - break; - } - if (mode == EXFILE_MODE_MAX) - return NULL; - - /* Open the underlying file, if not already provided. */ - int do_close_fd = FALSE; - if (fd < 0) { - if ((fd = open(path, mode_to_open_flags[mode])) < 0) - return NULL; - do_close_fd = TRUE; - } - - /* Allocate memory and open file streams, for both the underlying file and - * the handle returned to the caller. */ - exfile_t* xf = NULL; - prefix_len_t* prefix_len_arr = NULL; - FILE* handle = NULL; - if (!((xf = (exfile_t*)calloc(1, sizeof(exfile_t))) && - (prefix_len_arr = - (prefix_len_t*)malloc(sizeof(prefix_len_t) * ex_count)) && - (handle = fopencookie(xf, fopen_mode, cookie_io_funcs)))) { - /* If a file was opened above, close it. */ - if (do_close_fd) - close(fd); - if (xf) - xf->fd = -1; /* invalidate prior to calling exfile_close() */ - - free(prefix_len_arr); - if (handle) - fclose(handle); /* will call exfile_close already */ - else - exfile_close(xf); - return NULL; - } - - /* Compute the prefix lengths. */ - size_t prefix_len = 0; - for (i = 0; i < ex_count; i++) { - prefix_len_arr[i].prec = prefix_len; - prefix_len += ex_arr[i].len; - prefix_len_arr[i].total = prefix_len; - } - - /* Configure control object, including physical/logical file position. */ - xf->fd = fd; - xf->ex_count = ex_count; - xf->ex_arr = ex_arr; - xf->prefix_len_arr = prefix_len_arr; - xf->ex_free = ex_free; - xf->total_ex_len = prefix_len_arr[ex_count - 1].total; - xf->curr_file_pos = lseek(fd, 0, SEEK_CUR); - xf->curr_ex_idx = 0; - xf->curr_ex_pos = 0; - xf->curr_pos = 0; - - /* Return the external stream handle. */ - return handle; -} - -FILE* exfile_fopen(const char* path, - const char* fopen_mode, - ex_t* ex_arr, - size_t ex_count, - void (*ex_free)(void*)) { - return exfile_open(-1, path, fopen_mode, ex_arr, ex_count, ex_free); -} - -FILE* exfile_fdopen(int fd, - const char* fopen_mode, - ex_t* ex_arr, - size_t ex_count, - void (*ex_free)(void*)) { - return exfile_open(fd, NULL, fopen_mode, ex_arr, ex_count, ex_free); -} diff --git a/exfile.h b/exfile.h deleted file mode 100644 index e109f04..0000000 --- a/exfile.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2015 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef _BSDIFF_EXFILE_H_ -#define _BSDIFF_EXFILE_H_ - -#include <stdio.h> - -/* - * Extent files. - * - * This modules provides a familiar interface for handling files through an - * indirection layer of extents, which are contiguous chunks of variable length - * at arbitrary offsets within a file. Once an extent file handle is obtained, - * users may read, write and seek as they do with ordinary files, having the I/O - * with the underlying file done for them by the extent file implementation. The - * implementation supports "sparse extents", which are assumed to contain zeros - * but otherwise have no actual representation in the underlying file; these are - * denoted by negative offset values. - * - * Unlike ordinary files, the size of an extent file is fixed; it is not - * truncated on open, nor is writing past the extent span allowed. Also, writing - * to a sparse extent has no effect and will not raise an error. - */ - - -/* An extent, defined by an offset and a length. */ -typedef struct { - off_t off; /* the extent offset; negative indicates a sparse extent */ - size_t len; /* the extent length */ -} ex_t; - - -/* Opens a file |path| with use mode |fopen_mode| for use with an array of - * extents |ex_arr| of length |ex_count|. The mode string can be either of "r" - * (read-only), "w" (write-only) or "r+"/"w+" (read-write); the underlying file - * is neither created (if not present) nor truncated (if present) when opened - * for writing. The function |ex_free|, if not NULL, will be called to - * deallocate the extent array once the file object is closed. Returns a FILE - * pointer that can be used with ordinary stream functions (e.g. fread), or - * NULL if opening the file has failed. */ -FILE* exfile_fopen(const char* path, - const char* fopen_mode, - ex_t* ex_arr, - size_t ex_count, - void (*ex_free)(void*)); - -/* Associates an extent file stream with an already open file descriptor |fd|. - * The |fopen_mode| argument is as decribed above and must be compatible with - * the mode of |fd|. All other arguments, behaviors and return values are as - * those of exfile_fopen (see above). */ -FILE* exfile_fdopen(int fd, - const char* fopen_mode, - ex_t* ex_arr, - size_t ex_count, - void (*ex_free)(void*)); - -#endif /* _BSDIFF_EXFILE_H_ */ @@ -5,8 +5,9 @@ #ifndef _BSDIFF_EXTENTS_H_ #define _BSDIFF_EXTENTS_H_ -#include "exfile.h" +#include "extents_file.h" +namespace bsdiff { /* Parses a string representation |ex_str| and populates an array |ex_arr| * consisting of |*ex_count_p| extents. The string is expected to be a @@ -23,4 +24,6 @@ * deallocated with free(3). */ ex_t* extents_parse(const char* ex_str, ex_t* ex_arr, size_t* ex_count_p); +} // namespace bsdiff + #endif /* _BSDIFF_EXTENTS_H_ */ diff --git a/extents_file.cc b/extents_file.cc new file mode 100644 index 0000000..fc4a10d --- /dev/null +++ b/extents_file.cc @@ -0,0 +1,111 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extents_file.h" + +#include <string.h> + +#include <algorithm> + +// Extent files implementation extending FileInterface. +// +// This class allows to map linear positions in a file to a list of regions in +// another file. All the reads and writes are unbuffered, passed directly to the +// underlying file. Seeking is done in O(log(N)), where N is the number of +// extents in the file, but sequential reads jump to the next extent in O(1). + +namespace bsdiff { + +ExtentsFile::ExtentsFile(std::unique_ptr<FileInterface> file, + const std::vector<ex_t>& extents) + : file_(std::move(file)), extents_(extents) { + acc_len_.reserve(extents.size()); + for (const ex_t& extent : extents) { + acc_len_.push_back(total_ex_len_); + total_ex_len_ += extent.len; + } +} + +ExtentsFile::~ExtentsFile() { + Close(); +} + +bool ExtentsFile::Read(void* buf, size_t count, size_t* bytes_read) { + return IOOperation(&FileInterface::Read, buf, count, bytes_read); +} + + +bool ExtentsFile::Write(const void* buf, size_t count, size_t* bytes_written) { + return IOOperation(&FileInterface::Write, buf, count, bytes_written); +} + +bool ExtentsFile::Seek(off_t pos) { + if (pos < 0 || static_cast<uint64_t>(pos) > total_ex_len_) + return false; + if (acc_len_.empty()) + return true; + // Note that the first element of acc_len_ is always 0, and pos is at least 0, + // so the upper_bound will never return acc_len_.begin(). + curr_pos_ = pos; + curr_ex_idx_ = std::upper_bound(acc_len_.begin(), acc_len_.end(), pos) - + acc_len_.begin(); + // We handle the corner case where |pos| is the size of all the extents by + // leaving the value of curr_ex_idx_ the same way AdvancePos(0) would leave it + // after the seek. + if (curr_pos_ < total_ex_len_) + curr_ex_idx_--; + return true; +} + +bool ExtentsFile::Close() { + return file_->Close(); +} + +void ExtentsFile::AdvancePos(uint64_t size) { + curr_pos_ += size; + for (; curr_ex_idx_ < extents_.size(); curr_ex_idx_++) { + if (curr_pos_ < acc_len_[curr_ex_idx_] + extents_[curr_ex_idx_].len) + return; + } + return; +} + +template <typename T> +bool ExtentsFile::IOOperation(bool (FileInterface::*io_op)(T*, size_t, size_t*), + T* buf, + size_t count, + size_t* bytes_processed) { + bool result = true; + size_t processed = 0; + AdvancePos(0); + while (count > 0 && curr_ex_idx_ < extents_.size()) { + const ex_t& ex = extents_[curr_ex_idx_]; + size_t chunk_size = std::min(static_cast<uint64_t>(count), ex.len); + size_t chunk_processed = 0; + if (ex.off < 0) { + chunk_processed = chunk_size; + } else { + uint64_t file_pos = ex.off + (curr_pos_ - acc_len_[curr_ex_idx_]); + if (!file_->Seek(file_pos) || + !((file_.get()->*io_op))(buf, chunk_size, &chunk_processed)) { + processed += chunk_processed; + result = processed > 0; + break; + } + } + processed += chunk_processed; + count -= chunk_processed; + // T can be either const void* or void*. We const_cast it back to void* and + // then char* to do the arithmetic operation, but |buf| will continue to be + // const void* if it was defined that way. + buf = static_cast<char*>(const_cast<void*>(buf)) + chunk_processed; + AdvancePos(chunk_processed); + if (!chunk_processed) + break; + } + *bytes_processed = processed; + return result; +} + +} // namespace bsdiff diff --git a/extents_file.h b/extents_file.h new file mode 100644 index 0000000..a8007dd --- /dev/null +++ b/extents_file.h @@ -0,0 +1,91 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef _BSDIFF_EXTENTS_FILE_H_ +#define _BSDIFF_EXTENTS_FILE_H_ + +#include <stdio.h> + +#include <memory> +#include <vector> + +#include "file_interface.h" + +/* + * Extent files. + * + * This modules provides a familiar interface for handling files through an + * indirection layer of extents, which are contiguous chunks of variable length + * at arbitrary offsets within a file. Once an extent file handle is obtained, + * users may read, write and seek as they do with ordinary files, having the I/O + * with the underlying file done for them by the extent file implementation. The + * implementation supports "sparse extents", which are assumed to contain zeros + * but otherwise have no actual representation in the underlying file; these are + * denoted by negative offset values. + * + * Unlike ordinary files, the size of an extent file is fixed; it is not + * truncated on open, nor is writing past the extent span allowed. Also, writing + * to a sparse extent has no effect and will not raise an error. + */ + +namespace bsdiff { + +/* An extent, defined by an offset and a length. */ +struct ex_t { + off_t off; // the extent offset; negative indicates a sparse extent. + uint64_t len; // the extent length. +}; + +class ExtentsFile : public FileInterface { + public: + // Creates an ExtentsFile based on the underlying |file| passed. The positions + // in the ExtentsFile will be linearly mapped to the extents provided in + // |extents|. The created ExtentsFile takes ownership of the |file| will close + // it on destruction. + ExtentsFile(std::unique_ptr<FileInterface> file, + const std::vector<ex_t>& extents); + + ~ExtentsFile() override; + + // FileInterface overrides. + bool Read(void* buf, size_t count, size_t* bytes_read) override; + bool Write(const void* buf, size_t count, size_t* bytes_written) override; + bool Seek(off_t pos) override; + bool Close() override; + + private: + void AdvancePos(uint64_t size); + + // Performs an I/O operation (either read or write). This template shares the + // code for both Read() and Write() implementations. + template <typename T> + bool IOOperation(bool (FileInterface::*io_op)(T*, size_t, size_t*), + T* buf, + size_t count, + size_t* bytes_processed); + + // The underlying FileInterace instance. + std::unique_ptr<FileInterface> file_; + + // The list of extents mapping this instance to |file_|. + const std::vector<ex_t> extents_; + + // The accumulated length of the extents. The i-th element contains the sum of + // the length of all the extents from 0 up to but not including the i-th + // extent. This reduces the complexity for random-access Seek() calls. + std::vector<uint64_t> acc_len_; + + // Current extent index. + size_t curr_ex_idx_{0}; + + // Current logical file position. + uint64_t curr_pos_{0}; + + // Total length of all extents (constant). + uint64_t total_ex_len_{0}; +}; + +} // namespace bsdiff + +#endif // _BSDIFF_EXTENTS_FILE_H_ diff --git a/extents_file_unittest.cc b/extents_file_unittest.cc new file mode 100644 index 0000000..206328c --- /dev/null +++ b/extents_file_unittest.cc @@ -0,0 +1,176 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extents_file.h" + +#include <gtest/gtest.h> +#include <gmock/gmock.h> +#include <string> +#include <vector> + +#include "file_interface.h" + +using std::string; +using std::vector; +using testing::AnyNumber; +using testing::StrictMock; +using testing::Return; +using testing::InSequence; +using testing::_; + +namespace bsdiff { + +// Mock class for the underlying file interface. +class MockFile : public FileInterface { + public: + MOCK_METHOD3(Read, bool(void*, size_t, size_t*)); + MOCK_METHOD3(Write, bool(const void*, size_t, size_t*)); + MOCK_METHOD1(Seek, bool(off_t)); + MOCK_METHOD0(Close, bool()); +}; + +ACTION(SucceedIO) { + // Check that arg1 (count) can be converted + *arg2 = arg1; + return true; +} + +ACTION_P(SucceedPartialIO, bytes) { + // Check that arg1 (count) can be converted + *arg2 = bytes; + return true; +} + +class ExtentsFileTest : public testing::Test { + protected: + void SetUp() { + mock_file_ = new StrictMock<MockFile>(); + mock_file_ptr_.reset(mock_file_); + // The destructor of the ExtentsFile will call Close once. + EXPECT_CALL(*mock_file_, Close()).WillOnce(Return(true)); + } + + // Pointer to the underlying File owned by the ExtentsFile under test. This + // pointer is invalidated whenever the ExtentsFile is destroyed. + StrictMock<MockFile>* mock_file_; + std::unique_ptr<FileInterface> mock_file_ptr_; +}; + +TEST_F(ExtentsFileTest, DestructorCloses) { + ExtentsFile file(std::move(mock_file_ptr_), {}); +} + +TEST_F(ExtentsFileTest, CloseIsForwarded) { + ExtentsFile file(std::move(mock_file_ptr_), {}); + EXPECT_TRUE(file.Close()); + EXPECT_CALL(*mock_file_, Close()).WillOnce(Return(false)); +} + +TEST_F(ExtentsFileTest, SeekToRightOffsets) { + ExtentsFile file(std::move(mock_file_ptr_), + {ex_t{10, 5}, ex_t{20, 5}, {25, 2}}); + vector<std::pair<off_t, off_t>> tests = { + // Seek to the beginning of the file. + {0, 10}, + // Seek to the middle of a extent. + {3, 13}, + {11, 26}, + // Seek to the extent boundary. + {5, 20}, // Seeks to the first byte in the second extent. + {10, 25}, + }; + for (const auto& offset_pair : tests) { + // We use a failing Read() call to trigger the actual seek call to the + // underlying file. + EXPECT_CALL(*mock_file_, Seek(offset_pair.second)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(_, _, _)).WillOnce(Return(false)); + + EXPECT_TRUE(file.Seek(offset_pair.first)); + size_t bytes_read; + EXPECT_FALSE(file.Read(nullptr, 1, &bytes_read)); + } + + // Seeking to the end of the file is ok, but not past it. + EXPECT_TRUE(file.Seek(12)); + EXPECT_FALSE(file.Seek(13)); + + EXPECT_FALSE(file.Seek(-1)); +} + +TEST_F(ExtentsFileTest, ReadAcrossAllExtents) { + ExtentsFile file(std::move(mock_file_ptr_), + {ex_t{10, 5}, ex_t{20, 7}, {27, 3}}); + InSequence s; + char* buf = reinterpret_cast<char*>(0x1234); + + EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf, 5, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Seek(20)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf + 5, 7, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Seek(27)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf + 12, 3, _)).WillOnce(SucceedIO()); + + // FileExtents::Read() should read everything in one shot, by reading all + // the little chunks. Note that it doesn't attempt to read past the end of the + // FileExtents. + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(buf, 100, &bytes_read)); + EXPECT_EQ(15U, bytes_read); +} + +TEST_F(ExtentsFileTest, ReadSmallChunks) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + InSequence s; + char* buf = reinterpret_cast<char*>(0x1234); + + EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf, 1, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Seek(20)).WillOnce(Return(true)); + // We expect to read only part of the second extent. + EXPECT_CALL(*mock_file_, Read(buf + 1, 1, _)).WillOnce(SucceedIO()); + + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(buf, 2, &bytes_read)); + EXPECT_EQ(2U, bytes_read); +} + +TEST_F(ExtentsFileTest, ReadFailureFails) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + EXPECT_CALL(*mock_file_, Seek(_)) + .Times(AnyNumber()) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*mock_file_, Read(_, 1, _)).WillOnce(SucceedIO()); + // A second read that fails will succeed if there was partial data read. + EXPECT_CALL(*mock_file_, Read(_, 10, _)).WillOnce(Return(false)); + + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(nullptr, 100, &bytes_read)); + EXPECT_EQ(1U, bytes_read); +} + +TEST_F(ExtentsFileTest, ReadFails) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(_, 1, _)).WillOnce(Return(false)); + size_t bytes_read; + EXPECT_FALSE(file.Read(nullptr, 1, &bytes_read)); +} + +TEST_F(ExtentsFileTest, ReadPartialReadsAndEOF) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + EXPECT_CALL(*mock_file_, Seek(_)) + .Times(AnyNumber()) + .WillRepeatedly(Return(true)); + char* buf = reinterpret_cast<char*>(0x1234); + InSequence s; + EXPECT_CALL(*mock_file_, Read(buf, 1, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Read(buf + 1, _, _)).WillOnce(SucceedPartialIO(3)); + EXPECT_CALL(*mock_file_, Read(buf + 4, _, _)).WillOnce(SucceedPartialIO(0)); + + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(buf, 100, &bytes_read)); + EXPECT_EQ(4U, bytes_read); +} + +} // namespace bsdiff @@ -0,0 +1,94 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "file.h" + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +// TEMP_FAILURE_RETRY is defined by some versions of <unistd.h>. +#ifndef TEMP_FAILURE_RETRY +#include <utils/Compat.h> +#endif + +#include <algorithm> + +namespace bsdiff { + +std::unique_ptr<File> File::FOpen(const char* pathname, int flags) { + int fd = TEMP_FAILURE_RETRY(open(pathname, flags)); + if (fd < 0) + return std::unique_ptr<File>(); + return std::unique_ptr<File>(new File(fd)); +} + +File::~File() { + Close(); +} + +bool File::Read(void* buf, size_t count, size_t* bytes_read) { + if (fd_ < 0) { + errno = EBADF; + return false; + } + ssize_t rc = TEMP_FAILURE_RETRY(read(fd_, buf, count)); + if (rc == -1) + return false; + *bytes_read = static_cast<size_t>(rc); + return true; +} + +bool File::Write(const void* buf, size_t count, size_t* bytes_written) { + if (fd_ < 0) { + errno = EBADF; + return false; + } + ssize_t rc = TEMP_FAILURE_RETRY(write(fd_, buf, count)); + if (rc == -1) + return false; + *bytes_written = static_cast<size_t>(rc); + return true; +} + +bool File::Seek(off_t pos) { + if (fd_ < 0) { + errno = EBADF; + return false; + } + // fseek() uses a long value for the offset which could be smaller than off_t. + if (pos > std::numeric_limits<long>::max()) { + errno = EOVERFLOW; + return false; + } + off_t newpos = lseek(fd_, pos, SEEK_SET) == pos; + if (newpos < 0) + return false; + if (newpos != pos) { + errno = EINVAL; + return false; + } + return true; +} + +bool File::Close() { + if (fd_ < 0) { + errno = EBADF; + return false; + } + bool success = close(fd_) == 0; + if (!success && errno == EINTR) + success = true; + fd_ = -1; + return success; +} + +File::File(int fd) + : fd_(fd) {} + +} // namespace bsdiff @@ -0,0 +1,38 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef _BSDIFF_FILE_H_ +#define _BSDIFF_FILE_H_ + +#include <memory> + +#include "file_interface.h" + +namespace bsdiff { + +class File : public FileInterface { + public: + // Opens a file |pathname| with flags |flags| as defined by open(2). In case + // of error, an empty unique_ptr is returned and errno is set accordingly. + static std::unique_ptr<File> FOpen(const char* pathname, int flags); + + ~File() override; + + // FileInterface overrides. + bool Read(void* buf, size_t count, size_t* bytes_read) override; + bool Write(const void* buf, size_t count, size_t* bytes_written) override; + bool Seek(off_t pos) override; + bool Close() override; + + private: + // Creates the File instance for the |fd|. Takes ownership of the file + // descriptor. + File(int fd); + + int fd_; +}; + +} // namespace bsdiff + +#endif // _BSDIFF_FILE_H_ diff --git a/file_interface.h b/file_interface.h new file mode 100644 index 0000000..b32115a --- /dev/null +++ b/file_interface.h @@ -0,0 +1,44 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef _BSDIFF_FILE_INTERFACE_H_ +#define _BSDIFF_FILE_INTERFACE_H_ + +#include <sys/types.h> + +namespace bsdiff { + +class FileInterface { + public: + virtual ~FileInterface() = default; + + // Reads synchronously from the current file position up to |count| bytes into + // the passed |buf| buffer. On success, stores in |bytes_read| how many bytes + // were actually read from the file. In case of error returns false. This + // method may read less than |count| bytes even if there's no error. If the + // end of file is reached, 0 bytes will be read and this methods returns true. + virtual bool Read(void* buf, size_t count, size_t* bytes_read) = 0; + + // Writes synchronously up to |count| bytes from to passed |buf| buffer to + // the file. On success, stores in |bytes_written| how many bytes + // were actually written to the file. This method may write less than |count| + // bytes and return successfully, or even write 0 bytes if there's no more + // space left on the device. Returns whether the write succeeded. + virtual bool Write(const void* buf, size_t count, size_t* bytes_written) = 0; + + // Change the current file position to |pos| bytes from the beginning of the + // file. Return whether the seek succeeded. + virtual bool Seek(off_t pos) = 0; + + // Closes the file and flushes any cached data. Returns whether the close + // succeeded. + virtual bool Close() = 0; + + protected: + FileInterface() = default; +}; + +} // namespace bsdiff + +#endif // _BSDIFF_FILE_INTERFACE_H_ |