diff options
author | Alex Deymo <deymo@google.com> | 2015-10-27 00:44:54 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2015-10-27 00:44:54 +0000 |
commit | 6b3c93e10a19c1e4b0baa33967c75e846c1367c1 (patch) | |
tree | d3296fc27b358930fcffd969d7b35f6fd11701ee | |
parent | c6aa41e37a19fc3a68ff4c8d1268e59aa8b26f54 (diff) | |
parent | f85d74cca542f9b3e6d7427fe3cf52000af85e5a (diff) | |
download | bsdiff-6b3c93e10a19c1e4b0baa33967c75e846c1367c1.tar.gz |
Merge changes Ib9f70e57,If6837f25,I45ec3d45,I9aab80b3,I8d8f0715, ...
am: f85d74cca5
* commit 'f85d74cca542f9b3e6d7427fe3cf52000af85e5a':
Build unittests on Android.
bspatch: Re-enable extents support.
bspatch: Ran clang-format on bspatch.cc.
FileInterface: Implement GetSize().
bspatch: Use a C++ interface for file access.
Convert C sources to C++.
Ran clang-format on all the Chromium OS sources.
-rw-r--r-- | .clang-format | 4 | ||||
-rw-r--r-- | Android.mk | 62 | ||||
-rw-r--r-- | Makefile | 29 | ||||
-rw-r--r-- | bsdiff.cc (renamed from bsdiff.c) | 69 | ||||
-rw-r--r-- | bsdiff.h | 20 | ||||
-rw-r--r-- | bsdiff_main.c | 15 | ||||
-rw-r--r-- | bsdiff_main.cc | 14 | ||||
-rw-r--r-- | bsdiff_unittest.cc | 8 | ||||
-rw-r--r-- | bspatch.c | 281 | ||||
-rw-r--r-- | bspatch.cc | 288 | ||||
-rw-r--r-- | bspatch.h | 26 | ||||
-rw-r--r-- | bspatch_main.cc (renamed from bspatch_main.c) | 10 | ||||
-rw-r--r-- | exfile.c | 417 | ||||
-rw-r--r-- | exfile.h | 53 | ||||
-rw-r--r-- | extents.c | 130 | ||||
-rw-r--r-- | extents.cc | 113 | ||||
-rw-r--r-- | extents.h | 29 | ||||
-rw-r--r-- | extents_file.cc | 116 | ||||
-rw-r--r-- | extents_file.h | 92 | ||||
-rw-r--r-- | extents_file_unittest.cc | 185 | ||||
-rw-r--r-- | extents_unittest.cc | 56 | ||||
-rw-r--r-- | file.cc | 130 | ||||
-rw-r--r-- | file.h | 39 | ||||
-rw-r--r-- | file_interface.h | 48 | ||||
-rw-r--r-- | test_utils.cc | 5 | ||||
-rw-r--r-- | test_utils.h | 12 | ||||
-rw-r--r-- | testrunner.cc | 2 |
27 files changed, 1246 insertions, 1007 deletions
diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..998f530 --- /dev/null +++ b/.clang-format @@ -0,0 +1,4 @@ +BasedOnStyle: Chromium + +MaxEmptyLinesToKeep: 2 +AllowShortBlocksOnASingleLine: true @@ -14,6 +14,7 @@ LOCAL_PATH := $(call my-dir) +# Common project flags. bsdiff_common_cflags := \ -D_FILE_OFFSET_BITS=64 \ -Wall \ @@ -21,26 +22,67 @@ bsdiff_common_cflags := \ -Wextra \ -Wno-unused-parameter +bsdiff_common_static_libs := \ + libbz + +bsdiff_common_unittests := \ + bsdiff_unittest.cc \ + extents_file_unittest.cc \ + extents_unittest.cc \ + test_utils.cc + +# "bsdiff" program. +bsdiff_shared_libs := \ + libdivsufsort64 \ + libdivsufsort + +bsdiff_src_files := \ + bsdiff.cc + +# "bspatch" program. +bspatch_src_files := \ + bspatch.cc \ + extents.cc \ + extents_file.cc \ + file.cc + include $(CLEAR_VARS) LOCAL_MODULE := bsdiff +LOCAL_CPP_EXTENSION := .cc LOCAL_SRC_FILES := \ - bsdiff.c \ - bsdiff_main.c + $(bsdiff_src_files) \ + bsdiff_main.cc LOCAL_CFLAGS := $(bsdiff_common_cflags) LOCAL_C_INCLUDES += external/bzip2 -LOCAL_STATIC_LIBRARIES := libbz -LOCAL_SHARED_LIBRARIES := \ - libdivsufsort64 \ - libdivsufsort +LOCAL_STATIC_LIBRARIES := $(bsdiff_common_static_libs) +LOCAL_SHARED_LIBRARIES := $(bsdiff_shared_libs) include $(BUILD_HOST_EXECUTABLE) include $(CLEAR_VARS) LOCAL_MODULE := bspatch +LOCAL_CPP_EXTENSION := .cc +LOCAL_SRC_FILES := \ + $(bspatch_src_files) \ + bspatch_main.cc +LOCAL_CFLAGS := $(bsdiff_common_cflags) +LOCAL_C_INCLUDES += external/bzip2 +LOCAL_STATIC_LIBRARIES := $(bsdiff_common_static_libs) +include $(BUILD_HOST_EXECUTABLE) + +include $(CLEAR_VARS) +LOCAL_MODULE := bsdiff_unittest +LOCAL_MODULE_TAGS := debug tests +LOCAL_CPP_EXTENSION := .cc LOCAL_SRC_FILES := \ - bspatch.c \ - bspatch_main.c \ - extents.c + $(bsdiff_src_files) \ + $(bspatch_src_files) \ + $(bsdiff_common_unittests) \ + testrunner.cc LOCAL_CFLAGS := $(bsdiff_common_cflags) LOCAL_C_INCLUDES += external/bzip2 -LOCAL_STATIC_LIBRARIES := libbz +LOCAL_STATIC_LIBRARIES := \ + $(bsdiff_common_static_libs) \ + libgtest_host \ + libgmock_host +LOCAL_SHARED_LIBRARIES := $(bsdiff_shared_libs) include $(BUILD_HOST_EXECUTABLE) @@ -32,12 +32,15 @@ BSDIFF_OBJS = \ BSPATCH_LIBS = -lbz2 BSPATCH_OBJS = \ bspatch.o \ - exfile.o \ - extents.o + extents.o \ + extents_file.o \ + file.o -UNITTEST_LIBS = -lgtest +UNITTEST_LIBS = -lgmock -lgtest UNITTEST_OBJS = \ bsdiff_unittest.o \ + extents_file_unittest.o \ + extents_unittest.o \ test_utils.o \ testrunner.o @@ -49,16 +52,24 @@ bspatch: LDLIBS += $(BSPATCH_LIBS) unittests: LDLIBS += $(BSDIFF_LIBS) $(BSPATCH_LIBS) $(UNITTEST_LIBS) unittests: $(BSPATCH_OBJS) $(BSDIFF_OBJS) $(UNITTEST_OBJS) + +unittests bsdiff bspatch: $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ $(LDLIBS) # Source file dependencies. -bsdiff.o: bsdiff.c -bsdiff_main.o: bsdiff_main.c bsdiff.h +bsdiff.o: bsdiff.cc +bsdiff_main.o: bsdiff_main.cc bsdiff.h bsdiff_unittest.o: bsdiff_unittest.cc bsdiff.h test_utils.h -bspatch.o: bspatch.c exfile.h extents.h -bspatch_main.o: bspatch_main.c bspatch.h -exfile.o: exfile.c exfile.h -extents.o: extents.c extents.h exfile.h +bspatch.o: bspatch.cc bspatch.h extents.h extents_file.h file_interface.h \ + file.h +bspatch_main.o: bspatch_main.cc bspatch.h +extents.o: extents.cc extents.h extents_file.h file_interface.h +extents_file.o: extents_file.cc extents_file.h file_interface.h +extents_file_unittest.o: extents_file_unittest.cc extents_file.h \ + file_interface.h +extents_unittest.o: extents_unittest.cc extents.h extents_file.h \ + file_interface.h +file.o: file.cc file.h file_interface.h testrunner.o: testrunner.cc test_utils.o: test_utils.cc test_utils.h @@ -38,6 +38,8 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 #include <string.h> #include <unistd.h> +#include <algorithm> + #if _FILE_OFFSET_BITS == 64 #include "divsufsort64.h" #define saidx_t saidx64_t @@ -46,26 +48,26 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 #include "divsufsort.h" #endif -#define MIN(x,y) (((x)<(y)) ? (x) : (y)) +namespace bsdiff { -static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) -{ +static off_t matchlen(u_char *old, off_t oldsize, u_char *new_buf, + off_t newsize) { off_t i; for(i=0;(i<oldsize)&&(i<newsize);i++) - if(old[i]!=new[i]) break; + if(old[i]!=new_buf[i]) break; return i; } static off_t search(saidx_t *I,u_char *old,off_t oldsize, - u_char *new,off_t newsize,off_t st,off_t en,off_t *pos) + u_char *new_buf,off_t newsize,off_t st,off_t en,off_t *pos) { off_t x,y; if(en-st<2) { - x=matchlen(old+I[st],oldsize-I[st],new,newsize); - y=matchlen(old+I[en],oldsize-I[en],new,newsize); + x=matchlen(old+I[st],oldsize-I[st],new_buf,newsize); + y=matchlen(old+I[en],oldsize-I[en],new_buf,newsize); if(x>y) { *pos=I[st]; @@ -77,10 +79,10 @@ static off_t search(saidx_t *I,u_char *old,off_t oldsize, }; x=st+(en-st)/2; - if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<=0) { - return search(I,old,oldsize,new,newsize,x,en,pos); + if(memcmp(old+I[x],new_buf,std::min(oldsize-I[x],newsize))<=0) { + return search(I,old,oldsize,new_buf,newsize,x,en,pos); } else { - return search(I,old,oldsize,new,newsize,st,x,pos); + return search(I,old,oldsize,new_buf,newsize,st,x,pos); }; } @@ -105,7 +107,7 @@ static void offtout(off_t x,u_char *buf) int bsdiff(const char* old_filename, const char* new_filename, const char* patch_filename) { int fd; - u_char *old,*new; + u_char *old_buf,*new_buf; off_t oldsize,newsize; saidx_t *I; off_t scan,pos=0,len; @@ -126,26 +128,27 @@ int bsdiff(const char* old_filename, const char* new_filename, that we never try to malloc(0) and get a NULL pointer */ if(((fd=open(old_filename,O_RDONLY,0))<0) || ((oldsize=lseek(fd,0,SEEK_END))==-1) || - ((old=malloc(oldsize+1))==NULL) || + ((old_buf=static_cast<u_char*>(malloc(oldsize+1)))==NULL) || (lseek(fd,0,SEEK_SET)!=0) || - (read(fd,old,oldsize)!=oldsize) || + (read(fd,old_buf,oldsize)!=oldsize) || (close(fd)==-1)) err(1,"%s",old_filename); - if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL); + if((I=static_cast<saidx_t*>(malloc((oldsize+1)*sizeof(saidx_t))))==NULL) + err(1,NULL); - if(divsufsort(old, I, oldsize)) err(1, "divsufsort"); + if(divsufsort(old_buf, I, oldsize)) err(1, "divsufsort"); /* Allocate newsize+1 bytes instead of newsize bytes to ensure that we never try to malloc(0) and get a NULL pointer */ if(((fd=open(new_filename,O_RDONLY,0))<0) || ((newsize=lseek(fd,0,SEEK_END))==-1) || - ((new=malloc(newsize+1))==NULL) || + ((new_buf = static_cast<u_char*>(malloc(newsize+1)))==NULL) || (lseek(fd,0,SEEK_SET)!=0) || - (read(fd,new,newsize)!=newsize) || + (read(fd,new_buf,newsize)!=newsize) || (close(fd)==-1)) err(1,"%s",new_filename); - if(((db=malloc(newsize+1))==NULL) || - ((eb=malloc(newsize+1))==NULL)) err(1,NULL); + if(((db=static_cast<u_char*>(malloc(newsize+1)))==NULL) || + ((eb=static_cast<u_char*>(malloc(newsize+1)))==NULL)) err(1,NULL); dblen=0; eblen=0; @@ -189,19 +192,19 @@ int bsdiff(const char* old_filename, const char* new_filename, prev_oldscore=oldscore; prev_pos=pos; - len=search(I,old,oldsize,new+scan,newsize-scan, + len=search(I,old_buf,oldsize,new_buf+scan,newsize-scan, 0,oldsize,&pos); for(;scsc<scan+len;scsc++) if((scsc+lastoffset<oldsize) && - (old[scsc+lastoffset] == new[scsc])) + (old_buf[scsc+lastoffset] == new_buf[scsc])) oldscore++; if(((len==oldscore) && (len!=0)) || (len>oldscore+8)) break; if((scan+lastoffset<oldsize) && - (old[scan+lastoffset] == new[scan])) + (old_buf[scan+lastoffset] == new_buf[scan])) oldscore--; const off_t fuzz = 8; @@ -219,7 +222,7 @@ int bsdiff(const char* old_filename, const char* new_filename, if((len!=oldscore) || (scan==newsize)) { s=0;Sf=0;lenf=0; for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) { - if(old[lastpos+i]==new[lastscan+i]) s++; + if(old_buf[lastpos+i]==new_buf[lastscan+i]) s++; i++; if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }; }; @@ -228,7 +231,7 @@ int bsdiff(const char* old_filename, const char* new_filename, if(scan<newsize) { s=0;Sb=0; for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { - if(old[pos-i]==new[scan-i]) s++; + if(old_buf[pos-i]==new_buf[scan-i]) s++; if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }; }; }; @@ -237,10 +240,10 @@ int bsdiff(const char* old_filename, const char* new_filename, overlap=(lastscan+lenf)-(scan-lenb); s=0;Ss=0;lens=0; for(i=0;i<overlap;i++) { - if(new[lastscan+lenf-overlap+i]== - old[lastpos+lenf-overlap+i]) s++; - if(new[scan-lenb+i]== - old[pos-lenb+i]) s--; + if(new_buf[lastscan+lenf-overlap+i]== + old_buf[lastpos+lenf-overlap+i]) s++; + if(new_buf[scan-lenb+i]== + old_buf[pos-lenb+i]) s--; if(s>Ss) { Ss=s; lens=i+1; }; }; @@ -249,9 +252,9 @@ int bsdiff(const char* old_filename, const char* new_filename, }; for(i=0;i<lenf;i++) - db[dblen+i]=new[lastscan+i]-old[lastpos+i]; + db[dblen+i]=new_buf[lastscan+i]-old_buf[lastpos+i]; for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) - eb[eblen+i]=new[lastscan+lenf+i]; + eb[eblen+i]=new_buf[lastscan+lenf+i]; dblen+=lenf; eblen+=(scan-lenb)-(lastscan+lenf); @@ -322,8 +325,10 @@ int bsdiff(const char* old_filename, const char* new_filename, free(db); free(eb); free(I); - free(old); - free(new); + free(old_buf); + free(new_buf); return 0; } + +} // namespace bsdiff @@ -1,20 +1,16 @@ -/* Copyright 2015 The Chromium OS Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. #ifndef _BSDIFF_BSDIFF_H_ #define _BSDIFF_BSDIFF_H_ -#ifdef __cplusplus -extern "C" { -#endif +namespace bsdiff { -int bsdiff(const char* old_filename, const char* new_filename, +int bsdiff(const char* old_filename, + const char* new_filename, const char* patch_filename); -#ifdef __cplusplus -} -#endif +} // namespace bsdiff -#endif /* _BSDIFF_BSDIFF_H_ */ +#endif // _BSDIFF_BSDIFF_H_ diff --git a/bsdiff_main.c b/bsdiff_main.c deleted file mode 100644 index 49491bf..0000000 --- a/bsdiff_main.c +++ /dev/null @@ -1,15 +0,0 @@ -/* Copyright 2015 The Chromium OS Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include <err.h> - -#include "bsdiff.h" - -int main(int argc, char *argv[]) { - if (argc!=4) - errx(1, "usage: %s oldfile newfile patchfile\n", argv[0]); - - return bsdiff(argv[1], argv[2], argv[3]); -} diff --git a/bsdiff_main.cc b/bsdiff_main.cc new file mode 100644 index 0000000..db3026c --- /dev/null +++ b/bsdiff_main.cc @@ -0,0 +1,14 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <err.h> + +#include "bsdiff.h" + +int main(int argc, char* argv[]) { + if (argc != 4) + errx(1, "usage: %s oldfile newfile patchfile\n", argv[0]); + + return bsdiff::bsdiff(argv[1], argv[2], argv[3]); +} diff --git a/bsdiff_unittest.cc b/bsdiff_unittest.cc index 9b4cd53..b3aae3e 100644 --- a/bsdiff_unittest.cc +++ b/bsdiff_unittest.cc @@ -14,6 +14,8 @@ using std::string; using std::vector; using test_utils::BsdiffPatchFile; +namespace bsdiff { + class BsdiffTest : public testing::Test { protected: BsdiffTest() @@ -40,7 +42,7 @@ TEST_F(BsdiffTest, EqualEmptyFiles) { // An empty bz2 file will have 14 bytes. EXPECT_EQ(14, patch.diff_len); - EXPECT_EQ(14, patch.extra_len); + EXPECT_EQ(14U, patch.extra_len); } TEST_F(BsdiffTest, EqualSmallFiles) { @@ -56,5 +58,7 @@ TEST_F(BsdiffTest, EqualSmallFiles) { // An empty bz2 file will have 14 bytes. EXPECT_EQ(14, patch.diff_len); - EXPECT_EQ(14, patch.extra_len); + EXPECT_EQ(14U, patch.extra_len); } + +} // namespace bsdiff diff --git a/bspatch.c b/bspatch.c deleted file mode 100644 index 3bd3f0e..0000000 --- a/bspatch.c +++ /dev/null @@ -1,281 +0,0 @@ -/*- - * Copyright 2003-2005 Colin Percival - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted providing that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#if 0 -__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $"); -#endif - -#include <bzlib.h> -#include <err.h> -#include <fcntl.h> -#include <inttypes.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> // android - -#include "extents.h" - - -static off_t offtin(u_char *buf) -{ - off_t y; - - y=buf[7]&0x7F; - y=y*256;y+=buf[6]; - y=y*256;y+=buf[5]; - y=y*256;y+=buf[4]; - y=y*256;y+=buf[3]; - y=y*256;y+=buf[2]; - y=y*256;y+=buf[1]; - y=y*256;y+=buf[0]; - - if(buf[7]&0x80) y=-y; - - return y; -} - -/* TODO(deymo): Re-enable exfile.h once we can build it for the -target and mac. */ -#if 0 -/* Parses an extent string ex_str, returning a pointer to a newly allocated - * array of extents. The number of extents is stored in ex_count_p (if - * provided). */ -static ex_t *parse_extent_str(const char *ex_str, size_t *ex_count_p) -{ - size_t ex_count = (size_t)-1; - ex_t *ex_arr = extents_parse(ex_str, NULL, &ex_count); - if (!ex_arr) - errx(1, (ex_count == (size_t)-1 ? - "error parsing extents" : - "error allocating extent array")); - if (ex_count_p) - *ex_count_p = ex_count; - return ex_arr; -} -#endif - -int bspatch( - const char* old_filename, const char* new_filename, - const char* patch_filename, - const char* old_extents, const char* new_extents) { - FILE * f, * cpf, * dpf, * epf; - BZFILE * cpfbz2, * dpfbz2, * epfbz2; - int cbz2err, dbz2err, ebz2err; - FILE *old_file = NULL, *new_file = NULL; - ssize_t oldsize,newsize; - ssize_t bzctrllen,bzdatalen; - u_char header[32],buf[8]; - u_char *new; - off_t oldpos,newpos; - off_t ctrl[3]; - off_t lenread; - off_t i, j; - - int using_extents = (old_extents != NULL || new_extents != NULL); - - /* Open patch file */ - if ((f = fopen(patch_filename, "r")) == NULL) - err(1, "fopen(%s)", patch_filename); - - /* - File format: - 0 8 "BSDIFF40" - 8 8 X - 16 8 Y - 24 8 sizeof(new_filename) - 32 X bzip2(control block) - 32+X Y bzip2(diff block) - 32+X+Y ??? bzip2(extra block) - with control block a set of triples (x,y,z) meaning "add x bytes - from oldfile to x bytes from the diff block; copy y bytes from the - extra block; seek forwards in oldfile by z bytes". - */ - - /* Read header */ - if (fread(header, 1, 32, f) < 32) { - if (feof(f)) - errx(1, "Corrupt patch\n"); - err(1, "fread(%s)", patch_filename); - } - - /* Check for appropriate magic */ - if (memcmp(header, "BSDIFF40", 8) != 0) - errx(1, "Corrupt patch\n"); - - /* Read lengths from header */ - bzctrllen=offtin(header+8); - bzdatalen=offtin(header+16); - newsize=offtin(header+24); - if((bzctrllen<0) || (bzdatalen<0) || (newsize<0)) - errx(1,"Corrupt patch\n"); - - /* Close patch file and re-open it via libbzip2 at the right places */ - if (fclose(f)) - err(1, "fclose(%s)", patch_filename); - if ((cpf = fopen(patch_filename, "r")) == NULL) - err(1, "fopen(%s)", patch_filename); - if (fseek(cpf, 32, SEEK_SET)) - err(1, "fseeko(%s, %lld)", patch_filename, - (long long)32); - if ((cpfbz2 = BZ2_bzReadOpen(&cbz2err, cpf, 0, 0, NULL, 0)) == NULL) - errx(1, "BZ2_bzReadOpen, bz2err = %d", cbz2err); - if ((dpf = fopen(patch_filename, "r")) == NULL) - err(1, "fopen(%s)", patch_filename); - if (fseek(dpf, 32 + bzctrllen, SEEK_SET)) - err(1, "fseeko(%s, %lld)", patch_filename, - (long long)(32 + bzctrllen)); - if ((dpfbz2 = BZ2_bzReadOpen(&dbz2err, dpf, 0, 0, NULL, 0)) == NULL) - errx(1, "BZ2_bzReadOpen, bz2err = %d", dbz2err); - if ((epf = fopen(patch_filename, "r")) == NULL) - err(1, "fopen(%s)", patch_filename); - if (fseek(epf, 32 + bzctrllen + bzdatalen, SEEK_SET)) - err(1, "fseeko(%s, %lld)", patch_filename, - (long long)(32 + bzctrllen + bzdatalen)); - if ((epfbz2 = BZ2_bzReadOpen(&ebz2err, epf, 0, 0, NULL, 0)) == NULL) - errx(1, "BZ2_bzReadOpen, bz2err = %d", ebz2err); - - /* Open input file for reading. */ - if (using_extents) { - /* TODO(deymo): Re-enable exfile.h once we can build it for the - target and mac. */ - errx(1, "Extent support is disabled.\n"); -#if 0 - size_t ex_count = 0; - ex_t *ex_arr = parse_extent_str(old_extents, &ex_count); - old_file = exfile_fopen(old_filename, "r", ex_arr, ex_count, - free); -#endif - } else { - old_file = fopen(old_filename, "r"); - } - if (!old_file || - fseek(old_file, 0, SEEK_END) != 0 || - (oldsize = ftell(old_file)) < 0 || - fseek(old_file, 0, SEEK_SET) != 0) - err(1, "cannot obtain the size of %s", old_filename); - off_t old_file_pos = 0; - - if((new=malloc(newsize+1))==NULL) err(1,NULL); - - oldpos=0;newpos=0; - while(newpos<newsize) { - /* Read control data */ - for(i=0;i<=2;i++) { - lenread = BZ2_bzRead(&cbz2err, cpfbz2, buf, 8); - if ((lenread < 8) || ((cbz2err != BZ_OK) && - (cbz2err != BZ_STREAM_END))) - errx(1, "Corrupt patch\n"); - ctrl[i]=offtin(buf); - }; - - // android local change (start) - if (ctrl[0]<0||ctrl[1]<0) - errx(1,"Corrupt patch\n"); - // android local change (end) - - /* Sanity-check */ - if(newpos+ctrl[0]>newsize) - errx(1,"Corrupt patch\n"); - - /* Read diff string */ - lenread = BZ2_bzRead(&dbz2err, dpfbz2, new + newpos, ctrl[0]); - if ((lenread < ctrl[0]) || - ((dbz2err != BZ_OK) && (dbz2err != BZ_STREAM_END))) - errx(1, "Corrupt patch\n"); - - /* Add old data to diff string. It is enough to fseek once, at - * the beginning of the sequence, to avoid unnecessary - * overhead. */ - j = newpos; - if ((i = oldpos) < 0) { - j -= i; - i = 0; - } - if (i != old_file_pos && fseek(old_file, i, SEEK_SET) < 0) - err(1, "error seeking input file to offset %" PRIdMAX, - (intmax_t)i); - if ((old_file_pos = oldpos + ctrl[0]) > oldsize) - old_file_pos = oldsize; - while (i++ < old_file_pos) { - u_char c; - if (fread(&c, 1, 1, old_file) != 1) - err(1, "error reading from input file"); - new[j++] += c; - } - - /* Adjust pointers */ - newpos+=ctrl[0]; - oldpos+=ctrl[0]; - - /* Sanity-check */ - if(newpos+ctrl[1]>newsize) - errx(1,"Corrupt patch\n"); - - /* Read extra string */ - lenread = BZ2_bzRead(&ebz2err, epfbz2, new + newpos, ctrl[1]); - if ((lenread < ctrl[1]) || - ((ebz2err != BZ_OK) && (ebz2err != BZ_STREAM_END))) - errx(1, "Corrupt patch\n"); - - /* Adjust pointers */ - newpos+=ctrl[1]; - oldpos+=ctrl[2]; - }; - - /* Close input file. */ - fclose(old_file); - - /* Clean up the bzip2 reads */ - BZ2_bzReadClose(&cbz2err, cpfbz2); - BZ2_bzReadClose(&dbz2err, dpfbz2); - BZ2_bzReadClose(&ebz2err, epfbz2); - if (fclose(cpf) || fclose(dpf) || fclose(epf)) - err(1, "fclose(%s)", patch_filename); - - /* Write the new file */ - if (using_extents) { - /* TODO(deymo): Re-enable exfile.h once we can build it for the - target and mac. */ -#if 0 - size_t ex_count = 0; - ex_t *ex_arr = parse_extent_str(new_extents, &ex_count); - new_file = exfile_fopen(new_filename, "w", ex_arr, ex_count, - free); -#endif - } else { - new_file = fopen(new_filename, "w"); - } - if (!new_file || - fwrite(new, 1, newsize, new_file) != (size_t)newsize || - fclose(new_file) == EOF) - err(1,"%s",new_filename); - - free(new); - - return 0; -} diff --git a/bspatch.cc b/bspatch.cc new file mode 100644 index 0000000..3dfd59f --- /dev/null +++ b/bspatch.cc @@ -0,0 +1,288 @@ +/*- + * Copyright 2003-2005 Colin Percival + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if 0 +__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $"); +#endif + +#include "bspatch.h" + +#include <bzlib.h> +#include <err.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> + +#include <algorithm> +#include <memory> +#include <limits> + +#include "extents.h" +#include "extents_file.h" +#include "file.h" +#include "file_interface.h" + +namespace { + +int64_t ParseInt64(u_char* buf) { + int64_t y; + + y = buf[7] & 0x7F; + y = y * 256; + y += buf[6]; + y = y * 256; + y += buf[5]; + y = y * 256; + y += buf[4]; + y = y * 256; + y += buf[3]; + y = y * 256; + y += buf[2]; + y = y * 256; + y += buf[1]; + y = y * 256; + y += buf[0]; + + if (buf[7] & 0x80) + y = -y; + + return y; +} + +} // namespace + +namespace bsdiff { + +int bspatch( + const char* old_filename, const char* new_filename, + const char* patch_filename, + const char* old_extents, const char* new_extents) { + FILE* f, *cpf, *dpf, *epf; + BZFILE* cpfbz2, *dpfbz2, *epfbz2; + int cbz2err, dbz2err, ebz2err; + ssize_t bzctrllen, bzdatalen; + u_char header[32], buf[8]; + u_char* new_buf; + off_t ctrl[3]; + off_t lenread; + + int using_extents = (old_extents != NULL || new_extents != NULL); + + // Open patch file. + if ((f = fopen(patch_filename, "r")) == NULL) + err(1, "fopen(%s)", patch_filename); + + // File format: + // 0 8 "BSDIFF40" + // 8 8 X + // 16 8 Y + // 24 8 sizeof(new_filename) + // 32 X bzip2(control block) + // 32+X Y bzip2(diff block) + // 32+X+Y ??? bzip2(extra block) + // with control block a set of triples (x,y,z) meaning "add x bytes + // from oldfile to x bytes from the diff block; copy y bytes from the + // extra block; seek forwards in oldfile by z bytes". + + // Read header. + if (fread(header, 1, 32, f) < 32) { + if (feof(f)) + errx(1, "Corrupt patch\n"); + err(1, "fread(%s)", patch_filename); + } + + // Check for appropriate magic. + if (memcmp(header, "BSDIFF40", 8) != 0) + errx(1, "Corrupt patch\n"); + + // Read lengths from header. + uint64_t oldsize, newsize; + bzctrllen = ParseInt64(header + 8); + bzdatalen = ParseInt64(header + 16); + int64_t signed_newsize = ParseInt64(header + 24); + newsize = signed_newsize; + if ((bzctrllen < 0) || (bzdatalen < 0) || (signed_newsize < 0)) + errx(1, "Corrupt patch\n"); + + // Close patch file and re-open it via libbzip2 at the right places. + if (fclose(f)) + err(1, "fclose(%s)", patch_filename); + if ((cpf = fopen(patch_filename, "r")) == NULL) + err(1, "fopen(%s)", patch_filename); + if (fseek(cpf, 32, SEEK_SET)) + err(1, "fseeko(%s, %lld)", patch_filename, (long long)32); + if ((cpfbz2 = BZ2_bzReadOpen(&cbz2err, cpf, 0, 0, NULL, 0)) == NULL) + errx(1, "BZ2_bzReadOpen, bz2err = %d", cbz2err); + if ((dpf = fopen(patch_filename, "r")) == NULL) + err(1, "fopen(%s)", patch_filename); + if (fseek(dpf, 32 + bzctrllen, SEEK_SET)) + err(1, "fseeko(%s, %lld)", patch_filename, (long long)(32 + bzctrllen)); + if ((dpfbz2 = BZ2_bzReadOpen(&dbz2err, dpf, 0, 0, NULL, 0)) == NULL) + errx(1, "BZ2_bzReadOpen, bz2err = %d", dbz2err); + if ((epf = fopen(patch_filename, "r")) == NULL) + err(1, "fopen(%s)", patch_filename); + if (fseek(epf, 32 + bzctrllen + bzdatalen, SEEK_SET)) + err(1, "fseeko(%s, %lld)", patch_filename, + (long long)(32 + bzctrllen + bzdatalen)); + if ((epfbz2 = BZ2_bzReadOpen(&ebz2err, epf, 0, 0, NULL, 0)) == NULL) + errx(1, "BZ2_bzReadOpen, bz2err = %d", ebz2err); + + // Open input file for reading. + std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY); + if (!old_file) + err(1, "Error opening the old filename"); + + if (using_extents) { + std::vector<ex_t> parsed_old_extents; + if (!ParseExtentStr(old_extents, &parsed_old_extents)) + errx(1, "Error parsing the old extents"); + old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents)); + } + + if (!old_file->GetSize(&oldsize)) + err(1, "cannot obtain the size of %s", old_filename); + uint64_t old_file_pos = 0; + + if ((new_buf = static_cast<u_char*>(malloc(newsize + 1))) == NULL) + err(1, NULL); + + // The oldpos can be negative, but the new pos is only incremented linearly. + int64_t oldpos = 0; + uint64_t newpos = 0; + while (newpos < newsize) { + int64_t i, j; + // Read control data. + for (i = 0; i <= 2; i++) { + lenread = BZ2_bzRead(&cbz2err, cpfbz2, buf, 8); + if ((lenread < 8) || ((cbz2err != BZ_OK) && (cbz2err != BZ_STREAM_END))) + errx(1, "Corrupt patch\n"); + ctrl[i] = ParseInt64(buf); + }; + + // Sanity-check. + if (ctrl[0] < 0 || ctrl[1] < 0) + errx(1, "Corrupt patch\n"); + + // Sanity-check. + if (newpos + ctrl[0] > newsize) + errx(1, "Corrupt patch\n"); + + // Read diff string. + lenread = BZ2_bzRead(&dbz2err, dpfbz2, new_buf + newpos, ctrl[0]); + if ((lenread < ctrl[0]) || + ((dbz2err != BZ_OK) && (dbz2err != BZ_STREAM_END))) + errx(1, "Corrupt patch\n"); + + // Add old data to diff string. It is enough to fseek once, at + // the beginning of the sequence, to avoid unnecessary overhead. + j = newpos; + if ((i = oldpos) < 0) { + j -= i; + i = 0; + } + // We just checked that |i| is not negative. + if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i)) + err(1, "error seeking input file to offset %" PRId64, i); + if ((old_file_pos = oldpos + ctrl[0]) > oldsize) + old_file_pos = oldsize; + + uint64_t chunk_size = old_file_pos - i; + while (chunk_size > 0) { + size_t read_bytes; + size_t bytes_to_read = std::min( + chunk_size, + static_cast<uint64_t>(std::numeric_limits<size_t>::max())); + if (!old_file->Read(new_buf + j, bytes_to_read, &read_bytes)) { + err(1, "error reading from input file"); + } + if (!read_bytes) + errx(1, "EOF reached while reading from input file"); + j += read_bytes; + chunk_size -= read_bytes; + } + + // Adjust pointers. + newpos += ctrl[0]; + oldpos += ctrl[0]; + + // Sanity-check. + if (newpos + ctrl[1] > newsize) + errx(1, "Corrupt patch\n"); + + // Read extra string. + lenread = BZ2_bzRead(&ebz2err, epfbz2, new_buf + newpos, ctrl[1]); + if ((lenread < ctrl[1]) || + ((ebz2err != BZ_OK) && (ebz2err != BZ_STREAM_END))) + errx(1, "Corrupt patch\n"); + + // Adjust pointers. + newpos += ctrl[1]; + oldpos += ctrl[2]; + }; + + // Close input file. + old_file->Close(); + + // Clean up the bzip2 reads. + BZ2_bzReadClose(&cbz2err, cpfbz2); + BZ2_bzReadClose(&dbz2err, dpfbz2); + BZ2_bzReadClose(&ebz2err, epfbz2); + if (fclose(cpf) || fclose(dpf) || fclose(epf)) + err(1, "fclose(%s)", patch_filename); + + // Write the new file. + std::unique_ptr<FileInterface> new_file = + File::FOpen(new_filename, O_CREAT | O_WRONLY); + if (!new_file) + err(1, "Error opening the new filename %s", new_filename); + + if (using_extents) { + std::vector<ex_t> parsed_new_extents; + if (!ParseExtentStr(new_extents, &parsed_new_extents)) + errx(1, "Error parsing the new extents"); + new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents)); + } + + while (newsize > 0) { + size_t bytes_written; + if (!new_file->Write(new_buf, newsize, &bytes_written)) + err(1, "Error writing new file %s", new_filename); + newsize -= bytes_written; + new_buf += bytes_written; + } + + if (!new_file->Close()) + err(1, "Error closing new file %s", new_filename); + + free(new_buf); + + return 0; +} + +} // namespace bsdiff @@ -1,22 +1,18 @@ -/* Copyright 2015 The Chromium OS Authors. All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. #ifndef _BSDIFF_BSPATCH_H_ #define _BSDIFF_BSPATCH_H_ -#ifdef __cplusplus -extern "C" { -#endif +namespace bsdiff { -int bspatch( - const char* old_filename, const char* new_filename, - const char* patch_filename, - const char* old_extents, const char* new_extents); +int bspatch(const char* old_filename, + const char* new_filename, + const char* patch_filename, + const char* old_extents, + const char* new_extents); -#ifdef __cplusplus -} -#endif +} // namespace bsdiff -#endif /* _BSDIFF_BSPATCH_H_ */ +#endif // _BSDIFF_BSPATCH_H_ diff --git a/bspatch_main.c b/bspatch_main.cc index a5442e9..51ceebd 100644 --- a/bspatch_main.c +++ b/bspatch_main.cc @@ -7,11 +7,11 @@ #include "bspatch.h" -#define USAGE_TEMPLATE_STR \ - "usage: %s oldfile newfile patchfile [old-extents new-extents]\n" \ - "with extents taking the form \"off_1:len_1,...,off_n:len_n\"\n" +#define USAGE_TEMPLATE_STR \ + "usage: %s oldfile newfile patchfile [old-extents new-extents]\n" \ + "with extents taking the form \"off_1:len_1,...,off_n:len_n\"\n" -int main(int argc, char * argv[]) { +int main(int argc, char* argv[]) { const char* old_extents = NULL; const char* new_extents = NULL; @@ -23,5 +23,5 @@ int main(int argc, char * argv[]) { new_extents = argv[5]; } - return bspatch(argv[1], argv[2], argv[3], old_extents, new_extents); + return bsdiff::bspatch(argv[1], argv[2], argv[3], old_extents, new_extents); } diff --git a/exfile.c b/exfile.c deleted file mode 100644 index 945d66f..0000000 --- a/exfile.c +++ /dev/null @@ -1,417 +0,0 @@ -// Copyright 2015 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#define _GNU_SOURCE -#include "exfile.h" - -#include <assert.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -/* - * Extent files implementation. Some things worth noting: - * - * - We are using glibc's buffered FILE objects for the underlying file I/O; - * this contributes to improved performance, especially with badly fragmented - * extents. However, the FILE handle we return to the caller is decidedly - * unbuffered: making it buffered too seems superfluous, causing excess data - * copying and memory use. - * - * - We maintain the "logical" file position separately from the "physical" - * (underlying) file position. The latter is updated lazily whenever actual - * file I/O is about to be performed. - * - * - The logical position of an extent file is internally represented by the - * current extent index (curr_ex_idx) and the position within the current - * extent (curr_ex_pos), as well as an absolute logical position (curr_pos). - * In general, curr_pos should equal the total length of all extents prior to - * curr_ex_idx, plus curr_ex_pos. Also, curr_ex_idx may range between 0 and - * the total extent count; if it is exactly the latter, then curr_ex_pos must - * be zero, representing the fact that the we are at the logical end of the - * file. Otherwise, curr_ex_pos may range between 0 and the length of the - * current extent; if it is exactly the latter, then this is equivalent to - * position zero on the next extent. All functions should honor this - * duality. - * - * - Seeking is done efficiently at O(log(D)), where D is the - * number of extents between the current position and the new one. This seems - * like a good midway for supporting both sequential and random access. - */ - - -#define TRUE 1 -#define FALSE 0 - -#define arraysize(x) (sizeof(x) / sizeof(*(x))) - - -/* Extent prefix length. */ -typedef struct { - size_t prec; /* total length of preceding extents */ - size_t total; /* total length including current extent */ -} prefix_len_t; - -/* Extent file logical modes. Used as index to the mapping from logical modes - * to open(2) and fopen(3) modes below. */ -typedef enum { - EXFILE_MODE_RO, - EXFILE_MODE_WO, - EXFILE_MODE_RW, - EXFILE_MODE_MAX /* sentinel */ -} exfile_mode_t; - -/* An extent file control object (aka "cookie"). */ -typedef struct { - int fd; /* underlying file descriptor */ - size_t ex_count; /* number of extents (non-zero) */ - ex_t *ex_arr; /* array of extents */ - prefix_len_t *prefix_len_arr; /* total lengths of extent prefixes */ - void (*ex_free)(void *); /* extent array free function */ - size_t total_ex_len; /* total length of all extents (constant) */ - off_t curr_file_pos; /* current underlying file position */ - size_t curr_ex_idx; /* current extent index */ - size_t curr_ex_pos; /* current position within extent */ - off_t curr_pos; /* current logical file position */ -} exfile_t; - - -/* Mapping from fopen(3) modes to extent file logical modes. */ -static const struct { - const char *fopen_mode; - exfile_mode_t mode; -} fopen_mode_to_mode[] = { - {"r", EXFILE_MODE_RO}, - {"r+", EXFILE_MODE_RW}, - {"w", EXFILE_MODE_WO}, - {"w+", EXFILE_MODE_RW}, -}; - - -/* Mapping from extent file logical modes to open(2) flags. */ -static const int mode_to_open_flags[EXFILE_MODE_MAX] = { - O_RDONLY, - O_WRONLY, - O_RDWR, -}; - - -/* Searches an array |ex_arr| of |ex_count| extents and returns the index of - * the extent that contains the location |pos|. Uses an array |prefix_len_arr| - * of corresponding prefix lengths. The total complexity is O(log(D)), where D - * is the distance between the returned extent index and |init_ex_idx|. */ -static size_t -ex_arr_search(size_t ex_count, const ex_t *ex_arr, - const prefix_len_t *prefix_len_arr, size_t pos, - size_t init_ex_idx) -{ - assert(ex_arr && ex_count); - const ssize_t last_ex_idx = ex_count - 1; - assert(init_ex_idx <= ex_count); - assert(pos < prefix_len_arr[last_ex_idx].total); - if (init_ex_idx == ex_count) - init_ex_idx = last_ex_idx; /* adjustment for purposes of the search below */ - - /* First, search in exponentially increasing leaps from the current extent, - * until an interval bounding the target position was obtained. Here i and j - * are the left and right (inclusive) index boundaries, respectively. */ - ssize_t i = init_ex_idx; - ssize_t j = i; - size_t leap = 1; - /* Go left, as needed. */ - while (i > 0 && pos < prefix_len_arr[i].prec) { - j = i - 1; - if ((i -= leap) < 0) - i = 0; - leap <<= 1; - } - /* Go right, as needed. */ - while (j < last_ex_idx && pos >= prefix_len_arr[j].total) { - i = j + 1; - if ((j += leap) > last_ex_idx) - j = last_ex_idx; - leap <<= 1; - } - - /* Then, perform a binary search between i and j. */ - size_t k = 0; - while (1) { - k = (i + j) / 2; - if (pos < prefix_len_arr[k].prec) - j = k - 1; - else if (pos >= prefix_len_arr[k].total) - i = k + 1; - else - break; - } - - return k; -} - -/* Performs I/O operations (either read or write) on an extent file, advancing - * through consecutive extents and updating the logical/physical file position - * as we go. */ -static ssize_t -exfile_io(exfile_t *xf, int do_read, char *buf, size_t size) -{ - if (xf->curr_ex_idx == xf->ex_count) - return 0; /* end-of-extent-file */ - - /* Reading or writing? */ - typedef ssize_t (io_func_t)(int, void *, size_t); - io_func_t *io_func; - ssize_t error_ret_val; - if (do_read) { - io_func = read; - error_ret_val = -1; - } else { - io_func = (io_func_t *)write; - error_ret_val = 0; /* must not return a negative value when writing */ - } - - /* Start processing data along extents. */ - const ex_t *curr_ex = xf->ex_arr + xf->curr_ex_idx; - assert(curr_ex->len >= xf->curr_ex_pos); - size_t curr_ex_rem_len = curr_ex->len - xf->curr_ex_pos; - ssize_t total_bytes = 0; - while (size) { - /* Advance to the next extent of non-zero length. */ - while (curr_ex_rem_len == 0) { - xf->curr_ex_idx++; - xf->curr_ex_pos = 0; - if (xf->curr_ex_idx == xf->ex_count) - return total_bytes; /* end-of-extent-file */ - curr_ex++; - curr_ex_rem_len = curr_ex->len; - } - - const int is_real_ex = (curr_ex->off >= 0); - - /* Seek to the correct file position, as necessary. */ - if (is_real_ex) { - const off_t file_pos = curr_ex->off + xf->curr_ex_pos; - if (xf->curr_file_pos != file_pos) { - if (lseek(xf->fd, file_pos, SEEK_SET) == (off_t)-1) { - xf->curr_file_pos = -1; /* unknown file position */ - return total_bytes ? total_bytes : error_ret_val; - } - xf->curr_file_pos = file_pos; - } - } - - /* Process data to the end of the current extent or the requested - * count, whichever is smaller. */ - size_t io_count = (size < curr_ex_rem_len ? size : curr_ex_rem_len); - ssize_t io_bytes = io_count; - if (is_real_ex) - io_bytes = io_func(xf->fd, buf, io_count); - else if (do_read) - memset(buf, 0, io_count); - - /* Stop on error. */ - if (io_bytes < 0) { - if (total_bytes == 0) - total_bytes = error_ret_val; - break; - } - - /* Update read state. */ - total_bytes += io_bytes; - if (is_real_ex) - xf->curr_file_pos += io_bytes; - xf->curr_ex_pos += io_bytes; - xf->curr_pos += io_bytes; - - /* If we didn't read the whole extent, finish; delegate handling of - * partial read/write back to the caller. */ - if ((curr_ex_rem_len -= io_bytes) > 0) - break; - - /* Update total count and buffer pointer. */ - size -= io_bytes; - buf += io_bytes; - } - - return total_bytes; -} - -/* Reads up to |size| bytes from an extent file into |buf|. This implements the - * cookie_read_function_t interface and is a thin wrapper around exfile_io() - * (see above). Returns the number of bytes read, or -1 on error. */ -static ssize_t -exfile_read(void *cookie, char *buf, size_t size) -{ - return exfile_io((exfile_t *)cookie, TRUE, buf, size); -} - -/* Writes up to |size| bytes from |buf| to an extent file. This implements the - * cookie_write_function_t interface and is a thin wrapper around exfile_io() - * (see above). Returns the number of bytes written; must NOT return a negative - * value. */ -static ssize_t -exfile_write(void *cookie, const char *buf, size_t size) -{ - return exfile_io((exfile_t *)cookie, FALSE, (char *)buf, size); -} - -/* Performs seek on an extent file, repositioning it to the value of |*pos_p| - * according to |whence|. This implements the cookie_seek_function_t interface. - * On success, stores the resulting logical position measured in bytes along - * contiguous extents into |*pos_p| and returns 0; otherwise returns -1. */ -static int -exfile_seek(void *cookie, off64_t *pos_p, int whence) -{ - exfile_t *xf = (exfile_t *)cookie; - - /* Compute the absolute logical target position. */ - off64_t new_pos = *pos_p; - if (whence == SEEK_CUR) - new_pos += xf->curr_pos; - else if (whence == SEEK_END) - new_pos += xf->total_ex_len; - - /* Ensure that the target position is valid. Note that repositioning the - * file right past the last extent is considered valid, in line with normal - * seek behavior, although no write (nor read) can be performed there. */ - if (new_pos < 0 || new_pos > (off64_t)xf->total_ex_len) - return -1; - - if (new_pos != (off64_t)xf->curr_pos) { - /* Find the extend that contains the requested logical position; handle - * special case upfront, for efficiency. */ - size_t new_ex_idx = 0; - if (new_pos == (off64_t)xf->total_ex_len) - new_ex_idx = xf->ex_count; - else if (new_pos) - new_ex_idx = ex_arr_search(xf->ex_count, xf->ex_arr, - xf->prefix_len_arr, new_pos, - xf->curr_ex_idx); - - /* Set the logical position markers. */ - xf->curr_ex_idx = new_ex_idx; - xf->curr_ex_pos = - (new_ex_idx < xf->ex_count ? - (size_t)(new_pos - xf->prefix_len_arr[new_ex_idx].prec) : 0); - xf->curr_pos = (off_t)new_pos; - } - - *pos_p = new_pos; - return 0; -} - -/* Closes an open extent file. This implements the cookie_close_function_t - * interface. Always returns 0 (success). */ -static int -exfile_close(void *cookie) -{ - exfile_t *xf = (exfile_t *)cookie; - if (xf) { - if (xf->fd >= 0) - close(xf->fd); - free(xf->prefix_len_arr); - if (xf->ex_free) - xf->ex_free(xf->ex_arr); - free(xf); - } - return 0; -} - -static const cookie_io_functions_t cookie_io_funcs = { - .read = exfile_read, - .write = exfile_write, - .seek = exfile_seek, - .close = exfile_close, -}; - -static FILE * -exfile_open(int fd, const char *path, const char *fopen_mode, ex_t *ex_arr, - size_t ex_count, void (*ex_free)(void *)) -{ - /* Argument sanity check. */ - if (!(ex_arr && ex_count && (fd >= 0 || path) && (fd < 0 || !path))) - return NULL; - - /* Validate mode argument. */ - exfile_mode_t mode = EXFILE_MODE_MAX; - size_t i; - for (i = 0; i < arraysize(fopen_mode_to_mode); i++) - if (!strcmp(fopen_mode_to_mode[i].fopen_mode, fopen_mode)) { - mode = fopen_mode_to_mode[i].mode; - break; - } - if (mode == EXFILE_MODE_MAX) - return NULL; - - /* Open the underlying file, if not already provided. */ - int do_close_fd = FALSE; - if (fd < 0) { - if ((fd = open(path, mode_to_open_flags[mode])) < 0) - return NULL; - do_close_fd = TRUE; - } - - /* Allocate memory and open file streams, for both the underlying file and - * the handle returned to the caller. */ - exfile_t *xf = NULL; - prefix_len_t *prefix_len_arr = NULL; - FILE *handle = NULL; - if (!((xf = (exfile_t *)calloc(1, sizeof(exfile_t))) && - (prefix_len_arr = - (prefix_len_t *)malloc(sizeof(prefix_len_t) * ex_count)) && - (handle = fopencookie(xf, fopen_mode, cookie_io_funcs)))) { - /* If a file was opened above, close it. */ - if (do_close_fd) - close(fd); - if (xf) - xf->fd = -1; /* invalidate prior to calling exfile_close() */ - - free(prefix_len_arr); - if (handle) - fclose(handle); /* will call exfile_close already */ - else - exfile_close(xf); - return NULL; - } - - /* Compute the prefix lengths. */ - size_t prefix_len = 0; - for (i = 0; i < ex_count; i++) { - prefix_len_arr[i].prec = prefix_len; - prefix_len += ex_arr[i].len; - prefix_len_arr[i].total = prefix_len; - } - - /* Configure control object, including physical/logical file position. */ - xf->fd = fd; - xf->ex_count = ex_count; - xf->ex_arr = ex_arr; - xf->prefix_len_arr = prefix_len_arr; - xf->ex_free = ex_free; - xf->total_ex_len = prefix_len_arr[ex_count - 1].total; - xf->curr_file_pos = lseek(fd, 0, SEEK_CUR); - xf->curr_ex_idx = 0; - xf->curr_ex_pos = 0; - xf->curr_pos = 0; - - /* Return the external stream handle. */ - return handle; -} - -FILE * -exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr, - size_t ex_count, void (*ex_free)(void *)) -{ - return exfile_open(-1, path, fopen_mode, ex_arr, ex_count, ex_free); -} - -FILE * -exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr, - size_t ex_count, void (*ex_free)(void *)) -{ - return exfile_open(fd, NULL, fopen_mode, ex_arr, ex_count, ex_free); -} diff --git a/exfile.h b/exfile.h deleted file mode 100644 index b09e456..0000000 --- a/exfile.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2015 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef _BSDIFF_EXFILE_H_ -#define _BSDIFF_EXFILE_H_ - -#include <stdio.h> - -/* - * Extent files. - * - * This modules provides a familiar interface for handling files through an - * indirection layer of extents, which are contiguous chunks of variable length - * at arbitrary offsets within a file. Once an extent file handle is obtained, - * users may read, write and seek as they do with ordinary files, having the I/O - * with the underlying file done for them by the extent file implementation. The - * implementation supports "sparse extents", which are assumed to contain zeros - * but otherwise have no actual representation in the underlying file; these are - * denoted by negative offset values. - * - * Unlike ordinary files, the size of an extent file is fixed; it is not - * truncated on open, nor is writing past the extent span allowed. Also, writing - * to a sparse extent has no effect and will not raise an error. - */ - - -/* An extent, defined by an offset and a length. */ -typedef struct { - off_t off; /* the extent offset; negative indicates a sparse extent */ - size_t len; /* the extent length */ -} ex_t; - - -/* Opens a file |path| with use mode |fopen_mode| for use with an array of - * extents |ex_arr| of length |ex_count|. The mode string can be either of "r" - * (read-only), "w" (write-only) or "r+"/"w+" (read-write); the underlying file - * is neither created (if not present) nor truncated (if present) when opened - * for writing. The function |ex_free|, if not NULL, will be called to - * deallocate the extent array once the file object is closed. Returns a FILE - * pointer that can be used with ordinary stream functions (e.g. fread), or - * NULL if opening the file has failed. */ -FILE *exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr, - size_t ex_count, void (*ex_free)(void *)); - -/* Associates an extent file stream with an already open file descriptor |fd|. - * The |fopen_mode| argument is as decribed above and must be compatible with - * the mode of |fd|. All other arguments, behaviors and return values are as - * those of exfile_fopen (see above). */ -FILE *exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr, - size_t ex_count, void (*ex_free)(void *)); - -#endif /* _BSDIFF_EXFILE_H_ */ diff --git a/extents.c b/extents.c deleted file mode 100644 index 5f99e85..0000000 --- a/extents.c +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2015 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "extents.h" - -#include <assert.h> -#include <errno.h> -#include <limits.h> -#include <stdint.h> -#include <stdlib.h> - - -#define TRUE 1 -#define FALSE 0 - -/* Minimum/maximum values for arbitrary integer types. */ -#define UNSIGNED_INT_MAX(t) (~((t)0)) -#define SIGNED_INT_MAX(t) ((t)((uintmax_t)UNSIGNED_INT_MAX(t) >> 1)) -#define MAX(a,b) ((a) > (b) ? (a) : (b)) -#define INT_TYPE_MAX(t) MAX(UNSIGNED_INT_MAX(t), SIGNED_INT_MAX(t)) - -/* The maximum accepted value for a given integer type when parsed as a signed - * long long integer. This is defined to be the smaller of the maximum value - * that can be represented by this type and LLONG_MAX. This bound allows us to - * properly check that parsed values do not exceed the capacity of their - * intended store, regardless of how its size relates to that of a signed long - * long integer. Note: this may mean that we are losing the most significant - * bit of an unsigned 64-bit field (e.g. size_t on some platforms), however - * still permitting values up to 2^62, which is more than enough for all - * practical purposes. */ -#define LLONG_MAX_BOUND(s) \ - ((uintmax_t)(s) < (uintmax_t)LLONG_MAX ? (long long)(s) : LLONG_MAX) -#define MAX_ALLOWED(t) LLONG_MAX_BOUND(INT_TYPE_MAX(t)) - -/* Get the type of a struct field. */ -#define FIELD_TYPE(t, f) typeof(((t *)0)->f) - - -/* Reads a long long integer from |s| into |*val_p|. Returns a pointer to the - * character immediately following the specified |delim|, unless (a) parsing - * failed (overflow or no valid digits); (b) the read value is less than - * |min_val| or greater than |max_val|; (c) the delimiter character is not - * |delim|, or the string ends although |may_end| is false. In any of these - * cases, returns NULL. */ -const char * -read_llong(const char *s, long long *val_p, long long min_val, - long long max_val, char delim, int may_end) -{ - assert(val_p); - const char *next_s; - errno = 0; - long long val = strtoll(s, (char **)&next_s, 10); - if (((val == LLONG_MAX || val == LLONG_MIN) && errno == ERANGE) || - next_s == s || val < min_val || val > max_val || - (*next_s ? *next_s != delim : !may_end)) - return NULL; /* bad value or delimiter */ - *val_p = val; - if (*next_s) - next_s++; /* skip delimeter */ - return next_s; -} - - -/* Reads a comma-separated list of "offset:length" extents from |ex_str|. If - * |ex_arr| is NULL, then |ex_count| is ignored and it attempts to parse valid - * extents until the end of the string is reached. Otherwise, stores up to - * |ex_count| extents into |ex_arr|, which must be of at least this size. - * Returns the number of correctly parsed extents, or -1 if a malformed extent - * was found. */ -static ssize_t -extents_read(const char *ex_str, ex_t *ex_arr, size_t ex_count) -{ - size_t i; - size_t last_i = ex_count - 1; - if (!ex_arr) { - ex_count = SIZE_MAX; - last_i = 0; - } - for (i = 0; *ex_str && i < ex_count; i++) { - long long raw_off = 0, raw_len = 0; - if (!((ex_str = read_llong(ex_str, &raw_off, -1, - MAX_ALLOWED(FIELD_TYPE(ex_t, off)), - ':', FALSE)) && - (ex_str = read_llong(ex_str, &raw_len, 1, - MAX_ALLOWED(FIELD_TYPE(ex_t, len)), - ',', i >= last_i)))) - return -1; /* parsing error */ - if (ex_arr) { - ex_arr[i].off = raw_off; - ex_arr[i].len = raw_len; - } - } - return i; -} - - -ex_t * -extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p) -{ - /* Sanity checks: a string must be provided; if an array is provided, an - * array count must be given as well. */ - if (!ex_str || (ex_arr && !ex_count_p)) - return NULL; - - /* Parse string and count extents. */ - ssize_t ret = extents_read(ex_str, NULL, 0); - if (ret < 0) - return NULL; /* parsing error */ - size_t ex_count = (size_t)ret; - - /* Input is good, commit to extent count. */ - if (ex_count_p) { - size_t alloc_ex_count = *ex_count_p; - *ex_count_p = ex_count; - if (ex_arr && alloc_ex_count < ex_count) - return NULL; /* insufficient allocated space */ - } - if (ex_count == 0) - return NULL; /* no extents, nothing to do */ - - /* Allocate extent array, if needed. */ - if (!(ex_arr || (ex_arr = (ex_t *)malloc(sizeof(ex_t) * ex_count)))) - return NULL; /* allocation failed */ - - /* Populate the extent array. */ - extents_read(ex_str, ex_arr, ex_count); - - return ex_arr; -} diff --git a/extents.cc b/extents.cc new file mode 100644 index 0000000..52cf404 --- /dev/null +++ b/extents.cc @@ -0,0 +1,113 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extents.h" + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <stdlib.h> + +#include <algorithm> +#include <limits> + +namespace bsdiff { + +/* The maximum accepted value for a given integer type when parsed as a signed + * long long integer. This is defined to be the smaller of the maximum value + * that can be represented by this type and LLONG_MAX. This bound allows us to + * properly check that parsed values do not exceed the capacity of their + * intended store, regardless of how its size relates to that of a signed long + * long integer. Note: this may mean that we are losing the most significant + * bit of an unsigned 64-bit field (e.g. size_t on some platforms), however + * still permitting values up to 2^62, which is more than enough for all + * practical purposes. */ +#define MAX_ALLOWED(t) \ + (std::min(static_cast<uint64_t>(std::numeric_limits<t>::max()), \ + static_cast<uint64_t>(std::numeric_limits<long long>::max()))) + +/* Get the type of a struct field. */ +#define FIELD_TYPE(t, f) decltype(((t*)0)->f) + + +/* Reads a long long integer from |s| into |*val_p|. Returns a pointer to the + * character immediately following the specified |delim|, unless (a) parsing + * failed (overflow or no valid digits); (b) the read value is less than + * |min_val| or greater than |max_val|; (c) the delimiter character is not + * |delim|, or the string ends although |may_end| is false. In any of these + * cases, returns NULL. */ +const char* read_llong(const char* s, + long long* val_p, + long long min_val, + long long max_val, + char delim, + int may_end) { + assert(val_p); + const char* next_s; + errno = 0; + long long val = strtoll(s, (char**)&next_s, 10); + if (((val == LLONG_MAX || val == LLONG_MIN) && errno == ERANGE) || + next_s == s || val < min_val || val > max_val || + (*next_s ? *next_s != delim : !may_end)) + return NULL; /* bad value or delimiter */ + *val_p = val; + if (*next_s) + next_s++; /* skip delimeter */ + return next_s; +} + + +/* Reads a comma-separated list of "offset:length" extents from |ex_str|. If + * |ex_arr| is NULL, then |ex_count| is ignored and it attempts to parse valid + * extents until the end of the string is reached. Otherwise, stores up to + * |ex_count| extents into |ex_arr|, which must be of at least this size. + * Returns the number of correctly parsed extents, or -1 if a malformed extent + * was found. */ +static ssize_t extents_read(const char* ex_str, ex_t* ex_arr, size_t ex_count) { + size_t i; + size_t last_i = ex_count - 1; + if (!ex_arr) { + ex_count = SIZE_MAX; + last_i = 0; + } + for (i = 0; *ex_str && i < ex_count; i++) { + long long raw_off = 0, raw_len = 0; + if (!((ex_str = + read_llong(ex_str, &raw_off, -1, + MAX_ALLOWED(FIELD_TYPE(ex_t, off)), ':', false)) && + (ex_str = read_llong(ex_str, &raw_len, 1, + MAX_ALLOWED(FIELD_TYPE(ex_t, len)), ',', + i >= last_i)))) + return -1; /* parsing error */ + if (ex_arr) { + ex_arr[i].off = raw_off; + ex_arr[i].len = raw_len; + } + } + return i; +} + + +bool ParseExtentStr(const char* ex_str, std::vector<ex_t>* extents) { + // Sanity check: a string must be provided. + if (!ex_str) + return false; + + /* Parse string and count extents. */ + ssize_t ret = extents_read(ex_str, NULL, 0); + if (ret < 0) + return false; // parsing error. + + // Input is good, commit to extent count. + extents->resize(ret); + if (ret == 0) + return true; // No extents, nothing to do. + + // Populate the extent array. + extents_read(ex_str, extents->data(), extents->size()); + return true; +} + +} // namespace bsdiff @@ -5,22 +5,19 @@ #ifndef _BSDIFF_EXTENTS_H_ #define _BSDIFF_EXTENTS_H_ -#include "exfile.h" +#include <vector> +#include "extents_file.h" -/* Parses a string representation |ex_str| and populates an array |ex_arr| - * consisting of |*ex_count_p| extents. The string is expected to be a - * comma-separated list of pairs of the form "offset:length". An offset may be - * -1 or a non-negative integer; the former indicates a sparse extent - * (consisting of zeros). A length is a positive integer. If |ex_arr| is NULL, - * |*ex_count_p| is ignored and a new array is allocated based on the actual - * number of extents parsed. Upon success, returns a pointer to the populated - * array of extents and stores the actual number of extents at the location - * pointed to be |ex_count_p| (if provided). If the string parses correctly but - * the operation otherwise fails (allocation error, array too small), returns - * NULL but still store the number of parsed extents. Otherwise, returns NULL - * and does not store anything. If a new array was allocated, then it should be - * deallocated with free(3). */ -ex_t *extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p); +namespace bsdiff { -#endif /* _BSDIFF_EXTENTS_H_ */ +// Parses a string representation |ex_str| and populates the vector |extents| +// of ex_t. The string is expected to be a comma-separated list of pairs of the +// form "offset:length". An offset may be -1 or a non-negative integer; the +// former indicates a sparse extent (consisting of zeros). A length is a +// positive integer. Returns whether the parsing was successful. +bool ParseExtentStr(const char* ex_str, std::vector<ex_t>* extents); + +} // namespace bsdiff + +#endif // _BSDIFF_EXTENTS_H_ diff --git a/extents_file.cc b/extents_file.cc new file mode 100644 index 0000000..2d2ea83 --- /dev/null +++ b/extents_file.cc @@ -0,0 +1,116 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extents_file.h" + +#include <string.h> + +#include <algorithm> + +// Extent files implementation extending FileInterface. +// +// This class allows to map linear positions in a file to a list of regions in +// another file. All the reads and writes are unbuffered, passed directly to the +// underlying file. Seeking is done in O(log(N)), where N is the number of +// extents in the file, but sequential reads jump to the next extent in O(1). + +namespace bsdiff { + +ExtentsFile::ExtentsFile(std::unique_ptr<FileInterface> file, + const std::vector<ex_t>& extents) + : file_(std::move(file)), extents_(extents) { + acc_len_.reserve(extents.size()); + for (const ex_t& extent : extents) { + acc_len_.push_back(total_ex_len_); + total_ex_len_ += extent.len; + } +} + +ExtentsFile::~ExtentsFile() { + Close(); +} + +bool ExtentsFile::Read(void* buf, size_t count, size_t* bytes_read) { + return IOOperation(&FileInterface::Read, buf, count, bytes_read); +} + + +bool ExtentsFile::Write(const void* buf, size_t count, size_t* bytes_written) { + return IOOperation(&FileInterface::Write, buf, count, bytes_written); +} + +bool ExtentsFile::Seek(off_t pos) { + if (pos < 0 || static_cast<uint64_t>(pos) > total_ex_len_) + return false; + if (acc_len_.empty()) + return true; + // Note that the first element of acc_len_ is always 0, and pos is at least 0, + // so the upper_bound will never return acc_len_.begin(). + curr_pos_ = pos; + curr_ex_idx_ = std::upper_bound(acc_len_.begin(), acc_len_.end(), pos) - + acc_len_.begin(); + // We handle the corner case where |pos| is the size of all the extents by + // leaving the value of curr_ex_idx_ the same way AdvancePos(0) would leave it + // after the seek. + if (curr_pos_ < total_ex_len_) + curr_ex_idx_--; + return true; +} + +bool ExtentsFile::Close() { + return file_->Close(); +} + +bool ExtentsFile::GetSize(uint64_t* size) { + *size = total_ex_len_; + return true; +} + +void ExtentsFile::AdvancePos(uint64_t size) { + curr_pos_ += size; + for (; curr_ex_idx_ < extents_.size(); curr_ex_idx_++) { + if (curr_pos_ < acc_len_[curr_ex_idx_] + extents_[curr_ex_idx_].len) + return; + } + return; +} + +template <typename T> +bool ExtentsFile::IOOperation(bool (FileInterface::*io_op)(T*, size_t, size_t*), + T* buf, + size_t count, + size_t* bytes_processed) { + bool result = true; + size_t processed = 0; + AdvancePos(0); + while (count > 0 && curr_ex_idx_ < extents_.size()) { + const ex_t& ex = extents_[curr_ex_idx_]; + size_t chunk_size = std::min(static_cast<uint64_t>(count), ex.len); + size_t chunk_processed = 0; + if (ex.off < 0) { + chunk_processed = chunk_size; + } else { + uint64_t file_pos = ex.off + (curr_pos_ - acc_len_[curr_ex_idx_]); + if (!file_->Seek(file_pos) || + !((file_.get()->*io_op))(buf, chunk_size, &chunk_processed)) { + processed += chunk_processed; + result = processed > 0; + break; + } + } + processed += chunk_processed; + count -= chunk_processed; + // T can be either const void* or void*. We const_cast it back to void* and + // then char* to do the arithmetic operation, but |buf| will continue to be + // const void* if it was defined that way. + buf = static_cast<char*>(const_cast<void*>(buf)) + chunk_processed; + AdvancePos(chunk_processed); + if (!chunk_processed) + break; + } + *bytes_processed = processed; + return result; +} + +} // namespace bsdiff diff --git a/extents_file.h b/extents_file.h new file mode 100644 index 0000000..865e2a2 --- /dev/null +++ b/extents_file.h @@ -0,0 +1,92 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef _BSDIFF_EXTENTS_FILE_H_ +#define _BSDIFF_EXTENTS_FILE_H_ + +#include <stdio.h> + +#include <memory> +#include <vector> + +#include "file_interface.h" + +/* + * Extent files. + * + * This modules provides a familiar interface for handling files through an + * indirection layer of extents, which are contiguous chunks of variable length + * at arbitrary offsets within a file. Once an extent file handle is obtained, + * users may read, write and seek as they do with ordinary files, having the I/O + * with the underlying file done for them by the extent file implementation. The + * implementation supports "sparse extents", which are assumed to contain zeros + * but otherwise have no actual representation in the underlying file; these are + * denoted by negative offset values. + * + * Unlike ordinary files, the size of an extent file is fixed; it is not + * truncated on open, nor is writing past the extent span allowed. Also, writing + * to a sparse extent has no effect and will not raise an error. + */ + +namespace bsdiff { + +/* An extent, defined by an offset and a length. */ +struct ex_t { + off_t off; // the extent offset; negative indicates a sparse extent. + uint64_t len; // the extent length. +}; + +class ExtentsFile : public FileInterface { + public: + // Creates an ExtentsFile based on the underlying |file| passed. The positions + // in the ExtentsFile will be linearly mapped to the extents provided in + // |extents|. The created ExtentsFile takes ownership of the |file| will close + // it on destruction. + ExtentsFile(std::unique_ptr<FileInterface> file, + const std::vector<ex_t>& extents); + + ~ExtentsFile() override; + + // FileInterface overrides. + bool Read(void* buf, size_t count, size_t* bytes_read) override; + bool Write(const void* buf, size_t count, size_t* bytes_written) override; + bool Seek(off_t pos) override; + bool Close() override; + bool GetSize(uint64_t* size) override; + + private: + void AdvancePos(uint64_t size); + + // Performs an I/O operation (either read or write). This template shares the + // code for both Read() and Write() implementations. + template <typename T> + bool IOOperation(bool (FileInterface::*io_op)(T*, size_t, size_t*), + T* buf, + size_t count, + size_t* bytes_processed); + + // The underlying FileInterace instance. + std::unique_ptr<FileInterface> file_; + + // The list of extents mapping this instance to |file_|. + const std::vector<ex_t> extents_; + + // The accumulated length of the extents. The i-th element contains the sum of + // the length of all the extents from 0 up to but not including the i-th + // extent. This reduces the complexity for random-access Seek() calls. + std::vector<uint64_t> acc_len_; + + // Current extent index. + size_t curr_ex_idx_{0}; + + // Current logical file position. + uint64_t curr_pos_{0}; + + // Total length of all extents (constant). + uint64_t total_ex_len_{0}; +}; + +} // namespace bsdiff + +#endif // _BSDIFF_EXTENTS_FILE_H_ diff --git a/extents_file_unittest.cc b/extents_file_unittest.cc new file mode 100644 index 0000000..fa9ed22 --- /dev/null +++ b/extents_file_unittest.cc @@ -0,0 +1,185 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extents_file.h" + +#include <gtest/gtest.h> +#include <gmock/gmock.h> +#include <string> +#include <vector> + +#include "file_interface.h" + +using std::string; +using std::vector; +using testing::AnyNumber; +using testing::StrictMock; +using testing::Return; +using testing::InSequence; +using testing::_; + +namespace bsdiff { + +// Mock class for the underlying file interface. +class MockFile : public FileInterface { + public: + MOCK_METHOD3(Read, bool(void*, size_t, size_t*)); + MOCK_METHOD3(Write, bool(const void*, size_t, size_t*)); + MOCK_METHOD1(Seek, bool(off_t)); + MOCK_METHOD0(Close, bool()); + MOCK_METHOD1(GetSize, bool(uint64_t*)); +}; + +ACTION(SucceedIO) { + // Check that arg1 (count) can be converted + *arg2 = arg1; + return true; +} + +ACTION_P(SucceedPartialIO, bytes) { + // Check that arg1 (count) can be converted + *arg2 = bytes; + return true; +} + +class ExtentsFileTest : public testing::Test { + protected: + void SetUp() { + mock_file_ = new StrictMock<MockFile>(); + mock_file_ptr_.reset(mock_file_); + // The destructor of the ExtentsFile will call Close once. + EXPECT_CALL(*mock_file_, Close()).WillOnce(Return(true)); + } + + // Pointer to the underlying File owned by the ExtentsFile under test. This + // pointer is invalidated whenever the ExtentsFile is destroyed. + StrictMock<MockFile>* mock_file_; + std::unique_ptr<FileInterface> mock_file_ptr_; +}; + +TEST_F(ExtentsFileTest, DestructorCloses) { + ExtentsFile file(std::move(mock_file_ptr_), {}); +} + +TEST_F(ExtentsFileTest, CloseIsForwarded) { + ExtentsFile file(std::move(mock_file_ptr_), {}); + EXPECT_TRUE(file.Close()); + EXPECT_CALL(*mock_file_, Close()).WillOnce(Return(false)); +} + +TEST_F(ExtentsFileTest, GetSizeSumExtents) { + ExtentsFile file(std::move(mock_file_ptr_), + {ex_t{10, 5}, ex_t{20, 5}, {25, 2}}); + uint64_t size; + EXPECT_TRUE(file.GetSize(&size)); + EXPECT_EQ(12U, size); +} + +TEST_F(ExtentsFileTest, SeekToRightOffsets) { + ExtentsFile file(std::move(mock_file_ptr_), + {ex_t{10, 5}, ex_t{20, 5}, {25, 2}}); + vector<std::pair<off_t, off_t>> tests = { + // Seek to the beginning of the file. + {0, 10}, + // Seek to the middle of a extent. + {3, 13}, + {11, 26}, + // Seek to the extent boundary. + {5, 20}, // Seeks to the first byte in the second extent. + {10, 25}, + }; + for (const auto& offset_pair : tests) { + // We use a failing Read() call to trigger the actual seek call to the + // underlying file. + EXPECT_CALL(*mock_file_, Seek(offset_pair.second)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(_, _, _)).WillOnce(Return(false)); + + EXPECT_TRUE(file.Seek(offset_pair.first)); + size_t bytes_read; + EXPECT_FALSE(file.Read(nullptr, 1, &bytes_read)); + } + + // Seeking to the end of the file is ok, but not past it. + EXPECT_TRUE(file.Seek(12)); + EXPECT_FALSE(file.Seek(13)); + + EXPECT_FALSE(file.Seek(-1)); +} + +TEST_F(ExtentsFileTest, ReadAcrossAllExtents) { + ExtentsFile file(std::move(mock_file_ptr_), + {ex_t{10, 5}, ex_t{20, 7}, {27, 3}}); + InSequence s; + char* buf = reinterpret_cast<char*>(0x1234); + + EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf, 5, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Seek(20)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf + 5, 7, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Seek(27)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf + 12, 3, _)).WillOnce(SucceedIO()); + + // FileExtents::Read() should read everything in one shot, by reading all + // the little chunks. Note that it doesn't attempt to read past the end of the + // FileExtents. + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(buf, 100, &bytes_read)); + EXPECT_EQ(15U, bytes_read); +} + +TEST_F(ExtentsFileTest, ReadSmallChunks) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + InSequence s; + char* buf = reinterpret_cast<char*>(0x1234); + + EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(buf, 1, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Seek(20)).WillOnce(Return(true)); + // We expect to read only part of the second extent. + EXPECT_CALL(*mock_file_, Read(buf + 1, 1, _)).WillOnce(SucceedIO()); + + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(buf, 2, &bytes_read)); + EXPECT_EQ(2U, bytes_read); +} + +TEST_F(ExtentsFileTest, ReadFailureFails) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + EXPECT_CALL(*mock_file_, Seek(_)) + .Times(AnyNumber()) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*mock_file_, Read(_, 1, _)).WillOnce(SucceedIO()); + // A second read that fails will succeed if there was partial data read. + EXPECT_CALL(*mock_file_, Read(_, 10, _)).WillOnce(Return(false)); + + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(nullptr, 100, &bytes_read)); + EXPECT_EQ(1U, bytes_read); +} + +TEST_F(ExtentsFileTest, ReadFails) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + EXPECT_CALL(*mock_file_, Seek(10)).WillOnce(Return(true)); + EXPECT_CALL(*mock_file_, Read(_, 1, _)).WillOnce(Return(false)); + size_t bytes_read; + EXPECT_FALSE(file.Read(nullptr, 1, &bytes_read)); +} + +TEST_F(ExtentsFileTest, ReadPartialReadsAndEOF) { + ExtentsFile file(std::move(mock_file_ptr_), {ex_t{10, 1}, ex_t{20, 10}}); + EXPECT_CALL(*mock_file_, Seek(_)) + .Times(AnyNumber()) + .WillRepeatedly(Return(true)); + char* buf = reinterpret_cast<char*>(0x1234); + InSequence s; + EXPECT_CALL(*mock_file_, Read(buf, 1, _)).WillOnce(SucceedIO()); + EXPECT_CALL(*mock_file_, Read(buf + 1, _, _)).WillOnce(SucceedPartialIO(3)); + EXPECT_CALL(*mock_file_, Read(buf + 4, _, _)).WillOnce(SucceedPartialIO(0)); + + size_t bytes_read = 0; + EXPECT_TRUE(file.Read(buf, 100, &bytes_read)); + EXPECT_EQ(4U, bytes_read); +} + +} // namespace bsdiff diff --git a/extents_unittest.cc b/extents_unittest.cc new file mode 100644 index 0000000..84be2e1 --- /dev/null +++ b/extents_unittest.cc @@ -0,0 +1,56 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extents.h" + +#include <gtest/gtest.h> +#include <vector> + +namespace bsdiff { + +// ex_t comparator used for testing. +bool operator==(const struct ex_t& lhs, const struct ex_t& rhs) { + return lhs.off == rhs.off && lhs.len == rhs.len; +} + +// PrintTo is used by gtest framework whenever it needs to print our type. +void PrintTo(const struct ex_t& extent, ::std::ostream* os) { + *os << extent.off << ":" << extent.len; +} + +class ExtentsTest : public testing::Test { + protected: + std::vector<ex_t> extents_; +}; + +TEST_F(ExtentsTest, CornerCasesHandledTest) { + EXPECT_TRUE(ParseExtentStr("", &extents_)); + EXPECT_TRUE(extents_.empty()); +} + +TEST_F(ExtentsTest, SimpleCasesTest) { + EXPECT_TRUE(ParseExtentStr("10:20,30:40", &extents_)); + std::vector<ex_t> expected_values = {{10, 20}, {30, 40}}; + EXPECT_EQ(expected_values, extents_); +} + +TEST_F(ExtentsTest, MalformedExtentsTest) { + std::vector<const char*> test_cases = { + ":", ",", "1,2", "1:", "1,", ":2", ",2", "1,2:3", "10:-1", "-2:10"}; + for (const char* test_case : test_cases) { + std::vector<ex_t> extents; + EXPECT_FALSE(ParseExtentStr(test_case, &extents)) << "while testing case \"" + << test_case << "\""; + EXPECT_EQ(std::vector<ex_t>(), extents); + } +} + +TEST_F(ExtentsTest, NegativeValuesTest) { + // |-1| is used as a special case to read zeros for that extent. + EXPECT_TRUE(ParseExtentStr("10:20,-1:40,50:60", &extents_)); + std::vector<ex_t> expected_values = {{10, 20}, {-1, 40}, {50, 60}}; + EXPECT_EQ(expected_values, extents_); +} + +} // namespace bsdiff @@ -0,0 +1,130 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "file.h" + +#include <errno.h> +#include <fcntl.h> +#ifdef __linux__ +#include <linux/fs.h> +#endif // __linux__ +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +// TEMP_FAILURE_RETRY is defined by some versions of <unistd.h>. +#ifndef TEMP_FAILURE_RETRY +#include <utils/Compat.h> +#endif + +#include <algorithm> + +namespace bsdiff { + +std::unique_ptr<File> File::FOpen(const char* pathname, int flags) { + int fd = TEMP_FAILURE_RETRY(open(pathname, flags)); + if (fd < 0) + return std::unique_ptr<File>(); + return std::unique_ptr<File>(new File(fd)); +} + +File::~File() { + Close(); +} + +bool File::Read(void* buf, size_t count, size_t* bytes_read) { + if (fd_ < 0) { + errno = EBADF; + return false; + } + ssize_t rc = TEMP_FAILURE_RETRY(read(fd_, buf, count)); + if (rc == -1) + return false; + *bytes_read = static_cast<size_t>(rc); + return true; +} + +bool File::Write(const void* buf, size_t count, size_t* bytes_written) { + if (fd_ < 0) { + errno = EBADF; + return false; + } + ssize_t rc = TEMP_FAILURE_RETRY(write(fd_, buf, count)); + if (rc == -1) + return false; + *bytes_written = static_cast<size_t>(rc); + return true; +} + +bool File::Seek(off_t pos) { + if (fd_ < 0) { + errno = EBADF; + return false; + } + // fseek() uses a long value for the offset which could be smaller than off_t. + if (pos > std::numeric_limits<long>::max()) { + errno = EOVERFLOW; + return false; + } + off_t newpos = lseek(fd_, pos, SEEK_SET) == pos; + if (newpos < 0) + return false; + if (newpos != pos) { + errno = EINVAL; + return false; + } + return true; +} + +bool File::Close() { + if (fd_ < 0) { + errno = EBADF; + return false; + } + bool success = close(fd_) == 0; + if (!success && errno == EINTR) + success = true; + fd_ = -1; + return success; +} + +bool File::GetSize(uint64_t* size) { + struct stat stbuf; + if (fstat(fd_, &stbuf) == -1) + return false; + if (S_ISREG(stbuf.st_mode)) { + return stbuf.st_size; + } + if (S_ISBLK(stbuf.st_mode)) { +#if defined(BLKGETSIZE64) + return ioctl(fd_, BLKGETSIZE64, size); +#elif defined(DKIOCGETBLOCKCOUNT) + uint64_t sectors = 0; + if (ioctl(fd_, DKIOCGETBLOCKCOUNT, §ors) == 0) { + *size = sectors << 9; + return true; + } + return false; +#else + // Fall back to doing seeks to know the EOF. + off_t pos = lseek(fd_, 0, SEEK_CUR); + if (pos == -1) + return false; + off_t end_pos = lseek(fd_, 0, SEEK_END); + if (end_pos == -1) + return false; + *size = end_pos; + lseek(fd_, 0, SEEK_END); + return true; +#endif + } + return false; +} + +File::File(int fd) + : fd_(fd) {} + +} // namespace bsdiff @@ -0,0 +1,39 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef _BSDIFF_FILE_H_ +#define _BSDIFF_FILE_H_ + +#include <memory> + +#include "file_interface.h" + +namespace bsdiff { + +class File : public FileInterface { + public: + // Opens a file |pathname| with flags |flags| as defined by open(2). In case + // of error, an empty unique_ptr is returned and errno is set accordingly. + static std::unique_ptr<File> FOpen(const char* pathname, int flags); + + ~File() override; + + // FileInterface overrides. + bool Read(void* buf, size_t count, size_t* bytes_read) override; + bool Write(const void* buf, size_t count, size_t* bytes_written) override; + bool Seek(off_t pos) override; + bool Close() override; + bool GetSize(uint64_t* size) override; + + private: + // Creates the File instance for the |fd|. Takes ownership of the file + // descriptor. + File(int fd); + + int fd_; +}; + +} // namespace bsdiff + +#endif // _BSDIFF_FILE_H_ diff --git a/file_interface.h b/file_interface.h new file mode 100644 index 0000000..a643f45 --- /dev/null +++ b/file_interface.h @@ -0,0 +1,48 @@ +// Copyright 2015 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef _BSDIFF_FILE_INTERFACE_H_ +#define _BSDIFF_FILE_INTERFACE_H_ + +#include <sys/types.h> + +namespace bsdiff { + +class FileInterface { + public: + virtual ~FileInterface() = default; + + // Reads synchronously from the current file position up to |count| bytes into + // the passed |buf| buffer. On success, stores in |bytes_read| how many bytes + // were actually read from the file. In case of error returns false. This + // method may read less than |count| bytes even if there's no error. If the + // end of file is reached, 0 bytes will be read and this methods returns true. + virtual bool Read(void* buf, size_t count, size_t* bytes_read) = 0; + + // Writes synchronously up to |count| bytes from to passed |buf| buffer to + // the file. On success, stores in |bytes_written| how many bytes + // were actually written to the file. This method may write less than |count| + // bytes and return successfully, or even write 0 bytes if there's no more + // space left on the device. Returns whether the write succeeded. + virtual bool Write(const void* buf, size_t count, size_t* bytes_written) = 0; + + // Change the current file position to |pos| bytes from the beginning of the + // file. Return whether the seek succeeded. + virtual bool Seek(off_t pos) = 0; + + // Closes the file and flushes any cached data. Returns whether the close + // succeeded. + virtual bool Close() = 0; + + // Compute the size of the file and store it in |size|. Returns whether it + // computed the size successfully. + virtual bool GetSize(uint64_t* size) = 0; + + protected: + FileInterface() = default; +}; + +} // namespace bsdiff + +#endif // _BSDIFF_FILE_INTERFACE_H_ diff --git a/test_utils.cc b/test_utils.cc index b7540e6..2ad8fc8 100644 --- a/test_utils.cc +++ b/test_utils.cc @@ -25,13 +25,12 @@ const string PrependTmpdir(const string& path) { if (path[0] == '/') return path; - const char *tmpdir = getenv("TMPDIR"); + const char* tmpdir = getenv("TMPDIR"); const string prefix = (tmpdir && *tmpdir ? tmpdir : "/tmp"); return prefix + "/" + path; } -bool MakeTempFile(const string& base_filename_template, - string* filename) { +bool MakeTempFile(const string& base_filename_template, string* filename) { const string filename_template = PrependTmpdir(base_filename_template); vector<char> result(filename_template.size() + 1, '\0'); memcpy(result.data(), filename_template.data(), filename_template.size()); diff --git a/test_utils.h b/test_utils.h index 52cebab..186d9e6 100644 --- a/test_utils.h +++ b/test_utils.h @@ -8,12 +8,12 @@ #include <string> #include <vector> -#define TEST_AND_RETURN_FALSE(_x) \ - do { \ - if (!static_cast<bool>(_x)) { \ - fprintf(stderr, "%s failed.", #_x); \ - return false; \ - } \ +#define TEST_AND_RETURN_FALSE(_x) \ + do { \ + if (!static_cast<bool>(_x)) { \ + fprintf(stderr, "%s failed.", #_x); \ + return false; \ + } \ } while (0) namespace test_utils { diff --git a/testrunner.cc b/testrunner.cc index 40710fd..44f0b5e 100644 --- a/testrunner.cc +++ b/testrunner.cc @@ -4,7 +4,7 @@ #include <gtest/gtest.h> -int main(int argc, char **argv) { +int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } |