diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2008-12-17 18:05:30 -0800 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2008-12-17 18:05:30 -0800 |
commit | 990c1ebf2f02d2ce5086983002ca466012088f6d (patch) | |
tree | 38e5f4b90a3319870e0150193b4ab1ae2779ef0f | |
download | xdelta3-990c1ebf2f02d2ce5086983002ca466012088f6d.tar.gz |
Code drop from //branches/cupcake/...@124589
49 files changed, 29892 insertions, 0 deletions
diff --git a/Android.mk b/Android.mk new file mode 100644 index 0000000..a6fa250 --- /dev/null +++ b/Android.mk @@ -0,0 +1,35 @@ +LOCAL_PATH:= $(call my-dir) + +xdelta3_cflags := \ + -O3 \ + -fno-function-sections -fno-data-sections -fno-inline \ + -DSUPPORT_ANDROID_PRELINK_TAGS \ + -DGENERIC_ENCODE_TABLES=0 \ + -DREGRESSION_TEST=0 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -DXD3_DEBUG=0 \ + -DXD3_MAIN=0 \ + -DXD3_POSIX=1 \ + -DXD3_USE_LARGEFILE64=1 + +include $(CLEAR_VARS) + +LOCAL_LDLIBS += -lm +LOCAL_CFLAGS += $(xdelta3_cflags) +LOCAL_SRC_FILES := xdelta3.c +LOCAL_C_INCLUDES:= $(LOCAL_PATH)/ +LOCAL_MODULE := libxdelta3 +include $(BUILD_STATIC_LIBRARY) + +include $(CLEAR_VARS) + +LOCAL_LDLIBS += -lm +LOCAL_CFLAGS += $(xdelta3_cflags) -DXD3_MAIN=1 +LOCAL_SRC_FILES := xdelta3.c +LOCAL_C_INCLUDES:= $(LOCAL_PATH)/ +LOCAL_MODULE := xdelta3 + +include $(BUILD_HOST_EXECUTABLE) + + @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bbc6d2a --- /dev/null +++ b/Makefile @@ -0,0 +1,310 @@ +# xdelta 3 - delta compression tools and library +# Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + +UNAME = $(shell uname) +CYGWIN = $(findstring CYGWIN, $(UNAME)) +DARWIN = $(findstring Darwin, $(UNAME)) +PYVER = 2.5 + +ifeq ("$(CYGWIN)", "") +SWIGTGT = xdelta3module.so +PYTGT = build/lib.linux-i686-$(PYVER)/xdelta3main.so +else +SWIGTGT = xdelta3module.dll +PYTGT = build/lib.cygwin-1.5.24-i686-$(PYVER)/xdelta3main.dll +endif + +SOURCES = xdelta3-cfgs.h \ + xdelta3-decode.h \ + xdelta3-djw.h \ + xdelta3-fgk.h \ + xdelta3-hash.h \ + xdelta3-list.h \ + xdelta3-main.h \ + xdelta3-merge.h \ + xdelta3-python.h \ + xdelta3-second.h \ + xdelta3-test.h \ + xdelta3.c \ + xdelta3.h + +TARGETS = xdelta3-debug \ + xdelta3 \ + xdelta3-debug2 \ + xdelta3-debug3 \ + xdelta3.o \ + xdelta3_wrap.o \ + xdelta3-32 \ + xdelta3-64 \ + xdelta3-everything \ + xdelta3-Opg \ + xdelta3-64-O \ + xdelta3-Op \ + xdelta3-decoder xdelta3-decoder-nomain.o \ + xdelta3-nosec.o xdelta3-all.o xdelta3-fgk.o \ + xdelta3-noext xdelta3-tools \ + xdelta3-notools \ + xdelta3_wrap.c xdelta3.py \ + $(PYTGT) $(SWIGTGT) + +PYTHON = python + +WIXDIR = "/cygdrive/c/Program Files/wix2.0.4820" + +# -arch x86_64 +CFLAGS= -Wall -Wshadow -fno-builtin + +# $Format: "REL=$Xdelta3Version$" $ +REL=3.0u + +RELDIR = xdelta$(REL) + +EXTRA = Makefile COPYING linkxd3lib.c badcopy.c xdelta3.swig \ + draft-korn-vcdiff.txt xdelta3.vcproj badcopy.vcproj \ + xdelta3-regtest.py xdelta3-test.py setup.py \ + examples/Makefile examples/small_page_test.c \ + examples/README examples/encode_decode_test.c \ + examples/compare_test.c examples/speed_test.c \ + examples/test.h examples/checksum_test.cc \ + xdelta3.py xdelta3_wrap.c xdelta3.wxs xdelta3.wxi \ + testing/cmp.h testing/delta.h testing/file.h \ + testing/modify.h testing/random.h testing/segment.h \ + testing/sizes.h testing/test.h testing/Makefile \ + README readme.txt + +SWIG_FLAGS = -DXD3_DEBUG=1 \ + -DEXTERNAL_COMPRESSION=0 \ + -DXD3_USE_LARGEFILE64=1 \ + -DGENERIC_ENCODE_TABLES=1 \ + -DSECONDARY_DJW=1 \ + -DVCDIFF_TOOLS=1 \ + -DSWIG_MODULE=1 + +all: xdelta3-debug xdelta3 + +all-py: all $(PYTGT) $(SWIGTGT) + +all-targets: $(TARGETS) + +all-targets-test: all-targets test + +pytgt: $(PYTGT) +swigtgt: $(SWIGTGT) + +test: + ./xdelta3-debug test + +tar: + tar --exclude ".svn" -czf /tmp/$(RELDIR)-tmp.tar.gz $(SOURCES) $(EXTRA) + rm -rf /tmp/$(RELDIR) + mkdir /tmp/$(RELDIR) + (cd /tmp/$(RELDIR) && tar -xzf ../$(RELDIR)-tmp.tar.gz) + tar -czf ./$(RELDIR).tar.gz -C /tmp $(RELDIR) + +tar -tzf ./$(RELDIR).tar.gz + rm -rf /tmp/$(RELDIR) + +zip: + tar --exclude ".svn" -czf /tmp/$(RELDIR)-tmp.tar.gz $(SOURCES) $(EXTRA) + rm -rf /tmp/$(RELDIR) + mkdir /tmp/$(RELDIR) + (cd /tmp/$(RELDIR) && tar -xzf ../$(RELDIR)-tmp.tar.gz) + tar -czf ./$(RELDIR).tar.gz -C /tmp $(RELDIR) + +zip -r $(RELDIR).zip /tmp/$(RELDIR) + rm -rf /tmp/$(RELDIR) + +clean: + rm -f $(TARGETS) + rm -rf build Debug Release core cifs* *.stackdump *.exe \ + xdelta3.ncb xdelta3.suo xdelta3.sln xdelta3.wixobj xdelta3.msi + +wix: xdelta3.wxs xdelta3.wxi readme.txt Release\xdelta3.exe + $(WIXDIR)/candle.exe xdelta3.wxs -out xdelta3.wixobj + $(WIXDIR)/light.exe xdelta3.wixobj -out xdelta3.msi + +xdelta3: $(SOURCES) + $(CC) $(CFLAGS) -O3 xdelta3.c -lm -o xdelta3 \ + -DGENERIC_ENCODE_TABLES=0 \ + -DREGRESSION_TEST=1 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -DXD3_DEBUG=0 \ + -DXD3_MAIN=1 \ + -DXD3_POSIX=1 \ + -DXD3_USE_LARGEFILE64=1 + +xdelta3-debug: $(SOURCES) + $(CC) -g $(CFLAGS) xdelta3.c -lm -o xdelta3-debug \ + -DGENERIC_ENCODE_TABLES=1 \ + -DREGRESSION_TEST=1 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -DXD3_DEBUG=1 \ + -DXD3_MAIN=1 \ + -DXD3_STDIO=1 \ + -DXD3_USE_LARGEFILE64=1 + +xdelta3-32: $(SOURCES) + $(CC) -g $(CFLAGS) xdelta3.c -lm -o xdelta3-32 \ + -DXD3_DEBUG=1 \ + -DXD3_USE_LARGEFILE64=0 \ + -DREGRESSION_TEST=1 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -DXD3_MAIN=1 \ + -DXD3_POSIX=1 + +xdelta3-debug2: $(SOURCES) + $(CC) -g $(CFLAGS) \ + xdelta3.c -o xdelta3-debug2 \ + -DXD3_DEBUG=2 \ + -DXD3_MAIN=1 \ + -DXD3_STDIO=1 \ + -DXD3_USE_LARGEFILE64=1 \ + -DGENERIC_ENCODE_TABLES=1 \ + -DREGRESSION_TEST=1 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -lm + +xdelta3-debug3: $(SOURCES) + $(CC) -g $(CFLAGS) xdelta3.c -o xdelta3-debug3 \ + -DXD3_MAIN=1 \ + -DGENERIC_ENCODE_TABLES=1 \ + -DXD3_USE_LARGEFILE64=1 \ + -DXD3_STDIO=1 \ + -DREGRESSION_TEST=1 \ + -DXD3_DEBUG=3 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -lm + +$(PYTGT): $(SOURCES) setup.py + $(PYTHON) setup.py install --verbose --compile --force + +xdelta3_wrap.c xdelta3.py: xdelta3.swig + swig -python xdelta3.swig + +xdelta3.o: $(SOURCES) + $(CC) -O3 $(CFLAGS) -c xdelta3.c $(SWIG_FLAGS) -o xdelta3.o + +xdelta3_wrap.o: xdelta3_wrap.c + $(CC) -O3 $(CFLAGS) $(SWIG_FLAGS) \ + -DHAVE_CONFIG_H \ + -I/usr/include/python$(PYVER) \ + -I/usr/lib/python$(PYVER)/config \ + -fpic \ + -c xdelta3_wrap.c + +xdelta3module.dll: xdelta3_wrap.o xdelta3.o + gcc -shared -Wl,--enable-auto-image-base \ + xdelta3.o \ + xdelta3_wrap.o \ + -L/usr/lib/python$(PYVER)/config \ + -lpython$(PYVER) \ + -o xdelta3module.dll + cp $(SWIGTGT) /usr/lib/python$(PYVER)/site-packages + +ifeq ("$(DARWIN)", "") +xdelta3module.so: xdelta3_wrap.o xdelta3.o + ld -shared xdelta3.o xdelta3_wrap.o \ + -o xdelta3module.so \ + /usr/lib/libpython$(PYVER).so \ + -lc +else +xdelta3module.so: xdelta3_wrap.o xdelta3.o + gcc -Wl,-F. -bundle -undefined dynamic_lookup $(CFLAGS) \ + xdelta3.o xdelta3_wrap.o -o xdelta3module.so +endif + +xdelta3-decoder: $(SOURCES) + $(CC) -O3 -Wall -Wshadow xdelta3.c \ + -DXD3_ENCODER=0 -DXD3_MAIN=1 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \ + -DXD3_STDIO=1 -DEXTERNAL_COMPRESSION=0 -DVCDIFF_TOOLS=0 \ + -o xdelta3-decoder + +xdelta3-decoder-nomain.o: $(SOURCES) linkxd3lib.c + $(CC) -O3 -Wall -Wshadow xdelta3.c linkxd3lib.c \ + -DXD3_ENCODER=0 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \ + -o xdelta3-decoder-nomain.o + strip xdelta3-decoder-nomain.o + +xdelta3-O++: $(SOURCES) + $(CXX) -g -O3 $(CFLAGS) xdelta3.c \ + -o xdelta3-O++ \ + -DXD3_MAIN=1 \ + -DSECONDARY_DJW=1 \ + -DREGRESSION_TEST=1 \ + -lm + +xdelta3-Op: $(SOURCES) + $(CC) -g -O3 $(CFLAGS) xdelta3.c \ + -o xdelta3-Op \ + -DXD3_POSIX=1 \ + -DXD3_MAIN=1 \ + -DREGRESSION_TEST=1 \ + -lm + +xdelta3-64: $(SOURCES) + $(CC) -g $(CFLAGS) \ + xdelta3.c \ + -o xdelta3-64 \ + -DXD3_POSIX=1 \ + -DXD3_MAIN=1 \ + -DREGRESSION_TEST=1 \ + -DXD3_DEBUG=0 \ + -DXD3_USE_LARGEFILE64=1 \ + -lm + +xdelta3-64-O: $(SOURCES) + $(CC) -O3 $(CFLAGS) \ + xdelta3.c \ + -o xdelta3-64-O \ + -DXD3_POSIX=1 \ + -DXD3_MAIN=1 \ + -DXD3_USE_LARGEFILE64=1 \ + -lm + +xdelta3-everything: $(SOURCES) + $(CC) -g $(CFLAGS) \ + xdelta3.c \ + -o xdelta3-everything \ + -DXD3_MAIN=1 \ + -DVCDIFF_TOOLS=1 \ + -DREGRESSION_TEST=1 \ + -DSECONDARY_FGK=1 \ + -DSECONDARY_DJW=1 \ + -DGENERIC_ENCODE_TABLES=1 \ + -DGENERIC_ENCODE_TABLES_COMPUTE=1 \ + -DXD3_POSIX=1 \ + -DEXTERNAL_COMPRESSION=1 \ + -DXD3_DEBUG=1 \ + -lm + +xdelta3-Opg: $(SOURCES) + $(CC) -pg -g -O3 $(CFLAGS) \ + xdelta3.c \ + -o xdelta3-Opg \ + -DXD3_MAIN=1 \ + -DSECONDARY_DJW=1 \ + -DSECONDARY_FGK=1 \ + -DXD3_POSIX=1 \ + -DXD3_USE_LARGEFILE64=1 \ + -DREGRESSION_TEST=1 + +xdelta3-nosec.o: $(SOURCES) + $(CC) -O3 $(CFLAGS) -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 -o xdelta3-nosec.o + +xdelta3-all.o: $(SOURCES) + $(CC) -O3 $(CFLAGS) -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -o xdelta3-all.o + +xdelta3-fgk.o: $(SOURCES) + $(CC) -O3 $(CFLAGS) -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=0 -o xdelta3-fgk.o + +xdelta3-noext: $(SOURCES) + $(CC) -O3 $(CFLAGS) xdelta3.c -DXD3_MAIN=1 -DEXTERNAL_COMPRESSION=0 -o xdelta3-noext + +xdelta3-tools: $(SOURCES) + $(CC) -O3 $(CFLAGS) xdelta3.c -DXD3_MAIN=1 -o xdelta3-tools + +xdelta3-notools: $(SOURCES) + $(CC) -O3 $(CFLAGS) xdelta3.c -DXD3_MAIN=1 -DVCDIFF_TOOLS=0 -o xdelta3-notools @@ -0,0 +1,34 @@ +Xdelta 3.x readme.txt +Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 +<josh.macdonald@gmail.com> + + +Thanks for downloading Xdelta! + +This directory contains the Xdelta3 command-line interface (CLI) and source +distribution for VCDIFF differential compression, a.k.a. delta +compression. The latest information and downloads are available here: + + http://xdelta.org/ + http://code.google.com/p/xdelta/ + +The command-line syntax: + + http://code.google.com/p/xdelta/wiki/CommandLineSyntax + +Run 'xdelta3 -h' for brief help. Run 'xdelta3 test' for built-in tests. + +Sample commands (like gzip, -e means encode, -d means decode) + + xdelta3 -9 -S djw -e -vfs OLD_FILE NEW_FILE DELTA_FILE + xdelta3 -d -vfs OLD_FILE DELTA_FILE DECODED_FILE + +File bug reports and browse open support issues here: + + http://code.google.com/p/xdelta/issues/list + +The source distribution contains the C/C++/Python APIs, Unix, Microsoft VC++ +and Cygwin builds. Xdelta3 is covered under the terms of the GPL, see +COPYING. + +Commercial inquiries welcome, please contact <josh.macdonald@gmail.com> diff --git a/README.android b/README.android new file mode 100644 index 0000000..bbe0b31 --- /dev/null +++ b/README.android @@ -0,0 +1,7 @@ +The contents of this directory are the xdelta3.0u package, downloaded from + + http://xdelta.googlecode.com/files/xdelta3.0u.tar.gz + +on 12 Oct 2008. I added the Android.mk file and this README.android +file to the directory; nothing else has been touched. + diff --git a/badcopy.c b/badcopy.c new file mode 100644 index 0000000..03abc63 --- /dev/null +++ b/badcopy.c @@ -0,0 +1,158 @@ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +#define BUFSZ (1 << 22) + +#ifdef WIN32 +// whatever +static +double drand48() { + double r = rand() / (double)RAND_MAX; + return r; +} +long lrand48() { + long l = 0; + int i; + for (i = 0; i < 32; i++) { + l = l ^ (l << 2) ^ (l << 1) ^ rand(); + } + return l; +} +#endif + +#ifdef _WIN32 +#define XD3_WIN32 1 +#else +#define XD3_POSIX 1 +#endif +#define XD3_MAIN 1 +#define main notmain +#define EXTERNAL_COMPRESSION 0 +#define XD3_USE_LARGEFILE64 1 +#include "xdelta3.c" +#undef main + + +double error_prob = 0.0001; +usize_t mean_change = 100; +xoff_t total_change = 0; +xoff_t total_size = 0; +usize_t max_change = 0; +usize_t num_change = 0; + + +static usize_t +edist (usize_t mean, usize_t max) +{ + double mean_d = mean; + double erand = log (1.0 / drand48 ()); + usize_t x = (usize_t) (mean_d * erand + 0.5); + + return (x < max) ? (x > 0 ? x : 1) : max; +} + +void modify (char *buf, usize_t size) +{ + usize_t bufpos = 0, j; + usize_t last_end = 0; + + for (;; /* bufpos and j are incremented in the inner loop */) + { + /* The size of the next modification. */ + usize_t next_size = edist (mean_change, 1 << 31); + /* The expected interval of such a change. */ + double expect_interval = ((double) next_size * (1.0 - error_prob)) / error_prob; + /* The number of bytes until the next modification. */ + usize_t next_mod = edist ((usize_t)expect_interval, 1 << 31); + + if (next_size + next_mod + bufpos > size) { break; } + + if (max_change < next_size) { max_change = next_size; } + + bufpos += next_mod; + + fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", + total_size + (xoff_t)last_end, + total_size + (xoff_t)bufpos, + bufpos - last_end); + fprintf (stderr, "ADD: %I64u-%I64u (%u) is change %u\n", + total_size + (xoff_t)bufpos, + total_size + (xoff_t)(bufpos + next_size), + next_size, num_change); + + total_change += next_size; + num_change += 1; + + for (j = 0; j < next_size; j += 1, bufpos += 1) + { + buf[bufpos] = (char)(lrand48 () >> 3); + } + + last_end = bufpos; + } + + fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", + total_size + last_end, + total_size + size, size - last_end); + + total_size += size; +} + +int main(int argc, char **argv) +{ + main_file inp, out; + char *buf = malloc(BUFSZ); + int c, ret; + main_file_init(&inp); + main_file_init(&out); + option_force = 1; + if (argc > 5) + { + fprintf (stderr, "usage: badcopy [byte_error_prob [mean_error_size]]\n"); + return 1; + } + + if (argc > 4) { mean_change = atoi (argv[4]); } + if (argc > 3) { error_prob = atof (argv[3]); } + fprintf (stderr, "mean change = %u; error_prob = %0.10f\n", mean_change, error_prob); + + if ((ret = main_file_open (&inp, argv[1], XO_READ)) != 0) { + return 1; + } + if ((ret = main_file_open (&out, argv[2], XO_WRITE)) != 0) { + return 1; + } + + if (error_prob < 0.0 || error_prob > 1.0) + { + fprintf (stderr, "warning: error probability out of range\n"); + return 1; + } + + do + { + if ((ret = main_file_read (&inp, buf, BUFSZ, &c, "read failed")) != 0) { + return 1; + } + + if (c == 0) { break; } + + modify (buf, c); + + if ((ret = main_file_write (&out, buf, c, "write failed")) != 0) { + return 1; + } + } + while (c == BUFSZ); + + if ((ret = main_file_close (&out))) + { + return 1; + } + + fprintf (stderr, "add_prob %f; %u adds; total_change %u of %u bytes; add percentage %f; max add size %u\n", + error_prob, num_change, total_change, total_size, (double) total_change / (double) total_size, max_change); + + return 0; +} diff --git a/badcopy.vcproj b/badcopy.vcproj new file mode 100644 index 0000000..50683f6 --- /dev/null +++ b/badcopy.vcproj @@ -0,0 +1,218 @@ +<?xml version="1.0" encoding="Windows-1252"?> +<VisualStudioProject + ProjectType="Visual C++" + Version="8.00" + Name="badcopy" + ProjectGUID="{FED2964C-7114-41AC-81EE-68A4D2B67503}" + RootNamespace="badcopy" + Keyword="Win32Proj" + > + <Platforms> + <Platform + Name="Win32" + /> + </Platforms> + <ToolFiles> + </ToolFiles> + <Configurations> + <Configuration + Name="Debug|Win32" + OutputDirectory="Debug" + IntermediateDirectory="Debug" + ConfigurationType="1" + > + <Tool + Name="VCPreBuildEventTool" + /> + <Tool + Name="VCCustomBuildTool" + /> + <Tool + Name="VCXMLDataGeneratorTool" + /> + <Tool + Name="VCWebServiceProxyGeneratorTool" + /> + <Tool + Name="VCMIDLTool" + /> + <Tool + Name="VCCLCompilerTool" + Optimization="0" + PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;" + MinimalRebuild="true" + BasicRuntimeChecks="3" + RuntimeLibrary="3" + UsePrecompiledHeader="0" + WarningLevel="3" + Detect64BitPortabilityProblems="true" + DebugInformationFormat="4" + /> + <Tool + Name="VCManagedResourceCompilerTool" + /> + <Tool + Name="VCResourceCompilerTool" + /> + <Tool + Name="VCPreLinkEventTool" + /> + <Tool + Name="VCLinkerTool" + LinkIncremental="2" + GenerateDebugInformation="true" + SubSystem="1" + TargetMachine="1" + /> + <Tool + Name="VCALinkTool" + /> + <Tool + Name="VCManifestTool" + /> + <Tool + Name="VCXDCMakeTool" + /> + <Tool + Name="VCBscMakeTool" + /> + <Tool + Name="VCFxCopTool" + /> + <Tool + Name="VCAppVerifierTool" + /> + <Tool + Name="VCWebDeploymentTool" + /> + <Tool + Name="VCPostBuildEventTool" + /> + </Configuration> + <Configuration + Name="Release|Win32" + OutputDirectory="Release" + IntermediateDirectory="Release" + ConfigurationType="1" + > + <Tool + Name="VCPreBuildEventTool" + /> + <Tool + Name="VCCustomBuildTool" + /> + <Tool + Name="VCXMLDataGeneratorTool" + /> + <Tool + Name="VCWebServiceProxyGeneratorTool" + /> + <Tool + Name="VCMIDLTool" + /> + <Tool + Name="VCCLCompilerTool" + PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;" + RuntimeLibrary="2" + UsePrecompiledHeader="0" + WarningLevel="3" + Detect64BitPortabilityProblems="true" + DebugInformationFormat="3" + /> + <Tool + Name="VCManagedResourceCompilerTool" + /> + <Tool + Name="VCResourceCompilerTool" + /> + <Tool + Name="VCPreLinkEventTool" + /> + <Tool + Name="VCLinkerTool" + LinkIncremental="2" + GenerateDebugInformation="true" + SubSystem="1" + OptimizeReferences="2" + EnableCOMDATFolding="2" + TargetMachine="1" + /> + <Tool + Name="VCALinkTool" + /> + <Tool + Name="VCManifestTool" + /> + <Tool + Name="VCXDCMakeTool" + /> + <Tool + Name="VCBscMakeTool" + /> + <Tool + Name="VCFxCopTool" + /> + <Tool + Name="VCAppVerifierTool" + /> + <Tool + Name="VCWebDeploymentTool" + /> + <Tool + Name="VCPostBuildEventTool" + /> + </Configuration> + </Configurations> + <References> + </References> + <Files> + <Filter + Name="Header Files" + Filter="h;hpp;hxx;hm;inl;inc;xsd" + UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}" + > + </Filter> + <Filter + Name="Resource Files" + Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx" + UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}" + > + <File + RelativePath=".\releases\xdelta30h.ppc-osx.bin" + > + </File> + </Filter> + <Filter + Name="Source Files" + Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" + UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" + > + <File + RelativePath=".\badcopy.c" + > + </File> + </Filter> + <File + RelativePath=".\release\BuildLog.htm" + > + </File> + <File + RelativePath=".\debug\BuildLog.htm" + > + </File> + <File + RelativePath=".\www\xdelta3-api-guide.html" + > + </File> + <File + RelativePath=".\www\xdelta3-cmdline.html" + > + </File> + <File + RelativePath=".\www\xdelta3.html" + > + </File> + </Files> + <Globals> + </Globals> +</VisualStudioProject> diff --git a/draft-korn-vcdiff.txt b/draft-korn-vcdiff.txt new file mode 100644 index 0000000..1487deb --- /dev/null +++ b/draft-korn-vcdiff.txt @@ -0,0 +1,1322 @@ + David G. Korn, AT&T Labs + Joshua P. MacDonald, UC Berkeley + Jeffrey C. Mogul, Compaq WRL +Internet-Draft Kiem-Phong Vo, AT&T Labs +Expires: 09 November 2002 09 November 2001 + + + The VCDIFF Generic Differencing and Compression Data Format + + draft-korn-vcdiff-06.txt + + + +Status of this Memo + + This document is an Internet-Draft and is in full conformance + with all provisions of Section 10 of RFC2026. + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF), its areas, and its working groups. Note that + other groups may also distribute working documents as + Internet-Drafts. + + Internet-Drafts are draft documents valid for a maximum of six + months and may be updated, replaced, or obsoleted by other + documents at any time. It is inappropriate to use Internet- + Drafts as reference material or to cite them other than as + "work in progress." + + The list of current Internet-Drafts can be accessed at + http://www.ietf.org/ietf/1id-abstracts.txt + + The list of Internet-Draft Shadow Directories can be accessed at + http://www.ietf.org/shadow.html. + + +Abstract + + This memo describes a general, efficient and portable data format + suitable for encoding compressed and/or differencing data so that + they can be easily transported among computers. + + +Table of Contents: + + 1. EXECUTIVE SUMMARY ............................................ 2 + 2. CONVENTIONS .................................................. 3 + 3. DELTA INSTRUCTIONS ........................................... 4 + 4. DELTA FILE ORGANIZATION ...................................... 5 + 5. DELTA INSTRUCTION ENCODING ................................... 9 + 6. DECODING A TARGET WINDOW ..................................... 14 + 7. APPLICATION-DEFINED CODE TABLES .............................. 16 + 8. PERFORMANCE .................................................. 16 + 9. FURTHER ISSUES ............................................... 17 + 10. SUMMARY ...................................................... 18 + 11. ACKNOWLEDGEMENTS ............................................. 18 + 12. SECURITY CONSIDERATIONS ...................................... 18 + 13. SOURCE CODE AVAILABILITY ..................................... 18 + 14. INTELLECTUAL PROPERTY RIGHTS ................................. 18 + 15. IANA CONSIDERATIONS .......................................... 19 + 16. REFERENCES ................................................... 19 + 17. AUTHOR'S ADDRESS ............................................. 20 + + +1. EXECUTIVE SUMMARY + + Compression and differencing techniques can greatly improve storage + and transmission of files and file versions. Since files are often + transported across machines with distinct architectures and performance + characteristics, such data should be encoded in a form that is portable + and can be decoded with little or no knowledge of the encoders. + This document describes Vcdiff, a compact portable encoding format + designed for these purposes. + + Data differencing is the process of computing a compact and invertible + encoding of a "target file" given a "source file". Data compression + is similar but without the use of source data. The UNIX utilities diff, + compress, and gzip are well-known examples of data differencing and + compression tools. For data differencing, the computed encoding is + called a "delta file", and, for data compression, it is called + a "compressed file". Delta and compressed files are good for storage + and transmission as they are often smaller than the originals. + + Data differencing and data compression are traditionally treated + as distinct types of data processing. However, as shown in the Vdelta + technique by Korn and Vo [1], compression can be thought of as a special + case of differencing in which the source data is empty. The basic idea + is to unify the string parsing scheme used in the Lempel-Ziv'77 style + compressors [2], and the block-move technique of Tichy [3]. Loosely + speaking, this works as follows: + + a. Concatenate source and target data. + b. Parse the data from left to right as in LZ'77 but + make sure that a parsed segment starts the target data. + c. Start to output when reaching target data. + + Parsing is based on string matching algorithms such as suffix trees [4] + or hashing with different time and space performance characteristics. + Vdelta uses a fast string matching algorithm that requires less memory + than other techniques [5,6]. However, even with this algorithm, the + memory requirement can still be prohibitive for large files. A common + way to deal with memory limitation is to partition an input file into + chunks called "windows" and process them separately. Here, except for + unpublished work by Vo, little has been done on designing effective + windowing schemes. Current techniques, including Vdelta, simply use + source and target windows with corresponding addresses across source + and target files. + + String matching and windowing algorithms have large influence on the + compression rate of delta and compressed files. However, it is desirable + to have a portable encoding format that is independent of such algorithms. + This enables construction of client-server applications in which a server + may serve clients with unknown computing characteristics. Unfortunately, + all current differencing and compressing tools, including Vdelta, fall + short in this respect. Their storage formats are closely intertwined + with the implemented string matching and/or windowing algorithms. + + The encoding format Vcdiff proposed here addresses the above issues. + Vcdiff achieves the below characteristics: + + Output compactness: + The basic encoding format compactly represents compressed or delta + files. Applications can further extend the basic encoding format + with "secondary encoders" to achieve more compression. + + Data portability: + The basic encoding format is free from machine byte order and + word size issues. This allows data to be encoded on one machine + and decoded on a different machine with different architecture. + + Algorithm genericity: + The decoding algorithm is independent from string matching and + windowing algorithms. This allows competition among implementations + of the encoder while keeping the same decoder. + + Decoding efficiency: + Except for secondary encoder issues, the decoding algorithm runs + in time proportional to the size of the target file and uses space + proportional to the maximal window size. Vcdiff differs from more + conventional compressors in that it uses only byte-aligned + data, thus avoiding bit-level operations, which improves + decoding speed at the slight cost of compression efficiency. + + The Vcdiff data format and the algorithms for decoding data shall be + described next. Since Vcdiff treats compression as a special case of + differencing, we shall use the term "delta file" to indicate the + compressed output for both cases. + + +2. CONVENTIONS + + The basic data unit is a byte. For portability, Vcdiff shall limit + a byte to its lower eight bits even on machines with larger bytes. + The bits in a byte are ordered from right to left so that the least + significant bit (LSB) has value 1, and the most significant bit (MSB), + has value 128. + + For purposes of exposition in this document, we adopt the convention + that the LSB is numbered 0, and the MSB is numbered 7. Bit numbers + never appear in the encoded format itself. + + Vcdiff encodes unsigned integer values using a portable variable-sized + format (originally introduced in the Sfio library [7]). This encoding + treats an integer as a number in base 128. Then, each digit in this + representation is encoded in the lower seven bits of a byte. Except for + the least significant byte, other bytes have their most significant bit + turned on to indicate that there are still more digits in the encoding. + The two key properties of this integer encoding that are beneficial + to a data compression format are: + + a. The encoding is portable among systems using 8-bit bytes, and + b. Small values are encoded compactly. + + For example, consider the value 123456789 which can be represented with + four 7-bit digits whose values are 58, 111, 26, 21 in order from most + to least significant. Below is the 8-bit byte encoding of these digits. + Note that the MSBs of 58, 111 and 26 are on. + + +-------------------------------------------+ + | 10111010 | 11101111 | 10011010 | 00010101 | + +-------------------------------------------+ + MSB+58 MSB+111 MSB+26 0+21 + + + Henceforth, the terms "byte" and "integer" will refer to a byte and an + unsigned integer as described. + + + From time to time, algorithms are exhibited to clarify the descriptions + of parts of the Vcdiff format. On such occasions, the C language will be + used to make precise the algorithms. The C code shown in this + document is meant for clarification only, and is not part of the + actual specification of the Vcdiff format. + + In this specification, the key words "MUST", "MUST NOT", + "SHOULD", "SHOULD NOT", and "MAY" document are to be interpreted as + described in RFC2119 [12]. + + +3. DELTA INSTRUCTIONS + + A large target file is partitioned into non-overlapping sections + called "target windows". These target windows are processed separately + and sequentially based on their order in the target file. + + A target window T of length t may be compared against some source data + segment S of length s. By construction, this source data segment S + comes either from the source file, if one is used, or from a part of + the target file earlier than T. In this way, during decoding, S is + completely known when T is being decoded. + + The choices of T, t, S and s are made by some window selection algorithm + which can greatly affect the size of the encoding. However, as seen later, + these choices are encoded so that no knowledge of the window selection + algorithm is needed during decoding. + + Assume that S[j] represents the jth byte in S, and T[k] represents + the kth byte in T. Then, for the delta instructions, we treat the data + windows S and T as substrings of a superstring U formed by concatenating + them like this: + + S[0]S[1]...S[s-1]T[0]T[1]...T[t-1] + + The "address" of a byte in S or T is referred to by its location in U. + For example, the address of T[k] is s+k. + + The instructions to encode and direct the reconstruction of a target + window are called delta instructions. There are three types: + + ADD: This instruction has two arguments, a size x and a sequence of + x bytes to be copied. + COPY: This instruction has two arguments, a size x and an address p + in the string U. The arguments specify the substring of U that + must be copied. We shall assert that such a substring must be + entirely contained in either S or T. + RUN: This instruction has two arguments, a size x and a byte b that + will be repeated x times. + + Below are example source and target windows and the delta instructions + that encode the target window in terms of the source window. + + a b c d e f g h i j k l m n o p + a b c d w x y z e f g h e f g h e f g h e f g h z z z z + + COPY 4, 0 + ADD 4, w x y z + COPY 4, 4 + COPY 12, 24 + RUN 4, z + + + Thus, the first letter 'a' in the target window is at location 16 + in the superstring. Note that the fourth instruction, "COPY 12, 24", + copies data from T itself since address 24 is position 8 in T. + This instruction also shows that it is fine to overlap the data to be + copied with the data being copied from as long as the latter starts + earlier. This enables efficient encoding of periodic sequences, + i.e., sequences with regularly repeated subsequences. The RUN instruction + is a compact way to encode a sequence repeating the same byte even though + such a sequence can be thought of as a periodic sequence with period 1. + + To reconstruct the target window, one simply processes one delta + instruction at a time and copy the data either from the source window + or the being reconstructed target window based on the type of the + instruction and the associated address, if any. + + +4. DELTA FILE ORGANIZATION + + A Vcdiff delta file starts with a Header section followed by a sequence + of Window sections. The Header section includes magic bytes to identify + the file type, and information concerning data processing beyond the + basic encoding format. The Window sections encode the target windows. + + Below is the overall organization of a delta file. The indented items + refine the ones immediately above them. An item in square brackets may + or may not be present in the file depending on the information encoded + in the Indicator byte above it. + + Header + Header1 - byte + Header2 - byte + Header3 - byte + Header4 - byte + Hdr_Indicator - byte + [Secondary compressor ID] - byte + +[@@@ Why is compressor ID not an integer? ] +[@@@ If we aren't defining any secondary compressors yet, then it seems +that defining the [Secondary compressor ID] and the corresponding +VCD_DECOMPRESS Hdr_Indicator bit in this draft has no real value. An +implementation of this specification won't be able to decode a VCDIFF +encoded with this option if it doesn't know about any secondary +compressors. It seems that you should specify the bits related to +secondary compressors once you have defined the first a secondary +compressor. I can imagine a secondary-compressor might want to supply +extra information, such as a dictionary of some kind, in which case +this speculative treatment wouldn't go far enough.] + + [Length of code table data] - integer + [Code table data] + Size of near cache - byte + Size of same cache - byte + Compressed code table data + Window1 + Win_Indicator - byte + [Source segment size] - integer + [Source segment position] - integer + The delta encoding of the target window + Length of the delta encoding - integer + The delta encoding + Size of the target window - integer + Delta_Indicator - byte + Length of data for ADDs and RUNs - integer + Length of instructions and sizes - integer + Length of addresses for COPYs - integer + Data section for ADDs and RUNs - array of bytes + Instructions and sizes section - array of bytes + Addresses section for COPYs - array of bytes + Window2 + ... + + + +4.1 The Header Section + + Each delta file starts with a header section organized as below. + Note the convention that square-brackets enclose optional items. + + Header1 - byte = 0xE6 + Header2 - byte = 0xD3 + Header3 - byte = 0xD4 + +HMMM + +0xD6 +0xC3 +0xC4 + + Header4 - byte + Hdr_Indicator - byte + [Secondary compressor ID] - byte + [Length of code table data] - integer + [Code table data] + + The first three Header bytes are the ASCII characters 'V', 'C' and 'D' + with their most significant bits turned on (in hexadecimal, the values + are 0xE6, 0xD3, and 0xD4). The fourth Header byte is currently set to + zero. In the future, it might be used to indicate the version of Vcdiff. + + The Hdr_Indicator byte shows if there are any initialization data + required to aid in the reconstruction of data in the Window sections. + This byte MAY have non-zero values for either, both, or neither of + the two bits VCD_DECOMPRESS and VCD_CODETABLE below: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + | | | | | | | | | + +-+-+-+-+-+-+-+-+ + ^ ^ + | | + | +-- VCD_DECOMPRESS + +---- VCD_CODETABLE + + If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a secondary + compressor may have been used to further compress certain parts of the + delta encoding data as described in Sections 4.3 and 6. In that case, + the ID of the secondary compressor is given next. If this bit is zero, + the compressor ID byte is not included. + +[@@@ If we aren't defining any secondary compressors yet, then it seems +this bit has no real value yet..] + + If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an + application-defined code table is to be used for decoding the delta + instructions. This table itself is compressed. The length of the data + comprising this compressed code table and the data follow next. Section 7 + discusses application-defined code tables. If this bit is zero, the code + table data length and the code table data are not included. + + If both bits are set, then the compressor ID byte is included + before the code table data length and the code table data. + + +4.2 The Format of a Window Section + + Each Window section is organized as follows: + + Win_Indicator - byte + [Source segment length] - integer + [Source segment position] - integer + The delta encoding of the target window + + + Below are the detail of the various items: + +[@@@ Here, I want to replace the Win_Indicator with a source-count, +followed by source-count length/position pairs?] + + Win_Indicator: + This byte is a set of bits, as shown: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + | | | | | | | | | + +-+-+-+-+-+-+-+-+ + ^ ^ + | | + | +-- VCD_SOURCE + +---- VCD_TARGET + + + If bit 0 (VCD_SOURCE) is non-zero, this indicates that a segment + of data from the "source" file was used as the corresponding + source window of data to encode the target window. The decoder + will use this same source data segment to decode the target window. + + If bit 1 (VCD_TARGET) is non-zero, this indicates that a segment + of data from the "target" file was used as the corresponding + source window of data to encode the target window. As above, this + same source data segment is used to decode the target window. + + The Win_Indicator byte MUST NOT have more than one of the bits + set (non-zero). It MAY have none of these bits set. + + If one of these bits is set, the byte is followed by two + integers to indicate respectively the length and position of + the source data segment in the relevant file. If the + indicator byte is zero, the target window was compressed + by itself without comparing against another data segment, + and these two integers are not included. + + The delta encoding of the target window: + This contains the delta encoding of the target window either + in terms of the source data segment (i.e., VCD_SOURCE + or VCD_TARGET was set) or by itself if no source window + is specified. This data format is discussed next. + + +4.3 The Delta Encoding of a Target Window + + The delta encoding of a target window is organized as follows: + + Length of the delta encoding - integer + The delta encoding + Length of the target window - integer + Delta_Indicator - byte + Length of data for ADDs and RUNs - integer + Length of instructions section - integer + Length of addresses for COPYs - integer + Data section for ADDs and RUNs - array of bytes + Instructions and sizes section - array of bytes + Addresses section for COPYs - array of bytes + + + Length of the delta encoding: + This integer gives the total number of remaining bytes that + comprise data of the delta encoding for this target window. + + The delta encoding: + This contains the data representing the delta encoding which + is described next. + + Length of the target window: + This integer indicates the actual size of the target window + after decompression. A decoder can use this value to allocate + memory to store the uncompressed data. + + Delta_Indicator: + This byte is a set of bits, as shown: + + 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+ + | | | | | | | | | + +-+-+-+-+-+-+-+-+ + ^ ^ ^ + | | | + | | +-- VCD_DATACOMP + | +---- VCD_INSTCOMP + +------ VCD_ADDRCOMP + + VCD_DATACOMP: bit value 1. + VCD_INSTCOMP: bit value 2. + VCD_ADDRCOMP: bit value 4. + + As discussed, the delta encoding consists of COPY, ADD and RUN + instructions. The ADD and RUN instructions have accompanying + unmatched data (that is, data that does not specifically match + any data in the source window or in some earlier part of the + target window) and the COPY instructions have addresses of where + the matches occur. OPTIONALLY, these types of data MAY be further + compressed using a secondary compressor. Thus, Vcdiff separates + the encoding of the delta instructions into three parts: + + a. The unmatched data in the ADD and RUN instructions, + b. The delta instructions and accompanying sizes, and + c. The addresses of the COPY instructions. + + If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these + sections may have been compressed using the specified secondary + compressor. The bit positions 0 (VCD_DATACOMP), 1 (VCD_INSTCOMP), + and 2 (VCD_ADDRCOMP) respectively indicate, if non-zero, that + the corresponding parts are compressed. Then, these parts MUST + be decompressed before decoding the delta instructions. + + Length of data for ADDs and RUNs: + This is the length (in bytes) of the section of data storing + the unmatched data accompanying the ADD and RUN instructions. + + Length of instructions section: + This is the length (in bytes) of the delta instructions and + accompanying sizes. + + Length of addresses for COPYs: + This is the length (in bytes) of the section storing + the addresses of the COPY instructions. + + Data section for ADDs and RUNs: + This sequence of bytes encodes the unmatched data for the ADD + and RUN instructions. + + Instructions and sizes section: + This sequence of bytes encodes the instructions and their sizes. + + Addresses section for COPYs: + This sequence of bytes encodes the addresses of the COPY + instructions. + + +5. DELTA INSTRUCTION ENCODING + + The delta instructions described in Section 3 represent the results of + string matching. For many data differencing applications in which the + changes between source and target data are small, any straightforward + representation of these instructions would be adequate. However, for + applications including data compression, it is important to encode + these instructions well to achieve good compression rates. From our + experience, the following observations can be made: + + a. The addresses in COPY instructions are locations of matches and + often occur close by or even exactly equal to one another. This is + because data in local regions are often replicated with minor changes. + In turn, this means that coding a newly matched address against some + set of recently matched addresses can be beneficial. + + b. The matches are often short in length and separated by small amounts + of unmatched data. That is, the lengths of COPY and ADD instructions + are often small. This is particularly true of binary data such as + executable files or structured data such as HTML or XML. In such cases, + compression can be improved by combining the encoding of the sizes + and the instruction types as well as combining the encoding of adjacent + delta instructions with sufficiently small data sizes. + + The below subsections discuss how the Vcdiff data format provides + mechanisms enabling encoders to use the above observations to improve + compression rates. + + +5.1 Address Encoding Modes of COPY Instructions + + As mentioned earlier, addresses of COPY instructions often occur close + to one another or are exactly equal. To take advantage of this phenomenon + and encode addresses of COPY instructions more efficiently, the Vcdiff + data format supports the use of two different types of address caches. + Both the encoder and decoder maintain these caches, so that decoder's + caches remain synchronized with the encoder's caches. + + a. A "near" cache is an array with "s_near" slots, each containing an + address used for encoding addresses nearby to previously encoded + addresses (in the positive direction only). The near cache also + maintains a "next_slot" index to the near cache. New entries to the + near cache are always inserted in the next_slot index, which maintains + a circular buffer of the s_near most recent addresses. + + b. A "same" cache is an array with "s_same" multiple of 256 slots, each + containing an address. The same cache maintains a hash table of recent + addresses used for repeated encoding of the exact same address. + + + By default, the parameters s_near and s_same are respectively set to 4 + and 3. An encoder MAY modify these values, but then it MUST encode the + new values in the encoding itself, as discussed in Section 7, so that + the decoder can properly set up its own caches. + + At the start of processing a target window, an implementation + (encoder or decoder) initializes all of the slots in both caches + to zero. The next_slot pointer of the near cache is set + to point to slot zero. + + Each time a COPY instruction is processed by the encoder or + decoder, the implementation's caches are updated as follows, where + "addr" is the address in the COPY instruction. + + a. The slot in the near cache referenced by the next_slot + index is set to addr. The next_slot index is then incremented + modulo s_near. + + b. The slot in the same cache whose index is addr%(s_same*256) + is set to addr. [We use the C notations of % for modulo and + * for multiplication.] + + +5.2 Example code for maintaining caches + + To make clear the above description, below are example cache data + structures and algorithms to initialize and update them: + + typedef struct _cache_s + { + int* near; /* array of size s_near */ + int s_near; + int next_slot; /* the circular index for near */ + int* same; /* array of size s_same*256 */ + int s_same; + } Cache_t; + + cache_init(Cache_t* ka) + { + int i; + + ka->next_slot = 0; + for(i = 0; i < ka->s_near; ++i) + ka->near[i] = 0; + + for(i = 0; i < ka->s_same*256; ++i) + ka->same[i] = 0; + } + + cache_update(Cache_t* ka, int addr) + { + if(ka->s_near > 0) + { ka->near[ka->next_slot] = addr; + ka->next_slot = (ka->next_slot + 1) % ka->s_near; + } + + if(ka->s_same > 0) + ka->same[addr % (ka->s_same*256)] = addr; + } + + +5.3 Encoding of COPY instruction addresses + + The address of a COPY instruction is encoded using different modes + depending on the type of cached address used, if any. + + Let "addr" be the address of a COPY instruction to be decoded and "here" + be the current location in the target data (i.e., the start of the data + about to be encoded or decoded). Let near[j] be the jth element in + the near cache, and same[k] be the kth element in the same cache. + Below are the possible address modes: + + VCD_SELF: This mode has value 0. The address was encoded by itself + as an integer. + + VCD_HERE: This mode has value 1. The address was encoded as + the integer value "here - addr". + + Near modes: The "near modes" are in the range [2,s_near+1]. Let m + be the mode of the address encoding. The address was encoded + as the integer value "addr - near[m-2]". + + Same modes: The "same modes" are in the range + [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding. + The address was encoded as a single byte b such that + "addr == same[(m - (s_near+2))*256 + b]". + + +5.3 Example code for encoding and decoding of COPY instruction addresses + + We show example algorithms below to demonstrate use of address modes more + clearly. The encoder has freedom to choose address modes, the sample + addr_encode() algorithm merely shows one way of picking the address + mode. The decoding algorithm addr_decode() will uniquely decode addresses + regardless of the encoder's algorithm choice. + + Note that the address caches are updated immediately after an address is + encoded or decoded. In this way, the decoder is always synchronized with + the encoder. + + int addr_encode(Cache_t* ka, int addr, int here, int* mode) + { + int i, d, bestd, bestm; + + /* Attempt to find the address mode that yields the + * smallest integer value for "d", the encoded address + * value, thereby minimizing the encoded size of the + * address. */ + + bestd = addr; bestm = VCD_SELF; /* VCD_SELF == 0 */ + + if((d = here-addr) < bestd) + { bestd = d; bestm = VCD_HERE; } /* VCD_HERE == 1 */ + + for(i = 0; i < ka->s_near; ++i) + if((d = addr - ka->near[i]) >= 0 && d < bestd) + { bestd = d; bestm = i+2; } + + if(ka->s_same > 0 && ka->same[d = addr%(ka->s_same*256)] == addr) + { bestd = d%256; bestm = ka->s_near + 2 + d/256; } + + cache_update(ka,addr); + + *mode = bestm; /* this returns the address encoding mode */ + return bestd; /* this returns the encoded address */ + } + + Note that the addr_encode() algorithm chooses the best address mode using a + local optimization, but that may not lead to the best encoding efficiency + because different modes lead to different instruction encodings, as described below. + + The functions addrint() and addrbyte() used in addr_decode() obtain from + the "Addresses section for COPYs" (Section 4.3) an integer or a byte, + respectively. These utilities will not be described here. We simply + recall that an integer is represented as a compact variable-sized string + of bytes as described in Section 2 (i.e., base 128). + + int addr_decode(Cache_t* ka, int here, int mode) + { int addr, m; + + if(mode == VCD_SELF) + addr = addrint(); + else if(mode == VCD_HERE) + addr = here - addrint(); + else if((m = mode - 2) >= 0 && m < ka->s_near) /* near cache */ + addr = ka->near[m] + addrint(); + else /* same cache */ + { m = mode - (2 + ka->s_near); + addr = ka->same[m*256 + addrbyte()]; + } + + cache_update(ka, addr); + + return addr; + } + + +5.4 Instruction Codes + + As noted, the data sizes associated with delta instructions are often + small. Thus, compression efficiency can be improved by combining the sizes + and instruction types in a single encoding, as well by combining certain + pairs of adjacent delta instructions. Effective choices of when to perform + such combinations depend on many factors including the data being processed + and the string matching algorithm in use. For example, if many COPY + instructions have the same data sizes, it may be worth to encode these + instructions more compactly than others. + + The Vcdiff data format is designed so that a decoder does not need to be + aware of the choices made in encoding algorithms. This is achieved with the + notion of an "instruction code table" containing 256 entries. Each entry + defines either a single delta instruction or a pair of instructions that + have been combined. Note that the code table itself only exists in main + memory, not in the delta file (unless using an application-defined code + table, described in Section 7). The encoded data simply includes the index + of each instruction and, since there are only 256 indices, each index + can be represented as a single byte. + + Each instruction code entry contains six fields, each of which + is a single byte with unsigned value: + + +-----------------------------------------------+ + | inst1 | size1 | mode1 | inst2 | size2 | mode2 | + +-----------------------------------------------+ + +@@@ could be more compact + + Each triple (inst,size,mode) defines a delta instruction. The meanings + of these fields are as follows: + + inst: An "inst" field can have one of the four values: NOOP (0), ADD (1), + RUN (2) or COPY (3) to indicate the instruction types. NOOP means + that no instruction is specified. In this case, both the corresponding + size and mode fields will be zero. + + size: A "size" field is zero or positive. A value zero means that the + size associated with the instruction is encoded separately as + an integer in the "Instructions and sizes section" (Section 6). + A positive value for "size" defines the actual data size. + Note that since the size is restricted to a byte, the maximum + value for any instruction with size implicitly defined in the code + table is 255. + + mode: A "mode" field is significant only when the associated delta + instruction is a COPY. It defines the mode used to encode the + associated addresses. For other instructions, this is always zero. + + +5.5 The Code Table + + Following the discussions on address modes and instruction code tables, + we define a "Code Table" to have the data below: + + s_near: the size of the near cache, + s_same: the size of the same cache, + i_code: the 256-entry instruction code table. + + Vcdiff itself defines a "default code table" in which s_near is 4 + and s_same is 3. Thus, there are 9 address modes for a COPY instruction. + The first two are VCD_SELF (0) and VCD_HERE (1). Modes 2, 3, 4 and 5 + are for addresses coded against the near cache. And, modes 6, 7 and 8 + are for addresses coded against the same cache. + + The default instruction code table is depicted below, in a compact + representation that we use only for descriptive purposes. See section 7 + for the specification of how an instruction code table is represented + in the Vcdiff encoding format. In the depiction, a zero value for + size indicates that the size is separately coded. The mode of non-COPY + instructions is represented as 0 even though they are not used. + + + TYPE SIZE MODE TYPE SIZE MODE INDEX + --------------------------------------------------------------- + 1. RUN 0 0 NOOP 0 0 0 + 2. ADD 0, [1,17] 0 NOOP 0 0 [1,18] + 3. COPY 0, [4,18] 0 NOOP 0 0 [19,34] + 4. COPY 0, [4,18] 1 NOOP 0 0 [35,50] + 5. COPY 0, [4,18] 2 NOOP 0 0 [51,66] + 6. COPY 0, [4,18] 3 NOOP 0 0 [67,82] + 7. COPY 0, [4,18] 4 NOOP 0 0 [83,98] + 8. COPY 0, [4,18] 5 NOOP 0 0 [99,114] + 9. COPY 0, [4,18] 6 NOOP 0 0 [115,130] + 10. COPY 0, [4,18] 7 NOOP 0 0 [131,146] + 11. COPY 0, [4,18] 8 NOOP 0 0 [147,162] + 12. ADD [1,4] 0 COPY [4,6] 0 [163,174] + 13. ADD [1,4] 0 COPY [4,6] 1 [175,186] + 14. ADD [1,4] 0 COPY [4,6] 2 [187,198] + 15. ADD [1,4] 0 COPY [4,6] 3 [199,210] + 16. ADD [1,4] 0 COPY [4,6] 4 [211,222] + 17. ADD [1,4] 0 COPY [4,6] 5 [223,234] + 18. ADD [1,4] 0 COPY 4 6 [235,238] + 19. ADD [1,4] 0 COPY 4 7 [239,242] + 20. ADD [1,4] 0 COPY 4 8 [243,246] + 21. COPY 4 [0,8] ADD 1 0 [247,255] + --------------------------------------------------------------- + + In the above depiction, each numbered line represents one or more + entries in the actual instruction code table (recall that an entry in + the instruction code table may represent up to two combined delta + instructions.) The last column ("INDEX") shows which index value or + range of index values of the entries covered by that line. The notation + [i,j] means values from i through j, inclusive. The first 6 columns of + a line in the depiction describe the pairs of instructions used for + the corresponding index value(s). + + If a line in the depiction includes a column entry using the [i,j] + notation, this means that the line is instantiated for each value + in the range from i to j, inclusive. The notation "0, [i,j]" means + that the line is instantiated for the value 0 and for each value + in the range from i to j, inclusive. + + If a line in the depiction includes more than one entry using the [i,j] + notation, implying a "nested loop" to convert the line to a range of + table entries, the first such [i,j] range specifies the outer loop, + and the second specifies the inner loop. + + The below examples should make clear the above description: + + Line 1 shows the single RUN instruction with index 0. As the size field + is 0, this RUN instruction always has its actual size encoded separately. + + Line 2 shows the 18 single ADD instructions. The ADD instruction with + size field 0 (i.e., the actual size is coded separately) has index 1. + ADD instructions with sizes from 1 to 17 use code indices 2 to 18 and + their sizes are as given (so they will not be separately encoded.) + + Following the single ADD instructions are the single COPY instructions + ordered by their address encoding modes. For example, line 11 shows the + COPY instructions with mode 8, i.e., the last of the same cache. + In this case, the COPY instruction with size field 0 has index 147. + Again, the actual size of this instruction will be coded separately. + + Lines 12 to 21 show the pairs of instructions that are combined together. + For example, line 12 depicts the 12 entries in which an ADD instruction + is combined with an immediately following COPY instruction. The entries + with indices 163, 164, 165 represent the pairs in which the ADD + instructions all have size 1 while the COPY instructions has mode + 0 (VCD_SELF) and sizes 4, 5 and 6 respectively. + + The last line, line 21, shows the eight instruction pairs where the first + instruction is a COPY and the second is an ADD. In this case, all COPY + instructions have size 4 with mode ranging from 0 to 8 and all the ADD + instructions have size 1. Thus, the entry with largest index 255 + combines a COPY instruction of size 4 and mode 8 with an ADD instruction + of size 1. + + The choice of the minimum size 4 for COPY instructions in the default code + table was made from experiments that showed that excluding small matches + (less then 4 bytes long) improved the compression rates. + + +6. DECODING A TARGET WINDOW + + Section 4.3 discusses that the delta instructions and associated data + are encoded in three arrays of bytes: + + Data section for ADDs and RUNs, + Instructions and sizes section, and + Addresses section for COPYs. + + + Further, these data sections may have been further compressed by some + secondary compressor. Assuming that any such compressed data has been + decompressed so that we now have three arrays: + + inst: bytes coding the instructions and sizes. + data: unmatched data associated with ADDs and RUNs. + addr: bytes coding the addresses of COPYs. + + These arrays are organized as follows: + + inst: + a sequence of (index, [size1], [size2]) tuples, where "index" + is an index into the instruction code table, and size1 and size2 + are integers that MAY or MAY NOT be included in the tuple as + follows. The entry with the given "index" in the instruction + code table potentially defines two delta instructions. If the + first delta instruction is not a VCD_NOOP and its size is zero, + then size1 MUST be present. Otherwise, size1 MUST be omitted and + the size of the instruction (if it is not VCD_NOOP) is as defined + in the table. The presence or absence of size2 is defined + similarly with respect to the second delta instruction. + + data: + a sequence of data values, encoded as bytes. + + addr: + a sequence of address values. Addresses are normally encoded as + integers as described in Section 2 (i.e., base 128). + Since the same cache emits addresses in the range [0,255], + however, same cache addresses are always encoded as a + single byte. + + To summarize, each tuple in the "inst" array includes an index to some + entry in the instruction code table that determines: + + a. Whether one or two instructions were encoded and their types. + + b. If the instructions have their sizes encoded separately, these + sizes will follow, in order, in the tuple. + + c. If the instructions have accompanying data, i.e., ADDs or RUNs, + their data will be in the array "data". + + d. Similarly, if the instructions are COPYs, the coded addresses are + found in the array "addr". + + The decoding procedure simply processes the arrays by reading one code + index at a time, looking up the corresponding instruction code entry, + then consuming the respective sizes, data and addresses following the + directions in this entry. In other words, the decoder maintains an implicit + next-element pointer for each array; "consuming" an instruction tuple, + data, or address value implies incrementing the associated pointer. + + For example, if during the processing of the target window, the next + unconsumed tuple in the inst array has index value 19, then the first + instruction is a COPY, whose size is found as the immediately following + integer in the inst array. Since the mode of this COPY instruction is + VCD_SELF, the corresponding address is found by consuming the next + integer in the addr array. The data array is left intact. As the second + instruction for code index 19 is a NOOP, this tuple is finished. + + +7. APPLICATION-DEFINED CODE TABLES + + Although the default code table used in Vcdiff is good for general + purpose encoders, there are times when other code tables may perform + better. For example, to code a file with many identical segments of data, + it may be advantageous to have a COPY instruction with the specific size + of these data segments so that the instruction can be encoded in a single + byte. Such a special code table MUST then be encoded in the delta file + so that the decoder can reconstruct it before decoding the data. + + Vcdiff allows an application-defined code table to be specified + in a delta file with the following data: + + Size of near cache - byte + Size of same cache - byte + Compressed code table data + + The "compressed code table data" encodes the delta between the default + code table (source) and the new code table (target) in the same manner as + described in Section 4.3 for encoding a target window in terms of a + source window. This delta is computed using the following steps: + + a. Convert the new instruction code table into a string, "code", of + 1536 bytes using the below steps in order: + + i. Add in order the 256 bytes representing the types of the first + instructions in the instruction pairs. + ii. Add in order the 256 bytes representing the types of the second + instructions in the instruction pairs. + iii. Add in order the 256 bytes representing the sizes of the first + instructions in the instruction pairs. + iv. Add in order the 256 bytes representing the sizes of the second + instructions in the instruction pairs. + v. Add in order the 256 bytes representing the modes of the first + instructions in the instruction pairs. + vi. Add in order the 256 bytes representing the modes of the second + instructions in the instruction pairs. + + b. Similarly, convert the default instruction code table into + a string "dflt". + + c. Treat the string "code" as a target window and "dflt" as the + corresponding source data and apply an encoding algorithm to + compute the delta encoding of "code" in terms of "dflt". + This computation MUST use the default code table for encoding + the delta instructions. + + The decoder can then reverse the above steps to decode the compressed + table data using the method of Section 6, employing the default code + table, to generate the new code table. Note that the decoder does not + need to know anything about the details of the encoding algorithm used + in step (c). The decoder is still able to decode the new code table + because the Vcdiff format is independent from the choice of encoding + algorithm, and because the encoder in step (c) uses the known, default + code table. + + +8. PERFORMANCE + + The encoding format is compact. For compression only, using the LZ-77 + string parsing strategy and without any secondary compressors, the typical + compression rate is better than Unix compress and close to gzip. For + differencing, the data format is better than all known methods in + terms of its stated goal, which is primarily decoding speed and + encoding efficiency. + + We compare the performance of compress, gzip and Vcdiff using the + archives of three versions of the Gnu C compiler, gcc-2.95.1.tar, + gcc-2.95.2.tar and gcc-2.95.3.tar. The experiments were done on an + SGI-MIPS3, 400MHZ. Gzip was used at its default compression level. + Vcdiff timings were done using the Vcodex/Vcdiff software (Section 13). + As string and window matching typically dominates the computation during + compression, the Vcdiff compression times were directly due to the + algorithms used in the Vcodex/Vcdiff software. However, the decompression + times should be generic and representative of any good implementation + of the Vcdiff data format. Timing was done by running each program + three times and taking the average of the total cpu+system times. + + Below are the different Vcdiff runs: + + Vcdiff: vcdiff is used as compressor only. + + Vcdiff-d: vcdiff is used as a differencer only. That is, it only + compares target data against source data. Since the files + involved are large, they are broken into windows. In this + case, each target window starting at some file offset in + the target file is compared against a source window with + the same file offset (in the source file). The source + window is also slightly larger than the target window + to increase matching opportunities. The -d option also gives + a hint to the string matching algorithm of Vcdiff that + the two files are very similar with long stretches of matches. + The algorithm takes advantage of this to minimize its + processing of source data and save time. + + Vcdiff-dc: This is similar to Vcdiff-d but vcdiff can also compare + target data against target data as applicable. Thus, vcdiff + both computes differences and compresses data. The windowing + algorithm is the same as above. However, the above hint is + recinded in this case. + + Vcdiff-dcs: This is similar to Vcdiff-dc but the windowing algorithm + uses a content-based heuristic to select source data segments + that are more likely to match with a given target window. + Thus, the source data segment selected for a target window + often will not be aligned with the file offsets of this + target window. + + + gcc-2.95.1 gcc-2.95.2 compression decompression + raw size 55746560 55797760 + compress - 19939390 13.85s 7.09s + gzip - 12973443 42.99s 5.35s + Vcdiff - 15358786 20.04s 4.65s + Vcdiff-d - 100971 10.93s 1.92s + Vcdiff-dc - 97246 20.03s 1.84s + Vcdiff-dcs - 256445 44.81s 1.84s + + TABLE 1. Compressing gcc-2.95.2.tar given gcc-2.95.1 + + + TABLE 1 shows the raw sizes of gcc-2.95.1.tar and gcc-2.95.2.tar and the + sizes of the compressed results. As a pure compressor, the compression + rate for Vcdiff is worse than gzip and better than compress. The last + three rows shows that when two file versions are very similar, differencing + can have dramatically good compression rates. Vcdiff-d and Vcdiff-dc use + the same simple window selection method but Vcdiff-dc also does compression + so its output is slightly smaller. Vcdiff-dcs uses a heuristic based on + data content to search for source data that likely will match a given target + window. Although it does a good job, the heuristic did not always find the + best matches which are given by the simple algorithm of Vcdiff-d. As a + result, the output size is slightly larger. Note also that there is a large + cost in computing matching windows this way. Finally, the compression times + of Vcdiff-d is nearly half of that of Vcdiff-dc. It is tempting to conclude + that the compression feature causes the additional time in Vcdiff-dc + relative to Vcdiff-d. However, this is not the case. The hint given to + the Vcdiff string matching algorithm that the two files are likely to + have very long stretches of matches helps the algorithm to minimize + processing of the "source data", thus saving half the time. However, as we + shall see below when this hint is wrong, the result is even longer time. + + + gcc-2.95.2 gcc-2.95.3 compression decompression + raw size 55797760 55787520 + compress - 19939453 13.54s 7.00s + gzip - 12998097 42.63s 5.62s + Vcdiff - 15371737 20.09s 4.74s + Vcdiff-d - 26383849 71.41s 6.41s + Vcdiff-dc - 14461203 42.48s 4.82s + Vcdiff-dcs - 1248543 61.18s 1.99s + + TABLE 2. Compressing gcc-2.95.3.tar given gcc-2.95.2 + + + TABLE 2 shows the raw sizes of gcc-2.95.2.tar and gcc-2.95.3.tar and + the sizes of the compressed results. In this case, the tar file of + gcc-2.95.3 is rearranged in a way that makes the straightforward method + of matching file offsets for source and target windows fail. As a + result, Vcdiff-d performs rather dismally both in time and output size. + The large time for Vcdiff-d is directly due to fact that the string + matching algorithm has to work much harder to find matches when the hint + that two files have long matching stretches fails to hold. On the other + hand, Vcdiff-dc does both differencing and compression resulting in good + output size. Finally, the window searching heuristic used in Vcdiff-dcs is + effective in finding the right matching source windows for target windows + resulting a small output size. This shows why the data format needs to + have a way to specify matching windows to gain performance. Finally, + we note that the decoding times are always good regardless of how + the string matching or window searching algorithms perform. + + +9. FURTHER ISSUES + + This document does not address a few issues: + + Secondary compressors: + As discussed in Section 4.3, certain sections in the delta encoding + of a window may be further compressed by a secondary compressor. + In our experience, the basic Vcdiff format is adequate for most + purposes so that secondary compressors are seldom needed. In + particular, for normal use of data differencing where the files to + be compared have long stretches of matches, much of the gain in + compression rate is already achieved by normal string matching. + Thus, the use of secondary compressors is seldom needed in this case. + However, for applications beyond differencing of such nearly identical + files, secondary compressors may be needed to achieve maximal + compressed results. + + Therefore, we recommend to leave the Vcdiff data format defined + as in this document so that the use of secondary compressors + can be implemented when they become needed in the future. + The formats of the compressed data via such compressors or any + compressors that may be defined in the future are left open to + their implementations. These could include Huffman encoding, + arithmetic encoding, and splay tree encoding [8,9]. + + Large file system vs. small file system: + As discussed in Section 4, a target window in a large file may be + compared against some source window in another file or in the same + file (from some earlier part). In that case, the file offset of the + source window is specified as a variable-sized integer in the delta + encoding. There is a possibility that the encoding was computed on + a system supporting much larger files than in a system where + the data may be decoded (e.g., 64-bit file systems vs. 32-bit file + systems). In that case, some target data may not be recoverable. + This problem could afflict any compression format, and ought + to be resolved with a generic negotiation mechanism in the + appropriate protocol(s). + + +10. SUMMARY + + We have described Vcdiff, a general and portable encoding format for + compression and differencing. The format is good in that it allows + implementing a decoder without knowledge of the encoders. Further, + ignoring the use of secondary compressors not defined within the format, + the decoding algorithms runs in linear time and requires working space + proportional to window sizes. + + + +11. ACKNOWLEDGEMENTS + + Thanks are due to Balachander Krishnamurthy, Jeff Mogul and Arthur Van Hoff + who provided much encouragement to publicize Vcdiff. In particular, Jeff + helped clarifying the description of the data format presented here. + + + +12. SECURITY CONSIDERATIONS + + Vcdiff only provides a format to encode compressed and differenced data. + It does not address any issues concerning how such data are, in fact, + stored in a given file system or the run-time memory of a computer system. + Therefore, we do not anticipate any security issues with respect to Vcdiff. + + + +13. SOURCE CODE AVAILABILITY + + Vcdiff is implemented as a data transforming method in Phong Vo's + Vcodex library. AT&T Corp. has made the source code for Vcodex available + for anyone to use to transmit data via HTTP/1.1 Delta Encoding [10,11]. + The source code and according license is accessible at the below URL: + + http://www.research.att.com/sw/tools + + +14. INTELLECTUAL PROPERTY RIGHTS + + The IETF has been notified of intellectual property rights claimed in + regard to some or all of the specification contained in this + document. For more information consult the online list of claimed + rights, at <http://www.ietf.org/ipr.html>. + + The IETF takes no position regarding the validity or scope of any + intellectual property or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; neither does it represent that it + has made any effort to identify any such rights. Information on the + IETF's procedures with respect to rights in standards-track and + standards-related documentation can be found in BCP-11. Copies of + claims of rights made available for publication and any assurances of + licenses to be made available, or the result of an attempt made to + obtain a general license or permission for the use of such + proprietary rights by implementors or users of this specification can + be obtained from the IETF Secretariat. + + + +15. IANA CONSIDERATIONS + + The Internet Assigned Numbers Authority (IANA) administers the number + space for Secondary Compressor ID values. Values and their meaning + must be documented in an RFC or other peer-reviewed, permanent, and + readily available reference, in sufficient detail so that + interoperability between independent implementations is possible. + Subject to these constraints, name assignments are First Come, First + Served - see RFC2434 [13]. Legal ID values are in the range 1..255. + + This document does not define any values in this number space. + + +16. REFERENCES + + [1] D.G. Korn and K.P. Vo, Vdelta: Differencing and Compression, + Practical Reusable Unix Software, Editor B. Krishnamurthy, + John Wiley & Sons, Inc., 1995. + + [2] J. Ziv and A. Lempel, A Universal Algorithm for Sequential Data + Compression, IEEE Trans. on Information Theory, 23(3):337-343, 1977. + + [3] W. Tichy, The String-to-String Correction Problem with Block Moves, + ACM Transactions on Computer Systems, 2(4):309-321, November 1984. + + [4] E.M. McCreight, A Space-Economical Suffix Tree Construction + Algorithm, Journal of the ACM, 23:262-272, 1976. + + [5] J.J. Hunt, K.P. Vo, W. Tichy, An Empirical Study of Delta Algorithms, + IEEE Software Configuration and Maintenance Workshop, 1996. + + [6] J.J. Hunt, K.P. Vo, W. Tichy, Delta Algorithms: An Empirical Analysis, + ACM Trans. on Software Engineering and Methodology, 7:192-214, 1998. + + [7] D.G. Korn, K.P. Vo, Sfio: A buffered I/O Library, + Proc. of the Summer '91 Usenix Conference, 1991. + + [8] D. W. Jones, Application of Splay Trees to Data Compression, + CACM, 31(8):996:1007. + + [9] M. Nelson, J. Gailly, The Data Compression Book, ISBN 1-55851-434-1, + M&T Books, New York, NY, 1995. + + [10] J.C. Mogul, F. Douglis, A. Feldmann, and B. Krishnamurthy, + Potential benefits of delta encoding and data compression for HTTP, + SIGCOMM '97, Cannes, France, 1997. + + [11] J.C. Mogul, B. Krishnamurthy, F. Douglis, A. Feldmann, + Y. Goland, and A. Van Hoff, Delta Encoding in HTTP, + IETF, draft-mogul-http-delta-10, 2001. + + [12] S. Bradner, Key words for use in RFCs to Indicate Requirement Levels, + RFC 2119, March 1997. + + [13] T. Narten, H. Alvestrand, Guidelines for Writing an IANA + Considerations Section in RFCs, RFC2434, October 1998. + + + +17. AUTHOR'S ADDRESS + + Kiem-Phong Vo (main contact) + AT&T Labs, Room D223 + 180 Park Avenue + Florham Park, NJ 07932 + Email: kpv@research.att.com + Phone: 1 973 360 8630 + + David G. Korn + AT&T Labs, Room D237 + 180 Park Avenue + Florham Park, NJ 07932 + Email: dgk@research.att.com + Phone: 1 973 360 8602 + + Jeffrey C. Mogul + Western Research Laboratory + Compaq Computer Corporation + 250 University Avenue + Palo Alto, California, 94305, U.S.A. + Email: JeffMogul@acm.org + Phone: 1 650 617 3304 (email preferred) + + Joshua P. MacDonald + Computer Science Division + University of California, Berkeley + 345 Soda Hall + Berkeley, CA 94720 + Email: jmacd@cs.berkeley.edu diff --git a/examples/Makefile b/examples/Makefile new file mode 100755 index 0000000..b21ebda --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,32 @@ +CFLAGS = -g -Wall -I.. -DXD3_DEBUG=1 -DNDEBUG=0 +#CFLAGS = -O3 -Wall -I.. -DXD3_DEBUG=0 -fno-builtin -DNDEBUG=1 +# -pg + +SOURCES = small_page_test.c encode_decode_test.c speed_test.c + +DEPS = ../*.h ../*.c *.h + +TARGETS = small_page_test encode_decode_test speed_test32 speed_test64 compare_test checksum_test + +all: $(TARGETS) + +small_page_test: small_page_test.c $(DEPS) + $(CC) $(CFLAGS) small_page_test.c -o small_page_test -DXD3_USE_LARGEFILE64=0 -DSECONDARY_DJW=1 + +encode_decode_test: encode_decode_test.c $(DEPS) + $(CC) $(CFLAGS) encode_decode_test.c -o encode_decode_test + +speed_test32: speed_test.c $(DEPS) + $(CC) $(CFLAGS) -DXD3_USE_LARGEFILE64=0 speed_test.c -o speed_test32 + +speed_test64: speed_test.c $(DEPS) + $(CC) $(CFLAGS) -DXD3_USE_LARGEFILE64=1 speed_test.c -o speed_test64 + +compare_test: compare_test.c + $(CC) $(CFLAGS) compare_test.c -o compare_test + +checksum_test: checksum_test.cc + $(CXX) $(CFLAGS) checksum_test.cc -o checksum_test + +clean: + rm -f *.exe *.stackdump $(TARGETS) diff --git a/examples/README b/examples/README new file mode 100644 index 0000000..60840bf --- /dev/null +++ b/examples/README @@ -0,0 +1,8 @@ +Files in this directory demonstrate how to use the Xdelta3 API. Copyrights +are held by the respective authors and these files are not covered by the GPL. + +small_page_test.c -- how to use xdelta3 in an environment such as the kernel +for small pages with little memory + +encode_decode_test.c -- how to use xdelta3 to process (encode/decode) data in +multiple windows with the non-blocking API diff --git a/examples/checksum_test.cc b/examples/checksum_test.cc new file mode 100644 index 0000000..ab3ef6c --- /dev/null +++ b/examples/checksum_test.cc @@ -0,0 +1,731 @@ +/* Copyright (C) 2007 Josh MacDonald */ + +extern "C" { +#include "test.h" +} + +#include <list> +#include <vector> +#include <map> +#include <algorithm> + +using std::list; +using std::map; +using std::vector; + +// MLCG parameters +// a, a* +uint32_t good_32bit_values[] = { + 1597334677U, // ... + 741103597U, 887987685U, +}; + +// a, a* +uint64_t good_64bit_values[] = { + 1181783497276652981ULL, 4292484099903637661ULL, + 7664345821815920749ULL, // ... +}; + +struct true_type { }; +struct false_type { }; + +template <typename Word> +int bitsof(); + +template<> +int bitsof<uint32_t>() { + return 32; +} + +template<> +int bitsof<uint64_t>() { + return 64; +} + +struct plain { + int operator()(const uint8_t &c) { + return c; + } +}; + +template <typename Word> +struct hhash { // take "h" of the high-bits as a hash value for this + // checksum, which are the most "distant" in terms of the + // spectral test for the rabin_karp MLCG. For short windows, + // the high bits aren't enough, XOR "mask" worth of these in. + Word operator()(const Word& t, const int &h, const int &mask) { + return (t >> h) ^ (t & mask); + } +}; + +template <typename Word> +Word good_word(); + +template<> +uint32_t good_word<uint32_t>() { + return good_32bit_values[0]; +} + +template<> +uint64_t good_word<uint64_t>() { + return good_64bit_values[0]; +} + +// CLASSES + +#define SELF Word, CksumSize, CksumSkip, Permute, Hash, Compaction +#define MEMBER template <typename Word, \ + int CksumSize, \ + int CksumSkip, \ + typename Permute, \ + typename Hash, \ + int Compaction> + +MEMBER +struct cksum_params { + typedef Word word_type; + typedef Permute permute_type; + typedef Hash hash_type; + + enum { cksum_size = CksumSize, + cksum_skip = CksumSkip, + compaction = Compaction, + }; +}; + + +MEMBER +struct rabin_karp { + typedef Word word_type; + typedef Permute permute_type; + typedef Hash hash_type; + + enum { cksum_size = CksumSize, + cksum_skip = CksumSkip, + compaction = Compaction, + }; + + // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ... + rabin_karp() { + multiplier = good_word<Word>(); + powers = new Word[cksum_size]; + powers[cksum_size - 1] = 1; + for (int i = cksum_size - 2; i >= 0; i--) { + powers[i] = powers[i + 1] * multiplier; + } + product = powers[0] * multiplier; + } + + ~rabin_karp() { + delete [] powers; + } + + Word step(const uint8_t *ptr) { + Word h = 0; + for (int i = 0; i < cksum_size; i++) { + h += permute_type()(ptr[i]) * powers[i]; + } + return h; + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = multiplier * incr_state - + product * permute_type()(ptr[-1]) + + permute_type()(ptr[cksum_size - 1]); + return incr_state; + } + + Word *powers; + Word product; + Word multiplier; + Word incr_state; +}; + +MEMBER +struct adler32_cksum { + typedef Word word_type; + typedef Permute permute_type; + typedef Hash hash_type; + + enum { cksum_size = CksumSize, + cksum_skip = CksumSkip, + compaction = Compaction, + }; + + Word step(const uint8_t *ptr) { + return xd3_lcksum (ptr, cksum_size); + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = xd3_large_cksum_update (incr_state, ptr - 1, cksum_size); + return incr_state; + } + + Word incr_state; +}; + +// TESTS + +template <typename Word> +struct file_stats { + typedef list<const uint8_t*> ptr_list; + typedef Word word_type; + typedef map<word_type, ptr_list> table_type; + typedef typename table_type::iterator table_iterator; + typedef typename ptr_list::iterator ptr_iterator; + + int cksum_size; + int cksum_skip; + int unique; + int unique_values; + int count; + table_type table; + + file_stats(int size, int skip) + : cksum_size(size), + cksum_skip(skip), + unique(0), + unique_values(0), + count(0) { + } + + void reset() { + unique = 0; + unique_values = 0; + count = 0; + table.clear(); + } + + void update(const word_type &word, const uint8_t *ptr) { + table_iterator t_i = table.find(word); + + count++; + + if (t_i == table.end()) { + table.insert(make_pair(word, ptr_list())); + } + + ptr_list &pl = table[word]; + + for (ptr_iterator p_i = pl.begin(); + p_i != pl.end(); + ++p_i) { + if (memcmp(*p_i, ptr, cksum_size) == 0) { + return; + } + } + + unique++; + pl.push_back(ptr); + } + + void freeze() { + unique_values = table.size(); + table.clear(); + } +}; + +struct test_result_base; + +static vector<test_result_base*> all_tests; + +struct test_result_base { + virtual ~test_result_base() { + } + virtual void reset() = 0; + virtual void print() = 0; + virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0; + virtual void stat() = 0; + virtual int count() = 0; + virtual int dups() = 0; + virtual double uniqueness() = 0; + virtual double fullness() = 0; + virtual double collisions() = 0; + virtual double coverage() = 0; + virtual double compression() = 0; + virtual double time() = 0; + virtual double score() = 0; + virtual void set_score(double min_dups_frac, double min_time) = 0; + virtual double total_time() = 0; + virtual int total_count() = 0; + virtual int total_dups() = 0; +}; + +struct compare_h { + bool operator()(test_result_base *a, + test_result_base *b) { + return a->score() < b->score(); + } +}; + +MEMBER +struct test_result : public test_result_base { + typedef Word word_type; + typedef Permute permute_type; + typedef Hash hash_type; + + enum { cksum_size = CksumSize, + cksum_skip = CksumSkip, + compaction = Compaction, + }; + + const char *test_name; + file_stats<Word> fstats; + int test_size; + int n_steps; + int n_incrs; + int s_bits; + int s_mask; + int t_entries; + int h_bits; + int h_buckets_full; + double h_score; + char *hash_table; + long accum_millis; + int accum_iters; + + // These are not reset + double accum_time; + int accum_count; + int accum_dups; + int accum_colls; + int accum_size; + + test_result(const char *name) + : test_name(name), + fstats(cksum_size, cksum_skip), + hash_table(NULL), + accum_millis(0), + accum_iters(0), + accum_time(0.0), + accum_count(0), + accum_dups(0), + accum_colls(0), + accum_size(0) { + all_tests.push_back(this); + } + + ~test_result() { + reset(); + } + + void reset() { + // size of file + test_size = -1; + + // count + n_steps = -1; + n_incrs = -1; + + // four values used by new_table()/summarize_table() + s_bits = -1; + s_mask = -1; + t_entries = -1; + h_bits = -1; + h_buckets_full = -1; + + accum_millis = 0; + accum_iters = 0; + + fstats.reset(); + + // temporary + if (hash_table) { + delete(hash_table); + hash_table = NULL; + } + } + + int count() { + if (cksum_skip == 1) { + return n_incrs; + } else { + return n_steps; + } + } + + int dups() { + return fstats.count - fstats.unique; + } + + int colls() { + return fstats.unique - fstats.unique_values; + } + + double uniqueness() { + return 1.0 - (double) dups() / count(); + } + + double fullness() { + return (double) h_buckets_full / (1 << h_bits); + } + + double collisions() { + return (double) colls() / fstats.unique; + } + + double coverage() { + return (double) h_buckets_full / uniqueness() / count(); + } + + double compression() { + return 1.0 - coverage(); + } + + double time() { + return (double) accum_millis / accum_iters; + } + + double score() { + return h_score; + } + + void set_score(double min_compression, double min_time) { + h_score = (compression() - 0.99 * min_compression) + * (time() - 0.99 * min_time); + } + + double total_time() { + return accum_time; + } + + int total_count() { + return accum_count; + } + + int total_dups() { + return accum_dups; + } + + int total_colls() { + return accum_dups; + } + + void stat() { + accum_time += time(); + accum_count += count(); + accum_dups += dups(); + accum_colls += colls(); + accum_size += test_size; + } + + void print() { + if (fstats.count != count()) { + fprintf(stderr, "internal error: %d != %d\n", fstats.count, count()); + abort(); + } + printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n", + test_name, + cksum_size, + cksum_skip, + count(), + 100.0 * uniqueness(), + h_buckets_full, + 100.0 * fullness(), + 100.0 * collisions(), + 100.0 * coverage(), + h_bits, + 0.001 * accum_iters * test_size / accum_millis, + accum_iters); + } + + int size_log2 (int slots) + { + int bits = bitsof<word_type>() - 1; + int i; + + for (i = 3; i <= bits; i += 1) { + if (slots <= (1 << i)) { + return i - compaction; + } + } + + return bits; + } + + void new_table(int entries) { + t_entries = entries; + h_bits = size_log2(entries); + + int n = 1 << h_bits; + + s_bits = bitsof<word_type>() - h_bits; + s_mask = n - 1; + + hash_table = new char[n / 8]; + memset(hash_table, 0, n / 8); + } + + int get_table_bit(int i) { + return hash_table[i/8] & (1 << i%8); + } + + int set_table_bit(int i) { + return hash_table[i/8] |= (1 << i%8); + } + + void summarize_table() { + int n = 1 << h_bits; + int f = 0; + for (int i = 0; i < n; i++) { + if (get_table_bit(i)) { + f++; + } + } + h_buckets_full = f; + } + + void get(const uint8_t* buf, const int buf_size, int test_iters) { + rabin_karp<SELF> test; + //adler32_cksum<SELF> test; + hash_type hash; + const uint8_t *ptr; + const uint8_t *end; + int last_offset; + int periods; + int stop; + + test_size = buf_size; + last_offset = buf_size - cksum_size; + + if (last_offset < 0) { + periods = 0; + n_steps = 0; + n_incrs = 0; + stop = -cksum_size; + } else { + periods = last_offset / cksum_skip; + n_steps = periods + 1; + n_incrs = last_offset + 1; + stop = last_offset - (periods + 1) * cksum_skip; + } + + // Compute file stats once. + if (fstats.unique_values == 0) { + if (cksum_skip == 1) { + for (int i = 0; i <= buf_size - cksum_size; i++) { + fstats.update(hash(test.step(buf + i), s_bits, s_mask), buf + i); + } + } else { + ptr = buf + last_offset; + end = buf + stop; + + for (; ptr != end; ptr -= cksum_skip) { + fstats.update(hash(test.step(ptr), s_bits, s_mask), ptr); + } + } + fstats.freeze(); + } + + long start_test = get_millisecs_now(); + + if (cksum_skip != 1) { + new_table(n_steps); + + for (int i = 0; i < test_iters; i++) { + ptr = buf + last_offset; + end = buf + stop; + + for (; ptr != end; ptr -= cksum_skip) { + set_table_bit(hash(test.step(ptr), s_bits, s_mask)); + } + } + + summarize_table(); + } + + stop = buf_size - cksum_size + 1; + if (stop < 0) { + stop = 0; + } + + if (cksum_skip == 1) { + + new_table(n_incrs); + + for (int i = 0; i < test_iters; i++) { + ptr = buf; + end = buf + stop; + + if (ptr != end) { + set_table_bit(hash(test.state0(ptr++), s_bits, s_mask)); + } + + for (; ptr != end; ptr++) { + Word w = test.incr(ptr); + assert(w == test.step(ptr)); + set_table_bit(hash(w, s_bits, s_mask)); + } + } + + summarize_table(); + } + + accum_iters += test_iters; + accum_millis += get_millisecs_now() - start_test; + } +}; + +template <typename Word> +void print_array(const char *tname) { + printf("static const %s hash_multiplier[64] = {\n", tname); + Word p = 1; + for (int i = 0; i < 64; i++) { + printf(" %uU,\n", p); + p *= good_word<Word>(); + } + printf("};\n", tname); +} + +int main(int argc, char** argv) { + int i; + uint8_t *buf = NULL; + size_t buf_len = 0; + int ret; + + if (argc <= 1) { + fprintf(stderr, "usage: %s file ...\n", argv[0]); + return 1; + } + + //print_array<uint32_t>("uint32_t"); + +#define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \ + _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \ + (#T "_" #Z "_" #S "_" #P "_" #H "_" #C) + +#if 0 + + TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ + TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ + TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \ + TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \ + +#endif + +#define TESTS(SKIP) \ + TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ + TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ + TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \ + TEST(uint32_t, 9, SKIP, plain, hhash, 3) + +#define TESTS_ALL(SKIP) \ + TEST(uint32_t, 3, SKIP, plain, hhash, 0); \ + TEST(uint32_t, 3, SKIP, plain, hhash, 1); \ + TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \ + TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \ + TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \ + TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \ + TEST(uint32_t, 5, SKIP, plain, hhash, 0); \ + TEST(uint32_t, 5, SKIP, plain, hhash, 1); \ + TEST(uint32_t, 8, SKIP, plain, hhash, 0); \ + TEST(uint32_t, 8, SKIP, plain, hhash, 1); \ + TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \ + TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \ + TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \ + TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \ + TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \ + TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \ + TEST(uint32_t, 13, SKIP, plain, hhash, 0); \ + TEST(uint32_t, 13, SKIP, plain, hhash, 1); \ + TEST(uint32_t, 15, SKIP, plain, hhash, 0); /* x */ \ + TEST(uint32_t, 15, SKIP, plain, hhash, 1); /* x */ \ + TEST(uint32_t, 16, SKIP, plain, hhash, 0); /* x */ \ + TEST(uint32_t, 16, SKIP, plain, hhash, 1); /* x */ \ + TEST(uint32_t, 21, SKIP, plain, hhash, 0); \ + TEST(uint32_t, 21, SKIP, plain, hhash, 1); \ + TEST(uint32_t, 34, SKIP, plain, hhash, 0); \ + TEST(uint32_t, 34, SKIP, plain, hhash, 1); \ + TEST(uint32_t, 55, SKIP, plain, hhash, 0); \ + TEST(uint32_t, 55, SKIP, plain, hhash, 1) + + TESTS(1); // * +// TESTS(2); // * +// TESTS(3); // * +// TESTS(5); // * +// TESTS(8); // * +// TESTS(9); +// TESTS(11); +// TESTS(13); // * + TESTS(15); +// TESTS(16); +// TESTS(21); // * +// TESTS(34); // * +// TESTS(55); // * +// TESTS(89); // * + + for (i = 1; i < argc; i++) { + if ((ret = read_whole_file(argv[i], + & buf, + & buf_len))) { + return 1; + } + + fprintf(stderr, "file %s is %zu bytes\n", + argv[i], buf_len); + + double min_time = -1.0; + double min_compression = 0.0; + + for (vector<test_result_base*>::iterator i = all_tests.begin(); + i != all_tests.end(); ++i) { + test_result_base *test = *i; + test->reset(); + + int iters = 100; + long start_test = get_millisecs_now(); + + do { + test->get(buf, buf_len, iters); + iters *= 3; + iters /= 2; + } while (get_millisecs_now() - start_test < 2000); + + test->stat(); + + if (min_time < 0.0) { + min_compression = test->compression(); + min_time = test->time(); + } + + if (min_time > test->time()) { + min_time = test->time(); + } + + if (min_compression > test->compression()) { + min_compression = test->compression(); + } + + test->print(); + } + +// for (vector<test_result_base*>::iterator i = all_tests.begin(); +// i != all_tests.end(); ++i) { +// test_result_base *test = *i; +// test->set_score(min_compression, min_time); +// } + +// sort(all_tests.begin(), all_tests.end(), compare_h()); + +// for (vector<test_result_base*>::iterator i = all_tests.begin(); +// i != all_tests.end(); ++i) { +// test_result_base *test = *i; +// test->print(); +// } + + free(buf); + buf = NULL; + } + + return 0; +} diff --git a/examples/compare_test.c b/examples/compare_test.c new file mode 100644 index 0000000..f3b3ea2 --- /dev/null +++ b/examples/compare_test.c @@ -0,0 +1,123 @@ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <string.h> +#include <assert.h> + +#include "xdelta3.h" + +#define NUM (1<<20) +#define ITERS 100 + +/* From wikipedia on RDTSC */ +inline uint64_t rdtsc() { + uint32_t lo, hi; + asm volatile ("rdtsc" : "=a" (lo), "=d" (hi)); + return (uint64_t)hi << 32 | lo; +} + +typedef int (*test_func)(const char *s1, const char *s2, int n); + +void run_test(const char *buf1, const char *buf2, + const char *name, test_func func) { + uint64_t start, end; + uint64_t accum = 0; + int i, x; + + for (i = 0; i < ITERS; i++) { + start = rdtsc(); + x = func(buf1, buf2, NUM); + end = rdtsc(); + accum += end - start; + assert(x == NUM - 1); + } + + accum /= ITERS; + + printf("%s : %qu cycles\n", name, accum); +} + +/* Build w/ -fno-builtin for this to be fast, this assumes that there + * is a difference at s1[n-1] */ +int memcmp_fake(const char *s1, const char *s2, int n) { + int x = memcmp(s1, s2, n); + return x < 0 ? n - 1 : n + 1; +} + +#define UNALIGNED_OK 1 +static inline int +test2(const char *s1c, const char *s2c, int n) +{ + int i = 0; +#if UNALIGNED_OK + int nint = n / sizeof(int); + + if (nint >> 3) + { + int j = 0; + const int *s1 = (const int*)s1c; + const int *s2 = (const int*)s2c; + int nint_8 = nint - 8; + + while (i <= nint_8 && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++]) { } + + i = (i - 1) * sizeof(int); + } +#endif + + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} + +static inline int +test1(const char *s1c, const char *s2c, int n) { + int i = 0; + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} + +int main(/*int argc, char **argv*/) { + char *buf1 = malloc(NUM+1); + char *buf2 = malloc(NUM+1); + int i; + + for (i = 0; i < NUM; i++) { + buf1[i] = buf2[i] = rand(); + } + + buf2[NUM-1]++; + + printf ("ALIGNED\n"); + + run_test(buf1, buf2, "memcmp", &memcmp_fake); + run_test(buf1, buf2, "test1", &test1); + run_test(buf1, buf2, "test2", &test2); + + for (i = 0; i < NUM; i++) { + buf1[i] = buf2[i+1] = rand(); + } + + buf2[NUM]++; + + printf ("UNALIGNED\n"); + + run_test(buf1, buf2+1, "memcmp", &memcmp_fake); + run_test(buf1, buf2+1, "test1", &test1); + run_test(buf1, buf2+1, "test2", &test2); + + return 0; +} diff --git a/examples/encode_decode_test.c b/examples/encode_decode_test.c new file mode 100644 index 0000000..7bcf109 --- /dev/null +++ b/examples/encode_decode_test.c @@ -0,0 +1,204 @@ +// +// Permission to distribute this example by +// Copyright (C) 2007 Ralf Junker +// Ralf Junker <delphi@yunqa.de> +// http://www.yunqa.de/delphi/ + +//--------------------------------------------------------------------------- + +#include <stdio.h> +#include <sys/stat.h> +#include "xdelta3.h" +#include "xdelta3.c" + +//--------------------------------------------------------------------------- + +int code ( + int encode, + FILE* InFile, + FILE* SrcFile , + FILE* OutFile, + int BufSize ) +{ + int r, ret; + struct stat statbuf; + xd3_stream stream; + xd3_config config; + xd3_source source; + void* Input_Buf; + int Input_Buf_Read; + + if (BufSize < XD3_ALLOCSIZE) + BufSize = XD3_ALLOCSIZE; + + memset (&stream, 0, sizeof (stream)); + memset (&source, 0, sizeof (source)); + + xd3_init_config(&config, XD3_ADLER32); + config.winsize = BufSize; + xd3_config_stream(&stream, &config); + + if (SrcFile) + { + r = fstat(fileno(SrcFile), &statbuf); + if (r) + return r; + source.size = statbuf.st_size; + source.blksize = BufSize; + source.curblk = malloc(source.blksize); + + /* Load 1st block of stream. */ + r = fseek(SrcFile, 0, SEEK_SET); + if (r) + return r; + source.onblk = fread((void*)source.curblk, 1, source.blksize, SrcFile); + source.curblkno = 0; + /* Set the stream. */ + xd3_set_source(&stream, &source); + } + + Input_Buf = malloc(BufSize); + + fseek(InFile, 0, SEEK_SET); + do + { + Input_Buf_Read = fread(Input_Buf, 1, BufSize, InFile); + if (Input_Buf_Read < BufSize) + { + xd3_set_flags(&stream, XD3_FLUSH | stream.flags); + } + xd3_avail_input(&stream, Input_Buf, Input_Buf_Read); + +process: + if (encode) + ret = xd3_encode_input(&stream); + else + ret = xd3_decode_input(&stream); + + switch (ret) + { + case XD3_INPUT: + { + fprintf (stderr,"XD3_INPUT\n"); + continue; + } + + case XD3_OUTPUT: + { + fprintf (stderr,"XD3_OUTPUT\n"); + r = fwrite(stream.next_out, 1, stream.avail_out, OutFile); + if (r != (int)stream.avail_out) + return r; + xd3_consume_output(&stream); + goto process; + } + + case XD3_GETSRCBLK: + { + fprintf (stderr,"XD3_GETSRCBLK %qd\n", source.getblkno); + if (SrcFile) + { + r = fseek(SrcFile, source.blksize * source.getblkno, SEEK_SET); + if (r) + return r; + source.onblk = fread((void*)source.curblk, 1, + source.blksize, SrcFile); + source.curblkno = source.getblkno; + } + goto process; + } + + case XD3_GOTHEADER: + { + fprintf (stderr,"XD3_GOTHEADER\n"); + goto process; + } + + case XD3_WINSTART: + { + fprintf (stderr,"XD3_WINSTART\n"); + goto process; + } + + case XD3_WINFINISH: + { + fprintf (stderr,"XD3_WINFINISH\n"); + goto process; + } + + default: + { + fprintf (stderr,"!!! INVALID %s %d !!!\n", + stream.msg, ret); + return ret; + } + + } + + } + while (Input_Buf_Read == BufSize); + + free(Input_Buf); + + free((void*)source.curblk); + xd3_close_stream(&stream); + xd3_free_stream(&stream); + + return 0; + +}; + + +int main(int argc, char* argv[]) +{ + FILE* InFile; + FILE* SrcFile; + FILE* OutFile; + int r; + + if (argc != 3) { + fprintf (stderr, "usage: %s source input output\n", argv[0]); + return 1; + } + + char *input = argv[2]; + char *source = argv[1]; + const char *output = "encoded.testdata"; + const char *decoded = "decoded.testdata"; + + /* Encode */ + + InFile = fopen(input, "rb"); + SrcFile = fopen(source, "rb"); + OutFile = fopen(output, "wb"); + + r = code (1, InFile, SrcFile, OutFile, 0x1000); + + fclose(OutFile); + fclose(SrcFile); + fclose(InFile); + + if (r) { + fprintf (stderr, "Encode error: %d\n", r); + return r; + } + + /* Decode */ + + InFile = fopen(output, "rb"); + SrcFile = fopen(source, "rb"); + OutFile = fopen(decoded, "wb"); + + r = code (0, InFile, SrcFile, OutFile, 0x1000); + + fclose(OutFile); + fclose(SrcFile); + fclose(InFile); + + if (r) { + fprintf (stderr, "Decode error: %d\n", r); + return r; + } + + return 0; +} diff --git a/examples/small_page_test.c b/examples/small_page_test.c new file mode 100755 index 0000000..2d9ae93 --- /dev/null +++ b/examples/small_page_test.c @@ -0,0 +1,202 @@ +/* Copyright (C) 2007 Josh MacDonald */ + +#include <stdio.h> + +#define PAGE_SIZE 4096 + +#define SPACE_MAX 131072 // how much memory per process +#define OUTPUT_MAX 1024 // max size for output +#define XD3_ALLOCSIZE 256 // internal size for various buffers +#define IOPT_SIZE 128 // instruction buffer + +// SPACE_MAX of 32K is sufficient for most inputs with XD3_COMPLEVEL_1 +// XD3_COMPLEVEL_9 requires about 4x more space than XD3_COMPLEVEL_1 + +#include "xdelta3.h" +#include "xdelta3.c" + +typedef struct _context { + uint8_t *buffer; + int allocated; +} context_t; + +static int max_allocated = 0; + +void* +process_alloc (void* opaque, usize_t items, usize_t size) +{ + context_t *ctx = (context_t*) opaque; + usize_t t = items * size; + void *ret; + + if (ctx->allocated + t > SPACE_MAX) + { + return NULL; + } + + ret = ctx->buffer + ctx->allocated; + ctx->allocated += t; + return ret; +} + +void +process_free (void* opaque, void *ptr) +{ +} + +int +process_page (int is_encode, + int (*func) (xd3_stream *), + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + + /* On my x86 this is 1072 of objects on the stack */ + xd3_stream stream; + xd3_config config; + xd3_source src; + context_t *ctx = calloc(SPACE_MAX, 1); + int ret; + + memset (&config, 0, sizeof(config)); + + if (ctx == NULL) + { + printf("calloc failed\n"); + return -1; + } + + ctx->buffer = (uint8_t*)ctx; + ctx->allocated = sizeof(*ctx); + + config.flags = flags; + config.winsize = PAGE_SIZE; + config.sprevsz = PAGE_SIZE; + config.srcwin_maxsz = PAGE_SIZE; + config.iopt_size = IOPT_SIZE; + config.alloc = &process_alloc; + config.freef = &process_free; + config.opaque = (void*) ctx; + + src.size = PAGE_SIZE; + src.blksize = PAGE_SIZE; + src.onblk = PAGE_SIZE; + src.curblk = source; + src.curblkno = 0; + + if ((ret = xd3_config_stream (&stream, &config)) != 0 || + (ret = xd3_set_source (&stream, &src)) != 0 || + (ret = xd3_process_stream (is_encode, + &stream, + func, 1, + input, input_size, + output, output_size, + output_size_max)) != 0) + { + if (stream.msg != NULL) + { + fprintf(stderr, "stream message: %s\n", stream.msg); + } + } + + xd3_free_stream (&stream); + if (max_allocated < ctx->allocated) + { + max_allocated = ctx->allocated; + fprintf(stderr, "max allocated %d\n", max_allocated); + } + + free(ctx); + return ret; +} + +int test(int stride, int encode_flags) +{ + uint8_t frompg[PAGE_SIZE]; + uint8_t topg[PAGE_SIZE]; + uint8_t output[OUTPUT_MAX]; + uint8_t reout[PAGE_SIZE]; + usize_t output_size; + usize_t re_size; + int i, j, ret; + + for (i = 0; i < PAGE_SIZE; i++) + { + topg[i] = frompg[i] = (rand() >> 3 ^ rand() >> 6 ^ rand() >> 9); + } + + // change 1 byte every stride + if (stride > 0) + { + for (j = stride; j <= PAGE_SIZE; j += stride) + { + topg[j - 1] ^= 0xff; + } + } + + if ((ret = process_page (1, xd3_encode_input, + topg, PAGE_SIZE, + frompg, output, + &output_size, OUTPUT_MAX, + encode_flags)) != 0) + { + fprintf (stderr, "encode failed: stride %u flags 0x%x\n", + stride, encode_flags); + return ret; + } + + if ((ret = process_page (0, xd3_decode_input, + output, output_size, + frompg, reout, + &re_size, PAGE_SIZE, + 0)) != 0) + { + fprintf (stderr, "decode failed: stride %u output_size %u flags 0x%x\n", + stride, output_size, encode_flags); + return ret; + } + + if (output_size > OUTPUT_MAX || re_size != PAGE_SIZE) + { + fprintf (stderr, "internal error: %u != %u\n", output_size, re_size); + return -1; + } + + for (i = 0; i < PAGE_SIZE; i++) + { + if (reout[i] != topg[i]) + { + fprintf (stderr, "encode-decode error: position %d\n", i); + return -1; + } + } + + fprintf(stderr, "stride %d flags 0x%x size %u ", + stride, encode_flags, output_size); + fprintf(stderr, "%s\n", (ret == 0) ? "OK" : "FAIL"); + + return 0; +} + +int main() +{ + int stride; + int level; + + for (level = 1; level < 10; level = (level == 1 ? 3 : level + 3)) + { + int lflag = level << XD3_COMPLEVEL_SHIFT; + + for (stride = 2; stride <= PAGE_SIZE; stride += 2) + { + test(stride, lflag); + test(stride, lflag | XD3_SEC_DJW); + } + } + + return 0; +} diff --git a/examples/speed_test.c b/examples/speed_test.c new file mode 100644 index 0000000..d9ce5aa --- /dev/null +++ b/examples/speed_test.c @@ -0,0 +1,73 @@ +/* Copyright (C) 2007 Josh MacDonald */ + +#include "test.h" + +usize_t bench_speed(const uint8_t *from_buf, const size_t from_len, + const uint8_t *to_buf, const size_t to_len, + uint8_t *delta_buf, const size_t delta_alloc, + int flags) { + usize_t delta_size; + int ret = xd3_encode_memory(to_buf, to_len, from_buf, from_len, + delta_buf, &delta_size, delta_alloc, flags); + if (ret != 0) { + fprintf(stderr, "encode failure: %d: %s\n", ret, xd3_strerror(ret)); + abort(); + } + return delta_size; +} + +int main(int argc, char **argv) { + int repeat, level; + char *from, *to; + uint8_t *from_buf = NULL, *to_buf = NULL, *delta_buf = NULL; + size_t from_len = 0, to_len, delta_alloc, delta_size = 0; + long start, finish; + int i, ret; + int flags; + + if (argc != 5) { + fprintf(stderr, "usage: speed_test LEVEL COUNT FROM TO\n"); + return 1; + } + + level = atoi(argv[1]); + repeat = atoi(argv[2]); + from = argv[3]; + to = argv[4]; + flags = (level << XD3_COMPLEVEL_SHIFT) & XD3_COMPLEVEL_MASK; + + if ((strcmp(from, "null") != 0 && + (ret = read_whole_file(from, &from_buf, &from_len))) || + (ret = read_whole_file(to, &to_buf, &to_len))) { + fprintf(stderr, "read_whole_file error\n"); + goto exit; + } + + delta_alloc = to_len * 11 / 10; + delta_buf = main_malloc(delta_alloc); + + start = get_millisecs_now(); + + for (i = 0; i < repeat; ++i) { + delta_size = bench_speed(from_buf, from_len, + to_buf, to_len, delta_buf, delta_alloc, flags); + } + + finish = get_millisecs_now(); + + fprintf(stderr, + "STAT: encode %3.2f ms from %s to %s repeat %d %zdbit delta %zd\n", + (double)(finish - start) / repeat, from, to, repeat, sizeof (xoff_t) * 8, delta_size); + + ret = 0; + + if (0) { + exit: + ret = 1; + } + + main_free(to_buf); + main_free(from_buf); + main_free(delta_buf); + return ret; +} diff --git a/examples/test.h b/examples/test.h new file mode 100644 index 0000000..e8016bb --- /dev/null +++ b/examples/test.h @@ -0,0 +1,42 @@ +/* Copyright (C) 2007 Josh MacDonald */ + +#define NOT_MAIN 1 + +#include "xdelta3.h" +#include "xdelta3.c" + +static int read_whole_file(const char *name, + uint8_t **buf_ptr, + size_t *buf_len) { + main_file file; + int ret; + xoff_t len; + usize_t nread; + main_file_init(&file); + file.filename = name; + ret = main_file_open(&file, name, XO_READ); + if (ret != 0) { + fprintf(stderr, "open failed\n"); + goto exit; + } + ret = main_file_stat(&file, &len, 0); + if (ret != 0) { + fprintf(stderr, "stat failed\n"); + goto exit; + } + + (*buf_len) = (size_t)len; + (*buf_ptr) = (uint8_t*) main_malloc(*buf_len); + ret = main_file_read(&file, *buf_ptr, *buf_len, &nread, + "read failed"); + if (ret == 0 && *buf_len == nread) { + ret = 0; + } else { + fprintf(stderr, "invalid read\n"); + ret = XD3_INTERNAL; + } + exit: + main_file_cleanup(&file); + return ret; +} + diff --git a/linkxd3lib.c b/linkxd3lib.c new file mode 100644 index 0000000..284cb0d --- /dev/null +++ b/linkxd3lib.c @@ -0,0 +1,46 @@ +#include "xdelta3.h" + +extern int VVV; + +int VVV; + +void use(int r) +{ + VVV = r; +} + +int main() { + xd3_config config; + xd3_stream stream; + xd3_source source; + + xd3_init_config (& config, 0); + use (xd3_config_stream (&stream, &config)); + use (xd3_close_stream (&stream)); + xd3_abort_stream (&stream); + xd3_free_stream (&stream); + + xd3_avail_input (& stream, NULL, 0); + xd3_consume_output (& stream); + + use (xd3_bytes_on_srcblk (& source, 0)); + use (xd3_set_source (& stream, & source)); + xd3_set_flags (& stream, 0); + + use (xd3_decode_stream (& stream, NULL, 0, NULL, NULL, 0)); + use (xd3_decode_input (&stream)); + use (xd3_get_appheader (& stream, NULL, NULL)); + + use ((int) xd3_errstring (& stream)); + use ((int) xd3_strerror (0)); + +#if XD3_ENCODER + use (xd3_encode_input (&stream)); + use (xd3_encode_stream (& stream, NULL, 0, NULL, NULL, 0)); + use (xd3_set_appheader (& stream)); + use (xd3_encoder_used_source (& stream)); + use (xd3_encoder_srcbase (& stream)); + use (xd3_encoder_srclen (& stream)); +#endif + return 0; +} diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..be7c6ce --- /dev/null +++ b/readme.txt @@ -0,0 +1,34 @@ +Xdelta 3.x readme.txt +Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 +<josh.macdonald@gmail.com> + + +Thanks for downloading Xdelta! + +This directory contains the Xdelta3 command-line interface (CLI) and source +distribution for VCDIFF differential compression, a.k.a. delta +compression. The latest information and downloads are available here: + + http://xdelta.org/ + http://code.google.com/p/xdelta/ + +The command-line syntax: + + http://code.google.com/p/xdelta/wiki/CommandLineSyntax + +Run 'xdelta3 -h' for brief help. Run 'xdelta3 test' for built-in tests. + +Sample commands (like gzip, -e means encode, -d means decode) + + xdelta3 -9 -S djw -e -vfs OLD_FILE NEW_FILE DELTA_FILE + xdelta3 -d -vfs OLD_FILE DELTA_FILE DECODED_FILE + +File bug reports and browse open support issues here: + + http://code.google.com/p/xdelta/issues/list + +The source distribution contains the C/C++/Python APIs, Unix, Microsoft VC++ +and Cygwin builds. Xdelta3 is covered under the terms of the GPL, see +COPYING. + +Commercial inquiries welcome, please contact <josh.macdonald@gmail.com> diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0bb39e1 --- /dev/null +++ b/setup.py @@ -0,0 +1,59 @@ +# xdelta 3 - delta compression tools and library +# Copyright (C) 2004, 2007. Joshua P. MacDonald +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# +from distutils.core import setup, Extension +from distutils.util import get_platform + +# External compression support works on Windows/Cygwin, but not from +# within the Python module. It's something to do with fork() and +# exec() support. +#platform = get_platform() +#is_cygwin = platform.startswith('cygwin') + +xdelta3_ext = Extension('xdelta3main', + ['xdelta3.c'], + define_macros = [ + ('PYTHON_MODULE',1), + ('SECONDARY_DJW',1), + ('VCDIFF_TOOLS',1), + ('GENERIC_ENCODE_TABLES',0), + ('XD3_POSIX',1), + ('XD3_USE_LARGEFILE64',0), + + # the fork/exec stuff doesn't + # work inside python. + ('EXTERNAL_COMPRESSION',0), + + ('REGRESSION_TEST',0), + ('SECONDARY_FGK',0), + ('XD3_DEBUG',0), + ], + extra_compile_args = [ '-O3', + '-g', + '-fno-builtin', + # '-arch', 'x86_64', + ]) + +# $Format: "REL='$Xdelta3Version$'" $ +REL='3.0u' + +# This provides xdelta3.main(), which calls the xdelta3 command-line main() +# from python. +setup(name='xdelta3main', + version=REL, + ext_modules=[xdelta3_ext]) diff --git a/testing/Makefile b/testing/Makefile new file mode 100755 index 0000000..281ef11 --- /dev/null +++ b/testing/Makefile @@ -0,0 +1,15 @@ +CFLAGS = -g -Wall -I.. -DXD3_DEBUG=1 +#CFLAGS = -g -Wall -I.. -DXD3_DEBUG=2 +#CFLAGS = -O2 -Wall -I.. -DXD3_DEBUG=0 -DNDEBUG=1 + +DEPS = ../*.h ../*.c *.cc *.h + +TARGETS = xdelta3-regtest + +all: $(TARGETS) + +xdelta3-regtest: $(DEPS) + $(CXX) $(CFLAGS) regtest.cc -o xdelta3-regtest + +clean: + rm -f *.exe *.stackdump $(TARGETS) diff --git a/testing/cmp.h b/testing/cmp.h new file mode 100644 index 0000000..d96c386 --- /dev/null +++ b/testing/cmp.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++ -*- */ +namespace regtest { + +inline size_t CmpDifferentBlockBytes(const Block &a, const Block &b) { + size_t total = 0; + size_t i = 0; + size_t m = min(a.Size(), b.Size()); + + for (; i < m; i++) { + if (a[i] != b[i]) { + total++; + } + } + + total += a.Size() - i; + total += b.Size() - i; + + return total; +} + +inline xoff_t CmpDifferentBytes(const FileSpec &a, const FileSpec &b) { + Block block_a, block_b; + xoff_t total = 0; + FileSpec::iterator a_i(a), b_i(b); + + for (; !a_i.Done() && !b_i.Done(); a_i.Next(), b_i.Next()) { + + a_i.Get(&block_a); + b_i.Get(&block_b); + + total += CmpDifferentBlockBytes(block_a, block_b); + } + + for (; !a_i.Done(); a_i.Next()) { + total += a_i.BytesOnBlock(); + } + for (; !b_i.Done(); b_i.Next()) { + total += b_i.BytesOnBlock(); + } + + return total; +} + +inline bool ExtFile::EqualsSpec(const FileSpec &spec) const { + main_file t; + main_file_init(&t); + CHECK_EQ(0, main_file_open(&t, Name(), XO_READ)); + + Block tblock; + Block sblock; + for (BlockIterator iter(spec); !iter.Done(); iter.Next()) { + iter.Get(&sblock); + tblock.SetSize(sblock.Size()); + usize_t tread; + CHECK_EQ(0, main_file_read(&t, tblock.Data(), tblock.Size(), &tread, "read failed")); + CHECK_EQ(0, CmpDifferentBlockBytes(tblock, sblock)); + } + + CHECK_EQ(0, main_file_close(&t)); + main_file_cleanup(&t); + return true; +} + +} // namespace regtest diff --git a/testing/delta.h b/testing/delta.h new file mode 100644 index 0000000..58fbaac --- /dev/null +++ b/testing/delta.h @@ -0,0 +1,79 @@ +// Mode: -*- C++ -*- + +namespace regtest { + +class Delta { +public: + Delta(const Block &block); + + ~Delta() { + xd3_free_stream(&stream_); + } + + xoff_t AddedBytes() const { + return stream_.whole_target.addslen; + } + + xoff_t Windows() const { + return stream_.whole_target.wininfolen; + } + + void Print() const; + +private: + xd3_stream stream_; +}; + +Delta::Delta(const Block &block) { + int ret; + xd3_config config; + memset(&stream_, 0, sizeof (stream_)); + memset(&config, 0, sizeof (config)); + + xd3_init_config(&config, XD3_SKIP_EMIT | XD3_ADLER32_NOVER); + + CHECK_EQ(0, xd3_config_stream (&stream_, &config)); + + xd3_avail_input (&stream_, block.Data(), block.Size()); + + bool done = false; + while (!done) { + ret = xd3_decode_input(&stream_); + + switch (ret) { + case XD3_INPUT: + done = true; + break; + case XD3_OUTPUT: + CHECK_EQ(0, xd3_whole_append_window (&stream_)); + break; + case XD3_GOTHEADER: + case XD3_WINSTART: + case XD3_WINFINISH: + break; + default: + DP(RINT "error code %s\n", xd3_strerror (ret)); + abort(); + } + } +} + +void Delta::Print() const { + for (size_t i = 0; i < stream_.whole_target.instlen; i++) { + xd3_winst &winst = stream_.whole_target.inst[i]; + switch (winst.type) { + case XD3_RUN: + DP(RINT "%"Q"u run %u\n", winst.position, winst.size); + break; + case XD3_ADD: + DP(RINT "%"Q"u add %u\n", winst.position, winst.size); + break; + default: + DP(RINT "%"Q"u copy %u @ %"Q"u (mode %u)\n", + winst.position, winst.size, winst.addr, winst.mode); + break; + } + } +} + +} // namespace diff --git a/testing/file.h b/testing/file.h new file mode 100644 index 0000000..30a8428 --- /dev/null +++ b/testing/file.h @@ -0,0 +1,367 @@ +/* -*- Mode: C++ -*- */ +namespace regtest { + +class Block; +class BlockIterator; +class TmpFile; + +class FileSpec { + public: + FileSpec(MTRandom *rand) + : rand_(rand) { + } + + // Generates a file with a known size + void GenerateFixedSize(xoff_t size) { + Reset(); + + for (xoff_t p = 0; p < size; ) { + xoff_t t = min(Constants::BLOCK_SIZE, size - p); + table_.insert(make_pair(p, Segment(t, rand_))); + p += t; + } + } + + // Generates a file with exponential-random distributed size + void GenerateRandomSize(xoff_t mean) { + GenerateFixedSize(rand_->ExpRand(mean)); + } + + // Returns the size of the file + xoff_t Size() const { + if (table_.empty()) { + return 0; + } + SegmentMap::const_iterator i = --table_.end(); + return i->first + i->second.Size(); + } + + // Returns the number of blocks + xoff_t Blocks(size_t blksize = Constants::BLOCK_SIZE) const { + if (table_.empty()) { + return 0; + } + return ((Size() - 1) / blksize) + 1; + } + + // Returns the number of segments + xoff_t Segments() const { + return table_.size(); + } + + // Create a mutation according to "what". + void ModifyTo(const Mutator &mutator, + FileSpec *modify) const { + modify->Reset(); + mutator.Mutate(&modify->table_, &table_, rand_); + modify->CheckSegments(); + } + + void CheckSegments() const { + for (SegmentMap::const_iterator iter(table_.begin()); + iter != table_.end(); ) { + SegmentMap::const_iterator iter0(iter++); + if (iter == table_.end()) { + break; + } + CHECK_EQ(iter0->first + iter0->second.Size(), iter->first); + } + } + + void Reset() { + table_.clear(); + } + + void Print() const { + for (SegmentMap::const_iterator iter(table_.begin()); + iter != table_.end(); + ++iter) { + const Segment &seg = iter->second; + cerr << "Segment at " << iter->first << " (" << seg << ")" << endl; + } + } + + void PrintData() const; + + void WriteTmpFile(TmpFile *f) const; + + typedef BlockIterator iterator; + + private: + friend class BlockIterator; + + MTRandom *rand_; + SegmentMap table_; +}; + +class Block { +public: + Block() + : data_(NULL), + data_size_(0), + size_(0) { } + + ~Block() { + if (data_) { + delete [] data_; + } + } + + size_t Size() const { + return size_; + } + + uint8_t operator[](size_t i) const { + CHECK_LT(i, size_); + return data_[i]; + } + + uint8_t* Data() const { + if (data_ == NULL) { + CHECK_EQ(0, size_); + data_size_ = 1; + data_ = new uint8_t[1]; + } + return data_; + } + + // For writing to blocks + void Append(const uint8_t *data, size_t size); + + // For cleaing a block + void Reset() { + size_ = 0; + } + + void Print() const; + + void WriteTmpFile(TmpFile *f) const; + + void SetSize(size_t size) { + size_ = size; + + if (data_size_ < size) { + if (data_) { + delete [] data_; + } + data_ = new uint8_t[size]; + data_size_ = size; + } + } +private: + friend class BlockIterator; + + mutable uint8_t *data_; + mutable size_t data_size_; + size_t size_; +}; + +class BlockIterator { +public: + explicit BlockIterator(const FileSpec& spec) + : spec_(spec), + blkno_(0), + blksize_(Constants::BLOCK_SIZE) { } + + BlockIterator(const FileSpec& spec, + size_t blksize) + : spec_(spec), + blkno_(0), + blksize_(blksize) { } + + bool Done() const { + return blkno_ >= spec_.Blocks(blksize_); + } + + void Next() { + blkno_++; + } + + xoff_t Blkno() const { + return blkno_; + } + + xoff_t Offset() const { + return blkno_ * blksize_; + } + + void SetBlock(xoff_t blkno) { + blkno_ = blkno; + } + + void Get(Block *block) const; + + size_t BytesOnBlock() const { + xoff_t blocks = spec_.Blocks(blksize_); + xoff_t size = spec_.Size(); + + CHECK((blkno_ < blocks) || + (blkno_ == blocks && size % blksize_ == 0)); + + if (blkno_ == blocks) { + return 0; + } + if (blkno_ + 1 == blocks) { + return ((size - 1) % blksize_) + 1; + } + return blksize_; + } + + size_t BlockSize() const { + return blksize_; + } + +private: + const FileSpec& spec_; + xoff_t blkno_; + size_t blksize_; +}; + +class ExtFile { +public: + ExtFile() { + static int static_counter = 0; + char buf[32]; + snprintf(buf, 32, "/tmp/regtest.%d", static_counter++); + filename_.append(buf); + unlink(filename_.c_str()); + } + + ~ExtFile() { + unlink(filename_.c_str()); + } + + const char* Name() const { + return filename_.c_str(); + } + + // Check whether a real file matches a file spec. + bool EqualsSpec(const FileSpec &spec) const; + +protected: + string filename_; +}; + +class TmpFile : public ExtFile { +public: + // TODO this is a little unportable! + TmpFile() { + main_file_init(&file_); + CHECK_EQ(0, main_file_open(&file_, filename_.c_str(), XO_WRITE)); + } + + ~TmpFile() { + main_file_cleanup(&file_); + } + + void Append(const Block *block) { + CHECK_EQ(0, main_file_write(&file_, + block->Data(), block->Size(), + "tmpfile write failed")); + } + + + const char* Name() const { + if (main_file_isopen(&file_)) { + CHECK_EQ(0, main_file_close(&file_)); + } + return ExtFile::Name(); + } + +private: + mutable main_file file_; +}; + +inline void BlockIterator::Get(Block *block) const { + xoff_t offset = blkno_ * blksize_; + const SegmentMap &table = spec_.table_; + size_t got = 0; + block->SetSize(BytesOnBlock()); + + SegmentMap::const_iterator pos = table.upper_bound(offset); + if (pos == table.begin()) { + CHECK_EQ(0, spec_.Size()); + return; + } + --pos; + + while (got < block->size_) { + CHECK(pos != table.end()); + CHECK_GE(offset, pos->first); + + const Segment &seg = pos->second; + + // The position of this segment may start before this block starts, + // and then the position of the data may be offset from the seeding + // position. + size_t seg_offset = offset - pos->first; + size_t advance = min(seg.Size() - seg_offset, + blksize_ - got); + + seg.Fill(seg_offset, advance, block->data_ + got); + + got += advance; + offset += advance; + ++pos; + } +} + +inline void Block::Append(const uint8_t *data, size_t size) { + if (data_ == NULL) { + CHECK_EQ(0, size_); + CHECK_EQ(0, data_size_); + data_ = new uint8_t[Constants::BLOCK_SIZE]; + data_size_ = Constants::BLOCK_SIZE; + } + + if (size_ + size > data_size_) { + uint8_t *tmp = data_; + while (size_ + size > data_size_) { + data_size_ *= 2; + } + data_ = new uint8_t[data_size_]; + memcpy(data_, tmp, size_); + delete tmp; + } + + memcpy(data_ + size_, data, size); + size_ += size; +} + +inline void FileSpec::PrintData() const { + Block block; + for (BlockIterator iter(*this); !iter.Done(); iter.Next()) { + iter.Get(&block); + block.Print(); + } +} + +inline void Block::Print() const { + xoff_t pos = 0; + for (size_t i = 0; i < Size(); i++) { + if (pos % 16 == 0) { + DP(RINT "%5"Q"x: ", pos); + } + DP(RINT "%02x ", (*this)[i]); + if (pos % 16 == 15) { + DP(RINT "\n"); + } + pos++; + } + DP(RINT "\n"); +} + +inline void FileSpec::WriteTmpFile(TmpFile *f) const { + Block block; + for (BlockIterator iter(*this); !iter.Done(); iter.Next()) { + iter.Get(&block); + f->Append(&block); + } +} + +inline void Block::WriteTmpFile(TmpFile *f) const { + f->Append(this); +} + +} // namespace regtest + diff --git a/testing/modify.h b/testing/modify.h new file mode 100644 index 0000000..67cccd9 --- /dev/null +++ b/testing/modify.h @@ -0,0 +1,421 @@ +// -*- Mode: C++ -*- +namespace regtest { + +class Mutator { +public: + virtual ~Mutator() { } + virtual void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const = 0; +}; + +class Change { +public: + enum Kind { + MODIFY = 1, + ADD = 2, + DELETE = 3, + MOVE = 4, + COPY = 5, + OVERWRITE = 6, + }; + + // Constructor for modify, add, delete. + Change(Kind kind, xoff_t size, xoff_t addr1) + : kind(kind), + size(size), + addr1(addr1), + insert(NULL) { + CHECK(kind != MOVE && kind != COPY && kind != OVERWRITE); + } + + // Constructor for modify, add w/ provided data. + Change(Kind kind, xoff_t size, xoff_t addr1, Segment *insert) + : kind(kind), + size(size), + addr1(addr1), + insert(insert) { + CHECK(kind != MOVE && kind != COPY && kind != OVERWRITE); + } + + // Constructor for move + Change(Kind kind, xoff_t size, xoff_t addr1, xoff_t addr2) + : kind(kind), + size(size), + addr1(addr1), + addr2(addr2), + insert(NULL) { + CHECK(kind == MOVE || kind == COPY || kind == OVERWRITE); + } + + Kind kind; + xoff_t size; + xoff_t addr1; + xoff_t addr2; + Segment *insert; // For modify and/or add +}; + +typedef list<Change> ChangeList; + +class ChangeListMutator : public Mutator { +public: + ChangeListMutator(const ChangeList &cl) + : cl_(cl) { } + + ChangeListMutator() { } + + void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const; + + static void Mutate(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand); + + static void AddChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand); + + static void ModifyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand); + + static void DeleteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand); + + static void MoveChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand); + + static void OverwriteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand); + + static void CopyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand); + + static void AppendCopy(SegmentMap *table, + const SegmentMap *source_table, + xoff_t copy_offset, + xoff_t append_offset, + xoff_t length); + + ChangeList* Changes() { + return &cl_; + } + + const ChangeList* Changes() const { + return &cl_; + } + +private: + ChangeList cl_; +}; + +void ChangeListMutator::Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const { + // The speed of processing gigabytes of data is so slow compared with + // these table-copy operations, no attempt to make this fast. + SegmentMap tmp; + + for (ChangeList::const_iterator iter(cl_.begin()); iter != cl_.end(); ++iter) { + const Change &ch = *iter; + tmp.clear(); + Mutate(ch, &tmp, source_table, rand); + tmp.swap(*table); + source_table = table; + } +} + +void ChangeListMutator::Mutate(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + switch (ch.kind) { + case Change::ADD: + AddChange(ch, table, source_table, rand); + break; + case Change::MODIFY: + ModifyChange(ch, table, source_table, rand); + break; + case Change::DELETE: + DeleteChange(ch, table, source_table, rand); + break; + case Change::COPY: + CopyChange(ch, table, source_table, rand); + break; + case Change::MOVE: + MoveChange(ch, table, source_table, rand); + break; + case Change::OVERWRITE: + OverwriteChange(ch, table, source_table, rand); + break; + } +} + +void ChangeListMutator::ModifyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t m_end = m_start + ch.size; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (SegmentMap::const_iterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start || i_start >= m_end) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + // Insert the entire segment, even though it may extend into later + // segments. This condition avoids inserting it during later + // segments. + if (m_start >= i_start) { + if (ch.insert != NULL) { + table->insert(table->end(), make_pair(m_start, *ch.insert)); + } else { + Segment part(m_end - m_start, rand); + table->insert(table->end(), make_pair(m_start, part)); + } + } + + if (i_end > m_end) { + table->insert(table->end(), + make_pair(m_end, + seg.Subseg(m_end - i_start, i_end - m_end))); + } + } + + CHECK_LE(m_end, i_end); +} + +void ChangeListMutator::AddChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (SegmentMap::const_iterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start > m_start) { + table->insert(table->end(), make_pair(i_start + ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + if (ch.insert != NULL) { + table->insert(table->end(), make_pair(m_start, *ch.insert)); + } else { + Segment addseg(ch.size, rand); + table->insert(table->end(), make_pair(m_start, addseg)); + } + + if (m_start < i_end) { + table->insert(table->end(), + make_pair(m_start + ch.size, + seg.Subseg(m_start - i_start, i_end - m_start))); + } + } + + CHECK_LE(m_start, i_end); + + // Special case for add at end-of-input. + if (m_start == i_end) { + Segment addseg(ch.size, rand); + table->insert(table->end(), make_pair(m_start, addseg)); + } +} + +void ChangeListMutator::DeleteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t m_end = m_start + ch.size; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (SegmentMap::const_iterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start >= m_end) { + table->insert(table->end(), make_pair(i_start - ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + if (i_end > m_end) { + table->insert(table->end(), + make_pair(m_end - ch.size, + seg.Subseg(m_end - i_start, i_end - m_end))); + } + } + + CHECK_LT(m_start, i_end); + CHECK_LE(m_end, i_end); +} + +void ChangeListMutator::MoveChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + SegmentMap tmp; + CHECK_NE(ch.addr1, ch.addr2); + CopyChange(ch, &tmp, source_table, rand); + Change d(Change::DELETE, ch.size, + ch.addr1 < ch.addr2 ? ch.addr1 : ch.addr1 + ch.size); + DeleteChange(d, table, &tmp, rand); +} + +void ChangeListMutator::OverwriteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + SegmentMap tmp; + CHECK_NE(ch.addr1, ch.addr2); + CopyChange(ch, &tmp, source_table, rand); + Change d(Change::DELETE, ch.size, ch.addr2 + ch.size); + DeleteChange(d, table, &tmp, rand); +} + +void ChangeListMutator::CopyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *ignore) { + xoff_t m_start = ch.addr2; + xoff_t c_start = ch.addr1; + xoff_t i_start = 0; + xoff_t i_end = 0; + + // Like AddChange() with AppendCopy instead of a random segment. + for (SegmentMap::const_iterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start > m_start) { + table->insert(table->end(), make_pair(i_start + ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + AppendCopy(table, source_table, c_start, m_start, ch.size); + + if (m_start < i_end) { + table->insert(table->end(), + make_pair(m_start + ch.size, + seg.Subseg(m_start - i_start, i_end - m_start))); + } + } + + CHECK_LE(m_start, i_end); + + // Special case for copy to end-of-input. + if (m_start == i_end) { + AppendCopy(table, source_table, c_start, m_start, ch.size); + } +} + +void ChangeListMutator::AppendCopy(SegmentMap *table, + const SegmentMap *source_table, + xoff_t copy_offset, + xoff_t append_offset, + xoff_t length) { + SegmentMap::const_iterator pos(source_table->upper_bound(copy_offset)); + --pos; + xoff_t got = 0; + + while (got < length) { + size_t seg_offset = copy_offset - pos->first; + size_t advance = min(pos->second.Size() - seg_offset, + (size_t)(length - got)); + + table->insert(table->end(), + make_pair(append_offset, + pos->second.Subseg(seg_offset, + advance))); + + got += advance; + copy_offset += advance; + append_offset += advance; + ++pos; + } +} + +class Modify1stByte : public Mutator { +public: + void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const { + ChangeListMutator::Mutate(Change(Change::MODIFY, 1, 0), + table, source_table, rand); + } +}; + +} // namespace regtest diff --git a/testing/random.h b/testing/random.h new file mode 100644 index 0000000..f2cb167 --- /dev/null +++ b/testing/random.h @@ -0,0 +1,140 @@ +/* -*- Mode: C++ -*- */ +/* This is public-domain Mersenne Twister code, + * attributed to Michael Brundage. Thanks! + * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html + */ +#include <math.h> + +namespace regtest { + +class MTRandom { + public: + static const uint32_t TEST_SEED1 = 5489UL; + + static const int MT_LEN = 624; + static const int MT_IA = 397; + static const uint32_t UPPER_MASK = 0x80000000; + static const uint32_t LOWER_MASK = 0x7FFFFFFF; + static const uint32_t MATRIX_A = 0x9908B0DF; + + MTRandom() { + Init(TEST_SEED1); + } + + MTRandom(uint32_t seed) { + Init(seed); + } + + uint32_t Rand32 () { + uint32_t y; + static unsigned long mag01[2] = { + 0 , MATRIX_A + }; + + if (mt_index_ >= MT_LEN) { + int kk; + + for (kk = 0; kk < MT_LEN - MT_IA; kk++) { + y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK); + mt_buffer_[kk] = mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (;kk < MT_LEN - 1; kk++) { + y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK); + mt_buffer_[kk] = mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt_buffer_[0] & LOWER_MASK); + mt_buffer_[MT_LEN - 1] = mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + mt_index_ = 0; + } + + y = mt_buffer_[mt_index_++]; + + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; + } + + uint32_t ExpRand32(uint32_t mean) { + double mean_d = mean; + double erand = log (1.0 / (Rand32() / (double)UINT32_MAX)); + uint32_t x = (uint32_t) (mean_d * erand + 0.5); + return x; + } + + uint64_t Rand64() { + return ((uint64_t)Rand32() << 32) | Rand32(); + } + + uint64_t ExpRand64(uint64_t mean) { + double mean_d = mean; + double erand = log (1.0 / (Rand64() / (double)UINT32_MAX)); + uint64_t x = (uint64_t) (mean_d * erand + 0.5); + return x; + } + + template <typename T> + T Rand() { + switch (sizeof(T)) { + case sizeof(uint32_t): + return Rand32(); + case sizeof(uint64_t): + return Rand64(); + default: + cerr << "Invalid sizeof T" << endl; + abort(); + } + } + + template <typename T> + T ExpRand(T mean) { + switch (sizeof(T)) { + case sizeof(uint32_t): + return ExpRand32(mean); + case sizeof(uint64_t): + return ExpRand64(mean); + default: + cerr << "Invalid sizeof T" << endl; + abort(); + } + } + + private: + void Init(uint32_t seed) { + mt_buffer_[0] = seed; + mt_index_ = MT_LEN; + for (int i = 1; i < MT_LEN; i++) { + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt_buffer_[i] = + (1812433253UL * (mt_buffer_[i-1] ^ (mt_buffer_[i-1] >> 30)) + i); + } + } + + int mt_index_; + uint32_t mt_buffer_[MT_LEN]; +}; + +class MTRandom8 { +public: + MTRandom8(MTRandom *rand) + : rand_(rand) { + } + + uint8_t Rand8() { + uint32_t r = rand_->Rand32(); + + // TODO: make this use a single byte at a time? + return (r & 0xff) ^ (r >> 7) ^ (r >> 15) ^ (r >> 21); + } + +private: + MTRandom *rand_; +}; + +} // namespace regtest diff --git a/testing/segment.h b/testing/segment.h new file mode 100644 index 0000000..1dabf5c --- /dev/null +++ b/testing/segment.h @@ -0,0 +1,100 @@ +// -*- Mode: C++ -*- + +namespace regtest { + +class Segment { + public: + Segment(size_t size, MTRandom *rand) + : size_(size), + seed_(rand->Rand32()), + seed_offset_(0), + data_(NULL) { + CHECK_GT(size_, 0); + } + + Segment(size_t size, uint32_t seed) + : size_(size), + seed_(seed), + seed_offset_(0), + data_(NULL) { + CHECK_GT(size_, 0); + } + + Segment(size_t size, uint8_t *data) + : size_(size), + seed_(0), + seed_offset_(0), + data_(data) { + CHECK_GT(size_, 0); + } + + size_t Size() const { + return size_; + } + + Segment Subseg(size_t start, size_t size) const { + CHECK_LE(start + size, size_); + if (data_) { + return Segment(size, data_ + start); + } else { + return Segment(size, seed_, seed_offset_ + start); + } + } + + void Fill(size_t seg_offset, size_t size, uint8_t *data) const { + CHECK_LE(seg_offset + size, size_); + if (data_) { + memcpy(data, data_ + seg_offset, size); + } else { + size_t skip = seg_offset + seed_offset_; + MTRandom gen(seed_); + MTRandom8 gen8(&gen); + while (skip--) { + gen8.Rand8(); + } + for (size_t i = 0; i < size; i++) { + data[i] = gen8.Rand8(); + } + } + } + +private: + // Used by Subseg() + Segment(size_t size, uint32_t seed, size_t seed_offset) + : size_(size), + seed_(seed), + seed_offset_(seed_offset), + data_(NULL) { + CHECK_GT(size_, 0); + } + + friend ostream& operator<<(ostream& os, const Segment &seg); + + size_t size_; // Size of this segment + + // For random segments + uint32_t seed_; // Seed used for generating byte sequence + size_t seed_offset_; // Seed positions the sequence this many bytes + // before its beginning. + + // For literal segments (data is not owned) + uint8_t *data_; +}; + +ostream& operator<<(ostream& os, const Segment &seg) { + if (seg.data_) { + for (size_t i = 0; i < seg.size_; i++) { + char buf[10]; + sprintf(buf, "%02x ", seg.data_[i]); + os << buf; + } + return os; + } else { + return os << "size=" << seg.size_ << ",seed=" << seg.seed_ + << ",skip=" << seg.seed_offset_; + } +} + +typedef map<xoff_t, Segment> SegmentMap; + +} // namespace regtest diff --git a/testing/sizes.h b/testing/sizes.h new file mode 100644 index 0000000..6b70892 --- /dev/null +++ b/testing/sizes.h @@ -0,0 +1,69 @@ +// -*- Mode: C++ -*- +namespace regtest { + +template <typename T, typename U> +class SizeIterator { + public: + SizeIterator(MTRandom *rand, size_t howmany) + : rand_(rand), + count_(0), + fixed_(U::sizes), + fixed_size_(SIZEOF_ARRAY(U::sizes)), + howmany_(howmany) { } + + T Get() { + if (count_ < fixed_size_) { + return fixed_[count_]; + } + return rand_->Rand<T>() % U::max_value; + } + + bool Done() { + return count_ >= fixed_size_ && count_ >= howmany_; + } + + void Next() { + count_++; + } + + private: + MTRandom *rand_; + size_t count_; + T* fixed_; + size_t fixed_size_; + size_t howmany_; +}; + +class SmallSizes { +public: + static size_t sizes[]; + static size_t max_value; +}; + +size_t SmallSizes::sizes[] = { + 0, 1, Constants::BLOCK_SIZE / 4, 3333, + Constants::BLOCK_SIZE - (Constants::BLOCK_SIZE / 3), + Constants::BLOCK_SIZE, + Constants::BLOCK_SIZE + (Constants::BLOCK_SIZE / 3), + 2 * Constants::BLOCK_SIZE - (Constants::BLOCK_SIZE / 3), + 2 * Constants::BLOCK_SIZE, + 2 * Constants::BLOCK_SIZE + (Constants::BLOCK_SIZE / 3), +}; + +size_t SmallSizes::max_value = Constants::BLOCK_SIZE * 3; + +class LargeSizes { +public: + static size_t sizes[]; + static size_t max_value; +}; + +size_t LargeSizes::sizes[] = { + 1 << 20, + 1 << 18, + 1 << 16, +}; + +size_t LargeSizes::max_value = 1<<20; + +} // namespace regtest diff --git a/testing/test.h b/testing/test.h new file mode 100644 index 0000000..f2b46f3 --- /dev/null +++ b/testing/test.h @@ -0,0 +1,110 @@ +// -*- Mode: C++ -*- + +extern "C" { +#define NOT_MAIN 1 +#define REGRESSION_TEST 0 +#define VCDIFF_TOOLS 1 +#include "../xdelta3.c" +} + +#define CHECK_EQ(x,y) CHECK_OP(x,y,==) +#define CHECK_NE(x,y) CHECK_OP(x,y,!=) +#define CHECK_LT(x,y) CHECK_OP(x,y,<) +#define CHECK_GT(x,y) CHECK_OP(x,y,>) +#define CHECK_LE(x,y) CHECK_OP(x,y,<=) +#define CHECK_GE(x,y) CHECK_OP(x,y,>=) + +#define CHECK_OP(x,y,OP) \ + do { \ + typeof(x) _x(x); \ + typeof(x) _y(y); \ + if (!(_x OP _y)) { \ + cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \ + cerr << __FILE__ << ":" << __LINE__ << " Expected: " << _x << endl; \ + cerr << __FILE__ << ":" << __LINE__ << " Actual: " << _y << endl; \ + abort(); \ + } } while (false) + +#define CHECK(x) \ + do {if (!(x)) { \ + cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x << endl; \ + abort(); \ + } } while (false) + +#include <string> +using std::string; + +#include <vector> +using std::vector; + +inline string CommandToString(const vector<const char*> &v) { + string s(v[0]); + for (size_t i = 1; i < v.size() && v[i] != NULL; i++) { + s.append(" "); + s.append(v[i]); + } + return s; +} + +#include <iostream> +using std::cerr; +using std::endl; +using std::ostream; + +#include <map> +using std::map; +using std::pair; + +#include <ext/hash_map> +using __gnu_cxx::hash_map; + +#include <list> +using std::list; + +template <typename T, typename U> +pair<T, U> make_pair(const T& t, const U& u) { + return pair<T, U>(t, u); +} + +class Constants { +public: + // TODO: need to repeat the tests with different block sizes + // 1 << 7 triggers some bugs, 1 << 20 triggers others. + // + //static const xoff_t BLOCK_SIZE = 1 << 20; + static const xoff_t BLOCK_SIZE = 1 << 7; +}; + +using std::min; + +#include "random.h" +using regtest::MTRandom; +using regtest::MTRandom8; + +#include "segment.h" +using regtest::Segment; + +#include "modify.h" +using regtest::Mutator; +using regtest::ChangeList; +using regtest::Change; +using regtest::ChangeListMutator; +using regtest::Modify1stByte; + +#include "file.h" +using regtest::Block; +using regtest::BlockIterator; +using regtest::ExtFile; +using regtest::FileSpec; +using regtest::TmpFile; + +#include "cmp.h" +using regtest::CmpDifferentBytes; + +#include "sizes.h" +using regtest::SizeIterator; +using regtest::SmallSizes; +using regtest::LargeSizes; + +#include "delta.h" +using regtest::Delta; diff --git a/xdelta3-cfgs.h b/xdelta3-cfgs.h new file mode 100644 index 0000000..b13f7b0 --- /dev/null +++ b/xdelta3-cfgs.h @@ -0,0 +1,173 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/****************************************************************** + SOFT string matcher + ******************************************************************/ + +#if XD3_BUILD_SOFT + +#define TEMPLATE soft +#define LLOOK stream->smatcher.large_look +#define LSTEP stream->smatcher.large_step +#define SLOOK stream->smatcher.small_look +#define SCHAIN stream->smatcher.small_chain +#define SLCHAIN stream->smatcher.small_lchain +#define MAXLAZY stream->smatcher.max_lazy +#define LONGENOUGH stream->smatcher.long_enough + +#define SOFTCFG 1 +#include "xdelta3.c" +#undef SOFTCFG + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +#define SOFTCFG 0 + +/************************************************************ + FASTEST string matcher + **********************************************************/ +#if XD3_BUILD_FASTEST +#define TEMPLATE fastest +#define LLOOK 9 +#define LSTEP 26 +#define SLOOK 4U +#define SCHAIN 1 +#define SLCHAIN 1 +#define MAXLAZY 6 +#define LONGENOUGH 6 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/************************************************************ + FASTER string matcher + **********************************************************/ +#if XD3_BUILD_FASTER +#define TEMPLATE faster +#define LLOOK 9 +#define LSTEP 15 +#define SLOOK 4U +#define SCHAIN 1 +#define SLCHAIN 1 +#define MAXLAZY 18 +#define LONGENOUGH 18 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/****************************************************** + FAST string matcher + ********************************************************/ +#if XD3_BUILD_FAST +#define TEMPLATE fast +#define LLOOK 9 +#define LSTEP 8 +#define SLOOK 4U +#define SCHAIN 4 +#define SLCHAIN 1 +#define MAXLAZY 18 +#define LONGENOUGH 35 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/************************************************** + SLOW string matcher + **************************************************************/ +#if XD3_BUILD_SLOW +#define TEMPLATE slow +#define LLOOK 9 +#define LSTEP 2 +#define SLOOK 4U +#define SCHAIN 44 +#define SLCHAIN 13 +#define MAXLAZY 90 +#define LONGENOUGH 70 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif + +/******************************************************** + DEFAULT string matcher + ************************************************************/ +#if XD3_BUILD_DEFAULT +#define TEMPLATE default +#define LLOOK 9 +#define LSTEP 3 +#define SLOOK 4U +#define SCHAIN 8 +#define SLCHAIN 2 +#define MAXLAZY 36 +#define LONGENOUGH 70 + +#include "xdelta3.c" + +#undef TEMPLATE +#undef LLOOK +#undef SLOOK +#undef LSTEP +#undef SCHAIN +#undef SLCHAIN +#undef MAXLAZY +#undef LONGENOUGH +#endif diff --git a/xdelta3-decode.h b/xdelta3-decode.h new file mode 100644 index 0000000..bf2b0b1 --- /dev/null +++ b/xdelta3-decode.h @@ -0,0 +1,1115 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XDELTA3_DECODE_H_ +#define _XDELTA3_DECODE_H_ + +#define SRCORTGT(x) ((((x) & VCD_SRCORTGT) == VCD_SOURCE) ? \ + VCD_SOURCE : ((((x) & VCD_SRCORTGT) == \ + VCD_TARGET) ? VCD_TARGET : 0)) + +/* Initialize the decoder for a new window. The dec_tgtlen value is + * preserved across successive window decodings, and the update to + * dec_winstart is delayed until a new window actually starts. This + * is to avoid throwing an error due to overflow until the last + * possible moment. This makes it possible to encode exactly 4GB + * through a 32-bit encoder. */ +static int +xd3_decode_init_window (xd3_stream *stream) +{ + stream->dec_cpylen = 0; + stream->dec_cpyoff = 0; + stream->dec_cksumbytes = 0; + + xd3_init_cache (& stream->acache); + + return 0; +} + +/* Allocates buffer space for the target window and possibly the + * VCD_TARGET copy-window. Also sets the base of the two copy + * segments. */ +static int +xd3_decode_setup_buffers (xd3_stream *stream) +{ + /* If VCD_TARGET is set then the previous buffer may be reused. */ + if (stream->dec_win_ind & VCD_TARGET) + { + /* But this implementation only supports copying from the last + * target window. If the offset is outside that range, it can't + * be done. */ + if (stream->dec_cpyoff < stream->dec_laststart) + { + stream->msg = "unsupported VCD_TARGET offset"; + return XD3_INVALID_INPUT; + } + + /* See if the two windows are the same. This indicates the + * first time VCD_TARGET is used. This causes a second buffer + * to be allocated, after that the two are swapped in the + * DEC_FINISH case. */ + if (stream->dec_lastwin == stream->next_out) + { + stream->next_out = NULL; + stream->space_out = 0; + } + + // TODO: VCD_TARGET mode, this is broken + stream->dec_cpyaddrbase = stream->dec_lastwin + + (usize_t) (stream->dec_cpyoff - stream->dec_laststart); + } + + /* See if the current output window is large enough. */ + if (stream->space_out < stream->dec_tgtlen) + { + xd3_free (stream, stream->dec_buffer); + + stream->space_out = + xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE); + + if ((stream->dec_buffer = + (uint8_t*) xd3_alloc (stream, stream->space_out, 1)) == NULL) + { + return ENOMEM; + } + + stream->next_out = stream->dec_buffer; + } + + /* dec_tgtaddrbase refers to an invalid base address, but it is + * always used with a sufficiently large instruction offset (i.e., + * beyond the copy window). This condition is enforced by + * xd3_decode_output_halfinst. */ + stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen; + + return 0; +} + +static int +xd3_decode_allocate (xd3_stream *stream, + usize_t size, + uint8_t **buf_ptr, + usize_t *buf_alloc) +{ + if (*buf_ptr != NULL && *buf_alloc < size) + { + xd3_free (stream, *buf_ptr); + *buf_ptr = NULL; + } + + if (*buf_ptr == NULL) + { + *buf_alloc = xd3_round_blksize (size, XD3_ALLOCSIZE); + + if ((*buf_ptr = (uint8_t*) xd3_alloc (stream, *buf_alloc, 1)) == NULL) + { + return ENOMEM; + } + } + + return 0; +} + +static int +xd3_decode_section (xd3_stream *stream, + xd3_desect *section, + xd3_decode_state nstate, + int copy) +{ + XD3_ASSERT (section->pos <= section->size); + XD3_ASSERT (stream->dec_state != nstate); + + if (section->pos < section->size) + { + usize_t sect_take; + + if (stream->avail_in == 0) + { + return XD3_INPUT; + } + + if ((copy == 0) && (section->pos == 0)) + { + /* No allocation/copy needed */ + section->buf = stream->next_in; + sect_take = section->size; + } + else + { + usize_t sect_need = section->size - section->pos; + + /* Allocate and copy */ + sect_take = min (sect_need, stream->avail_in); + + if (section->pos == 0) + { + int ret; + + if ((ret = xd3_decode_allocate (stream, + section->size, + & section->copied1, + & section->alloc1))) + { + return ret; + } + + section->buf = section->copied1; + } + + memcpy (section->copied1 + section->pos, + stream->next_in, + sect_take); + } + + section->pos += sect_take; + + stream->dec_winbytes += sect_take; + + DECODE_INPUT (sect_take); + } + + if (section->pos < section->size) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + XD3_ASSERT (section->pos == section->size); + + stream->dec_state = nstate; + section->buf_max = section->buf + section->size; + section->pos = 0; + return 0; +} + +/* Decode the size and address for half of an instruction (i.e., a + * single opcode). This updates the stream->dec_position, which are + * bytes already output prior to processing this instruction. Perform + * bounds checking for sizes and copy addresses, which uses the + * dec_position (which is why these checks are done here). */ +static int +xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst) +{ + int ret; + + /* If the size from the instruction table is zero then read a size value. */ + if ((inst->size == 0) && + (ret = xd3_read_size (stream, + & stream->inst_sect.buf, + stream->inst_sect.buf_max, + & inst->size))) + { + return XD3_INVALID_INPUT; + } + + /* For copy instructions, read address. */ + if (inst->type >= XD3_CPY) + { + IF_DEBUG2 ({ + static int cnt = 0; + DP(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n", + cnt++, + stream->total_out + (stream->dec_position - + stream->dec_cpylen), + (stream->dec_position - stream->dec_cpylen), + inst->size, + inst->addr); + }); + + if ((ret = xd3_decode_address (stream, + stream->dec_position, + inst->type - XD3_CPY, + & stream->addr_sect.buf, + stream->addr_sect.buf_max, + & inst->addr))) + { + return ret; + } + + /* Cannot copy an address before it is filled-in. */ + if (inst->addr >= stream->dec_position) + { + stream->msg = "address too large"; + return XD3_INVALID_INPUT; + } + + /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining + * buffer space in its own segment. */ + if (inst->addr < stream->dec_cpylen && + inst->addr + inst->size > stream->dec_cpylen) + { + stream->msg = "size too large"; + return XD3_INVALID_INPUT; + } + } + else + { + IF_DEBUG2 ({ + if (inst->type == XD3_ADD) + { + static int cnt; + DP(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n", + cnt++, + (stream->total_out + stream->dec_position - stream->dec_cpylen), + stream->dec_position - stream->dec_cpylen, + inst->size); + } + else + { + static int cnt; + XD3_ASSERT (inst->type == XD3_RUN); + DP(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n", + cnt++, + stream->total_out + stream->dec_position - stream->dec_cpylen, + stream->dec_position - stream->dec_cpylen, + inst->size); + } + }); + } + + /* Check: The instruction will not overflow the output buffer. */ + if (stream->dec_position + inst->size > stream->dec_maxpos) + { + stream->msg = "size too large"; + return XD3_INVALID_INPUT; + } + + stream->dec_position += inst->size; + return 0; +} + +/* Decode a single opcode and then decode the two half-instructions. */ +static int +xd3_decode_instruction (xd3_stream *stream) +{ + int ret; + const xd3_dinst *inst; + + if (stream->inst_sect.buf == stream->inst_sect.buf_max) + { + stream->msg = "instruction underflow"; + return XD3_INVALID_INPUT; + } + + inst = &stream->code_table[*stream->inst_sect.buf++]; + + stream->dec_current1.type = inst->type1; + stream->dec_current2.type = inst->type2; + stream->dec_current1.size = inst->size1; + stream->dec_current2.size = inst->size2; + + /* For each instruction with a real operation, decode the + * corresponding size and addresses if necessary. Assume a + * code-table may have NOOP in either position, although this is + * unlikely. */ + if (inst->type1 != XD3_NOOP && + (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1))) + { + return ret; + } + if (inst->type2 != XD3_NOOP && + (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2))) + { + return ret; + } + return 0; +} + +/* Output the result of a single half-instruction. OPT: This the + decoder hotspot. */ +static int +xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst) +{ + /* To make this reentrant, set take = min (inst->size, available + space)... */ + usize_t take = inst->size; + + XD3_ASSERT (inst->type != XD3_NOOP); + + switch (inst->type) + { + case XD3_RUN: + { + /* Only require a single data byte. */ + if (stream->data_sect.buf == stream->data_sect.buf_max) + { + stream->msg = "data underflow"; + return XD3_INVALID_INPUT; + } + + memset (stream->next_out + stream->avail_out, + stream->data_sect.buf[0], + take); + + stream->data_sect.buf += 1; + stream->avail_out += take; + inst->type = XD3_NOOP; + break; + } + case XD3_ADD: + { + /* Require at least TAKE data bytes. */ + if (stream->data_sect.buf + take > stream->data_sect.buf_max) + { + stream->msg = "data underflow"; + return XD3_INVALID_INPUT; + } + + memcpy (stream->next_out + stream->avail_out, + stream->data_sect.buf, + take); + + stream->data_sect.buf += take; + stream->avail_out += take; + inst->type = XD3_NOOP; + break; + } + default: + { + usize_t i; + const uint8_t *src; + uint8_t *dst; + + /* See if it copies from the VCD_TARGET/VCD_SOURCE window or + * the target window. Out-of-bounds checks for the addresses + * and sizes are performed in xd3_decode_parse_halfinst. */ + if (inst->addr < stream->dec_cpylen) + { + if (stream->dec_win_ind & VCD_TARGET) + { + /* For VCD_TARGET we know the entire range is + * in-memory, as established by + * decode_setup_buffers. + * + * TODO: this is totally bogus, VCD_TARGET won't work. + */ + src = stream->dec_cpyaddrbase + inst->addr; + inst->type = XD3_NOOP; + inst->size = 0; + } + else + { + /* In this case we have to read a source block, which + * could return control to the caller. We need to + * know the first block number needed for this + * copy. */ + xd3_source *source; + xoff_t block; + usize_t blkoff; + usize_t blksize; + int ret; + + more: + + source = stream->src; + block = source->cpyoff_blocks; + blkoff = source->cpyoff_blkoff + inst->addr; + blksize = source->blksize; + + while (blkoff >= blksize) + { + block += 1; + blkoff -= blksize; + } + + if ((ret = xd3_getblk (stream, block))) + { + /* could be a XD3_GETSRCBLK failure. */ + if (ret == XD3_TOOFARBACK) + { + ret = XD3_INTERNAL; + } + return ret; + } + + src = source->curblk + blkoff; + + /* This block either contains enough data or the source file + * is short. */ + if ((source->onblk != blksize) && + (blkoff + take > source->onblk)) + { + stream->msg = "source file too short"; + return XD3_INVALID_INPUT; + + } + + XD3_ASSERT (blkoff != blksize); + + if (blkoff + take <= blksize) + { + inst->type = XD3_NOOP; + inst->size = 0; + } + else + { + /* This block doesn't contain all the data, modify + * the instruction, do not set to XD3_NOOP. */ + take = blksize - blkoff; + inst->size -= take; + inst->addr += take; + } + } + } + else + { + /* For a target-window copy, we know the entire range is + * in-memory. The dec_tgtaddrbase is negatively offset by + * dec_cpylen because the addresses start beyond that + * point. */ + src = stream->dec_tgtaddrbase + inst->addr; + inst->type = XD3_NOOP; + inst->size = 0; + } + + dst = stream->next_out + stream->avail_out; + + stream->avail_out += take; + + /* Can't just memcpy here due to possible overlap. */ + for (i = take; i != 0; i -= 1) + { + *dst++ = *src++; + } + + take = inst->size; + + /* If there is more to copy, call getblk again. */ + if (inst->type != XD3_NOOP) + { + XD3_ASSERT (take > 0); + goto more; + } + else + { + XD3_ASSERT (take == 0); + } + } + } + + return 0; +} + +static int +xd3_decode_finish_window (xd3_stream *stream) +{ + stream->dec_winbytes = 0; + stream->dec_state = DEC_FINISH; + + stream->data_sect.pos = 0; + stream->inst_sect.pos = 0; + stream->addr_sect.pos = 0; + + return XD3_OUTPUT; +} + +static int +xd3_decode_secondary_sections (xd3_stream *secondary_stream) +{ +#if SECONDARY_ANY + int ret; +#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \ + ((secondary_stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \ + (ret = xd3_decode_secondary (secondary_stream, \ + & secondary_stream-> LOWER ## _sect, \ + & xd3_sec_ ## LOWER (secondary_stream)))) + + if (DECODE_SECONDARY_SECTION (DATA, data) || + DECODE_SECONDARY_SECTION (INST, inst) || + DECODE_SECONDARY_SECTION (ADDR, addr)) + { + return ret; + } +#undef DECODE_SECONDARY_SECTION +#endif + return 0; +} + +static int +xd3_decode_sections (xd3_stream *stream) +{ + usize_t need, more, take; + int copy, ret; + + if ((stream->flags & XD3_JUST_HDR) != 0) + { + /* Nothing left to do. */ + return xd3_decode_finish_window (stream); + } + + /* To avoid copying, need this much data available */ + need = (stream->inst_sect.size + + stream->addr_sect.size + + stream->data_sect.size); + + /* The window may be entirely processed. */ + XD3_ASSERT (stream->dec_winbytes <= need); + + /* Compute how much more input is needed. */ + more = (need - stream->dec_winbytes); + + /* How much to consume. */ + take = min (more, stream->avail_in); + + /* See if the input is completely available, to avoid copy. */ + copy = (take != more); + + /* If the window is skipped... */ + if ((stream->flags & XD3_SKIP_WINDOW) != 0) + { + /* Skip the available input. */ + DECODE_INPUT (take); + + stream->dec_winbytes += take; + + if (copy) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + return xd3_decode_finish_window (stream); + } + + /* Process all but the DATA section. */ + switch (stream->dec_state) + { + default: + stream->msg = "internal error"; + return XD3_INVALID_INPUT; + + case DEC_DATA: + if ((ret = xd3_decode_section (stream, & stream->data_sect, + DEC_INST, copy))) { return ret; } + case DEC_INST: + if ((ret = xd3_decode_section (stream, & stream->inst_sect, + DEC_ADDR, copy))) { return ret; } + case DEC_ADDR: + if ((ret = xd3_decode_section (stream, & stream->addr_sect, + DEC_EMIT, copy))) { return ret; } + } + + XD3_ASSERT (stream->dec_winbytes == need); + + if ((ret = xd3_decode_secondary_sections (stream))) { return ret; } + + if (stream->flags & XD3_SKIP_EMIT) + { + return xd3_decode_finish_window (stream); + } + + /* OPT: A possible optimization is to avoid allocating memory in + * decode_setup_buffers and to avoid a large memcpy when the window + * consists of a single VCD_SOURCE copy instruction. The only + * potential problem is if the following window is a VCD_TARGET, + * then you need to remember... */ + if ((ret = xd3_decode_setup_buffers (stream))) { return ret; } + + return 0; +} + +static int +xd3_decode_emit (xd3_stream *stream) +{ + int ret; + + /* Produce output: originally structured to allow reentrant code + * that fills as much of the output buffer as possible, but VCDIFF + * semantics allows to copy from anywhere from the target window, so + * instead allocate a sufficiently sized buffer after the target + * window length is decoded. + * + * This code still needs to be reentrant to allow XD3_GETSRCBLK to + * return control. This is handled by setting the + * stream->dec_currentN instruction types to XD3_NOOP after they + * have been processed. */ + XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT)); + XD3_ASSERT (stream->dec_tgtlen <= stream->space_out); + + while (stream->inst_sect.buf != stream->inst_sect.buf_max || + stream->dec_current1.type != XD3_NOOP || + stream->dec_current2.type != XD3_NOOP) + { + /* Decode next instruction pair. */ + if ((stream->dec_current1.type == XD3_NOOP) && + (stream->dec_current2.type == XD3_NOOP) && + (ret = xd3_decode_instruction (stream))) { return ret; } + + /* Output for each instruction. */ + if ((stream->dec_current1.type != XD3_NOOP) && + (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) + { + return ret; + } + + if ((stream->dec_current2.type != XD3_NOOP) && + (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) + { + return ret; + } + } + + if (stream->avail_out != stream->dec_tgtlen) + { + IF_DEBUG1 (DP(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n", + stream->avail_out, stream->dec_tgtlen)); + stream->msg = "wrong window length"; + return XD3_INVALID_INPUT; + } + + if (stream->data_sect.buf != stream->data_sect.buf_max) + { + stream->msg = "extra data section"; + return XD3_INVALID_INPUT; + } + + if (stream->addr_sect.buf != stream->addr_sect.buf_max) + { + stream->msg = "extra address section"; + return XD3_INVALID_INPUT; + } + + /* OPT: Should cksum computation be combined with the above loop? */ + if ((stream->dec_win_ind & VCD_ADLER32) != 0 && + (stream->flags & XD3_ADLER32_NOVER) == 0) + { + uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out); + + if (a32 != stream->dec_adler32) + { + stream->msg = "target window checksum mismatch"; + return XD3_INVALID_INPUT; + } + } + + /* Finished with a window. */ + return xd3_decode_finish_window (stream); +} + +int +xd3_decode_input (xd3_stream *stream) +{ + int ret; + + if (stream->enc_state != 0) + { + stream->msg = "encoder/decoder transition"; + return XD3_INVALID_INPUT; + } + +#define BYTE_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + +#define OFFSET_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + +#define SIZE_CASE(expr,x,nstate) \ + do { \ + if ( (expr) && \ + ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \ + stream->dec_state = (nstate); \ + } while (0) + + switch (stream->dec_state) + { + case DEC_VCHEAD: + { + if ((ret = xd3_decode_bytes (stream, stream->dec_magic, + & stream->dec_magicbytes, 4))) + { + return ret; + } + + if (stream->dec_magic[0] != VCDIFF_MAGIC1 || + stream->dec_magic[1] != VCDIFF_MAGIC2 || + stream->dec_magic[2] != VCDIFF_MAGIC3) + { + stream->msg = "not a VCDIFF input"; + return XD3_INVALID_INPUT; + } + + if (stream->dec_magic[3] != 0) + { + stream->msg = "VCDIFF input version > 0 is not supported"; + return XD3_INVALID_INPUT; + } + + stream->dec_state = DEC_HDRIND; + } + case DEC_HDRIND: + { + if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) + { + return ret; + } + + if ((stream->dec_hdr_ind & VCD_INVHDR) != 0) + { + stream->msg = "unrecognized header indicator bits set"; + return XD3_INVALID_INPUT; + } + + stream->dec_state = DEC_SECONDID; + } + + case DEC_SECONDID: + /* Secondary compressor ID: only if VCD_SECONDARY is set */ + if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) + { + BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN); + + switch (stream->dec_secondid) + { + case VCD_FGK_ID: + FGK_CASE (stream); + case VCD_DJW_ID: + DJW_CASE (stream); + default: + stream->msg = "unknown secondary compressor ID"; + return XD3_INVALID_INPUT; + } + } + + case DEC_TABLEN: + /* Length of code table data: only if VCD_CODETABLE is set */ + SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->dec_codetblsz, DEC_NEAR); + + /* The codetblsz counts the two NEAR/SAME bytes */ + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) { + if (stream->dec_codetblsz <= 2) { + stream->msg = "invalid code table size"; + return ENOMEM; + } + stream->dec_codetblsz -= 2; + } + case DEC_NEAR: + /* Near modes: only if VCD_CODETABLE is set */ + BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->acache.s_near, DEC_SAME); + case DEC_SAME: + /* Same modes: only if VCD_CODETABLE is set */ + BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, + stream->acache.s_same, DEC_TABDAT); + case DEC_TABDAT: + /* Compressed code table data */ + + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) + { + /* Get the code table data. */ + if ((stream->dec_codetbl == NULL) && + (stream->dec_codetbl = + (uint8_t*) xd3_alloc (stream, + stream->dec_codetblsz, 1)) == NULL) + { + return ENOMEM; + } + + if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl, + & stream->dec_codetblbytes, + stream->dec_codetblsz))) + { + return ret; + } + + if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl, + stream->dec_codetblbytes))) + { + return ret; + } + } + else + { + /* Use the default table. */ + stream->acache.s_near = __rfc3284_code_table_desc.near_modes; + stream->acache.s_same = __rfc3284_code_table_desc.same_modes; + stream->code_table = xd3_rfc3284_code_table (); + } + + if ((ret = xd3_alloc_cache (stream))) { return ret; } + + stream->dec_state = DEC_APPLEN; + + case DEC_APPLEN: + /* Length of application data */ + SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, + stream->dec_appheadsz, DEC_APPDAT); + + case DEC_APPDAT: + /* Application data */ + if (stream->dec_hdr_ind & VCD_APPHEADER) + { + /* Note: we add an additional byte for padding, to allow + 0-termination. */ + if ((stream->dec_appheader == NULL) && + (stream->dec_appheader = + (uint8_t*) xd3_alloc (stream, + stream->dec_appheadsz+1, 1)) == NULL) + { + return ENOMEM; + } + + stream->dec_appheader[stream->dec_appheadsz] = 0; + + if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, + & stream->dec_appheadbytes, + stream->dec_appheadsz))) + { + return ret; + } + } + + stream->dec_hdrsize = stream->total_in; + stream->dec_state = DEC_WININD; + + case DEC_WININD: + { + /* Start of a window: the window indicator */ + if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) + { + return ret; + } + + stream->current_window = stream->dec_window_count; + + if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen)) + { + stream->msg = "decoder file offset overflow"; + return XD3_INVALID_INPUT; + } + + stream->dec_winstart += stream->dec_tgtlen; + + if ((stream->dec_win_ind & VCD_INVWIN) != 0) + { + stream->msg = "unrecognized window indicator bits set"; + return XD3_INVALID_INPUT; + } + + if ((ret = xd3_decode_init_window (stream))) { return ret; } + + stream->dec_state = DEC_CPYLEN; + + IF_DEBUG1 (DP(RINT "--------- TARGET WINDOW %"Q"u -----------\n", + stream->current_window)); + } + + case DEC_CPYLEN: + /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */ + SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, + DEC_CPYOFF); + + /* Set the initial, logical decoder position (HERE address) in + * dec_position. This is set to just after the source/copy + * window, as we are just about to output the first byte of + * target window. */ + stream->dec_position = stream->dec_cpylen; + + case DEC_CPYOFF: + /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */ + OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, + DEC_ENCLEN); + + /* Copy offset and copy length may not overflow. */ + if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen)) + { + stream->msg = "decoder copy window overflows a file offset"; + return XD3_INVALID_INPUT; + } + + /* Check copy window bounds: VCD_TARGET window may not exceed + current position. */ + if ((stream->dec_win_ind & VCD_TARGET) && + (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > + stream->dec_winstart)) + { + stream->msg = "VCD_TARGET window out of bounds"; + return XD3_INVALID_INPUT; + } + + case DEC_ENCLEN: + /* Length of the delta encoding */ + SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN); + case DEC_TGTLEN: + /* Length of target window */ + SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND); + + /* Set the maximum decoder position, beyond which we should not + * decode any data. This is the maximum value for dec_position. + * This may not exceed the size of a usize_t. */ + if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen)) + { + stream->msg = "decoder target window overflows a usize_t"; + return XD3_INVALID_INPUT; + } + + /* Check for malicious files. */ + if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE) + { + stream->msg = "hard window size exceeded"; + return XD3_INVALID_INPUT; + } + + stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen; + + case DEC_DELIND: + /* Delta indicator */ + BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN); + + if ((stream->dec_del_ind & VCD_INVDEL) != 0) + { + stream->msg = "unrecognized delta indicator bits set"; + return XD3_INVALID_INPUT; + } + + /* Delta indicator is only used with secondary compression. */ + if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL)) + { + stream->msg = "invalid delta indicator bits set"; + return XD3_INVALID_INPUT; + } + + /* Section lengths */ + case DEC_DATALEN: + SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN); + case DEC_INSTLEN: + SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN); + case DEC_ADDRLEN: + SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM); + + case DEC_CKSUM: + /* Window checksum. */ + if ((stream->dec_win_ind & VCD_ADLER32) != 0) + { + int i; + + if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, + & stream->dec_cksumbytes, 4))) + { + return ret; + } + + for (i = 0; i < 4; i += 1) + { + stream->dec_adler32 = + (stream->dec_adler32 << 8) | stream->dec_cksum[i]; + } + } + + stream->dec_state = DEC_DATA; + + /* Check dec_enclen for redundency, otherwise it is not really used. */ + { + usize_t enclen_check = + (1 + (xd3_sizeof_size (stream->dec_tgtlen) + + xd3_sizeof_size (stream->data_sect.size) + + xd3_sizeof_size (stream->inst_sect.size) + + xd3_sizeof_size (stream->addr_sect.size)) + + stream->data_sect.size + + stream->inst_sect.size + + stream->addr_sect.size + + ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0)); + + if (stream->dec_enclen != enclen_check) + { + stream->msg = "incorrect encoding length (redundent)"; + return XD3_INVALID_INPUT; + } + } + + /* Returning here gives the application a chance to inspect the + * header, skip the window, etc. */ + if (stream->current_window == 0) { return XD3_GOTHEADER; } + else { return XD3_WINSTART; } + + case DEC_DATA: + case DEC_INST: + case DEC_ADDR: + /* Next read the three sections. */ + if ((ret = xd3_decode_sections (stream))) { return ret; } + + case DEC_EMIT: + + /* To speed VCD_SOURCE block-address calculations, the source + * cpyoff_blocks and cpyoff_blkoff are pre-computed. */ + if (stream->dec_win_ind & VCD_SOURCE) + { + xd3_source *src = stream->src; + + if (src == NULL) + { + stream->msg = "source input required"; + return XD3_INVALID_INPUT; + } + + xd3_blksize_div(stream->dec_cpyoff, src, + &src->cpyoff_blocks, + &src->cpyoff_blkoff); + } + + /* xd3_decode_emit returns XD3_OUTPUT on every success. */ + if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT) + { + stream->total_out += (xoff_t) stream->avail_out; + } + + return ret; + + case DEC_FINISH: + { + if (stream->dec_win_ind & VCD_TARGET) + { + if (stream->dec_lastwin == NULL) + { + stream->dec_lastwin = stream->next_out; + stream->dec_lastspace = stream->space_out; + } + else + { + xd3_swap_uint8p (& stream->dec_lastwin, + & stream->next_out); + xd3_swap_usize_t (& stream->dec_lastspace, + & stream->space_out); + } + } + + stream->dec_lastlen = stream->dec_tgtlen; + stream->dec_laststart = stream->dec_winstart; + stream->dec_window_count += 1; + + /* Note: the updates to dec_winstart & current_window are + * deferred until after the next DEC_WININD byte is read. */ + stream->dec_state = DEC_WININD; + return XD3_WINFINISH; + } + + default: + stream->msg = "invalid state"; + return XD3_INVALID_INPUT; + } +} + +#endif // _XDELTA3_DECODE_H_ diff --git a/xdelta3-djw.h b/xdelta3-djw.h new file mode 100644 index 0000000..24f5b81 --- /dev/null +++ b/xdelta3-djw.h @@ -0,0 +1,1828 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* TODO: This code needs a thorough round of commenting. There is + * some slop in the declaration of arrays, which are maybe one element + * larger than they need to be and comments would help clear it up. */ + +#ifndef _XDELTA3_DJW_H_ +#define _XDELTA3_DJW_H_ + +/* The following people deserve much credit for the algorithms and + * techniques contained in this file: + + Julian Seward + Bzip2 sources, implementation of the multi-table Huffman technique. + + Jean-loup Gailly and Mark Adler and L. Peter Deutsch + Zlib source code, RFC 1951 + + Daniel S. Hirschberg and Debra A. LeLewer + "Efficient Decoding of Prefix Codes" + Communications of the ACM, April 1990 33(4). + + David J. Wheeler + Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps. + This contains the idea behind the multi-table Huffman and 1-2 coding + techniques. + ftp://ftp.cl.cam.ac.uk/users/djw3/ + +*/ + +/* OPT: during the multi-table iteration, pick the worst-overall + * performing table and replace it with exactly the frequencies of the + * worst-overall performing sector or N-worst performing sectors. */ + +/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with + * the Bzip prefix coding strategy. xdfs-0.256 contains the last of + * the other-format tests, including RFC1950 and the RFC1950+MTF + * tests. */ + +#define DJW_MAX_CODELEN 20 /* Maximum length of an alphabet code. */ + +/* Code lengths are themselves code-length encoded, so the total number of + * codes is: [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */ +#define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) + +#define RUN_0 0 /* Symbols used in MTF+1/2 coding. */ +#define RUN_1 1 + +/* Number of code lengths always encoded (djw_encode_basic array) */ +#define DJW_BASIC_CODES 5 +#define DJW_RUN_CODES 2 /* Number of run codes */ + +/* Offset of extra codes */ +#define DJW_EXTRA_12OFFSET (DJW_BASIC_CODES + DJW_RUN_CODES) + +/* Number of optionally encoded code lengths (djw_encode_extra array) */ +#define DJW_EXTRA_CODES 15 + +/* Number of bits to code [0-DJW_EXTRA_CODES] */ +#define DJW_EXTRA_CODE_BITS 4 + +#define DJW_MAX_GROUPS 8 /* Max number of group coding tables */ +#define DJW_GROUP_BITS 3 /* Number of bits to code [1-DJW_MAX_GROUPS] */ + +#define DJW_SECTORSZ_MULT 5 /* Multiplier for encoded sectorsz */ +#define DJW_SECTORSZ_BITS 5 /* Number of bits to code group size */ +#define DJW_SECTORSZ_MAX ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT) + +/* Maximum number of iterations to find group tables. */ +#define DJW_MAX_ITER 6 +/* Minimum number of bits an iteration must reduce coding by. */ +#define DJW_MIN_IMPROVEMENT 20 + +/* Maximum code length of a prefix code length */ +#define DJW_MAX_CLCLEN 15 + +/* Number of bits to code [0-DJW_MAX_CLCLEN] */ +#define DJW_CLCLEN_BITS 4 + +#define DJW_MAX_GBCLEN 7 /* Maximum code length of a group selector */ + +/* Number of bits to code [0-DJW_MAX_GBCLEN] + * TODO: Actually, should never have zero code lengths here, or else a group + * went unused. Write a test for this: if a group goes unused, eliminate + * it? */ +#define DJW_GBCLEN_BITS 3 + +/* It has to save at least this many bits... */ +#define EFFICIENCY_BITS 16 + +typedef struct _djw_stream djw_stream; +typedef struct _djw_heapen djw_heapen; +typedef struct _djw_prefix djw_prefix; +typedef uint32_t djw_weight; + +struct _djw_heapen +{ + uint32_t depth; + uint32_t freq; + uint32_t parent; +}; + +struct _djw_prefix +{ + usize_t scount; + uint8_t *symbol; + usize_t mcount; + uint8_t *mtfsym; + uint8_t *repcnt; +}; + +struct _djw_stream +{ + int unused; +}; + +/* Each Huffman table consists of 256 "code length" (CLEN) codes, + * which are themselves Huffman coded after eliminating repeats and + * move-to-front coding. The prefix consists of all the CLEN codes in + * djw_encode_basic plus a 4-bit value stating how many of the + * djw_encode_extra codes are actually coded (the rest are presumed + * zero, or unused CLEN codes). + * + * These values of these two arrays were arrived at by studying the + * distribution of min and max clen over a collection of DATA, INST, + * and ADDR inputs. The goal is to specify the order of + * djw_extra_codes that is most likely to minimize the number of extra + * codes that must be encoded. + * + * Results: 158896 sections were counted by compressing files (window + * size 512K) listed with: `find / -type f ( -user jmacd -o -perm +444 + * )` + * + * The distribution of CLEN codes for each efficient invocation of the + * secondary compressor (taking the best number of groups/sector size) + * was recorded. Then we look at the distribution of min and max clen + * values, counting the number of times the value C_low is less than + * the min and C_high is greater than the max. Values >= C_high and + * <= C_low will not have their lengths coded. The results are sorted + * and the least likely 15 are placed into the djw_encode_extra[] + * array in order. These values are used as the initial MTF ordering. + + clow[1] = 155119 + clow[2] = 140325 + clow[3] = 84072 + --- + clow[4] = 7225 + clow[5] = 1093 + clow[6] = 215 + --- + chigh[4] = 1 + chigh[5] = 30 + chigh[6] = 218 + chigh[7] = 2060 + chigh[8] = 13271 + --- + chigh[9] = 39463 + chigh[10] = 77360 + chigh[11] = 118298 + chigh[12] = 141360 + chigh[13] = 154086 + chigh[14] = 157967 + chigh[15] = 158603 + chigh[16] = 158864 + chigh[17] = 158893 + chigh[18] = 158895 + chigh[19] = 158896 + chigh[20] = 158896 + +*/ + +static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] = + { + 9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20, + }; + +static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] = + { + 4, 5, 6, 7, 8, + }; + +/*********************************************************************/ +/* DECLS */ +/*********************************************************************/ + +static djw_stream* djw_alloc (xd3_stream *stream); +static void djw_init (djw_stream *h); +static void djw_destroy (xd3_stream *stream, + djw_stream *h); + +#if XD3_ENCODER +static int xd3_encode_huff (xd3_stream *stream, + djw_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); +#endif + +static int xd3_decode_huff (xd3_stream *stream, + djw_stream *sec_stream, + const uint8_t **input, + const uint8_t *const input_end, + uint8_t **output, + const uint8_t *const output_end); + +/*********************************************************************/ +/* HUFFMAN */ +/*********************************************************************/ + +static djw_stream* +djw_alloc (xd3_stream *stream) +{ + return xd3_alloc (stream, sizeof (djw_stream), 1); +} + +static void +djw_init (djw_stream *h) +{ + /* Fields are initialized prior to use. */ +} + +static void +djw_destroy (xd3_stream *stream, + djw_stream *h) +{ + xd3_free (stream, h); +} + + +/*********************************************************************/ +/* HEAP */ +/*********************************************************************/ + +static inline int +heap_less (const djw_heapen *a, const djw_heapen *b) +{ + return a->freq < b->freq || + (a->freq == b->freq && + a->depth < b->depth); +} + +static inline void +heap_insert (usize_t *heap, const djw_heapen *ents, usize_t p, const usize_t e) +{ + /* Insert ents[e] into next slot heap[p] */ + usize_t pp = p/2; /* P's parent */ + + while (heap_less (& ents[e], & ents[heap[pp]])) + { + heap[p] = heap[pp]; + p = pp; + pp = p/2; + } + + heap[p] = e; +} + +static inline djw_heapen* +heap_extract (usize_t *heap, const djw_heapen *ents, usize_t heap_last) +{ + usize_t smallest = heap[1]; + usize_t p, pc, t; + + /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */ + heap[1] = heap[heap_last+1]; + + /* Re-heapify */ + for (p = 1; ; p = pc) + { + pc = p*2; + + /* Reached bottom of heap */ + if (pc > heap_last) { break; } + + /* See if second child is smaller. */ + if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) + { + pc += 1; + } + + /* If pc is not smaller than p, heap property re-established. */ + if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; } + + t = heap[pc]; + heap[pc] = heap[p]; + heap[p] = t; + } + + return (djw_heapen*) & ents[smallest]; +} + +#if XD3_DEBUG +static void +heap_check (usize_t *heap, djw_heapen *ents, usize_t heap_last) +{ + usize_t i; + for (i = 1; i <= heap_last; i += 1) + { + /* Heap property: child not less than parent */ + XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]])); + + IF_DEBUG1 (DP(RINT "heap[%d] = %u\n", i, ents[heap[i]].freq)); + } +} +#endif + +/*********************************************************************/ +/* MTF, 1/2 */ +/*********************************************************************/ + +static inline usize_t +djw_update_mtf (uint8_t *mtf, usize_t mtf_i) +{ + int k; + usize_t sym = mtf[mtf_i]; + + for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; } + + mtf[0] = sym; + return sym; +} + +static inline void +djw_update_1_2 (int *mtf_run, usize_t *mtf_i, + uint8_t *mtfsym, djw_weight *freq) +{ + int code; + + do + { + /* Offset by 1, since any number of RUN_ symbols implies run>0... */ + *mtf_run -= 1; + + code = (*mtf_run & 1) ? RUN_1 : RUN_0; + + mtfsym[(*mtf_i)++] = code; + freq[code] += 1; + *mtf_run >>= 1; + } + while (*mtf_run >= 1); + + *mtf_run = 0; +} + +static void +djw_init_clen_mtf_1_2 (uint8_t *clmtf) +{ + int i, cl_i = 0; + + clmtf[cl_i++] = 0; + for (i = 0; i < DJW_BASIC_CODES; i += 1) + { + clmtf[cl_i++] = djw_encode_12basic[i]; + } + for (i = 0; i < DJW_EXTRA_CODES; i += 1) + { + clmtf[cl_i++] = djw_encode_12extra[i]; + } +} + +/*********************************************************************/ +/* PREFIX CODES */ +/*********************************************************************/ +#if XD3_ENCODER +static usize_t +djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen) +{ + /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 + * internal nodes, never more than ALPHABET_SIZE entries actually in the + * heap (minimum weight subtrees during prefix construction). First + * ALPHABET_SIZE entries are the actual symbols, next ALPHABET_SIZE-1 are + * internal nodes. */ + djw_heapen ents[ALPHABET_SIZE * 2]; + usize_t heap[ALPHABET_SIZE + 1]; + + usize_t heap_last; /* Index of the last _valid_ heap entry. */ + usize_t ents_size; /* Number of entries, including 0th fake entry */ + int overflow; /* Number of code lengths that overflow */ + uint32_t total_bits; + int i; + + IF_DEBUG (uint32_t first_bits = 0); + + /* Insert real symbol frequences. */ + for (i = 0; i < asize; i += 1) + { + ents[i+1].freq = freq[i]; + IF_DEBUG1 (DP(RINT "ents[%d] = freq[%d] = %d\n", + i+1, i, freq[i])); + } + + again: + + /* The loop is re-entered each time an overflow occurs. Re-initialize... */ + heap_last = 0; + ents_size = 1; + overflow = 0; + total_bits = 0; + + /* 0th entry terminates the while loop in heap_insert (it's the parent of + * the smallest element, always less-than) */ + heap[0] = 0; + ents[0].depth = 0; + ents[0].freq = 0; + + /* Initial heap. */ + for (i = 0; i < asize; i += 1, ents_size += 1) + { + ents[ents_size].depth = 0; + ents[ents_size].parent = 0; + + if (ents[ents_size].freq != 0) + { + heap_insert (heap, ents, ++heap_last, ents_size); + } + } + + IF_DEBUG (heap_check (heap, ents, heap_last)); + + /* Must be at least one symbol, or else we can't get here. */ + XD3_ASSERT (heap_last != 0); + + /* If there is only one symbol, fake a second to prevent zero-length + * codes. */ + if (heap_last == 1) + { + /* Pick either the first or last symbol. */ + int s = freq[0] ? asize-1 : 0; + ents[s+1].freq = 1; + goto again; + } + + /* Build prefix tree. */ + while (heap_last > 1) + { + djw_heapen *h1 = heap_extract (heap, ents, --heap_last); + djw_heapen *h2 = heap_extract (heap, ents, --heap_last); + + ents[ents_size].freq = h1->freq + h2->freq; + ents[ents_size].depth = 1 + max (h1->depth, h2->depth); + ents[ents_size].parent = 0; + + h1->parent = h2->parent = ents_size; + + heap_insert (heap, ents, ++heap_last, ents_size++); + } + + IF_DEBUG (heap_check (heap, ents, heap_last)); + + /* Now compute prefix code lengths, counting parents. */ + for (i = 1; i < asize+1; i += 1) + { + int b = 0; + + if (ents[i].freq != 0) + { + int p = i; + + while ((p = ents[p].parent) != 0) { b += 1; } + + if (b > maxlen) { overflow = 1; } + + total_bits += b * freq[i-1]; + } + + /* clen is 0-origin, unlike ents. */ + IF_DEBUG1 (DP(RINT "clen[%d] = %d\n", i-1, b)); + clen[i-1] = b; + } + + IF_DEBUG (if (first_bits == 0) first_bits = total_bits); + + if (! overflow) + { + IF_DEBUG1 (if (first_bits != total_bits) + { + DP(RINT "code length overflow changed %u bits\n", + (usize_t)(total_bits - first_bits)); + }); + return total_bits; + } + + /* OPT: There is a non-looping way to fix overflow shown in zlib, but this + * is easier (for now), as done in bzip2. */ + for (i = 1; i < asize+1; i += 1) + { + ents[i].freq = ents[i].freq / 2 + 1; + } + + goto again; +} + +static void +djw_build_codes (usize_t *codes, const uint8_t *clen, int asize, int abs_max) +{ + int i, l; + int min_clen = DJW_MAX_CODELEN; + int max_clen = 0; + usize_t code = 0; + + /* Find the min and max code length */ + for (i = 0; i < asize; i += 1) + { + if (clen[i] > 0 && clen[i] < min_clen) + { + min_clen = clen[i]; + } + + max_clen = max (max_clen, (int) clen[i]); + } + + XD3_ASSERT (max_clen <= abs_max); + + /* Generate a code for each symbol with the appropriate length. */ + for (l = min_clen; l <= max_clen; l += 1) + { + for (i = 0; i < asize; i += 1) + { + if (clen[i] == l) + { + codes[i] = code++; + } + } + + code <<= 1; + } + + IF_DEBUG1 ({ + for (i = 0; i < asize; i += 1) + { + DP(RINT "code[%d] = %u\n", i, codes[i]); + } + }); +} + +/*********************************************************************/ +/* MOVE-TO-FRONT */ +/*********************************************************************/ +static void +djw_compute_mtf_1_2 (djw_prefix *prefix, + uint8_t *mtf, + djw_weight *freq_out, + usize_t nsym) +{ + int i, j, k; + usize_t sym; + usize_t size = prefix->scount; + usize_t mtf_i = 0; + int mtf_run = 0; + + /* This +2 is for the RUN_0, RUN_1 codes */ + memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2)); + + for (i = 0; i < size; ) + { + /* OPT: Bzip optimizes this algorithm a little by effectively checking + * j==0 before the MTF update. */ + sym = prefix->symbol[i++]; + + for (j = 0; mtf[j] != sym; j += 1) { } + + XD3_ASSERT (j <= nsym); + + for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; } + + mtf[0] = sym; + + if (j == 0) + { + mtf_run += 1; + continue; + } + + if (mtf_run > 0) + { + djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); + } + + /* Non-zero symbols are offset by RUN_1 */ + prefix->mtfsym[mtf_i++] = j+RUN_1; + freq_out[j+RUN_1] += 1; + } + + if (mtf_run > 0) + { + djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); + } + + prefix->mcount = mtf_i; +} + +/* Counts character frequencies of the input buffer, returns the size. */ +static usize_t +djw_count_freqs (djw_weight *freq, xd3_output *input) +{ + xd3_output *in; + usize_t size = 0; + + memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE); + + for (in = input; in; in = in->next_page) + { + const uint8_t *p = in->base; + const uint8_t *p_max = p + in->next; + + size += in->next; + + do + { + ++freq[*p]; + } + while (++p < p_max); + } + + IF_DEBUG1 ({int i; + DP(RINT "freqs: "); + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + DP(RINT "%u ", freq[i]); + } + DP(RINT "\n");}); + + return size; +} + +static void +djw_compute_multi_prefix (int groups, + uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE], + djw_prefix *prefix) +{ + int gp, i; + + prefix->scount = ALPHABET_SIZE; + memcpy (prefix->symbol, clen[0], ALPHABET_SIZE); + + for (gp = 1; gp < groups; gp += 1) + { + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (clen[gp][i] == 0) + { + continue; + } + + prefix->symbol[prefix->scount++] = clen[gp][i]; + } + } +} + +static void +djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq) +{ + /* This +1 is for the 0 code-length. */ + uint8_t clmtf[DJW_MAX_CODELEN+1]; + + djw_init_clen_mtf_1_2 (clmtf); + + djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN); +} + +static int +djw_encode_prefix (xd3_stream *stream, + xd3_output **output, + bit_state *bstate, + djw_prefix *prefix) +{ + int ret, i; + usize_t num_to_encode; + djw_weight clfreq[DJW_TOTAL_CODES]; + uint8_t clclen[DJW_TOTAL_CODES]; + usize_t clcode[DJW_TOTAL_CODES]; + + /* Move-to-front encode prefix symbols, count frequencies */ + djw_compute_prefix_1_2 (prefix, clfreq); + + /* Compute codes */ + djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); + djw_build_codes (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); + + /* Compute number of extra codes beyond basic ones for this template. */ + num_to_encode = DJW_TOTAL_CODES; + while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) + { + num_to_encode -= 1; + } + XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS)); + + /* Encode: # of extra codes */ + if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS, + num_to_encode - DJW_EXTRA_12OFFSET))) + { + return ret; + } + + /* Encode: MTF code lengths */ + for (i = 0; i < num_to_encode; i += 1) + { + if ((ret = xd3_encode_bits (stream, output, bstate, + DJW_CLCLEN_BITS, clclen[i]))) + { + return ret; + } + } + + /* Encode: CLEN code lengths */ + for (i = 0; i < prefix->mcount; i += 1) + { + usize_t mtf_sym = prefix->mtfsym[i]; + usize_t bits = clclen[mtf_sym]; + usize_t code = clcode[mtf_sym]; + + if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) + { + return ret; + } + } + + return 0; +} + +static void +djw_compute_selector_1_2 (djw_prefix *prefix, + usize_t groups, + djw_weight *gbest_freq) +{ + uint8_t grmtf[DJW_MAX_GROUPS]; + usize_t i; + + for (i = 0; i < groups; i += 1) { grmtf[i] = i; } + + djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups); +} + +static int +xd3_encode_howmany_groups (xd3_stream *stream, + xd3_sec_cfg *cfg, + usize_t input_size, + usize_t *ret_groups, + usize_t *ret_sector_size) +{ + usize_t cfg_groups = 0; + usize_t cfg_sector_size = 0; + usize_t sugg_groups = 0; + usize_t sugg_sector_size = 0; + + if (cfg->ngroups != 0) + { + if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS) + { + stream->msg = "invalid secondary encoder group number"; + return XD3_INTERNAL; + } + + cfg_groups = cfg->ngroups; + } + + if (cfg->sector_size != 0) + { + if (cfg->sector_size < DJW_SECTORSZ_MULT || + cfg->sector_size > DJW_SECTORSZ_MAX || + (cfg->sector_size % DJW_SECTORSZ_MULT) != 0) + { + stream->msg = "invalid secondary encoder sector size"; + return XD3_INTERNAL; + } + + cfg_sector_size = cfg->sector_size; + } + + if (cfg_groups == 0 || cfg_sector_size == 0) + { + /* These values were found empirically using xdelta3-tune around version + * xdfs-0.256. */ + switch (cfg->data_type) + { + case DATA_SECTION: + if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; } + else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; } + else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; } + else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; } + else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; } + else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; } + else { sugg_groups = 8; sugg_sector_size = 70; } + break; + case INST_SECTION: + if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; } + else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; } + else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; } + else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; } + else { sugg_groups = 8; sugg_sector_size = 40; } + break; + case ADDR_SECTION: + if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; } + else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; } + else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; } + else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; } + else { sugg_groups = 7; sugg_sector_size = 130; } + break; + } + + if (cfg_groups == 0) + { + cfg_groups = sugg_groups; + } + + if (cfg_sector_size == 0) + { + cfg_sector_size = sugg_sector_size; + } + } + + if (cfg_groups != 1 && cfg_sector_size == 0) + { + switch (cfg->data_type) + { + case DATA_SECTION: + cfg_sector_size = 20; + break; + case INST_SECTION: + cfg_sector_size = 50; + break; + case ADDR_SECTION: + cfg_sector_size = 130; + break; + } + } + + (*ret_groups) = cfg_groups; + (*ret_sector_size) = cfg_sector_size; + + XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS); + XD3_ASSERT (cfg_groups == 1 || + (cfg_sector_size >= DJW_SECTORSZ_MULT && + cfg_sector_size <= DJW_SECTORSZ_MAX)); + + return 0; +} + +static int +xd3_encode_huff (xd3_stream *stream, + djw_stream *h, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg) +{ + int ret; + usize_t groups, sector_size; + bit_state bstate = BIT_STATE_ENCODE_INIT; + xd3_output *in; + int output_bits; + usize_t input_bits; + usize_t input_bytes; + usize_t initial_offset = output->next; + djw_weight real_freq[ALPHABET_SIZE]; + uint8_t *gbest = NULL; + uint8_t *gbest_mtf = NULL; + + input_bytes = djw_count_freqs (real_freq, input); + input_bits = input_bytes * 8; + + XD3_ASSERT (input_bytes > 0); + + if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, + & groups, & sector_size))) + { + return ret; + } + + if (0) + { + regroup: + /* Sometimes we dynamically decide there are too many groups. Arrive + * here. */ + output->next = initial_offset; + xd3_bit_state_encode_init (& bstate); + } + + /* Encode: # of groups (3 bits) */ + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_GROUP_BITS, groups-1))) { goto failure; } + + if (groups == 1) + { + /* Single Huffman group. */ + usize_t code[ALPHABET_SIZE]; /* Codes */ + uint8_t clen[ALPHABET_SIZE]; + uint8_t prefix_mtfsym[ALPHABET_SIZE]; + djw_prefix prefix; + + output_bits = + djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); + djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); + + if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: prefix */ + prefix.mtfsym = prefix_mtfsym; + prefix.symbol = clen; + prefix.scount = ALPHABET_SIZE; + + if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) + { + goto failure; + } + + if (output_bits + (8 * output->next) + EFFICIENCY_BITS >= + input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: data */ + for (in = input; in; in = in->next_page) + { + const uint8_t *p = in->base; + const uint8_t *p_max = p + in->next; + + do + { + usize_t sym = *p++; + usize_t bits = clen[sym]; + + IF_DEBUG (output_bits -= bits); + + if ((ret = xd3_encode_bits (stream, & output, + & bstate, bits, code[sym]))) + { + goto failure; + } + } + while (p < p_max); + } + + XD3_ASSERT (output_bits == 0); + } + else + { + /* DJW Huffman */ + djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE]; + uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE]; + djw_weight left = input_bytes; + int gp; + int niter = 0; + usize_t select_bits; + usize_t sym1 = 0, sym2 = 0, s; + usize_t gcost[DJW_MAX_GROUPS]; + usize_t gbest_code[DJW_MAX_GROUPS+2]; + uint8_t gbest_clen[DJW_MAX_GROUPS+2]; + usize_t gbest_max = 1 + (input_bytes - 1) / sector_size; + int best_bits = 0; + usize_t gbest_no; + usize_t gpcnt; + const uint8_t *p; + IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]); + + /* Encode: sector size (5 bits) */ + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_SECTORSZ_BITS, + (sector_size/DJW_SECTORSZ_MULT)-1))) + { + goto failure; + } + + /* Dynamic allocation. */ + if (gbest == NULL) + { + if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL) + { + ret = ENOMEM; + goto failure; + } + } + + if (gbest_mtf == NULL) + { + if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL) + { + ret = ENOMEM; + goto failure; + } + } + + /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */ + + /* Generate initial code length tables. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_weight sum = 0; + djw_weight goal = left / (groups - gp); + + IF_DEBUG1 (usize_t nz = 0); + + /* Due to the single-code granularity of this distribution, it may + * be that we can't generate a distribution for each group. In that + * case subtract one group and try again. If (inefficient), we're + * testing group behavior, so don't mess things up. */ + if (goal == 0 && !cfg->inefficient) + { + IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n", + groups)); + groups -= 1; + goto regroup; + } + + /* Sum == goal is possible when (cfg->inefficient)... */ + while (sum < goal) + { + XD3_ASSERT (sym2 < ALPHABET_SIZE); + IF_DEBUG1 (nz += real_freq[sym2] != 0); + sum += real_freq[sym2++]; + } + + IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) " + "(%u/%u = %.3f)\n", + gp, sym1, sym2, nz, sum, + input_bytes, sum / (double)input_bytes);); + + for (s = 0; s < ALPHABET_SIZE; s += 1) + { + evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16; + } + + left -= sum; + sym1 = sym2+1; + } + + repeat: + + niter += 1; + gbest_no = 0; + memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups); + IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups)); + + /* For each input page (loop is irregular to allow non-pow2-size group + * size. */ + in = input; + p = in->base; + + /* For each group-size sector. */ + do + { + const uint8_t *p0 = p; + xd3_output *in0 = in; + usize_t best = 0; + usize_t winner = 0; + + /* Select best group for each sector, update evolve_freq. */ + memset (gcost, 0, sizeof (gcost[0]) * groups); + + /* For each byte in sector. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + /* For each group. */ + for (gp = 0; gp < groups; gp += 1) + { + gcost[gp] += evolve_clen[gp][*p]; + } + + /* Check end-of-input-page. */ +# define GP_PAGE() \ + if (++p - in->base == in->next) \ + { \ + in = in->next_page; \ + if (in == NULL) { break; } \ + p = in->base; \ + } + + GP_PAGE (); + } + + /* Find min cost group for this sector */ + best = -1U; + for (gp = 0; gp < groups; gp += 1) + { + if (gcost[gp] < best) { best = gcost[gp]; winner = gp; } + } + + XD3_ASSERT(gbest_no < gbest_max); + gbest[gbest_no++] = winner; + IF_DEBUG1 (gcount[winner] += 1); + + p = p0; + in = in0; + + /* Update group frequencies. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + evolve_freq[winner][*p] += 1; + + GP_PAGE (); + } + } + while (in != NULL); + + XD3_ASSERT (gbest_no == gbest_max); + + /* Recompute code lengths. */ + output_bits = 0; + for (gp = 0; gp < groups; gp += 1) + { + int i; + uint8_t evolve_zero[ALPHABET_SIZE]; + int any_zeros = 0; + + memset (evolve_zero, 0, sizeof (evolve_zero)); + + /* Cannot allow a zero clen when the real frequency is non-zero. + * Note: this means we are going to encode a fairly long code for + * these unused entries. An improvement would be to implement a + * NOTUSED code for when these are actually zero, but this requires + * another data structure (evolve_zero) since we don't know when + * evolve_freq[i] == 0... Briefly tested, looked worse. */ + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (evolve_freq[gp][i] == 0 && real_freq[i] != 0) + { + evolve_freq[gp][i] = 1; + evolve_zero[i] = 1; + any_zeros = 1; + } + } + + output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], + ALPHABET_SIZE, DJW_MAX_CODELEN); + + /* The above faking of frequencies does not matter for the last + * iteration, but we don't know when that is yet. However, it also + * breaks the output_bits computation. Necessary for accuracy, and + * for the (output_bits==0) assert after all bits are output. */ + if (any_zeros) + { + IF_DEBUG1 (usize_t save_total = output_bits); + + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; } + } + + IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n", + save_total - output_bits, gp)); + } + } + + IF_DEBUG1( + DP(RINT "pass %u total bits: %u group uses: ", niter, output_bits); + for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); } + DP(RINT "\n"); + ); + + /* End iteration. */ + + IF_DEBUG1 (if (niter > 1 && best_bits < output_bits) { + DP(RINT "iteration lost %u bits\n", output_bits - best_bits); }); + + if (niter == 1 || (niter < DJW_MAX_ITER && + (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT)) + { + best_bits = output_bits; + goto repeat; + } + + /* Efficiency check. */ + if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n", + input_bytes, output_bits / 8.0)); + + /* Encode: prefix */ + { + uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE]; + uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE]; + uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE]; + djw_prefix prefix; + + prefix.symbol = prefix_symbol; + prefix.mtfsym = prefix_mtfsym; + prefix.repcnt = prefix_repcnt; + + djw_compute_multi_prefix (groups, evolve_clen, & prefix); + if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) + { + goto failure; + } + } + + /* Encode: selector frequencies */ + { + djw_weight gbest_freq[DJW_MAX_GROUPS+1]; + djw_prefix gbest_prefix; + usize_t i; + + gbest_prefix.scount = gbest_no; + gbest_prefix.symbol = gbest; + gbest_prefix.mtfsym = gbest_mtf; + + djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq); + + select_bits = + djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN); + djw_build_codes (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN); + + for (i = 0; i < groups+1; i += 1) + { + if ((ret = xd3_encode_bits (stream, & output, & bstate, + DJW_GBCLEN_BITS, gbest_clen[i]))) + { + goto failure; + } + } + + for (i = 0; i < gbest_prefix.mcount; i += 1) + { + usize_t gp_mtf = gbest_mtf[i]; + usize_t gp_sel_bits = gbest_clen[gp_mtf]; + usize_t gp_sel_code = gbest_code[gp_mtf]; + + XD3_ASSERT (gp_mtf < groups+1); + + if ((ret = xd3_encode_bits (stream, & output, & bstate, + gp_sel_bits, gp_sel_code))) + { + goto failure; + } + + IF_DEBUG (select_bits -= gp_sel_bits); + } + + XD3_ASSERT (select_bits == 0); + } + + /* Efficiency check. */ + if (output_bits + select_bits + (8 * output->next) + + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) + { + goto nosecond; + } + + /* Encode: data */ + { + usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE]; + usize_t sector = 0; + + /* Build code tables for each group. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_build_codes (evolve_code[gp], evolve_clen[gp], + ALPHABET_SIZE, DJW_MAX_CODELEN); + } + + /* Now loop over the input. */ + in = input; + p = in->base; + + do + { + /* For each sector. */ + usize_t gp_best = gbest[sector]; + usize_t *gp_codes = evolve_code[gp_best]; + uint8_t *gp_clens = evolve_clen[gp_best]; + + XD3_ASSERT (sector < gbest_no); + + sector += 1; + + /* Encode the sector data. */ + for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) + { + usize_t sym = *p; + usize_t bits = gp_clens[sym]; + usize_t code = gp_codes[sym]; + + IF_DEBUG (output_bits -= bits); + + if ((ret = xd3_encode_bits (stream, & output, & bstate, + bits, code))) + { + goto failure; + } + + GP_PAGE (); + } + } + while (in != NULL); + + XD3_ASSERT (select_bits == 0); + XD3_ASSERT (output_bits == 0); + } + } + + ret = xd3_flush_bits (stream, & output, & bstate); + + if (0) + { + nosecond: + stream->msg = "secondary compression was inefficient"; + ret = XD3_NOSECOND; + } + + failure: + + xd3_free (stream, gbest); + xd3_free (stream, gbest_mtf); + return ret; +} +#endif /* XD3_ENCODER */ + +/*********************************************************************/ +/* DECODE */ +/*********************************************************************/ + +static void +djw_build_decoder (xd3_stream *stream, + usize_t asize, + usize_t abs_max, + const uint8_t *clen, + uint8_t *inorder, + usize_t *base, + usize_t *limit, + usize_t *min_clenp, + usize_t *max_clenp) +{ + int i, l; + const uint8_t *ci; + usize_t nr_clen [DJW_TOTAL_CODES]; + usize_t tmp_base[DJW_TOTAL_CODES]; + int min_clen; + int max_clen; + + /* Assumption: the two temporary arrays are large enough to hold abs_max. */ + XD3_ASSERT (abs_max <= DJW_MAX_CODELEN); + + /* This looks something like the start of zlib's inftrees.c */ + memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1)); + + /* Count number of each code length */ + i = asize; + ci = clen; + do + { + /* Caller _must_ check that values are in-range. Most of the time the + * caller decodes a specific number of bits, which imply the max value, + * and the other time the caller decodes a huffman value, which must be + * in-range. Therefore, its an assertion and this function cannot + * otherwise fail. */ + XD3_ASSERT (*ci <= abs_max); + + nr_clen[*ci++]++; + } + while (--i != 0); + + /* Compute min, max. */ + for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } } + min_clen = i; + for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } } + max_clen = i; + + /* Fill the BASE, LIMIT table. */ + tmp_base[min_clen] = 0; + base[min_clen] = 0; + limit[min_clen] = nr_clen[min_clen] - 1; + for (i = min_clen + 1; i <= max_clen; i += 1) + { + usize_t last_limit = ((limit[i-1] + 1) << 1); + tmp_base[i] = tmp_base[i-1] + nr_clen[i-1]; + limit[i] = last_limit + nr_clen[i] - 1; + base[i] = last_limit - tmp_base[i]; + } + + /* Fill the inorder array, canonically ordered codes. */ + ci = clen; + for (i = 0; i < asize; i += 1) + { + if ((l = *ci++) != 0) + { + inorder[tmp_base[l]++] = i; + } + } + + *min_clenp = min_clen; + *max_clenp = max_clen; +} + +static inline int +djw_decode_symbol (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *inorder, + const usize_t *base, + const usize_t *limit, + usize_t min_clen, + usize_t max_clen, + usize_t *sym, + usize_t max_sym) +{ + usize_t code = 0; + usize_t bits = 0; + + /* OPT: Supposedly a small lookup table improves speed here... */ + + /* Code outline is similar to xd3_decode_bits... */ + if (bstate->cur_mask == 0x100) { goto next_byte; } + + for (;;) + { + do + { + if (bits == max_clen) { goto corrupt; } + + bits += 1; + code = (code << 1); + + if (bstate->cur_byte & bstate->cur_mask) { code |= 1; } + + bstate->cur_mask <<= 1; + + if (bits >= min_clen && code <= limit[bits]) { goto done; } + } + while (bstate->cur_mask != 0x100); + + next_byte: + + if (*input == input_end) + { + stream->msg = "secondary decoder end of input"; + return XD3_INTERNAL; + } + + bstate->cur_byte = *(*input)++; + bstate->cur_mask = 1; + } + + done: + + if (base[bits] <= code) + { + usize_t offset = code - base[bits]; + + if (offset <= max_sym) + { + IF_DEBUG2 (DP(RINT "(j) %u ", code)); + *sym = inorder[offset]; + return 0; + } + } + + corrupt: + stream->msg = "secondary decoder invalid code"; + return XD3_INTERNAL; +} + +static int +djw_decode_clclen (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + uint8_t *cl_inorder, + usize_t *cl_base, + usize_t *cl_limit, + usize_t *cl_minlen, + usize_t *cl_maxlen, + uint8_t *cl_mtf) +{ + int ret; + uint8_t cl_clen[DJW_TOTAL_CODES]; + usize_t num_codes, value; + int i; + + /* How many extra code lengths to encode. */ + if ((ret = xd3_decode_bits (stream, bstate, input, + input_end, DJW_EXTRA_CODE_BITS, & num_codes))) + { + return ret; + } + + num_codes += DJW_EXTRA_12OFFSET; + + /* Read num_codes. */ + for (i = 0; i < num_codes; i += 1) + { + if ((ret = xd3_decode_bits (stream, bstate, input, + input_end, DJW_CLCLEN_BITS, & value))) + { + return ret; + } + + cl_clen[i] = value; + } + + /* Set the rest to zero. */ + for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; } + + /* No need to check for in-range clen values, because: */ + XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1); + + /* Build the code-length decoder. */ + djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN, + cl_clen, cl_inorder, cl_base, + cl_limit, cl_minlen, cl_maxlen); + + /* Initialize the MTF state. */ + djw_init_clen_mtf_1_2 (cl_mtf); + + return 0; +} + +static inline int +djw_decode_1_2 (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *inorder, + const usize_t *base, + const usize_t *limit, + const usize_t *minlen, + const usize_t *maxlen, + uint8_t *mtfvals, + usize_t elts, + usize_t skip_offset, + uint8_t *values) +{ + usize_t n = 0, rep = 0, mtf = 0, s = 0; + int ret; + + while (n < elts) + { + /* Special case inside generic code: CLEN only: If not the first group, + * we already know the zero frequencies. */ + if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0) + { + values[n++] = 0; + continue; + } + + /* Repeat last symbol. */ + if (rep != 0) + { + values[n++] = mtfvals[0]; + rep -= 1; + continue; + } + + /* Symbol following last repeat code. */ + if (mtf != 0) + { + usize_t sym = djw_update_mtf (mtfvals, mtf); + values[n++] = sym; + mtf = 0; + continue; + } + + /* Decode next symbol/repeat code. */ + if ((ret = djw_decode_symbol (stream, bstate, input, input_end, + inorder, base, limit, *minlen, *maxlen, + & mtf, DJW_TOTAL_CODES))) { return ret; } + + if (mtf <= RUN_1) + { + /* Repetition. */ + rep = ((mtf + 1) << s); + mtf = 0; + s += 1; + } + else + { + /* Remove the RUN_1 MTF offset. */ + mtf -= 1; + s = 0; + } + } + + /* If (rep != 0) there were too many codes received. */ + if (rep != 0) + { + stream->msg = "secondary decoder invalid repeat code"; + return XD3_INTERNAL; + } + + return 0; +} + +static inline int +djw_decode_prefix (xd3_stream *stream, + bit_state *bstate, + const uint8_t **input, + const uint8_t *input_end, + const uint8_t *cl_inorder, + const usize_t *cl_base, + const usize_t *cl_limit, + const usize_t *cl_minlen, + const usize_t *cl_maxlen, + uint8_t *cl_mtf, + usize_t groups, + uint8_t *clen) +{ + return djw_decode_1_2 (stream, bstate, input, input_end, + cl_inorder, cl_base, cl_limit, + cl_minlen, cl_maxlen, cl_mtf, + ALPHABET_SIZE * groups, ALPHABET_SIZE, clen); +} + +static int +xd3_decode_huff (xd3_stream *stream, + djw_stream *h, + const uint8_t **input_pos, + const uint8_t *const input_end, + uint8_t **output_pos, + const uint8_t *const output_end) +{ + const uint8_t *input = *input_pos; + uint8_t *output = *output_pos; + bit_state bstate = BIT_STATE_DECODE_INIT; + uint8_t *sel_group = NULL; + usize_t groups, gp; + usize_t output_bytes = (output_end - output); + usize_t sector_size; + usize_t sectors; + int ret; + + /* Invalid input. */ + if (output_bytes == 0) + { + stream->msg = "secondary decoder invalid input"; + return XD3_INTERNAL; + } + + /* Decode: number of groups */ + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_GROUP_BITS, & groups))) + { + goto fail; + } + + groups += 1; + + if (groups > 1) + { + /* Decode: group size */ + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_SECTORSZ_BITS, + & sector_size))) { goto fail; } + + sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT; + } + else + { + /* Default for groups == 1 */ + sector_size = output_bytes; + } + + sectors = 1 + (output_bytes - 1) / sector_size; + + /* TODO: In the case of groups==1, lots of extra stack space gets used here. + * Could dynamically allocate this memory, which would help with excess + * parameter passing, too. Passing too many parameters in this file, + * simplify it! */ + + /* Outer scope: per-group symbol decoder tables. */ + { + uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE]; + usize_t base [DJW_MAX_GROUPS][DJW_TOTAL_CODES]; + usize_t limit [DJW_MAX_GROUPS][DJW_TOTAL_CODES]; + usize_t minlen [DJW_MAX_GROUPS]; + usize_t maxlen [DJW_MAX_GROUPS]; + + /* Nested scope: code length decoder tables. */ + { + uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE]; + uint8_t cl_inorder[DJW_TOTAL_CODES]; + usize_t cl_base [DJW_MAX_CLCLEN+2]; + usize_t cl_limit [DJW_MAX_CLCLEN+2]; + uint8_t cl_mtf [DJW_TOTAL_CODES]; + usize_t cl_minlen; + usize_t cl_maxlen; + + /* Compute the code length decoder. */ + if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end, + cl_inorder, cl_base, cl_limit, & cl_minlen, + & cl_maxlen, cl_mtf))) { goto fail; } + + /* Now decode each group decoder. */ + if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end, + cl_inorder, cl_base, cl_limit, + & cl_minlen, & cl_maxlen, cl_mtf, + groups, clen[0]))) { goto fail; } + + /* Prepare the actual decoding tables. */ + for (gp = 0; gp < groups; gp += 1) + { + djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN, + clen[gp], inorder[gp], base[gp], limit[gp], + & minlen[gp], & maxlen[gp]); + } + } + + /* Decode: selector clens. */ + { + uint8_t sel_inorder[DJW_MAX_GROUPS+2]; + usize_t sel_base [DJW_MAX_GBCLEN+2]; + usize_t sel_limit [DJW_MAX_GBCLEN+2]; + uint8_t sel_mtf [DJW_MAX_GROUPS+2]; + usize_t sel_minlen; + usize_t sel_maxlen; + + /* Setup group selection. */ + if (groups > 1) + { + uint8_t sel_clen[DJW_MAX_GROUPS+1]; + + for (gp = 0; gp < groups+1; gp += 1) + { + usize_t value; + + if ((ret = xd3_decode_bits (stream, & bstate, & input, + input_end, DJW_GBCLEN_BITS, + & value))) { goto fail; } + + sel_clen[gp] = value; + sel_mtf[gp] = gp; + } + + if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL) + { + ret = ENOMEM; + goto fail; + } + + djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen, + sel_inorder, sel_base, sel_limit, + & sel_minlen, & sel_maxlen); + + if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end, + sel_inorder, sel_base, + sel_limit, & sel_minlen, + & sel_maxlen, sel_mtf, + sectors, 0, sel_group))) { goto fail; } + } + + /* Now decode each sector. */ + { + /* Initialize for (groups==1) case. */ + uint8_t *gp_inorder = inorder[0]; + usize_t *gp_base = base[0]; + usize_t *gp_limit = limit[0]; + usize_t gp_minlen = minlen[0]; + usize_t gp_maxlen = maxlen[0]; + usize_t c; + + for (c = 0; c < sectors; c += 1) + { + usize_t n; + + if (groups >= 2) + { + gp = sel_group[c]; + + XD3_ASSERT (gp < groups); + + gp_inorder = inorder[gp]; + gp_base = base[gp]; + gp_limit = limit[gp]; + gp_minlen = minlen[gp]; + gp_maxlen = maxlen[gp]; + } + + XD3_ASSERT (output_end - output > 0); + + /* Decode next sector. */ + n = min (sector_size, (usize_t) (output_end - output)); + + do + { + usize_t sym; + + if ((ret = djw_decode_symbol (stream, & bstate, + & input, input_end, + gp_inorder, gp_base, + gp_limit, gp_minlen, gp_maxlen, + & sym, ALPHABET_SIZE))) + { + goto fail; + } + + *output++ = sym; + } + while (--n); + } + } + } + } + + IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) + { goto fail; }); + XD3_ASSERT (ret == 0); + + fail: + xd3_free (stream, sel_group); + + (*input_pos) = input; + (*output_pos) = output; + return ret; +} + +#endif diff --git a/xdelta3-fgk.h b/xdelta3-fgk.h new file mode 100644 index 0000000..d1f9b24 --- /dev/null +++ b/xdelta3-fgk.h @@ -0,0 +1,852 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* For demonstration purposes only. + */ + +#ifndef _XDELTA3_FGK_h_ +#define _XDELTA3_FGK_h_ + +/* An implementation of the FGK algorithm described by D.E. Knuth in + * "Dynamic Huffman Coding" in Journal of Algorithms 6. */ + +/* A 32bit counter (fgk_weight) is used as the frequency counter for + * nodes in the huffman tree. TODO: Need oto test for overflow and/or + * reset stats. */ + +typedef struct _fgk_stream fgk_stream; +typedef struct _fgk_node fgk_node; +typedef struct _fgk_block fgk_block; +typedef unsigned int fgk_bit; +typedef uint32_t fgk_weight; + +struct _fgk_block { + union { + fgk_node *un_leader; + fgk_block *un_freeptr; + } un; +}; + +#define block_leader un.un_leader +#define block_freeptr un.un_freeptr + +/* The code can also support fixed huffman encoding/decoding. */ +#define IS_ADAPTIVE 1 + +/* weight is a count of the number of times this element has been seen + * in the current encoding/decoding. parent, right_child, and + * left_child are pointers defining the tree structure. right and + * left point to neighbors in an ordered sequence of weights. The + * left child of a node is always guaranteed to have weight not + * greater than its sibling. fgk_blockLeader points to the element + * with the same weight as itself which is closest to the next + * increasing weight block. */ +struct _fgk_node +{ + fgk_weight weight; + fgk_node *parent; + fgk_node *left_child; + fgk_node *right_child; + fgk_node *left; + fgk_node *right; + fgk_block *my_block; +}; + +/* alphabet_size is the a count of the number of possible leaves in + * the huffman tree. The number of total nodes counting internal + * nodes is ((2 * alphabet_size) - 1). zero_freq_count is the number + * of elements remaining which have zero frequency. zero_freq_exp and + * zero_freq_rem satisfy the equation zero_freq_count = + * 2^zero_freq_exp + zero_freq_rem. root_node is the root of the + * tree, which is initialized to a node with zero frequency and + * contains the 0th such element. free_node contains a pointer to the + * next available fgk_node space. alphabet contains all the elements + * and is indexed by N. remaining_zeros points to the head of the + * list of zeros. */ +struct _fgk_stream +{ + int alphabet_size; + int zero_freq_count; + int zero_freq_exp; + int zero_freq_rem; + int coded_depth; + + int total_nodes; + int total_blocks; + + fgk_bit *coded_bits; + + fgk_block *block_array; + fgk_block *free_block; + + fgk_node *decode_ptr; + fgk_node *remaining_zeros; + fgk_node *alphabet; + fgk_node *root_node; + fgk_node *free_node; +}; + +/*********************************************************************/ +/* Encoder */ +/*********************************************************************/ + +static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size */); +static void fgk_init (fgk_stream *h); +static int fgk_encode_data (fgk_stream *h, + int n); +static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h); + +static int xd3_encode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); + +/*********************************************************************/ +/* Decoder */ +/*********************************************************************/ + +static inline int fgk_decode_bit (fgk_stream *h, + fgk_bit b); +static int fgk_decode_data (fgk_stream *h); +static void fgk_destroy (xd3_stream *stream, + fgk_stream *h); + +static int xd3_decode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + const uint8_t **input, + const uint8_t *const input_end, + uint8_t **output, + const uint8_t *const output_end); + +/*********************************************************************/ +/* Private */ +/*********************************************************************/ + +static unsigned int fgk_find_nth_zero (fgk_stream *h, int n); +static int fgk_nth_zero (fgk_stream *h, int n); +static void fgk_update_tree (fgk_stream *h, int n); +static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n); +static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node); +static void fgk_move_right (fgk_stream *h, fgk_node *node); +static void fgk_promote (fgk_stream *h, fgk_node *node); +static void fgk_init_node (fgk_node *node, int i, int size); +static fgk_block* fgk_make_block (fgk_stream *h, fgk_node *l); +static void fgk_free_block (fgk_stream *h, fgk_block *b); +static void fgk_factor_remaining (fgk_stream *h); +static inline void fgk_swap_ptrs (fgk_node **one, fgk_node **two); + +/*********************************************************************/ +/* Basic Routines */ +/*********************************************************************/ + +/* returns an initialized huffman encoder for an alphabet with the + * given size. returns NULL if enough memory cannot be allocated */ +static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */) +{ + int alphabet_size0 = ALPHABET_SIZE; + fgk_stream *h; + + if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL) + { + return NULL; + } + + h->total_nodes = (2 * alphabet_size0) - 1; + h->total_blocks = (2 * h->total_nodes); + h->alphabet = (fgk_node*) xd3_alloc (stream, h->total_nodes, sizeof (fgk_node)); + h->block_array = (fgk_block*) xd3_alloc (stream, h->total_blocks, sizeof (fgk_block)); + h->coded_bits = (fgk_bit*) xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit)); + + if (h->coded_bits == NULL || + h->alphabet == NULL || + h->block_array == NULL) + { + fgk_destroy (stream, h); + return NULL; + } + + h->alphabet_size = alphabet_size0; + + return h; +} + +static void fgk_init (fgk_stream *h) +{ + int i; + + h->root_node = h->alphabet; + h->decode_ptr = h->root_node; + h->free_node = h->alphabet + h->alphabet_size; + h->remaining_zeros = h->alphabet; + h->coded_depth = 0; + h->zero_freq_count = h->alphabet_size + 2; + + /* after two calls to factor_remaining, zero_freq_count == alphabet_size */ + fgk_factor_remaining(h); /* set ZFE and ZFR */ + fgk_factor_remaining(h); /* set ZFDB according to prev state */ + + IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes)); + + for (i = 0; i < h->total_blocks-1; i += 1) + { + h->block_array[i].block_freeptr = &h->block_array[i + 1]; + } + + h->block_array[h->total_blocks - 1].block_freeptr = NULL; + h->free_block = h->block_array; + + /* Zero frequency nodes are inserted in the first alphabet_size + * positions, with Value, weight, and a pointer to the next zero + * frequency node. */ + for (i = h->alphabet_size - 1; i >= 0; i -= 1) + { + fgk_init_node (h->alphabet + i, i, h->alphabet_size); + } +} + +static void fgk_swap_ptrs(fgk_node **one, fgk_node **two) +{ + fgk_node *tmp = *one; + *one = *two; + *two = tmp; +} + +/* Takes huffman transmitter h and n, the nth elt in the alphabet, and + * returns the number of required to encode n. */ +static int fgk_encode_data (fgk_stream* h, int n) +{ + fgk_node *target_ptr = h->alphabet + n; + + XD3_ASSERT (n < h->alphabet_size); + + h->coded_depth = 0; + + /* First encode the binary representation of the nth remaining + * zero frequency element in reverse such that bit, which will be + * encoded from h->coded_depth down to 0 will arrive in increasing + * order following the tree path. If there is only one left, it + * is not neccesary to encode these bits. */ + if (IS_ADAPTIVE && target_ptr->weight == 0) + { + unsigned int where, shift; + int bits; + + where = fgk_find_nth_zero(h, n); + shift = 1; + + if (h->zero_freq_rem == 0) + { + bits = h->zero_freq_exp; + } + else + { + bits = h->zero_freq_exp + 1; + } + + while (bits > 0) + { + h->coded_bits[h->coded_depth++] = (shift & where) && 1; + + bits -= 1; + shift <<= 1; + }; + + target_ptr = h->remaining_zeros; + } + + /* The path from root to node is filled into coded_bits in reverse so + * that it is encoded in the right order */ + while (target_ptr != h->root_node) + { + h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr); + + target_ptr = target_ptr->parent; + } + + if (IS_ADAPTIVE) + { + fgk_update_tree(h, n); + } + + return h->coded_depth; +} + +/* Should be called as many times as fgk_encode_data returns. + */ +static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h) +{ + XD3_ASSERT (h->coded_depth > 0); + + return h->coded_bits[--h->coded_depth]; +} + +/* This procedure updates the tree after alphabet[n] has been encoded + * or decoded. + */ +static void fgk_update_tree (fgk_stream *h, int n) +{ + fgk_node *incr_node; + + if (h->alphabet[n].weight == 0) + { + incr_node = fgk_increase_zero_weight (h, n); + } + else + { + incr_node = h->alphabet + n; + } + + while (incr_node != h->root_node) + { + fgk_move_right (h, incr_node); + fgk_promote (h, incr_node); + incr_node->weight += 1; /* incr the parent */ + incr_node = incr_node->parent; /* repeat */ + } + + h->root_node->weight += 1; +} + +static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd) +{ + fgk_node **fwd_par_ptr, **back_par_ptr; + fgk_node *move_back, *tmp; + + move_back = move_fwd->my_block->block_leader; + + if (move_fwd == move_back || + move_fwd->parent == move_back || + move_fwd->weight == 0) + { + return; + } + + move_back->right->left = move_fwd; + + if (move_fwd->left) + { + move_fwd->left->right = move_back; + } + + tmp = move_fwd->right; + move_fwd->right = move_back->right; + + if (tmp == move_back) + { + move_back->right = move_fwd; + } + else + { + tmp->left = move_back; + move_back->right = tmp; + } + + tmp = move_back->left; + move_back->left = move_fwd->left; + + if (tmp == move_fwd) + { + move_fwd->left = move_back; + } + else + { + tmp->right = move_fwd; + move_fwd->left = tmp; + } + + if (move_fwd->parent->right_child == move_fwd) + { + fwd_par_ptr = &move_fwd->parent->right_child; + } + else + { + fwd_par_ptr = &move_fwd->parent->left_child; + } + + if (move_back->parent->right_child == move_back) + { + back_par_ptr = &move_back->parent->right_child; + } + else + { + back_par_ptr = &move_back->parent->left_child; + } + + fgk_swap_ptrs (&move_fwd->parent, &move_back->parent); + fgk_swap_ptrs (fwd_par_ptr, back_par_ptr); + + move_fwd->my_block->block_leader = move_fwd; +} + +/* Shifts node, the leader of its block, into the next block. */ +static void fgk_promote (fgk_stream *h, fgk_node *node) +{ + fgk_node *my_left, *my_right; + fgk_block *cur_block; + + my_right = node->right; + my_left = node->left; + cur_block = node->my_block; + + if (node->weight == 0) + { + return; + } + + /* if left is right child, parent of remaining zeros case (?), means parent + * has same weight as right child. */ + if (my_left == node->right_child && + node->left_child && + node->left_child->weight == 0) + { + XD3_ASSERT (node->left_child == h->remaining_zeros); + XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */ + + if (node->weight == (my_right->weight - 1) && my_right != h->root_node) + { + fgk_free_block (h, cur_block); + node->my_block = my_right->my_block; + my_left->my_block = my_right->my_block; + } + + return; + } + + if (my_left == h->remaining_zeros) + { + return; + } + + /* true if not the leftmost node */ + if (my_left->my_block == cur_block) + { + my_left->my_block->block_leader = my_left; + } + else + { + fgk_free_block (h, cur_block); + } + + /* node->parent != my_right */ + if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node)) + { + node->my_block = my_right->my_block; + } + else + { + node->my_block = fgk_make_block (h, node); + } +} + +/* When an element is seen the first time this is called to remove it from the list of + * zero weight elements and introduce a new internal node to the tree. */ +static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n) +{ + fgk_node *this_zero, *new_internal, *zero_ptr; + + this_zero = h->alphabet + n; + + if (h->zero_freq_count == 1) + { + /* this is the last one */ + this_zero->right_child = NULL; + + if (this_zero->right->weight == 1) + { + this_zero->my_block = this_zero->right->my_block; + } + else + { + this_zero->my_block = fgk_make_block (h, this_zero); + } + + h->remaining_zeros = NULL; + + return this_zero; + } + + zero_ptr = h->remaining_zeros; + + new_internal = h->free_node++; + + new_internal->parent = zero_ptr->parent; + new_internal->right = zero_ptr->right; + new_internal->weight = 0; + new_internal->right_child = this_zero; + new_internal->left = this_zero; + + if (h->remaining_zeros == h->root_node) + { + /* This is the first element to be coded */ + h->root_node = new_internal; + this_zero->my_block = fgk_make_block (h, this_zero); + new_internal->my_block = fgk_make_block (h, new_internal); + } + else + { + new_internal->right->left = new_internal; + + if (zero_ptr->parent->right_child == zero_ptr) + { + zero_ptr->parent->right_child = new_internal; + } + else + { + zero_ptr->parent->left_child = new_internal; + } + + if (new_internal->right->weight == 1) + { + new_internal->my_block = new_internal->right->my_block; + } + else + { + new_internal->my_block = fgk_make_block (h, new_internal); + } + + this_zero->my_block = new_internal->my_block; + } + + fgk_eliminate_zero (h, this_zero); + + new_internal->left_child = h->remaining_zeros; + + this_zero->right = new_internal; + this_zero->left = h->remaining_zeros; + this_zero->parent = new_internal; + this_zero->left_child = NULL; + this_zero->right_child = NULL; + + h->remaining_zeros->parent = new_internal; + h->remaining_zeros->right = this_zero; + + return this_zero; +} + +/* When a zero frequency element is encoded, it is followed by the + * binary representation of the index into the remaining elements. + * Sets a cache to the element before it so that it can be removed + * without calling this procedure again. */ +static unsigned int fgk_find_nth_zero (fgk_stream* h, int n) +{ + fgk_node *target_ptr = h->alphabet + n; + fgk_node *head_ptr = h->remaining_zeros; + unsigned int idx = 0; + + while (target_ptr != head_ptr) + { + head_ptr = head_ptr->right_child; + idx += 1; + } + + return idx; +} + +/* Splices node out of the list of zeros. */ +static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node) +{ + if (h->zero_freq_count == 1) + { + return; + } + + fgk_factor_remaining(h); + + if (node->left_child == NULL) + { + h->remaining_zeros = h->remaining_zeros->right_child; + h->remaining_zeros->left_child = NULL; + } + else if (node->right_child == NULL) + { + node->left_child->right_child = NULL; + } + else + { + node->right_child->left_child = node->left_child; + node->left_child->right_child = node->right_child; + } +} + +static void fgk_init_node (fgk_node *node, int i, int size) +{ + if (i < size - 1) + { + node->right_child = node + 1; + } + else + { + node->right_child = NULL; + } + + if (i >= 1) + { + node->left_child = node - 1; + } + else + { + node->left_child = NULL; + } + + node->weight = 0; + node->parent = NULL; + node->right = NULL; + node->left = NULL; + node->my_block = NULL; +} + +/* The data structure used is an array of blocks, which are unions of + * free pointers and huffnode pointers. free blocks are a linked list + * of free blocks, the front of which is h->free_block. The used + * blocks are pointers to the head of each block. */ +static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead) +{ + fgk_block *ret = h->free_block; + + XD3_ASSERT (h->free_block != NULL); + + h->free_block = h->free_block->block_freeptr; + + ret->block_leader = lead; + + return ret; +} + +/* Restores the block to the front of the free list. */ +static void fgk_free_block (fgk_stream *h, fgk_block *b) +{ + b->block_freeptr = h->free_block; + h->free_block = b; +} + +/* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity + * the equation given above. */ +static void fgk_factor_remaining (fgk_stream *h) +{ + unsigned int i; + + i = (--h->zero_freq_count); + h->zero_freq_exp = 0; + + while (i > 1) + { + h->zero_freq_exp += 1; + i >>= 1; + } + + i = 1 << h->zero_freq_exp; + + h->zero_freq_rem = h->zero_freq_count - i; +} + +/* receives a bit at a time and returns true when a complete code has + * been received. + */ +static int inline fgk_decode_bit (fgk_stream* h, fgk_bit b) +{ + XD3_ASSERT (b == 1 || b == 0); + + if (IS_ADAPTIVE && h->decode_ptr->weight == 0) + { + int bitsreq; + + if (h->zero_freq_rem == 0) + { + bitsreq = h->zero_freq_exp; + } + else + { + bitsreq = h->zero_freq_exp + 1; + } + + h->coded_bits[h->coded_depth] = b; + h->coded_depth += 1; + + return h->coded_depth >= bitsreq; + } + else + { + if (b) + { + h->decode_ptr = h->decode_ptr->right_child; + } + else + { + h->decode_ptr = h->decode_ptr->left_child; + } + + if (h->decode_ptr->left_child == NULL) + { + /* If the weight is non-zero, finished. */ + if (h->decode_ptr->weight != 0) + { + return 1; + } + + /* zero_freq_count is dropping to 0, finished. */ + return h->zero_freq_count == 1; + } + else + { + return 0; + } + } +} + +static int fgk_nth_zero (fgk_stream* h, int n) +{ + fgk_node *ret = h->remaining_zeros; + + /* ERROR: if during this loop (ret->right_child == NULL) then the + * encoder's zero count is too high. Could return an error code + * now, but is probably unnecessary overhead, since the caller + * should check integrity anyway. */ + for (; n != 0 && ret->right_child != NULL; n -= 1) + { + ret = ret->right_child; + } + + return ret - h->alphabet; +} + +/* once fgk_decode_bit returns 1, this retrieves an index into the + * alphabet otherwise this returns 0, indicating more bits are + * required. + */ +static int fgk_decode_data (fgk_stream* h) +{ + unsigned int elt = h->decode_ptr - h->alphabet; + + if (IS_ADAPTIVE && h->decode_ptr->weight == 0) { + int i; + unsigned int n = 0; + + for (i = 0; i < h->coded_depth - 1; i += 1) + { + n |= h->coded_bits[i]; + n <<= 1; + } + + n |= h->coded_bits[i]; + elt = fgk_nth_zero(h, n); + } + + h->coded_depth = 0; + + if (IS_ADAPTIVE) + { + fgk_update_tree(h, elt); + } + + h->decode_ptr = h->root_node; + + return elt; +} + +static void fgk_destroy (xd3_stream *stream, + fgk_stream *h) +{ + if (h != NULL) + { + xd3_free (stream, h->alphabet); + xd3_free (stream, h->coded_bits); + xd3_free (stream, h->block_array); + xd3_free (stream, h); + } +} + +/*********************************************************************/ +/* Xdelta */ +/*********************************************************************/ + +static int +xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg) +{ + bit_state bstate = BIT_STATE_ENCODE_INIT; + xd3_output *cur_page; + int ret; + + /* OPT: quit compression early if it looks bad */ + for (cur_page = input; cur_page; cur_page = cur_page->next_page) + { + const uint8_t *inp = cur_page->base; + const uint8_t *inp_max = inp + cur_page->next; + + while (inp < inp_max) + { + usize_t bits = fgk_encode_data (sec_stream, *inp++); + + while (bits--) + { + if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; } + } + } + } + + return xd3_flush_bits (stream, & output, & bstate); +} + +static int +xd3_decode_fgk (xd3_stream *stream, + fgk_stream *sec_stream, + const uint8_t **input_pos, + const uint8_t *const input_max, + uint8_t **output_pos, + const uint8_t *const output_max) +{ + bit_state bstate; + uint8_t *output = *output_pos; + const uint8_t *input = *input_pos; + + for (;;) + { + if (input == input_max) + { + stream->msg = "secondary decoder end of input"; + return XD3_INTERNAL; + } + + bstate.cur_byte = *input++; + + for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1) + { + int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) && 1); + + if (! done) { continue; } + + *output++ = fgk_decode_data (sec_stream); + + if (output == output_max) + { + /* During regression testing: */ + IF_REGRESSION ({ + int ret; + bstate.cur_mask <<= 1; + if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; } + }); + + (*output_pos) = output; + (*input_pos) = input; + return 0; + } + } + } +} + +#endif /* _XDELTA3_FGK_ */ diff --git a/xdelta3-hash.h b/xdelta3-hash.h new file mode 100644 index 0000000..b098d24 --- /dev/null +++ b/xdelta3-hash.h @@ -0,0 +1,223 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XDELTA3_HASH_H_ +#define _XDELTA3_HASH_H_ + +#if XD3_DEBUG +#define SMALL_HASH_DEBUG1(s,inp) \ + usize_t debug_state; \ + usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash, \ + xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look)) +#define SMALL_HASH_DEBUG2(s,inp) \ + XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \ + xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look))) +#else +#define SMALL_HASH_DEBUG1(s,inp) +#define SMALL_HASH_DEBUG2(s,inp) +#endif /* XD3_DEBUG */ + +/* This is a good hash multiplier for 32-bit LCGs: see "linear + * congruential generators of different sizes and good lattice + * structure" */ +static const uint32_t hash_multiplier = 1597334677U; + +/*********************************************************************** + Permute stuff + ***********************************************************************/ + +#if HASH_PERMUTE == 0 +#define PERMUTE(x) (x) +#else +#define PERMUTE(x) (__single_hash[(uint32_t)x]) + +static const uint16_t __single_hash[256] = +{ + /* Random numbers generated using SLIB's pseudo-random number generator. + * This hashes the input alphabet. */ + 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46, + 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602, + 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6, + 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a, + 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58, + 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b, + 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94, + 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176, + 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743, + 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6, + 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227, + 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d, + 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a, + 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31, + 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9, + 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6, + 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f, + 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f, + 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49, + 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037, + 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad, + 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f, + 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d, + 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b, + 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624, + 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272, + 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806, + 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050, + 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113, + 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2, + 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25, + 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5 +}; +#endif + +/* Update the checksum state. */ +#if ADLER_LARGE_CKSUM +inline uint32_t +xd3_large_cksum_update (uint32_t cksum, + const uint8_t *base, + int look) { + uint32_t old_c = PERMUTE(base[0]); + uint32_t new_c = PERMUTE(base[look]); + uint32_t low = ((cksum & 0xffff) - old_c + new_c) & 0xffff; + uint32_t high = ((cksum >> 16) - (old_c * look) + low) & 0xffff; + return (high << 16) | low; +} +#else +// TODO: revisit this topic +#endif + +/* Note: small cksum is hard-coded for 4 bytes */ +#if UNALIGNED_OK +static inline uint32_t +xd3_scksum (uint32_t *state, + const uint8_t *base, + const int look) +{ + (*state) = *(uint32_t*)base; + return (*state) * hash_multiplier; +} +static inline uint32_t +xd3_small_cksum_update (uint32_t *state, + const uint8_t *base, + int look) +{ + (*state) = *(uint32_t*)(base+1); + return (*state) * hash_multiplier; +} +#else +static inline uint32_t +xd3_scksum (uint32_t *state, + const uint8_t *base, + const int look) +{ + (*state) = (base[0] << 24 | + base[1] << 16 | + base[2] << 8 | + base[3]); + return (*state) * hash_multiplier; +} +static inline uint32_t +xd3_small_cksum_update (uint32_t *state, + const uint8_t *base, + const int look) +{ + (*state) <<= 8; + (*state) |= base[4]; + return (*state) * hash_multiplier; +} +#endif + +/*********************************************************************** + Ctable stuff + ***********************************************************************/ + +static inline usize_t +xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum) +{ + return (cksum >> cfg->shift) ^ (cksum & cfg->mask); +} + +/*********************************************************************** + Cksum function + ***********************************************************************/ + +#if ADLER_LARGE_CKSUM +static inline uint32_t +xd3_lcksum (const uint8_t *seg, const int ln) +{ + int i = 0; + uint32_t low = 0; + uint32_t high = 0; + + for (; i < ln; i += 1) + { + low += PERMUTE(*seg++); + high += low; + } + + return ((high & 0xffff) << 16) | (low & 0xffff); +} +#else +static inline uint32_t +xd3_lcksum (const uint8_t *seg, const int ln) +{ + int i, j; + uint32_t h = 0; + for (i = 0, j = ln - 1; i < ln; ++i, --j) { + h += PERMUTE(seg[i]) * hash_multiplier_powers[j]; + } + return h; +} +#endif + +#if XD3_ENCODER +static usize_t +xd3_size_log2 (usize_t slots) +{ + int bits = 28; /* This should not be an unreasonable limit. */ + int i; + + for (i = 3; i <= bits; i += 1) + { + if (slots < (1U << i)) + { + /* TODO: this is compaction=1 in checksum_test.cc and maybe should + * not be fixed at -1. */ + bits = i - 1; + break; + } + } + + return bits; +} + +static void +xd3_size_hashtable (xd3_stream *stream, + usize_t slots, + xd3_hash_cfg *cfg) +{ + int bits = xd3_size_log2 (slots); + + /* TODO: there's a 32-bit assumption here */ + cfg->size = (1 << bits); + cfg->mask = (cfg->size - 1); + cfg->shift = 32 - bits; +} +#endif + +#endif diff --git a/xdelta3-list.h b/xdelta3-list.h new file mode 100644 index 0000000..3c0df5e --- /dev/null +++ b/xdelta3-list.h @@ -0,0 +1,130 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __XDELTA3_LIST__ +#define __XDELTA3_LIST__ + +#define XD3_MAKELIST(LTYPE,ETYPE,LNAME) \ + \ +static inline ETYPE* \ +LTYPE ## _entry (LTYPE* l) \ +{ \ + return (ETYPE*) ((char*) l - (unsigned long) &((ETYPE*) 0)->LNAME); \ +} \ + \ +static inline void \ +LTYPE ## _init (LTYPE *l) \ +{ \ + l->next = l; \ + l->prev = l; \ +} \ + \ +static inline void \ +LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins) \ +{ \ + next->prev = ins; \ + prev->next = ins; \ + ins->next = next; \ + ins->prev = prev; \ +} \ + \ +static inline void \ +LTYPE ## _push_back (LTYPE *l, ETYPE *i) \ +{ \ + LTYPE ## _add (l->prev, l, & i->LNAME); \ +} \ + \ +static inline void \ +LTYPE ## _del (LTYPE *next, \ + LTYPE *prev) \ +{ \ + next->prev = prev; \ + prev->next = next; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _remove (ETYPE *f) \ +{ \ + LTYPE *i = f->LNAME.next; \ + LTYPE ## _del (f->LNAME.next, f->LNAME.prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _pop_back (LTYPE *l) \ +{ \ + LTYPE *i = l->prev; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _pop_front (LTYPE *l) \ +{ \ + LTYPE *i = l->next; \ + LTYPE ## _del (i->next, i->prev); \ + return LTYPE ## _entry (i); \ +} \ + \ +static inline int \ +LTYPE ## _empty (LTYPE *l) \ +{ \ + return l == l->next; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _front (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->next); \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _back (LTYPE *f) \ +{ \ + return LTYPE ## _entry (f->prev); \ +} \ + \ +static inline int \ +LTYPE ## _end (LTYPE *f, ETYPE *i) \ +{ \ + return f == & i->LNAME; \ +} \ + \ +static inline ETYPE* \ +LTYPE ## _next (ETYPE *f) \ +{ \ + return LTYPE ## _entry (f->LNAME.next); \ +} \ + \ +static inline usize_t \ +LTYPE ## _length (LTYPE *l) \ +{ \ + LTYPE *p; \ + int c = 0; \ + \ + for (p = l->next; p != l; p = p->next) \ + { \ + c += 1; \ + } \ + \ + return c; \ +} \ + \ +typedef int unused_ ## LTYPE + +#endif diff --git a/xdelta3-main.h b/xdelta3-main.h new file mode 100644 index 0000000..55200bd --- /dev/null +++ b/xdelta3-main.h @@ -0,0 +1,4242 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, + * Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This is all the extra stuff you need for convenience to users in a + * command line application. It contains these major components: + * + * 1. VCDIFF tools 2. external compression support (this is + * POSIX-specific). 3. a general read/write loop that handles all of + * the Xdelta decode/encode/VCDIFF-print functions 4. command-line + * interpreter 5. an Xdelta application header which stores default + * filename, external compression settings 6. output/error printing + * 7. basic file support and OS interface + */ + +/* TODO list: 1. do exact gzip-like filename, stdout handling. make a + * .vcdiff extension, refuse to encode to stdout without -cf, etc. + * 2. Allow the user to add a comment string to the app header without + * disturbing the default behavior. 3. "Source file must be seekable" + * is not actually true for encoding, given current behavior. Allow + * non-seekable sources? It would in theory let you use a fifo for + * the source. + */ + +/* On error handling and printing: + * + * The xdelta library sets stream->msg to indicate what condition + * caused an internal failure, but many failures originate here and + * are printed here. The return convention is 0 for success, as + * throughout Xdelta code, but special attention is required here for + * the operating system calls with different error handling. See the + * main_file_* routines. All errors in this file have a message + * printed at the time of occurance. Since some of these calls occur + * within calls to the library, the error may end up being printed + * again with a more general error message. + */ + +/*********************************************************************/ + +#ifndef XD3_POSIX +#define XD3_POSIX 0 +#endif +#ifndef XD3_STDIO +#define XD3_STDIO 0 +#endif +#ifndef XD3_WIN32 +#define XD3_WIN32 0 +#endif +#ifndef NOT_MAIN +#define NOT_MAIN 0 +#endif + +/* Combines xd3_strerror() and strerror() */ +const char* xd3_mainerror(int err_num); + +/* XPRINTX (used by main) prefixes an "xdelta3: " to the output. */ +#define XPR fprintf +#define NT stderr, "xdelta3: " + +/* If none are set, default to posix. */ +#if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0 +#undef XD3_POSIX +#define XD3_POSIX 1 +#endif + +/* Handle externally-compressed inputs. */ +#ifndef EXTERNAL_COMPRESSION +#define EXTERNAL_COMPRESSION 1 +#endif + +#define PRINTHDR_SPECIAL -4378291 + +/* The number of soft-config variables. */ +#define XD3_SOFTCFG_VARCNT 7 + +/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an + * error message from the library. */ +#define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", \ + xd3_errstring (stream), xd3_mainerror (ret) + +#include <stdio.h> /* fprintf */ + +#if XD3_POSIX +#include <unistd.h> /* close, read, write... */ +#include <sys/types.h> +#include <fcntl.h> +#endif + +#ifndef _WIN32 +#include <unistd.h> /* lots */ +#include <sys/time.h> /* gettimeofday() */ +#include <sys/stat.h> /* stat() and fstat() */ +#else +#if defined(_MSC_VER) +#define strtoll _strtoi64 +#endif +#include <sys/types.h> +#include <sys/stat.h> +#ifndef WIFEXITED +# define WIFEXITED(stat) (((*((int *) &(stat))) & 0xff) == 0) +#endif +#ifndef WEXITSTATUS +# define WEXITSTATUS(stat) (((*((int *) &(stat))) >> 8) & 0xff) +#endif +#ifndef S_ISREG +//# ifdef S_IFREG +//# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +//# else +# define S_ISREG(m) 1 +//# endif +#endif /* !S_ISREG */ + +// For standard input/output handles +static STARTUPINFO winStartupInfo; +#endif + +/********************************************************************** + ENUMS and TYPES + *********************************************************************/ + +/* These flags (mainly pertaining to main_read() operations) are set + * in the main_file->flags variable. All are related to with external + * decompression support. + * + * RD_FIRST causes the external decompression check when the input is + * first read. + * + * RD_NONEXTERNAL disables external decompression for reading a + * compressed input, in the case of Xdelta inputs. Note: Xdelta is + * supported as an external compression type, which makes is the + * reason for this flag. An example to justify this is: to create a + * delta between two files that are VCDIFF-compressed. Two external + * Xdelta decoders are run to supply decompressed source and target + * inputs to the Xdelta encoder. */ +typedef enum +{ + RD_FIRST = (1 << 0), + RD_NONEXTERNAL = (1 << 1), + RD_EXTERNAL_V1 = (1 << 2), +} xd3_read_flags; + +/* main_file->mode values */ +typedef enum +{ + XO_READ = 0, + XO_WRITE = 1, +} main_file_modes; + +/* Main commands. For example, CMD_PRINTHDR is the "xdelta printhdr" + * command. */ +typedef enum +{ + CMD_NONE = 0, + CMD_PRINTHDR, + CMD_PRINTHDRS, + CMD_PRINTDELTA, + CMD_RECODE, + CMD_MERGE_ARG, + CMD_MERGE, +#if XD3_ENCODER + CMD_ENCODE, +#endif + CMD_DECODE, + CMD_TEST, + CMD_CONFIG, +} xd3_cmd; + +#if XD3_ENCODER +#define CMD_DEFAULT CMD_ENCODE +#define IS_ENCODE(cmd) (cmd == CMD_ENCODE) +#else +#define CMD_DEFAULT CMD_DECODE +#define IS_ENCODE(cmd) (0) +#endif + +typedef struct _main_file main_file; +typedef struct _main_extcomp main_extcomp; +typedef struct _main_blklru main_blklru; +typedef struct _main_blklru_list main_blklru_list; +typedef struct _main_merge main_merge; +typedef struct _main_merge_list main_merge_list; + +/* The main_file object supports abstract system calls like open, + * close, read, write, seek, stat. The program uses these to + * represent both seekable files and non-seekable files. Source files + * must be seekable, but the target input and any output file do not + * require seekability. + */ +struct _main_file +{ +#if XD3_STDIO + FILE *file; +#elif XD3_POSIX + int file; +#elif XD3_WIN32 + HANDLE file; +#endif + + int mode; /* XO_READ and XO_WRITE */ + const char *filename; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + char *filename_copy; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + const char *realname; /* File name or /dev/stdin, + * /dev/stdout, /dev/stderr. */ + const main_extcomp *compressor; /* External compression struct. */ + int flags; /* RD_FIRST, RD_NONEXTERNAL, ... */ + xoff_t nread; /* for input position */ + xoff_t nwrite; /* for output position */ + uint8_t *snprintf_buf; /* internal snprintf() use */ +}; + +/* Various strings and magic values used to detect and call external + * compression. See below for examples. */ +struct _main_extcomp +{ + const char *recomp_cmdname; + const char *recomp_options; + + const char *decomp_cmdname; + const char *decomp_options; + + const char *ident; + const char *magic; + usize_t magic_size; + int flags; +}; + +/* This file implements a small LRU of source blocks. For encoding purposes, + * we prevent paging in blocks we've already scanned in the source (return + * XD3_NOTAVAIL). */ +struct _main_blklru_list +{ + main_blklru_list *next; + main_blklru_list *prev; +}; + +struct _main_blklru +{ + uint8_t *blk; + xoff_t blkno; + main_blklru_list link; +}; + +#define LRU_SIZE 32U +#define XD3_MINSRCWINSZ XD3_ALLOCSIZE + +/* ... represented as a list (no cache index). */ +XD3_MAKELIST(main_blklru_list,main_blklru,link); + +/* Merge state: */ + +struct _main_merge_list +{ + main_merge_list *next; + main_merge_list *prev; +}; + +struct _main_merge +{ + const char *filename; + + main_merge_list link; +}; + +XD3_MAKELIST(main_merge_list,main_merge,link); + +// TODO: really need to put options in a struct so that internal +// callers can easily reset state. + +/* Program options: various command line flags and options. */ +static int option_stdout = 0; +static int option_force = 0; +static int option_verbose = 0; +static int option_quiet = 0; +static int option_use_appheader = 1; +static uint8_t* option_appheader = NULL; +static int option_use_secondary = 0; +static char* option_secondary = NULL; +static int option_use_checksum = 1; +static int option_use_altcodetable = 0; +static char* option_smatch_config = NULL; +static int option_no_compress = 0; +static int option_no_output = 0; /* do not write output */ +static const char *option_source_filename = NULL; + +static int option_level = XD3_DEFAULT_LEVEL; +static usize_t option_iopt_size = XD3_DEFAULT_IOPT_SIZE; +static usize_t option_winsize = XD3_DEFAULT_WINSIZE; +static usize_t option_srcwinsz = XD3_DEFAULT_SRCWINSZ; +static usize_t option_sprevsz = XD3_DEFAULT_SPREVSZ; + +/* These variables are supressed to avoid their use w/o support. main() warns + * appropriately. */ +#if EXTERNAL_COMPRESSION +static int option_decompress_inputs = 1; +static int option_recompress_outputs = 1; +#endif + +/* This is for comparing "printdelta" output without attention to + * copy-instruction modes. */ +#if VCDIFF_TOOLS +static int option_print_cpymode = 1; /* Note: see reset_defaults(). */ +#endif + +/* Static variables */ +IF_DEBUG(static int main_mallocs = 0;) + +static char* program_name = NULL; +static uint8_t* appheader_used = NULL; +static uint8_t* main_bdata = NULL; +static usize_t main_bsize = 0; + +/* The LRU: obviously this is shared by all callers. */ +static usize_t lru_size = 0; +static main_blklru *lru = NULL; /* array of lru_size elts */ +static main_blklru_list lru_list; +static main_blklru_list lru_free; +static int do_not_lru = 0; /* set to avoid lru */ + +static int lru_hits = 0; +static int lru_misses = 0; +static int lru_filled = 0; + +/* Hacks for VCDIFF tools */ +static int allow_fake_source = 0; + +/* recode_stream is used by both recode/merge for reading vcdiff inputs */ +static xd3_stream *recode_stream = NULL; + +/* merge_stream is used by merge commands for storing the source encoding */ +static xd3_stream *merge_stream = NULL; + +/* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is + * false just so the program knows the mapping of IDENT->NAME. */ +static main_extcomp extcomp_types[] = +{ + /* The entry for xdelta3 must be 0 because the program_name is set there. */ + { "xdelta3", "-cfq", "xdelta3", "-dcfq", "X", "\xd6\xc3\xc4", 3, + RD_NONEXTERNAL }, + { "bzip2", "-cf", "bzip2", "-dcf", "B", "BZh", 3, 0 }, + { "gzip", "-cf", "gzip", "-dcf", "G", "\037\213", 2, 0 }, + { "compress", "-cf", "uncompress", "-cf", "Z", "\037\235", 2, 0 }, + + /* TODO: add commandline support for magic-less formats */ + /*{ "lzma", "-cf", "lzma", "-dcf", "M", "]\000", 2, 0 },*/ +}; + +// }; + +static int main_input (xd3_cmd cmd, main_file *ifile, + main_file *ofile, main_file *sfile); +static void main_get_appheader (xd3_stream *stream, main_file *ifile, + main_file *output, main_file *sfile); + +static int main_help (void); + +static int +main_version (void) +{ + /* $Format: " DP(RINT \"Xdelta version $Xdelta3Version$, Copyright (C) 2007, 2008, Joshua MacDonald\n\");" $ */ + DP(RINT "Xdelta version 3.0u, Copyright (C) 2007, 2008, Joshua MacDonald\n"); + DP(RINT "Xdelta comes with ABSOLUTELY NO WARRANTY.\n"); + DP(RINT "This is free software, and you are welcome to redistribute it\n"); + DP(RINT "under certain conditions; see \"COPYING\" for details.\n"); + return EXIT_SUCCESS; +} + +static int +main_config (void) +{ + main_version (); + + DP(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION); + DP(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES); + DP(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE); + DP(RINT "REGRESSION_TEST=%d\n", REGRESSION_TEST); + DP(RINT "SECONDARY_DJW=%d\n", SECONDARY_DJW); + DP(RINT "SECONDARY_FGK=%d\n", SECONDARY_FGK); + DP(RINT "UNALIGNED_OK=%d\n", UNALIGNED_OK); + DP(RINT "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS); + DP(RINT "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE); + DP(RINT "XD3_DEBUG=%d\n", XD3_DEBUG); + DP(RINT "XD3_ENCODER=%d\n", XD3_ENCODER); + DP(RINT "XD3_POSIX=%d\n", XD3_POSIX); + DP(RINT "XD3_STDIO=%d\n", XD3_STDIO); + DP(RINT "XD3_WIN32=%d\n", XD3_WIN32); + DP(RINT "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64); + DP(RINT "XD3_DEFAULT_LEVEL=%d\n", XD3_DEFAULT_LEVEL); + DP(RINT "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE); + DP(RINT "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ); + DP(RINT "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ); + DP(RINT "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE); + DP(RINT "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE); + DP(RINT "sizeof(void*)=%ld\n", sizeof(void*)); + DP(RINT "sizeof(int)=%ld\n", sizeof(int)); + DP(RINT "sizeof(uint32_t)=%ld\n", sizeof(uint32_t)); + DP(RINT "sizeof(uint64_t)=%ld\n", sizeof(uint64_t)); + DP(RINT "sizeof(usize_t)=%ld\n", sizeof(usize_t)); + DP(RINT "sizeof(xoff_t)=%ld\n", sizeof(xoff_t)); + + return EXIT_SUCCESS; +} + +static void +reset_defaults(void) +{ + option_stdout = 0; + option_force = 0; + option_verbose = 0; + option_quiet = 0; + option_appheader = NULL; + option_use_secondary = 0; + option_secondary = NULL; + option_use_altcodetable = 0; + option_smatch_config = NULL; + option_no_compress = 0; + option_no_output = 0; + option_source_filename = NULL; + program_name = NULL; + appheader_used = NULL; + main_bdata = NULL; + main_bsize = 0; + lru_size = 0; + lru = NULL; + do_not_lru = 0; + lru_hits = 0; + lru_misses = 0; + lru_filled = 0; + allow_fake_source = 0; + option_smatch_config = NULL; + + option_use_appheader = 1; + option_use_checksum = 1; +#if EXTERNAL_COMPRESSION + option_decompress_inputs = 1; + option_recompress_outputs = 1; +#endif +#if VCDIFF_TOOLS + option_print_cpymode = 1; +#endif + option_level = XD3_DEFAULT_LEVEL; + option_iopt_size = XD3_DEFAULT_IOPT_SIZE; + option_winsize = XD3_DEFAULT_WINSIZE; + option_srcwinsz = XD3_DEFAULT_SRCWINSZ; + option_sprevsz = XD3_DEFAULT_SPREVSZ; +} + +static void* +main_malloc1 (usize_t size) +{ + void* r = malloc (size); + if (r == NULL) { XPR(NT "malloc: %s\n", xd3_mainerror (ENOMEM)); } + else if (option_verbose > 3) { XPR(NT "malloc: %u: %p\n", size, r); } + return r; +} + +static void* +main_malloc (usize_t size) +{ + void *r = main_malloc1 (size); + if (r) { IF_DEBUG (main_mallocs += 1); } + return r; +} + +static void* +main_alloc (void *opaque, + usize_t items, + usize_t size) +{ + return main_malloc1 (items * size); +} + +static void +main_free1 (void *opaque, void *ptr) +{ + if (option_verbose > 3) { XPR(NT "free: %p\n", ptr); } + free (ptr); +} + +static void +main_free (void *ptr) +{ + if (ptr) + { + IF_DEBUG (main_mallocs -= 1); + main_free1 (NULL, ptr); + IF_DEBUG (XD3_ASSERT(main_mallocs >= 0)); + } +} + +/* This ensures that (ret = errno) always indicates failure, in case errno was + * accidentally not set. If this prints there's a bug somewhere. */ +static int +get_errno (void) +{ +#ifndef _WIN32 + if (errno == 0) + { + XPR(NT "you found a bug: expected errno != 0\n"); + errno = XD3_INTERNAL; + } + return errno; +#else + DWORD errNum = GetLastError(); + if (errNum == NO_ERROR) { + errNum = XD3_INTERNAL; + } + return errNum; +#endif +} + +const char* +xd3_mainerror(int err_num) { +#ifndef _WIN32 + const char* x = xd3_strerror (err_num); + if (x != NULL) { + return x; + } + return strerror(err_num); +#else + static char err_buf[256]; + const char* x = xd3_strerror (err_num); + if (x != NULL) { + return x; + } + memset (err_buf, 0, 256); + FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, err_num, + MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT), + err_buf, 256, NULL); + return err_buf; +#endif +} + +static long +get_millisecs_now (void) +{ +#ifndef _WIN32 + struct timeval tv; + + gettimeofday (& tv, NULL); + + return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000; +#else + SYSTEMTIME st; + FILETIME ft; + __int64 *pi = (__int64*)&ft; + GetLocalTime(&st); + SystemTimeToFileTime(&st, &ft); + return (long)((*pi) / 10000); +#endif +} + +/* Always >= 1 millisec, right? */ +static long +get_millisecs_since (void) +{ + static long last = 0; + long now = get_millisecs_now(); + long diff = now - last; + last = now; + return diff; +} + +static char* +main_format_bcnt (xoff_t r, char *buf) +{ + static const char* fmts[] = { "B", "KB", "MB", "GB" }; + usize_t i; + + for (i = 0; i < SIZEOF_ARRAY(fmts); i += 1) + { + if (r <= (10 * 1024) || i == (-1 + (int)SIZEOF_ARRAY(fmts))) + { + sprintf (buf, "%"Q"u %s", r, fmts[i]); + break; + } + r /= 1024; + } + return buf; +} + +static char* +main_format_rate (xoff_t bytes, long millis, char *buf) +{ + xoff_t r = (xoff_t)(1.0 * bytes / (1.0 * millis / 1000.0)); + static char lbuf[32]; + + main_format_bcnt (r, lbuf); + sprintf (buf, "%s/sec", lbuf); + return buf; +} + +static char* +main_format_millis (long millis, char *buf) +{ + if (millis < 1000) { sprintf (buf, "%lu ms", millis); } + else if (millis < 10000) { sprintf (buf, "%.1f sec", millis / 1000.0); } + else { sprintf (buf, "%lu sec", millis / 1000L); } + return buf; +} + +/* A safe version of strtol for xoff_t. */ +static int +main_strtoxoff (const char* s, xoff_t *xo, char which) +{ + char *e; + xoff_t x; + + XD3_ASSERT(s && *s != 0); + + { + /* Should check LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX? */ +#if SIZEOF_XOFF_T == 4 + long xx = strtol (s, &e, 0); +#else + long long xx = strtoll (s, &e, 0); +#endif + + if (xx < 0) + { + XPR(NT "-%c: negative integer: %s\n", which, s); + return EXIT_FAILURE; + } + + x = xx; + } + + if (*e != 0) + { + XPR(NT "-%c: invalid integer: %s\n", which, s); + return EXIT_FAILURE; + } + + (*xo) = x; + return 0; +} + +static int +main_atou (const char* arg, usize_t *xo, usize_t low, + usize_t high, char which) +{ + xoff_t x; + int ret; + + if ((ret = main_strtoxoff (arg, & x, which))) { return ret; } + + if (x < low) + { + XPR(NT "-%c: minimum value: %u\n", which, low); + return EXIT_FAILURE; + } + if (high == 0) + { + high = USIZE_T_MAX; + } + if (x > high) + { + XPR(NT "-%c: maximum value: %u\n", which, high); + return EXIT_FAILURE; + } + (*xo) = (usize_t)x; + return 0; +} + +/****************************************************************** + FILE BASICS + ******************************************************************/ + +/* With all the variation in file system-call semantics, arguments, + * return values and error-handling for the POSIX and STDIO file APIs, + * the insides of these functions make me sick, which is why these + * wrappers exist. */ + +#define XOPEN_OPNAME (xfile->mode == XO_READ ? "read" : "write") +#define XOPEN_STDIO (xfile->mode == XO_READ ? "rb" : "wb") +#define XOPEN_POSIX (xfile->mode == XO_READ ? \ + O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC) +#define XOPEN_MODE (xfile->mode == XO_READ ? 0 : 0666) + +#define XF_ERROR(op, name, ret) \ + do { if (!option_quiet) { XPR(NT "file %s failed: %s: %s: %s\n", (op), \ + XOPEN_OPNAME, (name), xd3_mainerror (ret)); } } while (0) + +#if XD3_STDIO +#define XFNO(f) fileno(f->file) +#define XSTDOUT_XF(f) { (f)->file = stdout; (f)->filename = "/dev/stdout"; } +#define XSTDIN_XF(f) { (f)->file = stdin; (f)->filename = "/dev/stdin"; } + +#elif XD3_POSIX +#define XFNO(f) f->file +#define XSTDOUT_XF(f) \ + { (f)->file = STDOUT_FILENO; (f)->filename = "/dev/stdout"; } +#define XSTDIN_XF(f) \ + { (f)->file = STDIN_FILENO; (f)->filename = "/dev/stdin"; } + +#elif XD3_WIN32 +#define XFNO(f) -1 +#define XSTDOUT_XF(f) { \ + (f)->file = GetStdHandle(STD_OUTPUT_HANDLE); \ + (f)->filename = "(stdout)"; \ + } +#define XSTDIN_XF(f) { \ + (f)->file = GetStdHandle(STD_INPUT_HANDLE); \ + (f)->filename = "(stdin)"; \ + } +#endif + +static void +main_file_init (main_file *xfile) +{ + memset (xfile, 0, sizeof (*xfile)); + +#if XD3_POSIX + xfile->file = -1; +#endif +#if XD3_WIN32 + xfile->file = INVALID_HANDLE_VALUE; +#endif +} + +static int +main_file_isopen (main_file *xfile) +{ +#if XD3_STDIO + return xfile->file != NULL; + +#elif XD3_POSIX + return xfile->file != -1; + +#elif XD3_WIN32 + return xfile->file != INVALID_HANDLE_VALUE; +#endif +} + +static int +main_file_close (main_file *xfile) +{ + int ret = 0; + + if (! main_file_isopen (xfile)) + { + return 0; + } + +#if XD3_STDIO + ret = fclose (xfile->file); + xfile->file = NULL; + +#elif XD3_POSIX + ret = close (xfile->file); + xfile->file = -1; + +#elif XD3_WIN32 + if (!CloseHandle(xfile->file)) { + ret = get_errno (); + } + xfile->file = INVALID_HANDLE_VALUE; +#endif + + if (ret != 0) { XF_ERROR ("close", xfile->filename, ret = get_errno ()); } + return ret; +} + +static void +main_file_cleanup (main_file *xfile) +{ + XD3_ASSERT (xfile != NULL); + + if (main_file_isopen (xfile)) + { + main_file_close (xfile); + } + + if (xfile->snprintf_buf != NULL) + { + main_free(xfile->snprintf_buf); + xfile->snprintf_buf = NULL; + } + + if (xfile->filename_copy != NULL) + { + main_free(xfile->filename_copy); + xfile->filename_copy = NULL; + } +} + +static int +main_file_open (main_file *xfile, const char* name, int mode) +{ + int ret = 0; + + xfile->mode = mode; + + XD3_ASSERT (name != NULL); + XD3_ASSERT (! main_file_isopen (xfile)); + if (name[0] == 0) + { + XPR(NT "invalid file name: empty string\n"); + return XD3_INVALID; + } + +#if XD3_STDIO + xfile->file = fopen (name, XOPEN_STDIO); + + ret = (xfile->file == NULL) ? get_errno () : 0; + +#elif XD3_POSIX + if ((ret = open (name, XOPEN_POSIX, XOPEN_MODE)) < 0) + { + ret = get_errno (); + } + else + { + xfile->file = ret; + ret = 0; + } + +#elif XD3_WIN32 + xfile->file = CreateFile(name, + (mode == XO_READ) ? GENERIC_READ : GENERIC_WRITE, + FILE_SHARE_READ, + NULL, + (mode == XO_READ) ? OPEN_EXISTING : + (option_force ? CREATE_ALWAYS : CREATE_NEW), + FILE_ATTRIBUTE_NORMAL, + NULL); + if (xfile->file == INVALID_HANDLE_VALUE) { + ret = get_errno (); + } +#endif + if (ret) { XF_ERROR ("open", name, ret); } + else { xfile->realname = name; xfile->nread = 0; } + return ret; +} + +static int +main_file_stat (main_file *xfile, xoff_t *size, int err_ifnoseek) +{ + int ret = 0; +#if XD3_WIN32 +# if (_WIN32_WINNT >= 0x0500) + LARGE_INTEGER li; + if (GetFileSizeEx(xfile->file, &li) == 0) + { + ret = get_errno (); + } + else + { + *size = li.QuadPart; + } +# else + DWORD filesize = GetFileSize(xfile->file, NULL); + if (filesize == INVALID_FILE_SIZE) + { + ret = GetLastError(); + if (ret != NO_ERROR) + return ret; + } + *size = filesize; +# endif +#else + struct stat sbuf; + if (fstat (XFNO (xfile), & sbuf) < 0) + { + ret = get_errno (); + if (err_ifnoseek) + { + XF_ERROR ("stat", xfile->filename, ret); + } + return ret; + } + + if (! S_ISREG (sbuf.st_mode)) + { + if (err_ifnoseek) + { + XPR(NT "source file must be seekable: %s\n", xfile->filename); + } + return ESPIPE; + } + (*size) = sbuf.st_size; +#endif + return ret; +} + +static int +main_file_exists (main_file *xfile) +{ + struct stat sbuf; + return stat (xfile->filename, & sbuf) == 0 && S_ISREG (sbuf.st_mode); +} + +#if (XD3_POSIX || EXTERNAL_COMPRESSION) +/* POSIX-generic code takes a function pointer to read() or write(). + * This calls the function repeatedly until the buffer is full or EOF. + * The NREAD parameter is not set for write, NULL is passed. Return + * is signed, < 0 indicate errors, otherwise byte count. */ +typedef int (xd3_posix_func) (int fd, uint8_t *buf, usize_t size); + +static int +xd3_posix_io (int fd, uint8_t *buf, usize_t size, + xd3_posix_func *func, usize_t *nread) +{ + int ret; + usize_t nproc = 0; + + while (nproc < size) + { + int result = (*func) (fd, buf + nproc, size - nproc); + + if (result < 0) + { + ret = get_errno (); + if (ret != EAGAIN && ret != EINTR) + { + return ret; + } + result = 0; + } + + if (nread != NULL && result == 0) { break; } + + nproc += result; + } + if (nread != NULL) { (*nread) = nproc; } + return 0; +} +#endif + +/* POSIX is unbuffered, while STDIO is buffered. main_file_read() + * should always be called on blocks. */ +static int +main_file_read (main_file *ifile, + uint8_t *buf, + usize_t size, + usize_t *nread, + const char *msg) +{ + int ret = 0; + +#if XD3_STDIO + usize_t result; + + result = fread (buf, 1, size, ifile->file); + + if (result < size && ferror (ifile->file)) + { + ret = get_errno (); + } + else + { + *nread = result; + } + +#elif XD3_POSIX + ret = xd3_posix_io (ifile->file, buf, size, (xd3_posix_func*) &read, nread); + +#elif XD3_WIN32 + DWORD nread2; + if (ReadFile (ifile->file, buf, size, &nread2, NULL) == 0) { + ret = get_errno(); + } else { + *nread = (usize_t)nread2; + } +#endif + + if (ret) + { + XPR(NT "%s: %s: %s\n", msg, ifile->filename, xd3_mainerror (ret)); + } + else + { + if (option_verbose > 3) { XPR(NT "main read: %s: %u\n", + ifile->filename, (*nread)); } + ifile->nread += (*nread); + } + + return ret; +} + +static int +main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg) +{ + int ret = 0; + +#if XD3_STDIO + usize_t result; + + result = fwrite (buf, 1, size, ofile->file); + + if (result != size) { ret = get_errno (); } + +#elif XD3_POSIX + ret = xd3_posix_io (ofile->file, buf, size, (xd3_posix_func*) &write, NULL); + +#elif XD3_WIN32 + DWORD nwrite; + if (WriteFile(ofile->file, buf, size, &nwrite, NULL) == 0) { + ret = get_errno (); + } else { + if (size != nwrite) { + XPR(NT "Incorrect write count"); + ret = XD3_INTERNAL; + } + } +#endif + + if (ret) + { + XPR(NT "%s: %s: %s\n", msg, ofile->filename, xd3_mainerror (ret)); + } + else + { + if (option_verbose > 3) { XPR(NT "main write: %s: %u\n", + ofile->filename, size); } + ofile->nwrite += size; + } + + return ret; +} + +static int +main_file_seek (main_file *xfile, xoff_t pos) +{ + int ret = 0; + +#if XD3_STDIO + if (fseek (xfile->file, pos, SEEK_SET) != 0) { ret = get_errno (); } + +#elif XD3_POSIX + if ((xoff_t) lseek (xfile->file, pos, SEEK_SET) != pos) + { ret = get_errno (); } + +#elif XD3_WIN32 +# if (_WIN32_WINNT >= 0x0500) + LARGE_INTEGER move, out; + move.QuadPart = pos; + if (SetFilePointerEx(xfile->file, move, &out, FILE_BEGIN) == 0) { + ret = get_errno (); + } +# else + if (SetFilePointer(xfile->file, (LONG)pos, NULL, FILE_BEGIN) == + INVALID_SET_FILE_POINTER) + { + ret = get_errno (); + } +# endif +#endif + + if (ret) + { + XPR(NT "seek failed: %s: %s\n", xfile->filename, xd3_mainerror (ret)); + } + + return ret; +} + +/* This function simply writes the stream output buffer, if there is + * any, for encode, decode and recode commands. (The VCDIFF tools use + * main_print_func()). */ +static int +main_write_output (xd3_stream* stream, main_file *ofile) +{ + int ret; + + if (option_no_output) + { + return 0; + } + + if (stream->avail_out > 0 && + (ret = main_file_write (ofile, stream->next_out, + stream->avail_out, "write failed"))) + { + return ret; + } + + return 0; +} + +static int +main_set_secondary_flags (xd3_config *config) +{ + int ret; + if (option_use_secondary) + { + /* The default secondary compressor is DJW, if it's compiled. */ + if (option_secondary == NULL) + { + if (SECONDARY_DJW) + { + config->flags |= XD3_SEC_DJW; + } + } + else + { + if (strcmp (option_secondary, "fgk") == 0 && SECONDARY_FGK) + { + config->flags |= XD3_SEC_FGK; + } + else if (strncmp (option_secondary, "djw", 3) == 0 && SECONDARY_DJW) + { + usize_t level = XD3_DEFAULT_SECONDARY_LEVEL; + + config->flags |= XD3_SEC_DJW; + + if (strlen (option_secondary) > 3 && + (ret = main_atou (option_secondary + 3, + &level, + 0, 9, 'S')) != 0 && + !option_quiet) + { + return XD3_INVALID; + } + + /* XD3_SEC_NOXXXX flags disable secondary compression on + * a per-section basis. For djw, ngroups=1 indicates + * minimum work, ngroups=0 uses default settings, which + * is > 1 groups by default. */ + if (level < 1) { config->flags |= XD3_SEC_NODATA; } + if (level < 7) { config->sec_data.ngroups = 1; } + else { config->sec_data.ngroups = 0; } + + if (level < 3) { config->flags |= XD3_SEC_NOINST; } + if (level < 8) { config->sec_inst.ngroups = 1; } + else { config->sec_inst.ngroups = 0; } + + if (level < 5) { config->flags |= XD3_SEC_NOADDR; } + if (level < 9) { config->sec_addr.ngroups = 1; } + else { config->sec_addr.ngroups = 0; } + } + else if (strcmp (option_secondary, "none") == 0 && SECONDARY_DJW) + { + /* No secondary */ + } + else + { + if (!option_quiet) + { + XPR(NT "unrecognized secondary compressor type: %s\n", + option_secondary); + return XD3_INVALID; + } + } + } + } + + return 0; +} + +/****************************************************************** + VCDIFF TOOLS + *****************************************************************/ + +#if VCDIFF_TOOLS +#include "xdelta3-merge.h" + +#if defined(_WIN32) || defined(__DJGPP__) +/* According to the internet, Windows vsnprintf() differs from most + * Unix implementations regarding the terminating 0 when the boundary + * condition is met. It doesn't matter here, we don't rely on the + * trailing 0. Besides, both Windows and DJGPP vsnprintf return -1 + * upon truncation, which isn't C99 compliant. To overcome this, + * recent MinGW runtimes provided their own vsnprintf (notice the + * absence of the '_' prefix) but they were initially buggy. So, + * always use the native '_'-prefixed version with Win32. */ +#include <stdarg.h> +#ifdef _WIN32 +#define vsnprintf_func _vsnprintf +#else +#define vsnprintf_func vsnprintf +#endif + +int +snprintf_func (char *str, int n, char *fmt, ...) +{ + va_list a; + int ret; + va_start (a, fmt); + ret = vsnprintf_func (str, n, fmt, a); + va_end (a); + if (ret < 0) + ret = n; + return ret; +} +#else +#define snprintf_func snprintf +#endif + +/* The following macros let VCDIFF printing something printf-like with + * main_file_write(), e.g.,: + * + * VC(UT "trying to be portable: %d\n", x)VE; + */ +#define SNPRINTF_BUFSIZE 1024 +#define VC do { if (((ret = snprintf_func +#define UT (char*)xfile->snprintf_buf, SNPRINTF_BUFSIZE, +#define VE ) >= SNPRINTF_BUFSIZE \ + && (ret = main_print_overflow(ret)) != 0) \ + || (ret = main_file_write(xfile, xfile->snprintf_buf, \ + ret, "print")) != 0) \ + { return ret; } } while (0) + +static int +main_print_overflow (int x) +{ + XPR(NT "internal print buffer overflow: %d bytes\n", x); + return XD3_INTERNAL; +} + +/* This function prints a single VCDIFF window. */ +static int +main_print_window (xd3_stream* stream, main_file *xfile) +{ + int ret; + usize_t size = 0; + + VC(UT " Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2\n")VE; + + while (stream->inst_sect.buf < stream->inst_sect.buf_max) + { + usize_t code = stream->inst_sect.buf[0]; + const uint8_t *addr_before = stream->addr_sect.buf; + const uint8_t *inst_before = stream->inst_sect.buf; + usize_t addr_bytes; + usize_t inst_bytes; + usize_t size_before = size; + + if ((ret = xd3_decode_instruction (stream))) + { + XPR(NT "instruction decode error at %"Q"u: %s\n", + stream->dec_winstart + size, stream->msg); + return ret; + } + + addr_bytes = stream->addr_sect.buf - addr_before; + inst_bytes = stream->inst_sect.buf - inst_before; + + VC(UT " %06"Q"u %03u %s %6u", stream->dec_winstart + size, + option_print_cpymode ? code : 0, + xd3_rtype_to_string ((xd3_rtype) stream->dec_current1.type, option_print_cpymode), + (usize_t) stream->dec_current1.size)VE; + + if (stream->dec_current1.type != XD3_NOOP) + { + if (stream->dec_current1.type >= XD3_CPY) + { + if (stream->dec_current1.addr >= stream->dec_cpylen) + { + VC(UT " T@%-6u", + stream->dec_current1.addr - stream->dec_cpylen)VE; + } + else + { + VC(UT " S@%-6"Q"u", + stream->dec_cpyoff + stream->dec_current1.addr)VE; + } + } + else + { + VC(UT " ")VE; + } + + size += stream->dec_current1.size; + } + + if (stream->dec_current2.type != XD3_NOOP) + { + VC(UT " %s %6u", + xd3_rtype_to_string ((xd3_rtype) stream->dec_current2.type, + option_print_cpymode), + (usize_t)stream->dec_current2.size)VE; + + if (stream->dec_current2.type >= XD3_CPY) + { + if (stream->dec_current2.addr >= stream->dec_cpylen) + { + VC(UT " T@%-6u", + stream->dec_current2.addr - stream->dec_cpylen)VE; + } + else + { + VC(UT " S@%-6"Q"u", + stream->dec_cpyoff + stream->dec_current2.addr)VE; + } + } + + size += stream->dec_current2.size; + } + + VC(UT "\n")VE; + + if (option_verbose && + addr_bytes + inst_bytes >= (size - size_before) && + (stream->dec_current1.type >= XD3_CPY || + stream->dec_current2.type >= XD3_CPY)) + { + VC(UT " %06"Q"u (inefficiency) %u encoded as %u bytes\n", + stream->dec_winstart + size_before, + size - size_before, + addr_bytes + inst_bytes)VE; + } + } + + if (stream->dec_tgtlen != size && (stream->flags & XD3_SKIP_WINDOW) == 0) + { + XPR(NT "target window size inconsistency"); + return XD3_INTERNAL; + } + + if (stream->dec_position != stream->dec_maxpos) + { + XPR(NT "target window position inconsistency"); + return XD3_INTERNAL; + } + + if (stream->addr_sect.buf != stream->addr_sect.buf_max) + { + XPR(NT "address section inconsistency"); + return XD3_INTERNAL; + } + + return 0; +} + +static int +main_print_vcdiff_file (main_file *xfile, main_file *file, const char *type) +{ + int ret; /* Used by above macros */ + if (file->filename) + { + VC(UT "XDELTA filename (%s): %s\n", type, + file->filename)VE; + } + if (file->compressor) + { + VC(UT "XDELTA ext comp (%s): %s\n", type, + file->compressor->recomp_cmdname)VE; + } + return 0; +} + +/* This function prints a VCDIFF input, mainly for debugging purposes. */ +static int +main_print_func (xd3_stream* stream, main_file *xfile) +{ + int ret; + + if (option_no_output) + { + return 0; + } + + if (xfile->snprintf_buf == NULL) + { + if ((xfile->snprintf_buf = (uint8_t*)main_malloc(SNPRINTF_BUFSIZE)) == NULL) + { + return ENOMEM; + } + } + + if (stream->dec_winstart == 0) + { + VC(UT "VCDIFF version: 0\n")VE; + VC(UT "VCDIFF header size: %d\n", + stream->dec_hdrsize)VE; + VC(UT "VCDIFF header indicator: ")VE; + if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) + VC(UT "VCD_SECONDARY ")VE; + if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) + VC(UT "VCD_CODETABLE ")VE; + if ((stream->dec_hdr_ind & VCD_APPHEADER) != 0) + VC(UT "VCD_APPHEADER ")VE; + if (stream->dec_hdr_ind == 0) + VC(UT "none")VE; + VC(UT "\n")VE; + + IF_SEC(VC(UT "VCDIFF secondary compressor: %s\n", + stream->sec_type ? stream->sec_type->name : "none")VE); + IF_NSEC(VC(UT "VCDIFF secondary compressor: unsupported\n")VE); + + if (stream->dec_hdr_ind & VCD_APPHEADER) + { + uint8_t *apphead; + usize_t appheadsz; + ret = xd3_get_appheader (stream, & apphead, & appheadsz); + + if (ret == 0 && appheadsz > 0) + { + int sq = option_quiet; + main_file i, o, s; + XD3_ASSERT (apphead != NULL); + VC(UT "VCDIFF application header: ")VE; + if ((ret = main_file_write (xfile, apphead, + appheadsz, "print")) != 0) + { return ret; } + VC(UT "\n")VE; + + main_file_init (& i); + main_file_init (& o); + main_file_init (& s); + option_quiet = 1; + main_get_appheader (stream, &i, & o, & s); + option_quiet = sq; + if ((ret = main_print_vcdiff_file (xfile, & o, "output"))) + { return ret; } + if ((ret = main_print_vcdiff_file (xfile, & s, "source"))) + { return ret; } + main_file_cleanup (& i); + main_file_cleanup (& o); + main_file_cleanup (& s); + } + } + } + else + { + VC(UT "\n")VE; + } + + VC(UT "VCDIFF window number: %"Q"u\n", stream->current_window)VE; + VC(UT "VCDIFF window indicator: ")VE; + if ((stream->dec_win_ind & VCD_SOURCE) != 0) VC(UT "VCD_SOURCE ")VE; + if ((stream->dec_win_ind & VCD_TARGET) != 0) VC(UT "VCD_TARGET ")VE; + if ((stream->dec_win_ind & VCD_ADLER32) != 0) VC(UT "VCD_ADLER32 ")VE; + if (stream->dec_win_ind == 0) VC(UT "none")VE; + VC(UT "\n")VE; + + if ((stream->dec_win_ind & VCD_ADLER32) != 0) + { + VC(UT "VCDIFF adler32 checksum: %08X\n", + (usize_t)stream->dec_adler32)VE; + } + + if (stream->dec_del_ind != 0) + { + VC(UT "VCDIFF delta indicator: ")VE; + if ((stream->dec_del_ind & VCD_DATACOMP) != 0) VC(UT "VCD_DATACOMP ")VE; + if ((stream->dec_del_ind & VCD_INSTCOMP) != 0) VC(UT "VCD_INSTCOMP ")VE; + if ((stream->dec_del_ind & VCD_ADDRCOMP) != 0) VC(UT "VCD_ADDRCOMP ")VE; + if (stream->dec_del_ind == 0) VC(UT "none")VE; + VC(UT "\n")VE; + } + + if (stream->dec_winstart != 0) + { + VC(UT "VCDIFF window at offset: %"Q"u\n", stream->dec_winstart)VE; + } + + if (SRCORTGT (stream->dec_win_ind)) + { + VC(UT "VCDIFF copy window length: %u\n", + (usize_t)stream->dec_cpylen)VE; + VC(UT "VCDIFF copy window offset: %"Q"u\n", + stream->dec_cpyoff)VE; + } + + VC(UT "VCDIFF delta encoding length: %u\n", + (usize_t)stream->dec_enclen)VE; + VC(UT "VCDIFF target window length: %u\n", + (usize_t)stream->dec_tgtlen)VE; + + VC(UT "VCDIFF data section length: %u\n", + (usize_t)stream->data_sect.size)VE; + VC(UT "VCDIFF inst section length: %u\n", + (usize_t)stream->inst_sect.size)VE; + VC(UT "VCDIFF addr section length: %u\n", + (usize_t)stream->addr_sect.size)VE; + + ret = 0; + if ((stream->flags & XD3_JUST_HDR) != 0) + { + /* Print a header -- finished! */ + ret = PRINTHDR_SPECIAL; + } + else if ((stream->flags & XD3_SKIP_WINDOW) == 0) + { + ret = main_print_window (stream, xfile); + } + + return ret; +} + +static int +main_recode_copy (xd3_stream* stream, + xd3_output* output, + xd3_desect* input) +{ + int ret; + + XD3_ASSERT(output != NULL); + XD3_ASSERT(output->next_page == NULL); + + if ((ret = xd3_decode_allocate (recode_stream, + input->size, + &output->base, + &output->avail))) + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + return ret; + } + + memcpy (output->base, + /* Note: decoder advances buf, so get base of buffer with + * buf_max - size */ + input->buf_max - input->size, + input->size); + output->next = input->size; + return 0; +} + +// Re-encode one window +static int +main_recode_func (xd3_stream* stream, main_file *ofile) +{ + int ret; + xd3_source decode_source; + + XD3_ASSERT(stream->dec_state == DEC_FINISH); + XD3_ASSERT(recode_stream->enc_state == ENC_INIT || + recode_stream->enc_state == ENC_INPUT); + + // Copy partial decoder output to partial encoder inputs + if ((ret = main_recode_copy (recode_stream, + DATA_HEAD(recode_stream), + &stream->data_sect)) || + (ret = main_recode_copy (recode_stream, + INST_HEAD(recode_stream), + &stream->inst_sect)) || + (ret = main_recode_copy (recode_stream, + ADDR_HEAD(recode_stream), + &stream->addr_sect))) + { + return ret; + } + + // This jumps to xd3_emit_hdr() + recode_stream->enc_state = ENC_FLUSH; + recode_stream->avail_in = stream->dec_tgtlen; + + if (SRCORTGT (stream->dec_win_ind)) + { + recode_stream->src = & decode_source; + decode_source.srclen = stream->dec_cpylen; + decode_source.srcbase = stream->dec_cpyoff; + } + + if (option_use_checksum && + (stream->dec_win_ind & VCD_ADLER32) != 0) + { + recode_stream->flags |= XD3_ADLER32_RECODE; + recode_stream->recode_adler32 = stream->dec_adler32; + } + + if (option_use_appheader != 0 && + option_appheader != NULL) + { + xd3_set_appheader (recode_stream, option_appheader, + strlen ((char*) option_appheader)); + } + else if (option_use_appheader != 0 && + option_appheader == NULL) + { + if (stream->dec_appheader != NULL) + { + xd3_set_appheader (recode_stream, + stream->dec_appheader, stream->dec_appheadsz); + } + } + + // Output loop + for (;;) + { + switch((ret = xd3_encode_input (recode_stream))) + { + case XD3_INPUT: { + /* finished recoding one window */ + stream->total_out = recode_stream->total_out; + return 0; + } + case XD3_OUTPUT: { + /* main_file_write below */ + break; + } + case XD3_GOTHEADER: + case XD3_WINSTART: + case XD3_WINFINISH: { + /* ignore */ + continue; + } + case XD3_GETSRCBLK: + case 0: { + return XD3_INTERNAL; + } + default: + return ret; + } + + if ((ret = main_write_output (recode_stream, ofile))) + { + return ret; + } + + xd3_consume_output (recode_stream); + } +} +#endif /* VCDIFF_TOOLS */ + +/******************************************************************* + VCDIFF merging + ******************************************************************/ + +#if VCDIFF_TOOLS +/* Modifies static state. */ +static int +main_init_recode_stream (void) +{ + int ret; + int stream_flags = XD3_ADLER32_NOVER | XD3_SKIP_EMIT; + int recode_flags; + xd3_config recode_config; + + XD3_ASSERT (recode_stream == NULL); + + if ((recode_stream = (xd3_stream*) main_malloc(sizeof(xd3_stream))) == NULL) + { + return ENOMEM; + } + + recode_flags = (stream_flags & XD3_SEC_TYPE); + + recode_config.alloc = main_alloc; + recode_config.freef = main_free1; + + xd3_init_config(&recode_config, recode_flags); + + if ((ret = main_set_secondary_flags (&recode_config)) || + (ret = xd3_config_stream (recode_stream, &recode_config)) || + (ret = xd3_encode_init_partial (recode_stream)) || + (ret = xd3_whole_state_init (recode_stream))) + { + XPR(NT XD3_LIB_ERRMSG (recode_stream, ret)); + xd3_free_stream (recode_stream); + recode_stream = NULL; + return ret; + } + + return 0; +} + +/* This processes the sequence of -m arguments. The final input + * is processed as part of the ordinary main_input() loop. */ +static int +main_merge_arguments (main_merge_list* merges) +{ + int ret = 0; + int count = 0; + main_merge *merge = NULL; + xd3_stream merge_input; + + if (main_merge_list_empty (merges)) + { + return 0; + } + + if ((ret = xd3_config_stream (& merge_input, NULL)) || + (ret = xd3_whole_state_init (& merge_input))) + { + XPR(NT XD3_LIB_ERRMSG (& merge_input, ret)); + return ret; + } + + merge = main_merge_list_front (merges); + while (!main_merge_list_end (merges, merge)) + { + main_file mfile; + main_file_init (& mfile); + mfile.filename = merge->filename; + mfile.flags = RD_NONEXTERNAL; + + if ((ret = main_file_open (& mfile, merge->filename, XO_READ))) + { + goto error; + } + + ret = main_input (CMD_MERGE_ARG, & mfile, NULL, NULL); + + if (ret == 0) + { + if (count++ == 0) + { + /* The first merge source is the next merge input. */ + xd3_swap_whole_state (& recode_stream->whole_target, + & merge_input.whole_target); + } + else + { + /* Merge the recode_stream with merge_input. */ + ret = xd3_merge_input_output (recode_stream, + & merge_input.whole_target); + + /* Save the next merge source in merge_input. */ + xd3_swap_whole_state (& recode_stream->whole_target, + & merge_input.whole_target); + } + } + + main_file_cleanup (& mfile); + + if (recode_stream != NULL) + { + xd3_free_stream (recode_stream); + main_free (recode_stream); + recode_stream = NULL; + } + + if (main_bdata != NULL) + { + main_free (main_bdata); + main_bdata = NULL; + main_bsize = 0; + } + + if (ret != 0) + { + goto error; + } + + merge = main_merge_list_next (merge); + } + + XD3_ASSERT (merge_stream == NULL); + + if ((merge_stream = (xd3_stream*) main_malloc (sizeof(xd3_stream))) == NULL) + { + ret = ENOMEM; + goto error; + } + + if ((ret = xd3_config_stream (merge_stream, NULL)) || + (ret = xd3_whole_state_init (merge_stream))) + { + XPR(NT XD3_LIB_ERRMSG (& merge_input, ret)); + goto error; + } + + xd3_swap_whole_state (& merge_stream->whole_target, + & merge_input.whole_target); + ret = 0; + error: + xd3_free_stream (& merge_input); + return ret; +} + +/* This processes each window of the final merge input. This routine + * does not output, it buffers the entire delta into memory. */ +static int +main_merge_func (xd3_stream* stream, main_file *no_write) +{ + int ret; + + if ((ret = xd3_whole_append_window (stream))) + { + return ret; + } + + return 0; +} + + +/* This is called after all windows have been read, as a final step in + * main_input(). This is only called for the final merge step. */ +static int +main_merge_output (xd3_stream *stream, main_file *ofile) +{ + int ret; + usize_t inst_pos = 0; + xoff_t output_pos = 0; + xd3_source recode_source; + usize_t window_num = 0; + int at_least_once = 0; + + /* merge_stream is set if there were arguments. this stream's input + * needs to be applied to the merge_stream source. */ + if ((merge_stream != NULL) && + (ret = xd3_merge_input_output (stream, + & merge_stream->whole_target))) + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + return ret; + } + + if (option_use_appheader != 0 && + option_appheader != NULL) + { + xd3_set_appheader (recode_stream, option_appheader, + strlen ((char*) option_appheader)); + } + + /* Enter the ENC_INPUT state and bypass the next_in == NULL test + * and (leftover) input buffering logic. */ + XD3_ASSERT(recode_stream->enc_state == ENC_INIT); + recode_stream->enc_state = ENC_INPUT; + recode_stream->next_in = main_bdata; + recode_stream->flags |= XD3_FLUSH; + + /* This encodes the entire target. */ + while (inst_pos < stream->whole_target.instlen || !at_least_once) + { + xoff_t window_start = output_pos; + int window_srcset = 0; + xoff_t window_srcmin = 0; + xoff_t window_srcmax = 0; + usize_t window_pos = 0; + usize_t window_size; + + /* at_least_once ensures that we encode at least one window, + * which handles the 0-byte case. */ + at_least_once = 1; + + XD3_ASSERT (recode_stream->enc_state == ENC_INPUT); + + if ((ret = xd3_encode_input (recode_stream)) != XD3_WINSTART) + { + XPR(NT "invalid merge state: %s\n", xd3_mainerror (ret)); + return XD3_INVALID; + } + + /* Window sizes must match from the input to the output, so that + * target copies are in-range (and so that checksums carry + * over). */ + XD3_ASSERT (window_num < stream->whole_target.wininfolen); + window_size = stream->whole_target.wininfo[window_num].length; + + /* Output position should also match. */ + if (output_pos != stream->whole_target.wininfo[window_num].offset) + { + XPR(NT "internal merge error: offset mismatch\n"); + return XD3_INVALID; + } + + if (option_use_checksum && + (stream->dec_win_ind & VCD_ADLER32) != 0) + { + recode_stream->flags |= XD3_ADLER32_RECODE; + recode_stream->recode_adler32 = stream->whole_target.wininfo[window_num].adler32; + } + + window_num++; + + if (main_bsize < window_size) + { + main_free (main_bdata); + main_bdata = NULL; + main_bsize = 0; + if ((main_bdata = (uint8_t*) + main_malloc (window_size)) == NULL) + { + return ENOMEM; + } + main_bsize = window_size; + } + + /* This encodes a single target window. */ + while (window_pos < window_size && + inst_pos < stream->whole_target.instlen) + { + xd3_winst *inst = &stream->whole_target.inst[inst_pos]; + usize_t take = min(inst->size, window_size - window_pos); + xoff_t addr; + + switch (inst->type) + { + case XD3_RUN: + if ((ret = xd3_emit_run (recode_stream, window_pos, take, + stream->whole_target.adds[inst->addr]))) + { + return ret; + } + break; + + case XD3_ADD: + /* Adds are implicit, put them into the input buffer. */ + memcpy (main_bdata + window_pos, + stream->whole_target.adds + inst->addr, take); + break; + + default: /* XD3_COPY + copy mode */ + if (inst->mode != 0) + { + if (window_srcset) { + window_srcmin = min(window_srcmin, inst->addr); + window_srcmax = max(window_srcmax, inst->addr + take); + } else { + window_srcset = 1; + window_srcmin = inst->addr; + window_srcmax = inst->addr + take; + } + addr = inst->addr; + } + else + { + XD3_ASSERT (inst->addr >= window_start); + addr = inst->addr - window_start; + } + IF_DEBUG1 (DP(RINT "[merge copy] winpos %u take %u addr %"Q"u mode %u\n", + window_pos, take, addr, inst->mode)); + if ((ret = xd3_found_match (recode_stream, window_pos, take, + addr, inst->mode != 0))) + { + return ret; + } + break; + } + + window_pos += take; + output_pos += take; + + if (take == inst->size) + { + inst_pos += 1; + } + else + { + /* Modify the instruction for the next pass. */ + if (inst->type != XD3_RUN) + { + inst->addr += take; + } + inst->size -= take; + } + } + + xd3_avail_input (recode_stream, main_bdata, window_pos); + + recode_stream->enc_state = ENC_INSTR; + + if (window_srcset) { + recode_stream->srcwin_decided = 1; + recode_stream->src = &recode_source; + recode_source.srclen = window_srcmax - window_srcmin; + recode_source.srcbase = window_srcmin; + recode_stream->taroff = recode_source.srclen; + } else { + recode_stream->srcwin_decided = 0; + recode_stream->src = NULL; + recode_stream->taroff = 0; + } + + for (;;) + { + switch ((ret = xd3_encode_input (recode_stream))) + { + case XD3_INPUT: { + goto done_window; + } + case XD3_OUTPUT: { + /* main_file_write below */ + break; + } + case XD3_GOTHEADER: + case XD3_WINSTART: + case XD3_WINFINISH: { + /* ignore */ + continue; + } + case XD3_GETSRCBLK: + case 0: { + return XD3_INTERNAL; + } + default: + return ret; + } + + if ((ret = main_write_output(recode_stream, ofile))) + { + return ret; + } + + xd3_consume_output (recode_stream); + } + done_window: + (void) 0; + } + + return 0; +} +#endif + +/******************************************************************* + Input decompression, output recompression + ******************************************************************/ + +#if EXTERNAL_COMPRESSION +/* This is tricky POSIX-specific code with lots of fork(), pipe(), + * dup(), waitpid(), and exec() business. Most of this code + * originated in PRCS1, which did automatic package-file + * decompression. It works with both XD3_POSIX and XD3_STDIO file + * disciplines. + * + * To automatically detect compressed inputs requires a child process + * to reconstruct the input stream, which was advanced in order to + * detect compression, because it may not be seekable. In other + * words, the main program reads part of the input stream, and if it + * detects a compressed input it then forks a pipe copier process, + * which copies the first-read block out of the main-program's memory, + * then streams the remaining compressed input into the + * input-decompression pipe. + */ + +#include <unistd.h> +#include <sys/stat.h> +#include <sys/wait.h> + +/* Remember which pipe FD is which. */ +#define PIPE_READ_FD 0 +#define PIPE_WRITE_FD 1 + +static pid_t ext_subprocs[2]; +static char* ext_tmpfile = NULL; + +/* Like write(), but makes repeated calls to empty the buffer. */ +static int +main_pipe_write (int outfd, uint8_t *exist_buf, usize_t remain) +{ + int ret; + + if ((ret = xd3_posix_io (outfd, exist_buf, remain, + (xd3_posix_func*) &write, NULL))) + { + XPR(NT "pipe write failed: %s", xd3_mainerror (ret)); + return ret; + } + + return 0; +} + +/* A simple error-reporting waitpid interface. */ +static int +main_waitpid_check(pid_t pid) +{ + int status; + int ret = 0; + + if (waitpid (pid, & status, 0) < 0) + { + ret = get_errno (); + XPR(NT "compression subprocess: wait: %s\n", xd3_mainerror (ret)); + } + else if (! WIFEXITED (status)) + { + ret = ECHILD; + XPR(NT "compression subprocess: signal %d\n", + WIFSIGNALED (status) ? WTERMSIG (status) : WSTOPSIG (status)); + } + else if (WEXITSTATUS (status) != 0) + { + ret = ECHILD; + XPR(NT "compression subprocess: exit %d\n", WEXITSTATUS (status)); + } + + return ret; +} + +/* Wait for any existing child processes to check for abnormal exit. */ +static int +main_external_compression_finish (void) +{ + int i; + int ret; + + for (i = 0; i < 2; i += 1) + { + if (! ext_subprocs[i]) { continue; } + + if ((ret = main_waitpid_check (ext_subprocs[i]))) + { + return ret; + } + } + + return 0; +} + +/* This runs as a forked process of main_input_decompress_setup() to + * copy input to the decompression process. First, the available + * input is copied out of the existing buffer, then the buffer is + * reused to continue reading from the compressed input file. */ +static int +main_pipe_copier (uint8_t *pipe_buf, + usize_t pipe_bufsize, + usize_t nread, + main_file *ifile, + int outfd) +{ + int ret; + + for (;;) + { + if (nread > 0 && (ret = main_pipe_write (outfd, pipe_buf, nread))) + { + return ret; + } + + if (nread < pipe_bufsize) + { + break; + } + + if ((ret = main_file_read (ifile, pipe_buf, pipe_bufsize, + & nread, "pipe read failed")) < 0) + { + return ret; + } + } + + return 0; +} + +/* This function is called after we have read some amount of data from + * the input file and detected a compressed input. Here we start a + * decompression subprocess by forking twice. The first process runs + * the decompression command, the second process copies data to the + * input of the first. */ +static int +main_input_decompress_setup (const main_extcomp *decomp, + main_file *ifile, + uint8_t *input_buf, + usize_t input_bufsize, + uint8_t *pipe_buf, + usize_t pipe_bufsize, + usize_t pipe_avail, + usize_t *nread) +{ + /* The two pipes: input and output file descriptors. */ + int outpipefd[2], inpipefd[2]; + int input_fd = -1; /* The resulting input_fd (output of decompression). */ + pid_t decomp_id, copier_id; /* The two subprocs. */ + int ret; + + outpipefd[0] = outpipefd[1] = -1; + inpipefd[0] = inpipefd[1] = -1; + + if (pipe (outpipefd) || pipe (inpipefd)) + { + XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + if ((decomp_id = fork ()) < 0) + { + XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + /* The first child runs the decompression process: */ + if (decomp_id == 0) + { + /* Setup pipes: write to the outpipe, read from the inpipe. */ + if (dup2 (outpipefd[PIPE_WRITE_FD], STDOUT_FILENO) < 0 || + dup2 (inpipefd[PIPE_READ_FD], STDIN_FILENO) < 0 || + close (outpipefd[PIPE_READ_FD]) || + close (outpipefd[PIPE_WRITE_FD]) || + close (inpipefd[PIPE_READ_FD]) || + close (inpipefd[PIPE_WRITE_FD]) || + execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, + decomp->decomp_options, NULL)) + { + XPR(NT "child process %s failed to execute: %s\n", + decomp->decomp_cmdname, xd3_mainerror (get_errno ())); + } + + _exit (127); + } + + ext_subprocs[0] = decomp_id; + + if ((copier_id = fork ()) < 0) + { + XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + /* The second child runs the copier process: */ + if (copier_id == 0) + { + int exitval = 0; + + if (close (inpipefd[PIPE_READ_FD]) || + main_pipe_copier (pipe_buf, pipe_bufsize, pipe_avail, + ifile, inpipefd[PIPE_WRITE_FD]) || + close (inpipefd[PIPE_WRITE_FD])) + { + XPR(NT "child copier process failed: %s\n", + xd3_mainerror (get_errno ())); + exitval = 1; + } + + _exit (exitval); + } + + ext_subprocs[1] = copier_id; + + /* The parent closes both pipes after duplicating the output of + * compression. */ + input_fd = dup (outpipefd[PIPE_READ_FD]); + + if (input_fd < 0 || + main_file_close (ifile) || + close (outpipefd[PIPE_READ_FD]) || + close (outpipefd[PIPE_WRITE_FD]) || + close (inpipefd[PIPE_READ_FD]) || + close (inpipefd[PIPE_WRITE_FD])) + { + XPR(NT "dup/close failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#if XD3_STDIO + /* Note: fdopen() acquires the fd, closes it when finished. */ + if ((ifile->file = fdopen (input_fd, "r")) == NULL) + { + XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#elif XD3_POSIX + ifile->file = input_fd; +#endif + + ifile->compressor = decomp; + + /* Now the input file is decompressed. */ + return main_file_read (ifile, input_buf, input_bufsize, + nread, "input decompression failed"); + + pipe_cleanup: + close (input_fd); + close (outpipefd[PIPE_READ_FD]); + close (outpipefd[PIPE_WRITE_FD]); + close (inpipefd[PIPE_READ_FD]); + close (inpipefd[PIPE_WRITE_FD]); + return ret; +} + + +/* This routine is called when the first buffer of input data is read + * by the main program (unless input decompression is disabled by + * command-line option). If it recognizes the magic number of a known + * input type it invokes decompression. + * + * Skips decompression if the decompression type or the file type is + * RD_NONEXTERNAL. + * + * Behaves exactly like main_file_read, otherwise. + * + * This function uses a separate buffer to read the first small block + * of input. If a compressed input is detected, the separate buffer + * is passed to the pipe copier. This avoids using the same size + * buffer in both cases. */ +static int +main_decompress_input_check (main_file *ifile, + uint8_t *input_buf, + usize_t input_size, + usize_t *nread) +{ + int ret; + usize_t i; + usize_t check_nread; + uint8_t check_buf[XD3_ALLOCSIZE]; + + if ((ret = main_file_read (ifile, check_buf, + min (input_size, XD3_ALLOCSIZE), + & check_nread, "input read failed"))) + { + return ret; + } + + for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) + { + const main_extcomp *decomp = & extcomp_types[i]; + + if ((check_nread > decomp->magic_size) && + /* The following expr skips decompression if we are trying + * to read a VCDIFF input and that is the magic number. */ + !((decomp->flags & RD_NONEXTERNAL) && + (ifile->flags & RD_NONEXTERNAL)) && + memcmp (check_buf, decomp->magic, decomp->magic_size) == 0) + { + if (! option_quiet) + { + XPR(NT "%s | %s %s\n", + ifile->filename, + decomp->decomp_cmdname, + decomp->decomp_options); + } + + return main_input_decompress_setup (decomp, ifile, + input_buf, input_size, + check_buf, XD3_ALLOCSIZE, + check_nread, nread); + } + } + + /* Now read the rest of the input block. */ + (*nread) = 0; + + if (check_nread == XD3_ALLOCSIZE) + { + ret = main_file_read (ifile, input_buf + XD3_ALLOCSIZE, + input_size - XD3_ALLOCSIZE, nread, + "input read failed"); + } + + memcpy (input_buf, check_buf, check_nread); + + (*nread) += check_nread; + + return 0; +} + +/* This is called when the source file needs to be decompressed. We + * fork/exec a decompression command with the proper input and output + * to a temporary file. */ +static int +main_decompress_source (main_file *sfile, xd3_source *source) +{ + const main_extcomp *decomp = sfile->compressor; + pid_t decomp_id; /* One subproc. */ + int input_fd = -1; + int output_fd = -1; + int ret; + char *tmpname = NULL; + char *tmpdir = getenv ("TMPDIR"); + static const char tmpl[] = "/xd3src.XXXXXX"; + + /* Make a template for mkstmp() */ + if (tmpdir == NULL) { tmpdir = "/tmp"; } + if ((tmpname = + (char*) main_malloc (strlen (tmpdir) + sizeof (tmpl) + 1)) == NULL) + { + return ENOMEM; + } + sprintf (tmpname, "%s%s", tmpdir, tmpl); + + XD3_ASSERT (ext_tmpfile == NULL); + ext_tmpfile = tmpname; + + /* Open the output FD. */ + if ((output_fd = mkstemp (tmpname)) < 0) + { + XPR(NT "mkstemp failed: %s: %s", + tmpname, xd3_mainerror (ret = get_errno ())); + goto cleanup; + } + + /* Copy the input FD, reset file position. */ + XD3_ASSERT (main_file_isopen (sfile)); +#if XD3_STDIO + if ((input_fd = dup (fileno (sfile->file))) < 0) + { + XPR(NT "dup failed: %s", xd3_mainerror (ret = get_errno ())); + goto cleanup; + } + main_file_close (sfile); + sfile->file = NULL; +#elif XD3_POSIX + input_fd = sfile->file; + sfile->file = -1; +#endif + + if ((ret = lseek (input_fd, SEEK_SET, 0)) != 0) + { + XPR(NT "lseek failed: : %s", xd3_mainerror (ret = get_errno ())); + goto cleanup; + } + + if ((decomp_id = fork ()) < 0) + { + XPR(NT "fork failed: %s", xd3_mainerror (ret = get_errno ())); + goto cleanup; + } + + /* The child runs the decompression process: */ + if (decomp_id == 0) + { + /* Setup pipes: write to the output file, read from the pipe. */ + if (dup2 (input_fd, STDIN_FILENO) < 0 || + dup2 (output_fd, STDOUT_FILENO) < 0 || + execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, + decomp->decomp_options, NULL)) + { + XPR(NT "child process %s failed to execute: %s\n", + decomp->decomp_cmdname, xd3_mainerror (get_errno ())); + } + + _exit (127); + } + + close (input_fd); + close (output_fd); + input_fd = -1; + output_fd = -1; + + /* Then wait for completion. */ + if ((ret = main_waitpid_check (decomp_id))) + { + goto cleanup; + } + + /* Open/stat the decompressed source file. */ + if ((ret = main_file_open (sfile, tmpname, XO_READ))) { goto cleanup; } + if ((ret = main_file_stat (sfile, & source->size, 1))) { goto cleanup; } + return 0; + + cleanup: + close (input_fd); + close (output_fd); + if (tmpname) { free (tmpname); } + ext_tmpfile = NULL; + return ret; +} + +/* Initiate re-compression of the output stream. This is easier than + * input decompression because we know beforehand that the stream will + * be compressed, whereas the input has already been read when we + * decide it should be decompressed. Thus, it only requires one + * subprocess and one pipe. */ +static int +main_recompress_output (main_file *ofile) +{ + pid_t recomp_id; /* One subproc. */ + int pipefd[2]; /* One pipe. */ + int output_fd = -1; + int ret; + const main_extcomp *recomp = ofile->compressor; + + pipefd[0] = pipefd[1] = -1; + + if (pipe (pipefd)) + { + XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + if ((recomp_id = fork ()) < 0) + { + XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + + /* The child runs the recompression process: */ + if (recomp_id == 0) + { + /* Setup pipes: write to the output file, read from the pipe. */ + if (dup2 (XFNO (ofile), STDOUT_FILENO) < 0 || + dup2 (pipefd[PIPE_READ_FD], STDIN_FILENO) < 0 || + close (pipefd[PIPE_READ_FD]) || + close (pipefd[PIPE_WRITE_FD]) || + execlp (recomp->recomp_cmdname, recomp->recomp_cmdname, + recomp->recomp_options, NULL)) + { + XPR(NT "child process %s failed to execute: %s\n", + recomp->recomp_cmdname, xd3_mainerror (get_errno ())); + } + + _exit (127); + } + + ext_subprocs[0] = recomp_id; + + /* The parent closes both pipes after duplicating the output-fd for + * writing to the compression pipe. */ + output_fd = dup (pipefd[PIPE_WRITE_FD]); + + if (output_fd < 0 || + main_file_close (ofile) || + close (pipefd[PIPE_READ_FD]) || + close (pipefd[PIPE_WRITE_FD])) + { + XPR(NT "close failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#if XD3_STDIO + /* Note: fdopen() acquires the fd, closes it when finished. */ + if ((ofile->file = fdopen (output_fd, "w")) == NULL) + { + XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ())); + goto pipe_cleanup; + } + +#elif XD3_POSIX + ofile->file = output_fd; +#endif + + /* Now the output file will be compressed. */ + return 0; + + pipe_cleanup: + close (output_fd); + close (pipefd[PIPE_READ_FD]); + close (pipefd[PIPE_WRITE_FD]); + return ret; +} +#endif /* EXTERNAL_COMPRESSION */ + +/* Identify the compressor that was used based on its ident string, + * which is passed in the application header. */ +static const main_extcomp* +main_ident_compressor (const char *ident) +{ + usize_t i; + + for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) + { + if (strcmp (extcomp_types[i].ident, ident) == 0) + { + return & extcomp_types[i]; + } + } + + return NULL; +} + +/* Return the main_extcomp record to use for this identifier, if possible. */ +static const main_extcomp* +main_get_compressor (const char *ident) +{ + const main_extcomp *ext = main_ident_compressor (ident); + + if (ext == NULL) + { + if (! option_quiet) + { + XPR(NT "warning: cannot recompress output: " + "unrecognized external compression ID: %s\n", ident); + } + return NULL; + } + else if (! EXTERNAL_COMPRESSION) + { + if (! option_quiet) + { + XPR(NT "warning: external support not compiled: " + "original input was compressed: %s\n", ext->recomp_cmdname); + } + return NULL; + } + else + { + return ext; + } +} + +/********************************************************************* + APPLICATION HEADER + *******************************************************************/ + +#if XD3_ENCODER +static const char* +main_apphead_string (const char* x) +{ + const char *y; + + if (x == NULL) { return ""; } + + if (strcmp (x, "/dev/stdin") == 0 || + strcmp (x, "/dev/stdout") == 0 || + strcmp (x, "/dev/stderr") == 0) { return "-"; } + + // TODO: this is not portable + return (y = strrchr (x, '/')) == NULL ? x : y + 1; +} + +static int +main_set_appheader (xd3_stream *stream, main_file *input, main_file *sfile) +{ + /* The user may disable the application header. Once the appheader + * is set, this disables setting it again. */ + if (appheader_used || ! option_use_appheader) { return 0; } + + /* The user may specify the application header, otherwise format the + default header. */ + if (option_appheader) + { + appheader_used = option_appheader; + } + else + { + const char *iname; + const char *icomp; + const char *sname; + const char *scomp; + int len; + + iname = main_apphead_string (input->filename); + icomp = (input->compressor == NULL) ? "" : input->compressor->ident; + len = strlen (iname) + strlen (icomp) + 2; + + if (sfile->filename != NULL) + { + sname = main_apphead_string (sfile->filename); + scomp = (sfile->compressor == NULL) ? "" : sfile->compressor->ident; + len += strlen (sname) + strlen (scomp) + 2; + } + else + { + sname = scomp = ""; + } + + if ((appheader_used = (uint8_t*) main_malloc (len)) == NULL) + { + return ENOMEM; + } + + if (sfile->filename == NULL) + { + sprintf ((char*)appheader_used, "%s/%s", iname, icomp); + } + else + { + sprintf ((char*)appheader_used, "%s/%s/%s/%s", + iname, icomp, sname, scomp); + } + } + + xd3_set_appheader (stream, appheader_used, strlen ((char*)appheader_used)); + + return 0; +} +#endif + +static void +main_get_appheader_params (main_file *file, char **parsed, + int output, const char *type, + main_file *other) +{ + /* Set the filename if it was not specified. If output, option_stdout (-c) + * overrides. */ + if (file->filename == NULL && + ! (output && option_stdout) && + strcmp (parsed[0], "-") != 0) + { + file->filename = parsed[0]; + + if (other->filename != NULL) { + /* Take directory from the other file, if it has one. */ + /* TODO: This results in nonsense names like /dev/foo.tar.gz + * and probably the filename-default logic interferes with + * multi-file operation and the standard file extension? + * Possibly the name header is bad, should be off by default. + * Possibly we just want to remember external/compression + * settings. */ + char *last_slash = strrchr(other->filename, '/'); + + if (last_slash != NULL) { + int dlen = last_slash - other->filename; + + XD3_ASSERT(file->filename_copy == NULL); + file->filename_copy = + (char*) main_malloc(dlen + 2 + strlen(file->filename)); + + strncpy(file->filename_copy, other->filename, dlen); + file->filename_copy[dlen] = '/'; + strcpy(file->filename_copy + dlen + 1, parsed[0]); + + file->filename = file->filename_copy; + } + } + + if (! option_quiet) + { + XPR(NT "using default %s filename: %s\n", type, file->filename); + } + } + + /* Set the compressor, initiate de/recompression later. */ + if (file->compressor == NULL && *parsed[1] != 0) + { + file->compressor = main_get_compressor (parsed[1]); + } +} + +static void +main_get_appheader (xd3_stream *stream, main_file *ifile, + main_file *output, main_file *sfile) +{ + uint8_t *apphead; + usize_t appheadsz; + int ret; + + /* The user may disable the application header. Once the appheader + * is set, this disables setting it again. */ + if (! option_use_appheader) { return; } + + ret = xd3_get_appheader (stream, & apphead, & appheadsz); + + /* Ignore failure, it only means we haven't received a header yet. */ + if (ret != 0) { return; } + + if (appheadsz > 0) + { + char *start = (char*)apphead; + char *slash; + int place = 0; + char *parsed[4]; + + memset (parsed, 0, sizeof (parsed)); + + while ((slash = strchr (start, '/')) != NULL) + { + *slash = 0; + parsed[place++] = start; + start = slash + 1; + } + + parsed[place++] = start; + + /* First take the output parameters. */ + if (place == 2 || place == 4) + { + main_get_appheader_params (output, parsed, 1, "output", ifile); + } + + /* Then take the source parameters. */ + if (place == 4) + { + main_get_appheader_params (sfile, parsed+2, 0, "source", ifile); + } + } + + option_use_appheader = 0; + return; +} + +/********************************************************************* + Main I/O routines + **********************************************************************/ + +/* This function acts like the above except it may also try to + * recognize a compressed input when the first buffer of data is read. + * The EXTERNAL_COMPRESSION code is called to search for magic + * numbers. */ +static int +main_read_primary_input (main_file *ifile, + uint8_t *buf, + usize_t size, + usize_t *nread) +{ +#if EXTERNAL_COMPRESSION + if (option_decompress_inputs && ifile->flags & RD_FIRST) + { + ifile->flags &= ~RD_FIRST; + + return main_decompress_input_check (ifile, buf, size, nread); + } +#endif + + return main_file_read (ifile, buf, size, nread, "input read failed"); +} + +/* Open the main output file, sets a default file name, initiate + * recompression. This function is expected to fprint any error + * messages. */ +static int +main_open_output (xd3_stream *stream, main_file *ofile) +{ + int ret; + + if (option_no_output) + { + return 0; + } + + if (ofile->filename == NULL) + { + XSTDOUT_XF (ofile); + + if (option_verbose > 1) + { + XPR(NT "using standard output: %s\n", ofile->filename); + } + } + else + { + /* Stat the file to check for overwrite. */ + if (option_force == 0 && main_file_exists (ofile)) + { + if (!option_quiet) + { + XPR(NT "to overwrite output file specify -f: %s\n", + ofile->filename); + } + return EEXIST; + } + + if ((ret = main_file_open (ofile, ofile->filename, XO_WRITE))) + { + return ret; + } + + if (option_verbose > 1) { XPR(NT "output file: %s\n", ofile->filename); } + } + +#if EXTERNAL_COMPRESSION + /* Do output recompression. */ + if (ofile->compressor != NULL && option_recompress_outputs == 1) + { + if (! option_quiet) + { + XPR(NT "%s %s | %s\n", + ofile->compressor->recomp_cmdname, + ofile->compressor->recomp_options, + ofile->filename); + } + + if ((ret = main_recompress_output (ofile))) + { + return ret; + } + } +#endif + + return 0; +} + +/* This is called at different times for encoding and decoding. The + * encoder calls it immediately, the decoder delays until the + * application header is received. Stream may be NULL, in which case + * xd3_set_source is not called. */ +static int +main_set_source (xd3_stream *stream, int cmd, + main_file *sfile, xd3_source *source) +{ + int ret = 0; + usize_t i; + uint8_t *tmp_buf = NULL; + + /* Open it, check for seekability, set required xd3_source fields. */ + if (allow_fake_source) + { + sfile->mode = XO_READ; + sfile->realname = sfile->filename; + sfile->nread = 0; + source->size = XOFF_T_MAX; + } + else + { + if ((ret = main_file_open (sfile, sfile->filename, XO_READ)) || + (ret = main_file_stat (sfile, & source->size, 1))) + { + goto error; + } + } + + source->name = sfile->filename; + source->ioh = sfile; + source->curblkno = (xoff_t) -1; + source->curblk = NULL; + +#if EXTERNAL_COMPRESSION + if (option_decompress_inputs) + { + /* If encoding, read the header to check for decompression. */ + if (IS_ENCODE (cmd)) + { + usize_t nread; + tmp_buf = (uint8_t*) main_malloc (XD3_ALLOCSIZE); + + if ((ret = main_file_read (sfile, tmp_buf, XD3_ALLOCSIZE, + & nread, "source read failed"))) + { + goto error; + } + + /* Check known magic numbers. */ + for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) + { + const main_extcomp *decomp = & extcomp_types[i]; + + if ((nread > decomp->magic_size) && + memcmp (tmp_buf, decomp->magic, decomp->magic_size) == 0) + { + sfile->compressor = decomp; + break; + } + } + + if (sfile->compressor == NULL) + { + if (option_verbose > 2) + { + XPR(NT "source block 0 read (not compressed)\n"); + } + } + } + + /* In either the encoder or decoder, start decompression. */ + if (sfile->compressor) + { + xoff_t osize = source->size; + + if ((ret = main_decompress_source (sfile, source))) + { + goto error; + } + + if (! option_quiet) + { + char s1[32], s2[32]; + XPR(NT "%s | %s %s => %s %.1f%% [ %s , %s ]\n", + sfile->filename, + sfile->compressor->decomp_cmdname, + sfile->compressor->decomp_options, + sfile->realname, + 100.0 * source->size / osize, + main_format_bcnt (osize, s1), + main_format_bcnt (source->size, s2)); + } + } + } +#endif + + /* At this point we know source->size. + * Source buffer, blksize, LRU init. */ + if (source->size < option_srcwinsz) + { + /* Reduce sizes to actual source size, read whole file */ + option_srcwinsz = source->size; + source->blksize = source->size; + lru_size = 1; + } + else + { + option_srcwinsz = max(option_srcwinsz, XD3_MINSRCWINSZ); + + source->blksize = (option_srcwinsz / LRU_SIZE); + lru_size = LRU_SIZE; + } + + main_blklru_list_init (& lru_list); + main_blklru_list_init (& lru_free); + + if (option_verbose) + { + static char buf[32]; + + XPR(NT "source %s winsize %s size %"Q"u\n", + sfile->filename, main_format_bcnt(option_srcwinsz, buf), + source->size); + } + + if (option_verbose > 1) + { + XPR(NT "source block size: %u\n", source->blksize); + } + + if ((lru = (main_blklru*) + main_malloc (sizeof (main_blklru) * lru_size)) == NULL) + { + ret = ENOMEM; + goto error; + } + + for (i = 0; i < lru_size; i += 1) + { + lru[i].blkno = (xoff_t) -1; + + if ((lru[i].blk = (uint8_t*) main_malloc (source->blksize)) == NULL) + { + ret = ENOMEM; + goto error; + } + + main_blklru_list_push_back (& lru_free, & lru[i]); + } + + if (stream && (ret = xd3_set_source (stream, source))) + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + goto error; + } + + error: + if (tmp_buf != NULL) + { + main_free (tmp_buf); + } + + return ret; +} + +static usize_t +main_get_winsize (main_file *ifile) { + xoff_t file_size; + usize_t size = option_winsize; + + if (main_file_stat (ifile, &file_size, 0) == 0) + { + size = (usize_t) min(file_size, (xoff_t) size); + } + + size = max(size, XD3_ALLOCSIZE); + + if (option_verbose > 1) + { + XPR(NT "input window size: %u\n", size); + } + + return size; +} + +/******************************************************************* + Source routines + *******************************************************************/ + +/* This is the callback for reading a block of source. This function + * is blocking and it implements a small LRU. + * + * Note that it is possible for main_input() to handle getblk requests + * in a non-blocking manner. If the callback is NULL then the caller + * of xd3_*_input() must handle the XD3_GETSRCBLK return value and + * fill the source in the same way. See xd3_getblk for details. To + * see an example of non-blocking getblk, see xdelta-test.h. */ +static int +main_getblk_func (xd3_stream *stream, + xd3_source *source, + xoff_t blkno) +{ + int ret; + xoff_t pos = blkno * source->blksize; + main_file *sfile = (main_file*) source->ioh; + main_blklru *blru = NULL; + usize_t onblk = xd3_bytes_on_srcblk_fast (source, blkno); + usize_t nread; + usize_t i; + + if (allow_fake_source) + { + source->curblkno = blkno; + source->onblk = onblk; + source->curblk = lru[0].blk; + return 0; + } + + if (do_not_lru) + { + /* Direct lookup assumes sequential scan w/o skipping blocks. */ + int idx = blkno % lru_size; + if (lru[idx].blkno == blkno) + { + source->curblkno = blkno; + source->onblk = onblk; + source->curblk = lru[idx].blk; + lru_hits += 1; + return 0; + } + + if (lru[idx].blkno != (xoff_t)-1 && + lru[idx].blkno != (xoff_t)(blkno - lru_size)) + { + return XD3_TOOFARBACK; + } + } + else + { + /* Sequential search through LRU. */ + for (i = 0; i < lru_size; i += 1) + { + if (lru[i].blkno == blkno) + { + main_blklru_list_remove (& lru[i]); + main_blklru_list_push_back (& lru_list, & lru[i]); + + source->curblkno = blkno; + source->onblk = onblk; + source->curblk = lru[i].blk; + lru_hits += 1; + return 0; + } + } + } + + if (! main_blklru_list_empty (& lru_free)) + { + blru = main_blklru_list_pop_front (& lru_free); + } + else if (! main_blklru_list_empty (& lru_list)) + { + if (do_not_lru) { + blru = & lru[blkno % lru_size]; + main_blklru_list_remove(blru); + } else { + blru = main_blklru_list_pop_front (& lru_list); + } + lru_misses += 1; + } + + lru_filled += 1; + + if ((ret = main_file_seek (sfile, pos))) + { + return ret; + } + + if ((ret = main_file_read (sfile, (uint8_t*) blru->blk, source->blksize, + & nread, "source read failed"))) + { + return ret; + } + + if (nread != onblk) + { + XPR(NT "source file size change: %s\n", sfile->filename); + return XD3_INTERNAL; + } + + main_blklru_list_push_back (& lru_list, blru); + + if (option_verbose > 3) + { + if (blru->blkno != (xoff_t)-1) + { + XPR(NT "source block %"Q"u ejects %"Q"u (lru_hits=%u, " + "lru_misses=%u, lru_filled=%u)\n", + blkno, blru->blkno, lru_hits, lru_misses, lru_filled); + } + else + { + XPR(NT "source block %"Q"u read (lru_hits=%u, lru_misses=%u, " + "lru_filled=%u)\n", blkno, lru_hits, lru_misses, lru_filled); + } + } + + blru->blkno = blkno; + source->curblk = blru->blk; + source->curblkno = blkno; + source->onblk = onblk; + + return 0; +} + +/********************************************************************* + Main routines + ********************************************************************/ + +/* This is a generic input function. It calls the xd3_encode_input or + * xd3_decode_input functions and makes calls to the various input + * handling routines above, which coordinate external decompression. + */ +static int +main_input (xd3_cmd cmd, + main_file *ifile, + main_file *ofile, + main_file *sfile) +{ + int ret; + xd3_stream stream; + usize_t nread; + usize_t winsize; + int stream_flags = 0; + xd3_config config; + xd3_source source; + xoff_t last_total_in = 0; + xoff_t last_total_out = 0; + long start_time; + int stdout_only = 0; + int (*input_func) (xd3_stream*); + int (*output_func) (xd3_stream*, main_file *); + + memset (& stream, 0, sizeof (stream)); + memset (& source, 0, sizeof (source)); + memset (& config, 0, sizeof (config)); + + config.alloc = main_alloc; + config.freef = main_free1; + + config.iopt_size = option_iopt_size; + config.sprevsz = option_sprevsz; + + do_not_lru = 0; + + start_time = get_millisecs_now (); + + if (option_use_checksum) { stream_flags |= XD3_ADLER32; } + + /* main_input setup. */ + switch ((int) cmd) + { +#if VCDIFF_TOOLS + if (1) { case CMD_PRINTHDR: stream_flags |= XD3_JUST_HDR; } + else if (1) { case CMD_PRINTHDRS: stream_flags |= XD3_SKIP_WINDOW; } + else { case CMD_PRINTDELTA: stream_flags |= XD3_SKIP_EMIT; } + ifile->flags |= RD_NONEXTERNAL; + input_func = xd3_decode_input; + output_func = main_print_func; + stream_flags |= XD3_ADLER32_NOVER; + stdout_only = 1; + break; + + case CMD_RECODE: + case CMD_MERGE: + case CMD_MERGE_ARG: + /* No source will be read */ + stream_flags |= XD3_ADLER32_NOVER | XD3_SKIP_EMIT; + ifile->flags |= RD_NONEXTERNAL; + input_func = xd3_decode_input; + + if ((ret = main_init_recode_stream ())) + { + return EXIT_FAILURE; + } + + if (cmd == CMD_RECODE) { output_func = main_recode_func; } + else { output_func = main_merge_func; } + break; +#endif /* VCDIFF_TOOLS */ + +#if XD3_ENCODER + case CMD_ENCODE: + do_not_lru = 1; + input_func = xd3_encode_input; + output_func = main_write_output; + + if (option_no_compress) { stream_flags |= XD3_NOCOMPRESS; } + if (option_use_altcodetable) { stream_flags |= XD3_ALT_CODE_TABLE; } + if (option_smatch_config) + { + char *s = option_smatch_config, *e; + int values[XD3_SOFTCFG_VARCNT]; + int got; + + config.smatch_cfg = XD3_SMATCH_SOFT; + + for (got = 0; got < XD3_SOFTCFG_VARCNT; got += 1, s = e + 1) + { + values[got] = strtol (s, &e, 10); + + if ((values[got] < 0) || + (e == s) || + (got < XD3_SOFTCFG_VARCNT-1 && *e == 0) || + (got == XD3_SOFTCFG_VARCNT-1 && *e != 0)) + { + XPR(NT "invalid string match specifier (-C) %d: %s\n", + got, s); + return EXIT_FAILURE; + } + } + + config.smatcher_soft.large_look = values[0]; + config.smatcher_soft.large_step = values[1]; + config.smatcher_soft.small_look = values[2]; + config.smatcher_soft.small_chain = values[3]; + config.smatcher_soft.small_lchain = values[4]; + config.smatcher_soft.max_lazy = values[5]; + config.smatcher_soft.long_enough = values[6]; + } + else + { + if (option_verbose > 1) + { + XPR(NT "compression level: %d\n", option_level); + } + if (option_level == 0) + { + stream_flags |= XD3_NOCOMPRESS; + config.smatch_cfg = XD3_SMATCH_FASTEST; + } + else if (option_level == 1) + { config.smatch_cfg = XD3_SMATCH_FASTEST; } + else if (option_level == 2) + { config.smatch_cfg = XD3_SMATCH_FASTER; } + else if (option_level <= 5) + { config.smatch_cfg = XD3_SMATCH_FAST; } + else if (option_level == 6) + { config.smatch_cfg = XD3_SMATCH_DEFAULT; } + else + { config.smatch_cfg = XD3_SMATCH_SLOW; } + } + break; +#endif + case CMD_DECODE: + if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; } + ifile->flags |= RD_NONEXTERNAL; + input_func = xd3_decode_input; + output_func = main_write_output; + break; + default: + XPR(NT "internal error\n"); + return EXIT_FAILURE; + } + + main_bsize = winsize = main_get_winsize (ifile); + + if ((main_bdata = (uint8_t*) main_malloc (winsize)) == NULL) + { + return EXIT_FAILURE; + } + + if (IS_ENCODE (cmd)) + { + /* When encoding, open the source file, possibly decompress it. + * The decoder delays this step until XD3_GOTHEADER. */ + if (sfile->filename != NULL && + (ret = main_set_source (NULL, cmd, sfile, & source))) + { + return EXIT_FAILURE; + } + } + + config.winsize = winsize; + config.srcwin_maxsz = option_srcwinsz; + config.getblk = main_getblk_func; + config.flags = stream_flags; + + if ((ret = main_set_secondary_flags (&config)) || + (ret = xd3_config_stream (& stream, & config))) + { + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } + +#if VCDIFF_TOOLS + if ((cmd == CMD_MERGE || cmd == CMD_MERGE_ARG) && + (ret = xd3_whole_state_init (& stream))) + { + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } +#endif + + if (IS_ENCODE (cmd) && sfile->filename != NULL && + (ret = xd3_set_source (& stream, & source))) + { + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } + + /* This times each window. */ + get_millisecs_since (); + + /* Main input loop. */ + do + { + xoff_t input_offset; + xoff_t input_remain; + usize_t try_read; + + input_offset = ifile->nread; + + input_remain = XOFF_T_MAX - input_offset; + + try_read = (usize_t) min ((xoff_t) config.winsize, input_remain); + + if ((ret = main_read_primary_input (ifile, main_bdata, + try_read, & nread))) + { + return EXIT_FAILURE; + } + + /* If we've reached EOF tell the stream to flush. */ + if (nread < try_read) + { + stream.flags |= XD3_FLUSH; + } + +#if XD3_ENCODER + /* After the first main_read_primary_input completes, we know + * all the information needed to encode the application + * header. */ + if (cmd == CMD_ENCODE && + (ret = main_set_appheader (& stream, ifile, sfile))) + { + return EXIT_FAILURE; + } +#endif + xd3_avail_input (& stream, main_bdata, nread); + + /* If we read zero bytes after encoding at least one window... */ + if (nread == 0 && stream.current_window > 0) { + break; + } + + again: + ret = input_func (& stream); + + switch (ret) + { + case XD3_INPUT: + continue; + + case XD3_GOTHEADER: + { + XD3_ASSERT (stream.current_window == 0); + + /* Need to process the appheader as soon as possible. It may + * contain a suggested default filename/decompression routine for + * the ofile, and it may contain default/decompression routine for + * the sources. */ + if (cmd == CMD_DECODE) + { + /* May need to set the sfile->filename if none was given. */ + main_get_appheader (& stream, ifile, ofile, sfile); + + /* Now open the source file. */ + if ((sfile->filename != NULL) && + (ret = main_set_source (& stream, cmd, sfile, & source))) + { + return EXIT_FAILURE; + } + } + else if (cmd == CMD_PRINTHDR || + cmd == CMD_PRINTHDRS || + cmd == CMD_PRINTDELTA || + cmd == CMD_RECODE) + { + if (sfile->filename == NULL) + { + allow_fake_source = 1; + sfile->filename = "<placeholder>"; + main_set_source (& stream, cmd, sfile, & source); + } + } + } + /* FALLTHROUGH */ + case XD3_WINSTART: + { + /* e.g., set or unset XD3_SKIP_WINDOW. */ + goto again; + } + + case XD3_OUTPUT: + { + /* Defer opening the output file until the stream produces its + * first output for both encoder and decoder, this way we + * delay long enough for the decoder to receive the + * application header. (Or longer if there are skipped + * windows, but I can't think of any reason not to delay + * open.) */ + if (ofile != NULL && + ! main_file_isopen (ofile) && + (ret = main_open_output (& stream, ofile)) != 0) + { + return EXIT_FAILURE; + } + + if ((ret = output_func (& stream, ofile)) && + (ret != PRINTHDR_SPECIAL)) + { + return EXIT_FAILURE; + } + + if (ret == PRINTHDR_SPECIAL) + { + xd3_abort_stream (& stream); + ret = EXIT_SUCCESS; + goto done; + } + + ret = 0; + + xd3_consume_output (& stream); + goto again; + } + + case XD3_WINFINISH: + { + if (IS_ENCODE (cmd) || cmd == CMD_DECODE || cmd == CMD_RECODE) + { + if (! option_quiet && IS_ENCODE (cmd) && + main_file_isopen (sfile)) + { + /* Warn when no source copies are found */ + if (option_verbose && ! xd3_encoder_used_source (& stream)) + { + XPR(NT "warning: input window %"Q"u..%"Q"u has " + "no source copies\n", + stream.current_window * winsize, + (stream.current_window+1) * winsize); + } + + /* Limited i-buffer size affects source copies */ + if (option_verbose > 1 && + stream.i_slots_used > stream.iopt_size) + { + XPR(NT "warning: input position %"Q"u overflowed " + "instruction buffer, needed %u (vs. %u), " + "consider raising -I\n", + stream.current_window * winsize, + stream.i_slots_used, stream.iopt_size); + } + } + + if (option_verbose) + { + char rrateavg[32], wrateavg[32], tm[32]; + char rdb[32], wdb[32]; + char trdb[32], twdb[32]; + long millis = get_millisecs_since (); + usize_t this_read = (usize_t)(stream.total_in - + last_total_in); + usize_t this_write = (usize_t)(stream.total_out - + last_total_out); + last_total_in = stream.total_in; + last_total_out = stream.total_out; + + if (option_verbose > 1) + { + XPR(NT "%"Q"u: in %s (%s): out %s (%s): " + "total in %s: out %s: %s\n", + stream.current_window, + main_format_bcnt (this_read, rdb), + main_format_rate (this_read, millis, rrateavg), + main_format_bcnt (this_write, wdb), + main_format_rate (this_write, millis, wrateavg), + main_format_bcnt (stream.total_in, trdb), + main_format_bcnt (stream.total_out, twdb), + main_format_millis (millis, tm)); + } + else + { + XPR(NT "%"Q"u: in %s: out %s: total in %s: " + "out %s: %s\n", + stream.current_window, + main_format_bcnt (this_read, rdb), + main_format_bcnt (this_write, wdb), + main_format_bcnt (stream.total_in, trdb), + main_format_bcnt (stream.total_out, twdb), + main_format_millis (millis, tm)); + } + } + } + goto again; + } + + default: + /* input_func() error */ + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } + } + while (nread == config.winsize); +done: + /* Close the inputs. (ifile must be open, sfile may be open) */ + main_file_close (ifile); + if (sfile != NULL) + { + main_file_close (sfile); + } + +#if VCDIFF_TOOLS + if (cmd == CMD_MERGE && + (ret = main_merge_output (& stream, ofile))) + { + return EXIT_FAILURE; + } + + if (cmd == CMD_MERGE_ARG) + { + xd3_swap_whole_state (& stream.whole_target, + & recode_stream->whole_target); + } +#endif /* VCDIFF_TOOLS */ + + /* If output file is not open yet because of delayed-open, it means + * we never encountered a window in the delta, but it could have had + * a VCDIFF header? TODO: solve this elsewhere. For now, it prints + * "nothing to output" below, but the check doesn't happen in case + * of option_no_output. */ + if (! option_no_output && ofile != NULL) + { + if (!stdout_only && ! main_file_isopen (ofile)) + { + XPR(NT "nothing to output: %s\n", ifile->filename); + return EXIT_FAILURE; + } + + /* Have to close the output before calling + * main_external_compression_finish, or else it hangs. */ + if (main_file_close (ofile) != 0) + { + return EXIT_FAILURE; + } + } + +#if EXTERNAL_COMPRESSION + if ((ret = main_external_compression_finish ())) + { + XPR(NT "external compression commands failed\n"); + return EXIT_FAILURE; + } +#endif + + if ((ret = xd3_close_stream (& stream))) + { + XPR(NT XD3_LIB_ERRMSG (& stream, ret)); + return EXIT_FAILURE; + } + +#if XD3_ENCODER + if (option_verbose > 1 && cmd == CMD_ENCODE) + { + XPR(NT "scanner configuration: %s\n", stream.smatcher.name); + XPR(NT "target hash table size: %u\n", stream.small_hash.size); + if (sfile != NULL && sfile->filename != NULL) + { + XPR(NT "source hash table size: %u\n", stream.large_hash.size); + } + } + + if (option_verbose > 2 && cmd == CMD_ENCODE) + { + XPR(NT "source copies: %"Q"u (%"Q"u bytes)\n", + stream.n_scpy, stream.l_scpy); + XPR(NT "target copies: %"Q"u (%"Q"u bytes)\n", + stream.n_tcpy, stream.l_tcpy); + XPR(NT "adds: %"Q"u (%"Q"u bytes)\n", stream.n_add, stream.l_add); + XPR(NT "runs: %"Q"u (%"Q"u bytes)\n", stream.n_run, stream.l_run); + } +#endif + + xd3_free_stream (& stream); + + if (option_verbose) + { + char tm[32]; + long end_time = get_millisecs_now (); + xoff_t nwrite = ofile != NULL ? ofile->nwrite : 0; + + XPR(NT "finished in %s; input %"Q"u output %"Q"u bytes (%0.2f%%)\n", + main_format_millis (end_time - start_time, tm), + ifile->nread, nwrite, 100.0 * nwrite / ifile->nread); + } + + return EXIT_SUCCESS; +} + +/* free memory before exit, reset single-use variables. */ +static void +main_cleanup (void) +{ + usize_t i; + + if (appheader_used != NULL && + appheader_used != option_appheader) + { + main_free (appheader_used); + appheader_used = NULL; + } + + main_free (main_bdata); + main_bdata = NULL; + main_bsize = 0; + +#if EXTERNAL_COMPRESSION + main_free (ext_tmpfile); + ext_tmpfile = NULL; +#endif + + for (i = 0; lru && i < lru_size; i += 1) + { + main_free (lru[i].blk); + } + + main_free (lru); + lru = NULL; + + lru_hits = 0; + lru_misses = 0; + lru_filled = 0; + + if (recode_stream != NULL) + { + xd3_free_stream (recode_stream); + main_free (recode_stream); + recode_stream = NULL; + } + + if (merge_stream != NULL) + { + xd3_free_stream (merge_stream); + main_free (merge_stream); + merge_stream = NULL; + } + + XD3_ASSERT (main_mallocs == 0); +} + +static void +setup_environment (int argc, + char **argv, + int *argc_out, + char ***argv_out, + char ***argv_free, + char **env_free) +{ + int n, i, i0; + char *p, *v = getenv("XDELTA"); + if (v == NULL) { + (*argc_out) = argc; + (*argv_out) = argv; + (*argv_free) = NULL; + (*env_free) = NULL; + return; + } + + (*env_free) = (char*) main_malloc(strlen(v) + 1); + strcpy(*env_free, v); + + /* Space needed for extra args, at least # of spaces */ + n = argc + 1; + for (p = *env_free; *p != 0; ) { + if (*p++ == ' ') { + n++; + } + } + + (*argv_free) = (char**) main_malloc(sizeof(char*) * (n + 1)); + (*argv_out) = (*argv_free); + (*argv_out)[0] = argv[0]; + (*argv_out)[n] = NULL; + + i = 1; + for (p = *env_free; *p != 0; ) { + (*argv_out)[i++] = p; + while (*p != ' ' && *p != 0) { + p++; + } + while (*p == ' ') { + *p++ = 0; + } + } + + for (i0 = 1; i0 < argc; i0++) { + (*argv_out)[i++] = argv[i0]; + } + + /* Counting spaces is an upper bound, argv stays NULL terminated. */ + (*argc_out) = i; + while (i <= n) { + (*argv_out)[i++] = NULL; + } +} + +int +#if PYTHON_MODULE || SWIG_MODULE || NOT_MAIN +xd3_main_cmdline (int argc, char **argv) +#else +main (int argc, char **argv) +#endif +{ + static const char *flags = + "0123456789cdefhnqvDJNORTVs:m:B:C:E:F:I:L:O:M:P:W:A::S::"; + xd3_cmd cmd; + main_file ifile; + main_file ofile; + main_file sfile; + main_merge_list merge_order; + main_merge *merge; + int my_optind; + char *my_optarg; + char *my_optstr; + char *sfilename; + int env_argc; + char **env_argv; + char **free_argv; /* malloc() in setup_environment() */ + char *free_value; /* malloc() in setup_environment() */ + int ret; + +#ifdef _WIN32 + GetStartupInfo(&winStartupInfo); + setvbuf(stderr, NULL, _IONBF, 0); /* Do not buffer stderr */ +#endif + + main_file_init (& ifile); + main_file_init (& ofile); + main_file_init (& sfile); + main_merge_list_init (& merge_order); + + reset_defaults(); + + free_argv = NULL; + free_value = NULL; + setup_environment(argc, argv, &env_argc, &env_argv, + &free_argv, &free_value); + cmd = CMD_NONE; + sfilename = NULL; + my_optind = 1; + argv = env_argv; + argc = env_argc; + program_name = env_argv[0]; + extcomp_types[0].recomp_cmdname = program_name; + extcomp_types[0].decomp_cmdname = program_name; + + takearg: + my_optarg = NULL; + my_optstr = argv[my_optind]; + + /* This doesn't use getopt() because it makes trouble for -P & python which + * reenter main() and thus care about freeing all memory. I never had much + * trust for getopt anyway, it's too opaque. This implements a fairly + * standard non-long-option getopt with support for named operations (e.g., + * "xdelta3 [encode|decode|printhdr...] < in > out"). */ + if (my_optstr) + { + if (*my_optstr == '-') { my_optstr += 1; } + else if (cmd == CMD_NONE) { goto nonflag; } + else { my_optstr = NULL; } + } + while (my_optstr) + { + char *s; + my_optarg = NULL; + if ((ret = *my_optstr++) == 0) { my_optind += 1; goto takearg; } + + /* Option handling: first check for one ':' following the option in + * flags, then check for two. The syntax allows: + * + * 1. -Afoo defines optarg="foo" + * 2. -A foo defines optarg="foo" + * 3. -A "" defines optarg="" (allows empty-string) + * 4. -A [EOA or -moreargs] error (mandatory case) + * 5. -A [EOA -moreargs] defines optarg=NULL (optional case) + * 6. -A=foo defines optarg="foo" + * 7. -A= defines optarg="" (mandatory case) + * 8. -A= defines optarg=NULL (optional case) + * + * See tests in test_command_line_arguments(). + */ + s = strchr (flags, ret); + if (s && s[1] && s[1] == ':') + { + int eqcase = 0; + int option = s[2] && s[2] == ':'; + + /* Case 1, set optarg to the remaining characters. */ + my_optarg = my_optstr; + my_optstr = ""; + + /* Case 2-5 */ + if (*my_optarg == 0) + { + /* Condition 4-5 */ + int have_arg = (my_optind < (argc - 1) && + *argv[my_optind+1] != '-'); + + if (! have_arg) + { + if (! option) + { + /* Case 4 */ + XPR(NT "-%c: requires an argument\n", ret); + ret = EXIT_FAILURE; + goto cleanup; + } + /* Case 5. */ + my_optarg = NULL; + } + else + { + /* Case 2-3. */ + my_optarg = argv[++my_optind]; + } + } + /* Case 6-8. */ + else if (*my_optarg == '=') + { + /* Remove the = in all cases. */ + my_optarg += 1; + eqcase = 1; + + if (option && *my_optarg == 0) + { + /* Case 8. */ + my_optarg = NULL; + } + } + } + + switch (ret) + { + /* case: if no '-' was found, maybe check for a command name. */ + nonflag: + if (strcmp (my_optstr, "decode") == 0) { cmd = CMD_DECODE; } + else if (strcmp (my_optstr, "encode") == 0) + { +#if XD3_ENCODER + cmd = CMD_ENCODE; +#else + XPR(NT "encoder support not compiled\n"); + return EXIT_FAILURE; +#endif + } + else if (strcmp (my_optstr, "config") == 0) { cmd = CMD_CONFIG; } +#if REGRESSION_TEST + else if (strcmp (my_optstr, "test") == 0) { cmd = CMD_TEST; } +#endif +#if VCDIFF_TOOLS + else if (strcmp (my_optstr, "printhdr") == 0) { cmd = CMD_PRINTHDR; } + else if (strcmp (my_optstr, "printhdrs") == 0) + { cmd = CMD_PRINTHDRS; } + else if (strcmp (my_optstr, "printdelta") == 0) + { cmd = CMD_PRINTDELTA; } + else if (strcmp (my_optstr, "recode") == 0) { cmd = CMD_RECODE; } + else if (strcmp (my_optstr, "merge") == 0) { cmd = CMD_MERGE; } +#endif + + /* If no option was found and still no command, let the default + * command be encode. The remaining args are treated as + * filenames. */ + if (cmd == CMD_NONE) + { + cmd = CMD_DEFAULT; + my_optstr = NULL; + break; + } + else + { + /* But if we find a command name, continue the getopt loop. */ + my_optind += 1; + goto takearg; + } + + /* gzip-like options */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + option_level = ret - '0'; + break; + case 'f': option_force = 1; break; + case 'v': option_verbose += 1; option_quiet = 0; break; + case 'q': option_quiet = 1; option_verbose = 0; break; + case 'c': option_stdout = 1; break; + case 'd': + if (cmd == CMD_NONE) { cmd = CMD_DECODE; } + else { ret = main_help (); goto exit; } + break; + case 'e': +#if XD3_ENCODER + if (cmd == CMD_NONE) { cmd = CMD_ENCODE; } + else { ret = main_help (); goto exit; } + break; +#else + XPR(NT "encoder support not compiled\n"); + return EXIT_FAILURE; +#endif + + case 'n': option_use_checksum = 0; break; + case 'N': option_no_compress = 1; break; + case 'T': option_use_altcodetable = 1; break; + case 'C': option_smatch_config = my_optarg; break; + case 'J': option_no_output = 1; break; + case 'S': if (my_optarg == NULL) + { + option_use_secondary = 1; + option_secondary = "none"; + } + else + { + option_use_secondary = 1; + option_secondary = my_optarg; + } + break; + case 'A': if (my_optarg == NULL) { option_use_appheader = 0; } + else { option_appheader = (uint8_t*) my_optarg; } break; + case 'B': + if ((ret = main_atou (my_optarg, & option_srcwinsz, XD3_MINSRCWINSZ, + 0, 'B'))) + { + goto exit; + } + break; + case 'I': + if ((ret = main_atou (my_optarg, & option_iopt_size, 0, + 0, 'I'))) + { + goto exit; + } + break; + case 'P': + if ((ret = main_atou (my_optarg, & option_sprevsz, 0, + 0, 'P'))) + { + goto exit; + } + break; + case 'W': + if ((ret = main_atou (my_optarg, & option_winsize, XD3_ALLOCSIZE, + XD3_HARDMAXWINSIZE, 'W'))) + { + goto exit; + } + break; + case 'D': +#if EXTERNAL_COMPRESSION == 0 + if (option_verbose > 0) + { + XPR(NT "warning: -D option ignored, " + "external compression support was not compiled\n"); + } +#else + option_decompress_inputs = 0; +#endif + break; + case 'R': +#if EXTERNAL_COMPRESSION == 0 + if (option_verbose > 0) + { + XPR(NT "warning: -R option ignored, " + "external compression support was not compiled\n"); + } +#else + option_recompress_outputs = 0; +#endif + break; + case 's': + if (sfilename != NULL) + { + XPR(NT "specify only one source file\n"); + goto cleanup; + } + + sfilename = my_optarg; + break; + case 'm': + if ((merge = (main_merge*) + main_malloc (sizeof (main_merge))) == NULL) + { + goto cleanup; + } + main_merge_list_push_back (& merge_order, merge); + merge->filename = my_optarg; + break; + case 'V': + ret = main_version (); goto exit; + default: + ret = main_help (); goto exit; + } + } + + option_source_filename = sfilename; + + /* In case there were no arguments, set the default command. */ + if (cmd == CMD_NONE) { cmd = CMD_DEFAULT; } + + argc -= my_optind; + argv += my_optind; + + /* There may be up to two more arguments. */ + if (argc > 2) + { + XPR(NT "too many filenames: %s ...\n", argv[2]); + goto cleanup; + } + + ifile.flags = RD_FIRST; + sfile.flags = RD_FIRST; + sfile.filename = option_source_filename; + + /* The infile takes the next argument, if there is one. But if not, infile + * is set to stdin. */ + if (argc > 0) + { + ifile.filename = argv[0]; + + if ((ret = main_file_open (& ifile, ifile.filename, XO_READ))) + { + goto cleanup; + } + } + else + { + XSTDIN_XF (& ifile); + } + + /* The ofile takes the following argument, if there is one. But if not, it + * is left NULL until the application header is processed. It will be set + * in main_open_output. */ + if (argc > 1) + { + /* Check for conflicting arguments. */ + if (option_stdout && ! option_quiet) + { + XPR(NT "warning: -c option overrides output filename: %s\n", + argv[1]); + } + + if (! option_stdout) { ofile.filename = argv[1]; } + } + +#if VCDIFF_TOOLS + if (cmd == CMD_MERGE && + (ret = main_merge_arguments (&merge_order))) + { + goto cleanup; + } +#endif /* VCDIFF_TOOLS */ + + switch (cmd) + { + case CMD_PRINTHDR: + case CMD_PRINTHDRS: + case CMD_PRINTDELTA: +#if XD3_ENCODER + case CMD_ENCODE: + case CMD_RECODE: + case CMD_MERGE: +#endif + case CMD_DECODE: + ret = main_input (cmd, & ifile, & ofile, & sfile); + break; + +#if REGRESSION_TEST + case CMD_TEST: + main_config (); + ret = xd3_selftest (); + break; +#endif + + case CMD_CONFIG: + ret = main_config (); + break; + + default: + ret = main_help (); + break; + } + + if (0) + { + cleanup: + ret = EXIT_FAILURE; + exit: + (void)0; + } + +#if EXTERNAL_COMPRESSION + if (ext_tmpfile != NULL) + { + unlink (ext_tmpfile); + } +#endif + + main_file_cleanup (& ifile); + main_file_cleanup (& ofile); + main_file_cleanup (& sfile); + + while (! main_merge_list_empty (& merge_order)) + { + merge = main_merge_list_pop_front (& merge_order); + main_free (merge); + } + + main_free (free_argv); + main_free (free_value); + + main_cleanup (); + + fflush (stdout); + fflush (stderr); + return ret; +} + +static int +main_help (void) +{ + main_version(); + + /* Note: update wiki when command-line features change */ + DP(RINT "usage: xdelta3 [command/options] [input [output]]\n"); + DP(RINT "special command names:\n"); + DP(RINT " config prints xdelta3 configuration\n"); + DP(RINT " decode decompress the input\n"); + DP(RINT " encode compress the input%s\n", + XD3_ENCODER ? "" : " [Not compiled]"); +#if REGRESSION_TEST + DP(RINT " test run the builtin tests\n"); +#endif +#if VCDIFF_TOOLS + DP(RINT "special commands for VCDIFF inputs:\n"); + DP(RINT " printdelta print information about the entire delta\n"); + DP(RINT " printhdr print information about the first window\n"); + DP(RINT " printhdrs print information about all windows\n"); + DP(RINT " recode encode with new application/secondary settings\n"); + DP(RINT " merge merge VCDIFF inputs (see below)\n"); +#endif + DP(RINT "standard options:\n"); + DP(RINT " -0 .. -9 compression level\n"); + DP(RINT " -c use stdout\n"); + DP(RINT " -d decompress\n"); + DP(RINT " -e compress%s\n", + XD3_ENCODER ? "" : " [Not compiled]"); + DP(RINT " -f force overwrite\n"); + DP(RINT " -h show help\n"); + DP(RINT " -q be quiet\n"); + DP(RINT " -v be verbose (max 2)\n"); + DP(RINT " -V show version\n"); + + DP(RINT "memory options:\n"); + DP(RINT " -B bytes source window size\n"); + DP(RINT " -W bytes input window size\n"); + DP(RINT " -P size compression duplicates window\n"); + DP(RINT " -I size instruction buffer size (0 = unlimited)\n"); + + DP(RINT "compression options:\n"); + DP(RINT " -s source source file to copy from (if any)\n"); + DP(RINT " -S [djw|fgk] enable/disable secondary compression\n"); + DP(RINT " -N disable small string-matching compression\n"); + DP(RINT " -D disable external decompression (encode/decode)\n"); + DP(RINT " -R disable external recompression (decode)\n"); + DP(RINT " -n disable checksum (encode/decode)\n"); + DP(RINT " -C soft config (encode, undocumented)\n"); + DP(RINT " -A [apphead] disable/provide application header (encode)\n"); + DP(RINT " -J disable output (check/compute only)\n"); + DP(RINT " -T use alternate code table (test)\n"); + DP(RINT " -m arguments for \"merge\"\n"); + + DP(RINT "the XDELTA environment variable may contain extra args:\n"); + DP(RINT " XDELTA=\"-s source-x.y.tar.gz\" \\\n"); + DP(RINT " tar --use-compress-program=xdelta3 \\\n"); + DP(RINT " -cf target-x.z.tar.gz.vcdiff target-x.y\n"); + DP(RINT "the \"merge\" command combines VCDIFF inputs as follows:\n"); + DP(RINT " xdelta3 merge -m 1.vcdiff -m 2.vcdiff 3.vcdiff merged.vcdiff\n"); + return EXIT_FAILURE; +} diff --git a/xdelta3-merge.h b/xdelta3-merge.h new file mode 100644 index 0000000..2253a2c --- /dev/null +++ b/xdelta3-merge.h @@ -0,0 +1,579 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XDELTA3_MERGE_H_ +#define _XDELTA3_MERGE_H_ + +int xd3_merge_inputs (xd3_stream *stream, + xd3_whole_state *source, + xd3_whole_state *input); + +static int +xd3_whole_state_init (xd3_stream *stream) +{ + XD3_ASSERT (stream->whole_target.adds == NULL); + XD3_ASSERT (stream->whole_target.inst == NULL); + XD3_ASSERT (stream->whole_target.wininfo == NULL); + XD3_ASSERT (stream->whole_target.length == 0); + + stream->whole_target.adds_alloc = XD3_ALLOCSIZE; + stream->whole_target.inst_alloc = XD3_ALLOCSIZE; + stream->whole_target.wininfo_alloc = XD3_ALLOCSIZE; + + if ((stream->whole_target.adds = (uint8_t*) + xd3_alloc (stream, stream->whole_target.adds_alloc, 1)) == NULL || + (stream->whole_target.inst = (xd3_winst*) + xd3_alloc (stream, stream->whole_target.inst_alloc, 1)) == NULL || + (stream->whole_target.wininfo = (xd3_wininfo*) + xd3_alloc (stream, stream->whole_target.wininfo_alloc, 1)) == NULL) + { + return ENOMEM; + } + return 0; +} + +static void +xd3_swap_whole_state (xd3_whole_state *a, + xd3_whole_state *b) +{ + xd3_whole_state tmp; + XD3_ASSERT (a->inst != NULL && a->adds != NULL); + XD3_ASSERT (b->inst != NULL && b->adds != NULL); + XD3_ASSERT (b->wininfo != NULL && b->wininfo != NULL); + memcpy (&tmp, a, sizeof (xd3_whole_state)); + memcpy (a, b, sizeof (xd3_whole_state)); + memcpy (b, &tmp, sizeof (xd3_whole_state)); +} + +static int +xd3_realloc_buffer (xd3_stream *stream, + usize_t current_units, + usize_t unit_size, + usize_t new_units, + usize_t *alloc_size, + void **alloc_ptr) +{ + usize_t needed; + usize_t new_alloc; + usize_t cur_size; + uint8_t *new_buf; + + needed = (current_units + new_units) * unit_size; + + if (needed <= *alloc_size) + { + return 0; + } + + cur_size = current_units * unit_size; + new_alloc = xd3_round_blksize (needed * 2, XD3_ALLOCSIZE); + + if ((new_buf = (uint8_t*) xd3_alloc (stream, new_alloc, 1)) == NULL) + { + return ENOMEM; + } + + if (cur_size != 0) + { + memcpy (new_buf, *alloc_ptr, cur_size); + } + + if (*alloc_ptr != NULL) + { + xd3_free (stream, *alloc_ptr); + } + + *alloc_size = new_alloc; + *alloc_ptr = new_buf; + + return 0; +} + +/* allocate one new output instruction */ +static int +xd3_whole_alloc_winst (xd3_stream *stream, + xd3_winst **winstp) +{ + int ret; + + if ((ret = xd3_realloc_buffer (stream, + stream->whole_target.instlen, + sizeof (xd3_winst), + 1, + & stream->whole_target.inst_alloc, + (void**) & stream->whole_target.inst))) + { + return ret; + } + + *winstp = &stream->whole_target.inst[stream->whole_target.instlen++]; + + return 0; +} + +static int +xd3_whole_alloc_adds (xd3_stream *stream, + usize_t count) +{ + return xd3_realloc_buffer (stream, + stream->whole_target.addslen, + 1, + count, + & stream->whole_target.adds_alloc, + (void**) & stream->whole_target.adds); +} + +static int +xd3_whole_alloc_wininfo (xd3_stream *stream, + xd3_wininfo **wininfop) +{ + int ret; + + if ((ret = xd3_realloc_buffer (stream, + stream->whole_target.wininfolen, + sizeof (xd3_wininfo), + 1, + & stream->whole_target.wininfo_alloc, + (void**) & stream->whole_target.wininfo))) + { + return ret; + } + + *wininfop = &stream->whole_target.wininfo[stream->whole_target.wininfolen++]; + + return 0; +} + +static int +xd3_whole_append_inst (xd3_stream *stream, + xd3_hinst *inst) +{ + int ret; + xd3_winst *winst; + + if ((ret = xd3_whole_alloc_winst (stream, &winst))) + { + return ret; + } + + winst->type = inst->type; + winst->mode = 0; + winst->size = inst->size; + winst->position = stream->whole_target.length; + stream->whole_target.length += inst->size; + + if (((inst->type == XD3_ADD) || (inst->type == XD3_RUN)) && + (ret = xd3_whole_alloc_adds (stream, + (inst->type == XD3_RUN ? 1 : inst->size)))) + { + return ret; + } + + switch (inst->type) + { + case XD3_RUN: + winst->addr = stream->whole_target.addslen; + stream->whole_target.adds[stream->whole_target.addslen++] = + *stream->data_sect.buf++; + break; + + case XD3_ADD: + winst->addr = stream->whole_target.addslen; + memcpy (stream->whole_target.adds + stream->whole_target.addslen, + stream->data_sect.buf, + inst->size); + stream->data_sect.buf += inst->size; + stream->whole_target.addslen += inst->size; + break; + + default: + if (inst->addr < stream->dec_cpylen) + { + winst->mode = SRCORTGT (stream->dec_win_ind); + winst->addr = stream->dec_cpyoff + inst->addr; + } + else + { + winst->addr = (stream->dec_winstart + + inst->addr - + stream->dec_cpylen); + } + break; + } + + return 0; +} + +int +xd3_whole_append_window (xd3_stream *stream) +{ + int ret; + xd3_wininfo *wininfo; + + if ((ret = xd3_whole_alloc_wininfo (stream, &wininfo))) { return ret; } + + wininfo->length = stream->dec_tgtlen; + wininfo->offset = stream->dec_winstart; + wininfo->adler32 = stream->dec_adler32; + + while (stream->inst_sect.buf < stream->inst_sect.buf_max) + { + if ((ret = xd3_decode_instruction (stream))) + { + return ret; + } + + if ((stream->dec_current1.type != XD3_NOOP) && + (ret = xd3_whole_append_inst (stream, + & stream->dec_current1))) + { + return ret; + } + + if ((stream->dec_current2.type != XD3_NOOP) && + (ret = xd3_whole_append_inst (stream, + & stream->dec_current2))) + { + return ret; + } + } + + return 0; +} + +/* xd3_merge_input_output applies *source to *stream, returns the + * result in stream. */ +int xd3_merge_input_output (xd3_stream *stream, + xd3_whole_state *source) +{ + int ret; + xd3_stream tmp_stream; + memset (& tmp_stream, 0, sizeof (tmp_stream)); + if ((ret = xd3_config_stream (& tmp_stream, NULL)) || + (ret = xd3_whole_state_init (& tmp_stream)) || + (ret = xd3_merge_inputs (& tmp_stream, + source, + & stream->whole_target))) + { + XPR(NT XD3_LIB_ERRMSG (&tmp_stream, ret)); + return ret; + } + + /* the output is in tmp_stream.whole_state, swap into input */ + xd3_swap_whole_state (& stream->whole_target, + & tmp_stream.whole_target); + /* total allocation counts are preserved */ + xd3_free_stream (& tmp_stream); + return 0; +} + +static int +xd3_merge_run (xd3_stream *stream, + xd3_whole_state *target, + xd3_winst *iinst) +{ + int ret; + xd3_winst *oinst; + + if ((ret = xd3_whole_alloc_winst (stream, &oinst)) || + (ret = xd3_whole_alloc_adds (stream, 1))) + { + return ret; + } + + oinst->type = iinst->type; + oinst->mode = iinst->mode; + oinst->size = iinst->size; + oinst->addr = stream->whole_target.addslen; + + XD3_ASSERT (stream->whole_target.length == iinst->position); + oinst->position = stream->whole_target.length; + stream->whole_target.length += iinst->size; + + stream->whole_target.adds[stream->whole_target.addslen++] = + target->adds[iinst->addr]; + + return 0; +} + +static int +xd3_merge_add (xd3_stream *stream, + xd3_whole_state *target, + xd3_winst *iinst) +{ + int ret; + xd3_winst *oinst; + + if ((ret = xd3_whole_alloc_winst (stream, &oinst)) || + (ret = xd3_whole_alloc_adds (stream, iinst->size))) + { + return ret; + } + + oinst->type = iinst->type; + oinst->mode = iinst->mode; + oinst->size = iinst->size; + oinst->addr = stream->whole_target.addslen; + + XD3_ASSERT (stream->whole_target.length == iinst->position); + oinst->position = stream->whole_target.length; + stream->whole_target.length += iinst->size; + + memcpy(stream->whole_target.adds + stream->whole_target.addslen, + target->adds + iinst->addr, + iinst->size); + + stream->whole_target.addslen += iinst->size; + + return 0; +} + +static int +xd3_merge_target_copy (xd3_stream *stream, + xd3_winst *iinst) +{ + int ret; + xd3_winst *oinst; + + if ((ret = xd3_whole_alloc_winst (stream, &oinst))) + { + return ret; + } + + XD3_ASSERT (stream->whole_target.length == iinst->position); + + memcpy (oinst, iinst, sizeof (*oinst)); + return 0; +} + +static int +xd3_merge_find_position (xd3_stream *stream, + xd3_whole_state *source, + xoff_t address, + usize_t *inst_num) +{ + usize_t low; + usize_t high; + + if (address >= source->length) + { + stream->msg = "Invalid copy offset in merge"; + return XD3_INVALID_INPUT; + } + + low = 0; + high = source->instlen; + + while (low != high) + { + xoff_t mid_lpos; + xoff_t mid_hpos; + usize_t mid = low + (high - low) / 2; + mid_lpos = source->inst[mid].position; + + if (address < mid_lpos) + { + high = mid; + continue; + } + + mid_hpos = mid_lpos + source->inst[mid].size; + + if (address >= mid_hpos) + { + low = mid + 1; + continue; + } + + *inst_num = mid; + return 0; + } + + stream->msg = "Internal error in merge"; + return XD3_INTERNAL; +} + +static int +xd3_merge_source_copy (xd3_stream *stream, + xd3_whole_state *source, + const xd3_winst *iinst_orig) +{ + int ret; + xd3_winst iinst; + usize_t sinst_num; + + memcpy (& iinst, iinst_orig, sizeof (iinst)); + + XD3_ASSERT (iinst.mode == VCD_SOURCE); + + if ((ret = xd3_merge_find_position (stream, source, + iinst.addr, &sinst_num))) + { + return ret; + } + + while (iinst.size > 0) + { + xd3_winst *sinst; + xd3_winst *minst; + usize_t sinst_offset; + usize_t sinst_left; + usize_t this_take; + + XD3_ASSERT (sinst_num < source->instlen); + + sinst = &source->inst[sinst_num]; + + XD3_ASSERT (iinst.addr >= sinst->position); + + sinst_offset = iinst.addr - sinst->position; + + XD3_ASSERT (sinst->size > sinst_offset); + + sinst_left = sinst->size - sinst_offset; + this_take = min (iinst.size, sinst_left); + + XD3_ASSERT (this_take > 0); + + if ((ret = xd3_whole_alloc_winst (stream, &minst))) + { + return ret; + } + + minst->size = this_take; + minst->type = sinst->type; + minst->position = iinst.position; + minst->mode = 0; + + switch (sinst->type) + { + case XD3_RUN: + if ((ret = xd3_whole_alloc_adds (stream, 1))) + { + return ret; + } + + minst->addr = stream->whole_target.addslen; + stream->whole_target.adds[stream->whole_target.addslen++] = + source->adds[sinst->addr]; + break; + case XD3_ADD: + if ((ret = xd3_whole_alloc_adds (stream, this_take))) + { + return ret; + } + + minst->addr = stream->whole_target.addslen; + memcpy(stream->whole_target.adds + stream->whole_target.addslen, + source->adds + sinst->addr + sinst_offset, + this_take); + stream->whole_target.addslen += this_take; + break; + default: + if (sinst->mode != 0) + { + minst->mode = sinst->mode; + minst->addr = sinst->addr + sinst_offset; + } + else + { + // TODO: this is slow because of the recursion, which + // could reach a depth equal to the number of target + // copies, and this is compression-inefficient because + // it can produce duplicate adds. + xd3_winst tinst; + tinst.type = XD3_CPY; + tinst.mode = iinst.mode; + tinst.addr = sinst->addr + sinst_offset; + tinst.size = this_take; + tinst.position = iinst.position; + + // The instruction allocated in this frame will not be used. + stream->whole_target.instlen -= 1; + + if ((ret = xd3_merge_source_copy (stream, source, &tinst))) + { + return ret; + } + } + break; + } + + iinst.position += this_take; + iinst.addr += this_take; + iinst.size -= this_take; + sinst_num += 1; + } + + return 0; +} + +/* xd3_merge_inputs() applies *input to *source, returns its result in + * stream. */ +int xd3_merge_inputs (xd3_stream *stream, + xd3_whole_state *source, + xd3_whole_state *input) +{ + int ret = 0; + usize_t i; + size_t input_i; + + for (i = 0; i < input->wininfolen; ++i) { + xd3_wininfo *copyinfo; + + if ((ret = xd3_whole_alloc_wininfo (stream, ©info))) { return ret; } + + *copyinfo = input->wininfo[i]; + } + + /* iterate over each instruction. */ + for (input_i = 0; ret == 0 && input_i < input->instlen; ++input_i) + { + xd3_winst *iinst = &input->inst[input_i]; + + switch (iinst->type) + { + case XD3_RUN: + ret = xd3_merge_run (stream, input, iinst); + break; + case XD3_ADD: + ret = xd3_merge_add (stream, input, iinst); + break; + default: + /* TODO: VCD_TARGET support is completely untested all + * throughout. */ + if (iinst->mode == 0 || iinst->mode == VCD_TARGET) + { + ret = xd3_merge_target_copy (stream, iinst); + } + else + { + ret = xd3_merge_source_copy (stream, source, iinst); + } + + /* The whole_target.length is not updated in the xd3_merge*copy + * routine because of recursion in xd3_merge_source_copy. */ + stream->whole_target.length += iinst->size; + break; + } + } + + return ret; +} + +#endif diff --git a/xdelta3-python.h b/xdelta3-python.h new file mode 100644 index 0000000..4805b17 --- /dev/null +++ b/xdelta3-python.h @@ -0,0 +1,88 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "Python.h" + +static PyObject *pyxd3_error; + +/* spam: xdelta3.main([string,list,...]) */ +PyObject *xdelta3_main_cmdline (PyObject *self, PyObject *args) +{ + int ret, i, nargs; + char **argv = NULL; + int argc = 0; + PyObject *result = NULL; + PyObject *o; + + if (! PyArg_ParseTuple (args, "O", &o) + || ! PyList_Check (o)) + { + goto cleanup; + } + + argc = PyList_Size (o); + nargs = argc + 2; + + if (! (argv = malloc (sizeof(argv[0]) * nargs))) + { + PyErr_NoMemory (); + goto cleanup; + } + memset (argv, 0, sizeof(argv[0]) * nargs); + + for (i = 1; i < nargs-1; i += 1) + { + char *ps; + PyObject *s; + if ((s = PyList_GetItem (o, i-1)) == NULL) { goto cleanup; } + ps = PyString_AsString (s); + /* TODO: ps is NULL if s is not a string, crashes the interpreter */ + argv[i] = ps; + } + + ret = xd3_main_cmdline (argc+1, argv); + + if (ret == 0) + { + result = Py_BuildValue ("i", ret); + } + else + { + PyErr_SetString (pyxd3_error, "failed :("); + } + cleanup: + if (argv) + { + free (argv); + } + return result; +} + +static PyMethodDef xdelta3_methods[] = { + { "main", xdelta3_main_cmdline, METH_VARARGS, "xdelta3 main()" }, + { NULL, NULL } +}; + +DL_EXPORT(void) initxdelta3main (void) +{ + PyObject *m, *d; + m = Py_InitModule ("xdelta3main", xdelta3_methods); + d = PyModule_GetDict (m); + pyxd3_error = PyErr_NewException ("xdelta3main.error", NULL, NULL); + PyDict_SetItemString (d, "error", pyxd3_error); +} diff --git a/xdelta3-regtest.py b/xdelta3-regtest.py new file mode 100755 index 0000000..f9a11bd --- /dev/null +++ b/xdelta3-regtest.py @@ -0,0 +1,1222 @@ +#!/usr/bin/python2.5 +# xdelta 3 - delta compression tools and library +# Copyright (C) 2003, 2006, 2007, 2008. Joshua P. MacDonald +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +# TODO: test 1.5 vs. greedy + +import os, sys, math, re, time, types, array, random +import xdelta3 + +#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' +#RCSDIR = '/tmp/PRCS_read_copy' +#SAMPLEDIR = "/tmp/WESNOTH_tmp/diff" + +#RCSDIR = 'G:/jmacd/PRCS_copy' +#SAMPLEDIR = "C:/sample_data/Wesnoth/tar" + +#RCSDIR = '/Users/jmacd/src/ftp.kernel.org/pub/scm/linux/kernel/bkcvs/linux-2.4/net/x25' +RCSDIR = '/Users/jmacd/src/ftp.kernel.org' + +# +MIN_SIZE = 0 + +TIME_TOO_SHORT = 0.050 + +SKIP_TRIALS = 2 +MIN_TRIALS = 3 +MAX_TRIALS = 15 + +# 10 = fast 1.5 = slow +MIN_STDDEV_PCT = 1.5 + +# How many results per round +MAX_RESULTS = 500 +TEST_ROUNDS = 500 +KEEP_P = (0.5) + +# For RCS testing, what percent to select +FILE_P = (0.50) + +# For run-speed tests +MIN_RUN = 1000 * 1000 * 1 +MAX_RUN = 1000 * 1000 * 10 + +# Testwide defaults +ALL_ARGS = [ + '-vv' + ] + +# The first 7 args go to -C +SOFT_CONFIG_CNT = 7 + +CONFIG_ORDER = [ 'large_look', + 'large_step', + 'small_look', + 'small_chain', + 'small_lchain', + 'max_lazy', + 'long_enough', + + # > SOFT_CONFIG_CNT + 'nocompress', + 'winsize', + 'srcwinsize', + 'sprevsz', + 'iopt', + 'djw', + 'altcode', + ] + +CONFIG_ARGMAP = { + 'winsize' : '-W', + 'srcwinsize' : '-B', + 'sprevsz' : '-P', + 'iopt' : '-I', + 'nocompress' : '-N', + 'djw' : '-Sdjw', + 'altcode' : '-T', + } + +def INPUT_SPEC(rand): + return { + + # Time/space costs: + + # -C 1,2,3,4,5,6,7 + 'large_look' : lambda d: rand.choice([9, 10, 11, 12]), + 'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]), + 'small_look' : lambda d: rand.choice([4]), + 'small_chain' : lambda d: rand.choice([1]), + 'small_lchain' : lambda d: rand.choice([1]), + 'max_lazy' : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]), + + # Note: long_enough only refers to small matching and has no effect if + # small_chain == 1. + 'long_enough' : lambda d: rand.choice([4]), + + # -N + 'nocompress' : lambda d: rand.choice(['false']), + + # -T + 'altcode' : lambda d: rand.choice(['false']), + + # -S djw + 'djw' : lambda d: rand.choice(['false']), + + # Memory costs: + + # -W + 'winsize' : lambda d: 8 * (1<<20), + + # -B + 'srcwinsize' : lambda d: 64 * (1<<20), + + # -I 0 is unlimited + 'iopt' : lambda d: 0, + + # -P only powers of two + 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]), + } +#end + +# +TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() + +RUNFILE = os.path.join(TMPDIR, 'run') +DFILE = os.path.join(TMPDIR, 'output') +RFILE = os.path.join(TMPDIR, 'recon') + +HEAD_STATE = 0 +BAR_STATE = 1 +REV_STATE = 2 +DATE_STATE = 3 + +# +IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*') + +# rcs output +RE_TOTREV = re.compile('total revisions: (\\d+)') +RE_BAR = re.compile('----------------------------') +RE_REV = re.compile('revision (.+)') +RE_DATE = re.compile('date: ([^;]+);.*') +# xdelta output +RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') +RE_EXTCOMP = re.compile('XDELTA ext comp.*') + +def c2str(c): + return ' '.join(['%s' % x for x in c]) +#end + +def SumList(l): + return reduce(lambda x,y: x+y, l) +#end + +# returns (total, mean, stddev, q2 (median), +# (q3-q1)/2 ("semi-interquartile range"), max-min (spread)) +class StatList: + def __init__(self,l,desc): + cnt = len(l) + assert(cnt > 1) + l.sort() + self.cnt = cnt + self.l = l + self.total = SumList(l) + self.mean = self.total / float(self.cnt) + self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1)) + self.q0 = l[0] + self.q1 = l[int(self.cnt/4.0+0.5)] + self.q2 = l[int(self.cnt/2.0+0.5)] + self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))] + self.q4 = l[self.cnt-1]+1 + self.siqr = (self.q3-self.q1)/2.0; + self.spread = (self.q4-self.q0) + self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \ + (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread) + #end +#end + +def RunCommand(args, ok = [0]): + #print 'run command %s' % (' '.join(args)) + p = os.spawnvp(os.P_WAIT, args[0], args) + if p not in ok: + raise CommandError(args, 'exited %d' % p) + #end +#end + +def RunCommandIO(args,infn,outfn): + p = os.fork() + if p == 0: + os.dup2(os.open(infn,os.O_RDONLY),0) + os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1) + os.execvp(args[0], args) + else: + s = os.waitpid(p,0) + o = os.WEXITSTATUS(s[1]) + if not os.WIFEXITED(s[1]) or o != 0: + raise CommandError(args, 'exited %d' % o) + #end + #end +#end + +class TimedTest: + def __init__(self, target, source, runnable, + skip_trials = SKIP_TRIALS, + min_trials = MIN_TRIALS, + max_trials = MAX_TRIALS, + min_stddev_pct = MIN_STDDEV_PCT): + self.target = target + self.source = source + self.runnable = runnable + + self.skip_trials = skip_trials + self.min_trials = min(min_trials, max_trials) + self.max_trials = max_trials + self.min_stddev_pct = min_stddev_pct + + self.encode_time = self.DoTest(DFILE, + lambda x: x.Encode(self.target, self.source, DFILE)) + self.encode_size = runnable.EncodeSize(DFILE) + + self.decode_time = self.DoTest(RFILE, + lambda x: x.Decode(DFILE, self.source, RFILE), + ) + + # verify + runnable.Verify(self.target, RFILE) + #end + + def DoTest(self, fname, func): + trials = 0 + measured = [] + + while 1: + try: + os.remove(fname) + except OSError: + pass + + start_time = time.time() + start_clock = time.clock() + + func(self.runnable) + + total_clock = (time.clock() - start_clock) + total_time = (time.time() - start_time) + + elap_time = max(total_time, 0.0000001) + elap_clock = max(total_clock, 0.0000001) + + trials = trials + 1 + + # skip some of the first trials + if trials > self.skip_trials: + measured.append((elap_clock, elap_time)) + #print 'measurement total: %.1f ms' % (total_time * 1000.0) + + # at least so many + if trials < (self.skip_trials + self.min_trials): + #print 'continue: need more trials: %d' % trials + continue + + # compute %variance + done = 0 + if self.skip_trials + self.min_trials <= 2: + measured = measured + measured; + done = 1 + #end + + time_stat = StatList([x[1] for x in measured], 'elap time') + sp = float(time_stat.s) / float(time_stat.mean) + + # what if MAX_TRIALS is exceeded? + too_many = (trials - self.skip_trials) >= self.max_trials + good = (100.0 * sp) < self.min_stddev_pct + if done or too_many or good: + trials = trials - self.skip_trials + if not done and not good: + #print 'too many trials: %d' % trials + pass + #clock = StatList([x[0] for x in measured], 'elap clock') + return time_stat + #end + #end + #end +#end + +def Decimals(start, end): + l = [] + step = start + while 1: + r = range(step, step * 10, step) + l = l + r + if step * 10 >= end: + l.append(step * 10) + break + step = step * 10 + return l +#end + +# This tests the raw speed of 0-byte inputs +def RunSpeedTest(): + for L in Decimals(MIN_RUN, MAX_RUN): + SetFileSize(RUNFILE, L) + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)])) + ReportSpeed(L, trx, '1MB ') + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)])) + ReportSpeed(L, trx, '512k') + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)])) + ReportSpeed(L, trx, '256k') + + trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE)) + ReportSpeed(L, trm, 'swig') + + trg = TimedTest(RUNFILE, None, GzipRun1()) + ReportSpeed(L,trg,'gzip') + #end +#end + +def SetFileSize(F,L): + fd = os.open(F, os.O_CREAT | os.O_WRONLY) + os.ftruncate(fd,L) + assert os.fstat(fd).st_size == L + os.close(fd) +#end + +def ReportSpeed(L,tr,desc): + print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \ + (desc, L, + tr.encode_size, + tr.encode_time.mean * 1000.0, + tr.decode_time.mean * 1000.0) +#end + +class Xdelta3RunClass: + def __init__(self, extra): + self.extra = extra + #end + + def __str__(self): + return ' '.join(self.extra) + #end + + def New(self): + return Xdelta3Runner(self.extra) + #end +#end + +class Xdelta3Runner: + def __init__(self, extra): + self.extra = extra + #end + + def Encode(self, target, source, output): + args = (ALL_ARGS + + self.extra + + ['-e']) + if source: + args.append('-s') + args.append(source) + #end + args = args + [target, output] + self.Main(args) + #end + + def Decode(self, input, source, output): + args = (ALL_ARGS + + ['-d']) + if source: + args.append('-s') + args.append(source) + #end + args = args + [input, output] + self.Main(args) + #end + + def Verify(self, target, recon): + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end + + def Main(self, args): + try: + #print 'Run %s' % (' '.join(args)) + xdelta3.xd3_main_cmdline(args) + except Exception, e: + raise CommandError(args, "xdelta3.main exception: %s" % e) + #end + #end +#end + +class Xdelta3Mod1: + def __init__(self, file): + self.target_data = open(file, 'r').read() + #end + + def Encode(self, ignore1, ignore2, ignore3): + r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10) + if r1 != 0: + raise CommandError('memory', 'encode failed: %s' % r1) + #end + self.encoded = encoded + #end + + def Decode(self, ignore1, ignore2, ignore3): + r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data)) + if r2 != 0: + raise CommandError('memory', 'decode failed: %s' % r1) + #end + self.decoded = data1 + #end + + def Verify(self, ignore1, ignore2): + if self.target_data != self.decoded: + raise CommandError('memory', 'bad decode') + #end + #end + + def EncodeSize(self, ignore1): + return len(self.encoded) + #end +#end + +class GzipRun1: + def Encode(self, target, source, output): + assert source == None + RunCommandIO(['gzip', '-cf'], target, output) + #end + + def Decode(self, input, source, output): + assert source == None + RunCommandIO(['gzip', '-dcf'], input, output) + #end + + def Verify(self, target, recon): + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end +#end + +class Xdelta1RunClass: + def __str__(self): + return 'xdelta1' + #end + + def New(self): + return Xdelta1Runner() + #end +#end + +class Xdelta1Runner: + def Encode(self, target, source, output): + assert source != None + args = ['xdelta1', 'delta', '-q', source, target, output] + RunCommand(args, [0, 1]) + #end + + def Decode(self, input, source, output): + assert source != None + args = ['xdelta1', 'patch', '-q', input, source, output] + # Note: for dumb historical reasons, xdelta1 returns 1 or 0 + RunCommand(args) + #end + + def Verify(self, target, recon): + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end +#end + +# exceptions +class SkipRcsException: + def __init__(self,reason): + self.reason = reason + #end +#end + +class NotEnoughVersions: + def __init__(self): + pass + #end +#end + +class CommandError: + def __init__(self,cmd,str): + if type(cmd) is types.TupleType or \ + type(cmd) is types.ListType: + cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd) + #end + print 'command was: ',cmd + print 'command failed: ',str + print 'have fun debugging' + #end +#end + +class RcsVersion: + def __init__(self,vstr): + self.vstr = vstr + #end + def __cmp__(self,other): + return cmp(self.date, other.date) + #end + def __str__(self): + return str(self.vstr) + #end +#end + +class RcsFile: + + def __init__(self, fname): + self.fname = fname + self.versions = [] + self.state = HEAD_STATE + #end + + def SetTotRev(self,s): + self.totrev = int(s) + #end + + def Rev(self,s): + self.rev = RcsVersion(s) + if len(self.versions) >= self.totrev: + raise SkipRcsException('too many versions (in log messages)') + #end + self.versions.append(self.rev) + #end + + def Date(self,s): + self.rev.date = s + #end + + def Match(self, line, state, rx, gp, newstate, f): + if state == self.state: + m = rx.match(line) + if m: + if f: + f(m.group(gp)) + #end + self.state = newstate + return 1 + #end + #end + return None + #end + + def Sum1Rlog(self): + f = os.popen('rlog '+self.fname, "r") + l = f.readline() + while l: + if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev): + pass + elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None): + pass + elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev): + pass + elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date): + pass + #end + l = f.readline() + #end + c = f.close() + if c != None: + raise c + #end + #end + + def Sum1(self): + st = os.stat(self.fname) + self.rcssize = st.st_size + self.Sum1Rlog() + if self.totrev != len(self.versions): + raise SkipRcsException('wrong version count') + #end + self.versions.sort() + #end + + def Checkout(self,n): + v = self.versions[n] + out = open(self.Verf(n), "w") + cmd = 'co -ko -p%s %s' % (v.vstr, self.fname) + total = 0 + (inf, + stream, + err) = os.popen3(cmd, "r") + inf.close() + buf = stream.read() + while buf: + total = total + len(buf) + out.write(buf) + buf = stream.read() + #end + v.vsize = total + estr = '' + buf = err.read() + while buf: + estr = estr + buf + buf = err.read() + #end + if stream.close(): + raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr)) + #end + out.close() + err.close() + #end + + def Vdate(self,n): + return self.versions[n].date + #end + + def Vstr(self,n): + return self.versions[n].vstr + #end + + def Verf(self,n): + return os.path.join(TMPDIR, 'input.%d' % n) + #end + + def FilePairsByDate(self, runclass): + if self.totrev < 2: + raise NotEnoughVersions() + #end + self.Checkout(0) + ntrials = [] + if self.totrev < 2: + return vtrials + #end + for v in range(0,self.totrev-1): + if v > 1: + os.remove(self.Verf(v-1)) + #end + self.Checkout(v+1) + if os.stat(self.Verf(v)).st_size < MIN_SIZE or \ + os.stat(self.Verf(v+1)).st_size < MIN_SIZE: + continue + #end + + result = TimedTest(self.Verf(v+1), + self.Verf(v), + runclass.New()) + + target_size = os.stat(self.Verf(v+1)).st_size + + ntrials.append(result) + #end + + os.remove(self.Verf(self.totrev-1)) + os.remove(self.Verf(self.totrev-2)) + return ntrials + #end + + def AppendVersion(self, f, n): + self.Checkout(n) + rf = open(self.Verf(n), "r") + data = rf.read() + f.write(data) + rf.close() + return len(data) + #end + +class RcsFinder: + def __init__(self): + self.subdirs = [] + self.rcsfiles = [] + self.others = [] + self.skipped = [] + self.biground = 0 + #end + + def Scan1(self,dir): + dents = os.listdir(dir) + subdirs = [] + rcsfiles = [] + others = [] + for dent in dents: + full = os.path.join(dir, dent) + if os.path.isdir(full): + subdirs.append(full) + elif dent[len(dent)-2:] == ",v": + rcsfiles.append(RcsFile(full)) + else: + others.append(full) + #end + #end + self.subdirs = self.subdirs + subdirs + self.rcsfiles = self.rcsfiles + rcsfiles + self.others = self.others + others + return subdirs + #end + + def Crawl(self, dir): + subdirs = [dir] + while subdirs: + s1 = self.Scan1(subdirs[0]) + subdirs = subdirs[1:] + s1 + #end + #end + + def Summarize(self): + good = [] + for rf in self.rcsfiles: + try: + rf.Sum1() + if rf.totrev < 2: + raise SkipRcsException('too few versions (< 2)') + #end + except SkipRcsException, e: + #print 'skipping file %s: %s' % (rf.fname, e.reason) + self.skipped.append(rf) + else: + good.append(rf) + #end + self.rcsfiles = good + #end + + def AllPairsByDate(self, runclass): + results = [] + good = [] + for rf in self.rcsfiles: + try: + results = results + rf.FilePairsByDate(runclass) + except SkipRcsException: + print 'file %s has compressed versions: skipping' % (rf.fname) + except NotEnoughVersions: + print 'testing %s on %s: not enough versions' % (runclass, rf.fname) + else: + good.append(rf) + #end + self.rcsfiles = good + self.ReportPairs(runclass, results) + return results + #end + + def ReportPairs(self, name, results): + encode_time = 0 + decode_time = 0 + encode_size = 0 + for r in results: + encode_time += r.encode_time.mean + decode_time += r.decode_time.mean + encode_size += r.encode_size + #end + print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \ + (name, encode_time, decode_time, encode_size) + #end + + def MakeBigFiles(self, rand): + f1 = open(TMPDIR + "/big.1", "w") + f2 = open(TMPDIR + "/big.2", "w") + population = [] + for file in self.rcsfiles: + if len(file.versions) < 2: + continue + population.append(file) + #end + f1sz = 0 + f2sz = 0 + fcount = int(len(population) * FILE_P) + assert fcount > 0 + for file in rand.sample(population, fcount): + m = IGNORE_FILENAME.match(file.fname) + if m != None: + continue + #end + r1, r2 = rand.sample(xrange(0, len(file.versions)), 2) + f1sz += file.AppendVersion(f1, r1) + f2sz += file.AppendVersion(f2, r2) + #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2))) + #end + testkey = 'rcs%d' % self.biground + self.biground = self.biground + 1 + + print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz) + f1.close() + f2.close() + return (TMPDIR + "/big.1", + TMPDIR + "/big.2", + testkey) + #end + + def Generator(self): + return lambda rand: self.MakeBigFiles(rand) + #end +#end + +# find a set of RCS files for testing +def GetTestRcsFiles(): + rcsf = RcsFinder() + rcsf.Crawl(RCSDIR) + if len(rcsf.rcsfiles) == 0: + raise CommandError('', 'no RCS files') + #end + rcsf.Summarize() + print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles), + len(rcsf.subdirs), + len(rcsf.others), + len(rcsf.skipped)) + print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str + print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str + return rcsf +#end + +class SampleDataTest: + def __init__(self, dirs): + self.pairs = [] + while dirs: + d = dirs[0] + dirs = dirs[1:] + l = os.listdir(d) + files = [] + for e in l: + p = os.path.join(d, e) + if os.path.isdir(p): + dirs.append(p) + else: + files.append(p) + #end + #end + if len(files) > 1: + files.sort() + for x in xrange(len(files) - 1): + self.pairs.append((files[x], files[x+1], + '%s-%s' % (files[x], files[x+1]))) + #end + #end + #end + #end + + def Generator(self): + return lambda rand: rand.choice(self.pairs) + #end +#end + +# configs are represented as a list of values, +# program takes a list of strings: +def ConfigToArgs(config): + args = [ '-C', + ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])] + for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)): + key = CONFIG_ARGMAP[CONFIG_ORDER[i]] + val = config[i] + if val == 'true' or val == 'false': + if val == 'true': + args.append('%s' % key) + #end + else: + args.append('%s=%s' % (key, val)) + #end + #end + return args +#end + +# +class RandomTest: + def __init__(self, tnum, tinput, config, syntuple = None): + self.mytinput = tinput[2] + self.myconfig = config + self.tnum = tnum + + if syntuple != None: + self.runtime = syntuple[0] + self.compsize = syntuple[1] + self.decodetime = None + else: + args = ConfigToArgs(config) + result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args)) + + self.runtime = result.encode_time.mean + self.compsize = result.encode_size + self.decodetime = result.decode_time.mean + #end + + self.score = None + self.time_pos = None + self.size_pos = None + self.score_pos = None + #end + + def __str__(self): + decodestr = ' %.6f' % self.decodetime + return 'time %.6f%s size %d%s << %s >>%s' % ( + self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""), + self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""), + c2str(self.config()), + decodestr) + #end + + def time(self): + return self.runtime + #end + + def size(self): + return self.compsize + #end + + def config(self): + return self.myconfig + #end + + def score(self): + return self.score + #end + + def tinput(self): + return self.mytinput + #end +#end + +def PosInAlist(l, e): + for i in range(0, len(l)): + if l[i][1] == e: + return i; + #end + #end + return -1 +#end + +# Generates a set of num_results test configurations, given the list of +# retest-configs. +def RandomTestConfigs(rand, input_configs, num_results): + + outputs = input_configs[:] + have_set = dict([(c,c) for c in input_configs]) + + # Compute a random configuration + def RandomConfig(): + config = [] + cmap = {} + for key in CONFIG_ORDER: + val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap) + config.append(val) + #end + return tuple(config) + #end + + while len(outputs) < num_results: + newc = None + for i in xrange(100): + c = RandomConfig() + if have_set.has_key(c): + continue + #end + have_set[c] = c + newc = c + break + if newc is None: + print 'stopped looking for configs at %d' % len(outputs) + break + #end + outputs.append(c) + #end + outputs.sort() + return outputs +#end + +def RunTestLoop(rand, generator, rounds): + configs = [] + for rnum in xrange(rounds): + configs = RandomTestConfigs(rand, configs, MAX_RESULTS) + tinput = generator(rand) + tests = [] + for x in xrange(len(configs)): + t = RandomTest(x, tinput, configs[x]) + print 'Round %d test %d: %s' % (rnum, x, t) + tests.append(t) + #end + results = ScoreTests(tests) + + for r in results: + c = r.config() + if not test_all_config_results.has_key(c): + test_all_config_results[c] = [r] + else: + test_all_config_results[c].append(r) + #end + #end + + GraphResults('expt%d' % rnum, results) + GraphSummary('sum%d' % rnum, results) + + # re-test some fraction + configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]] + #end +#end + +# TODO: cleanup +test_all_config_results = {} + +def ScoreTests(results): + scored = [] + timed = [] + sized = [] + + t_min = float(min([test.time() for test in results])) + #t_max = float(max([test.time() for test in results])) + s_min = float(min([test.size() for test in results])) + #s_max = float(max([test.size() for test in results])) + + for test in results: + + # Hyperbolic function. Smaller scores still better + red = 0.999 # minimum factors for each dimension are 1/1000 + test.score = ((test.size() - s_min * red) * + (test.time() - t_min * red)) + + scored.append((test.score, test)) + timed.append((test.time(), test)) + sized.append((test.size(), test)) + #end + + scored.sort() + timed.sort() + sized.sort() + + best_by_size = [] + best_by_time = [] + + pos = 0 + for (score, test) in scored: + pos += 1 + test.score_pos = pos + #end + + scored = [x[1] for x in scored] + + for test in scored: + test.size_pos = PosInAlist(sized, test) + test.time_pos = PosInAlist(timed, test) + #end + + for test in scored: + c = test.config() + s = 0.0 + print 'H-Score: %0.9f %s' % (test.score, test) + #end + + return scored +#end + +def GraphResults(desc, results): + f = open("data-%s.csv" % desc, "w") + for r in results: + f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r)) + #end + f.close() + os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc)) +#end + +def GraphSummary(desc, results_ignore): + test_population = 0 + config_ordered = [] + + # drops duplicate test/config pairs (TODO: don't retest them) + for config, cresults in test_all_config_results.items(): + input_config_map = {} + uniq = [] + for test in cresults: + assert test.config() == config + test_population += 1 + key = test.tinput() + if not input_config_map.has_key(key): + input_config_map[key] = {} + #end + if input_config_map[key].has_key(config): + print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test) + continue + #end + input_config_map[key][config] = test + uniq.append(test) + #end + config_ordered.append(uniq) + #end + + # sort configs descending by number of tests + config_ordered.sort(lambda x, y: len(y) - len(x)) + + print 'population %d: %d configs %d results' % \ + (test_population, + len(config_ordered), + len(config_ordered[0])) + + if config_ordered[0] == 1: + return + #end + + # a map from test-key to test-list w/ various configs + input_set = {} + osize = len(config_ordered) + + for i in xrange(len(config_ordered)): + config = config_ordered[i][0].config() + config_tests = config_ordered[i] + + #print '%s has %d tested inputs' % (config, len(config_tests)) + + if len(input_set) == 0: + input_set = dict([(t.tinput(), [t]) for t in config_tests]) + continue + #end + + # a map from test-key to test-list w/ various configs + update_set = {} + for r in config_tests: + t = r.tinput() + if input_set.has_key(t): + update_set[t] = input_set[t] + [r] + else: + #print 'config %s does not have test %s' % (config, t) + pass + #end + #end + + if len(update_set) <= 1: + break + #end + + input_set = update_set + + # continue if there are more w/ the same number of inputs + if i < (len(config_ordered) - 1) and \ + len(config_ordered[i + 1]) == len(config_tests): + continue + #end + + # synthesize results for multi-test inputs + config_num = None + + # map of config to sum(various test-keys) + smap = {} + for (key, tests) in input_set.items(): + if config_num == None: + # config_num should be the same in all elements + config_num = len(tests) + smap = dict([(r.config(), + (r.time(), + r.size())) + for r in tests]) + else: + # compuate the per-config sum of time/size + assert config_num == len(tests) + smap = dict([(r.config(), + (smap[r.config()][0] + r.time(), + smap[r.config()][1] + r.size())) + for r in tests]) + #end + #end + + if config_num == 1: + continue + #end + + if len(input_set) == osize: + break + #end + + summary = '%s-%d' % (desc, len(input_set)) + osize = len(input_set) + + print 'generate %s w/ %d configs' % (summary, config_num) + syn = [RandomTest(0, (None, None, summary), config, + syntuple = (smap[config][0], smap[config][1])) + for config in smap.keys()] + syn = ScoreTests(syn) + #print 'smap is %s' % (smap,) + #print 'syn is %s' % (' and '.join([str(x) for x in syn])) + GraphResults(summary, syn) + #end +#end + +if __name__ == "__main__": + try: + RunCommand(['rm', '-rf', TMPDIR]) + os.mkdir(TMPDIR) + + rcsf = GetTestRcsFiles() + #generator = rcsf.Generator() + + #sample = SampleDataTest([SAMPLEDIR]) + #generator = sample.Generator() + + #rand = random.Random(135135135135135) + #RunTestLoop(rand, generator, TEST_ROUNDS) + + #RunSpeedTest() + + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9'])) + x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw'])) + x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T'])) + + #x1r = rcsf.AllPairsByDate(Xdelta1RunClass()) + + except CommandError: + pass + else: + RunCommand(['rm', '-rf', TMPDIR]) + pass + #end +#end diff --git a/xdelta3-second.h b/xdelta3-second.h new file mode 100644 index 0000000..9096d0f --- /dev/null +++ b/xdelta3-second.h @@ -0,0 +1,315 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2002, 2003, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XDELTA3_SECOND_H_ +#define _XDELTA3_SECOND_H_ + +static inline void xd3_bit_state_encode_init (bit_state *bits) +{ + bits->cur_byte = 0; + bits->cur_mask = 1; +} + +static inline int xd3_decode_bits (xd3_stream *stream, + bit_state *bits, + const uint8_t **input, + const uint8_t *input_max, + usize_t nbits, + usize_t *valuep) +{ + usize_t value = 0; + usize_t vmask = 1 << nbits; + + if (bits->cur_mask == 0x100) { goto next_byte; } + + for (;;) + { + do + { + vmask >>= 1; + + if (bits->cur_byte & bits->cur_mask) + { + value |= vmask; + } + + bits->cur_mask <<= 1; + + if (vmask == 1) { goto done; } + } + while (bits->cur_mask != 0x100); + + next_byte: + + if (*input == input_max) + { + stream->msg = "secondary decoder end of input"; + return XD3_INTERNAL; + } + + bits->cur_byte = *(*input)++; + bits->cur_mask = 1; + } + + done: + + IF_DEBUG2 (DP(RINT "(d) %u ", value)); + + (*valuep) = value; + return 0; +} + +#if REGRESSION_TEST +/* There may be extra bits at the end of secondary decompression, this macro + * checks for non-zero bits. This is overly strict, but helps pass the + * single-bit-error regression test. */ +static int +xd3_test_clean_bits (xd3_stream *stream, bit_state *bits) +{ + for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1) + { + if (bits->cur_byte & bits->cur_mask) + { + stream->msg = "secondary decoder garbage"; + return XD3_INTERNAL; + } + } + + return 0; +} +#endif + +static xd3_sec_stream* +xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp) +{ + xd3_sec_stream *sec_stream; + + if ((sec_stream = *sec_streamp) == NULL) + { + if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL) + { + return NULL; + } + + sec_stream = *sec_streamp; + + /* If cuumulative stats, init once. */ + stream->sec_type->init (sec_stream); + } + + return sec_stream; +} + +static int +xd3_decode_secondary (xd3_stream *stream, + xd3_desect *sect, + xd3_sec_stream **sec_streamp) +{ + xd3_sec_stream *sec_stream; + uint32_t dec_size; + uint8_t *out_used; + int ret; + + if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) + { + return ENOMEM; + } + + /* Decode the size, allocate the buffer. */ + if ((ret = xd3_read_size (stream, & sect->buf, + sect->buf_max, & dec_size)) || + (ret = xd3_decode_allocate (stream, dec_size, + & sect->copied2, & sect->alloc2))) + { + return ret; + } + + out_used = sect->copied2; + + if ((ret = stream->sec_type->decode (stream, sec_stream, + & sect->buf, sect->buf_max, + & out_used, out_used + dec_size))) + { + return ret; + } + + if (sect->buf != sect->buf_max) + { + stream->msg = "secondary decoder finished with unused input"; + return XD3_INTERNAL; + } + + if (out_used != sect->copied2 + dec_size) + { + stream->msg = "secondary decoder short output"; + return XD3_INTERNAL; + } + + sect->buf = sect->copied2; + sect->buf_max = sect->copied2 + dec_size; + sect->size = dec_size; + + return 0; +} + +#if XD3_ENCODER +static inline int xd3_encode_bit (xd3_stream *stream, + xd3_output **output, + bit_state *bits, + int bit) +{ + int ret; + + if (bit) + { + bits->cur_byte |= bits->cur_mask; + } + + /* OPT: Might help to buffer more than 8 bits at once. */ + if (bits->cur_mask == 0x80) + { + if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0) + { + return ret; + } + + bits->cur_mask = 1; + bits->cur_byte = 0; + } + else + { + bits->cur_mask <<= 1; + } + + return 0; +} + +static inline int xd3_flush_bits (xd3_stream *stream, + xd3_output **output, + bit_state *bits) +{ + return (bits->cur_mask == 1) ? 0 : + xd3_emit_byte (stream, output, bits->cur_byte); +} + +static inline int xd3_encode_bits (xd3_stream *stream, + xd3_output **output, + bit_state *bits, + usize_t nbits, + usize_t value) +{ + int ret; + usize_t mask = 1 << nbits; + + XD3_ASSERT (nbits > 0); + XD3_ASSERT (nbits < sizeof (usize_t) * 8); + XD3_ASSERT (value < mask); + + do + { + mask >>= 1; + + if ((ret = xd3_encode_bit (stream, output, bits, value & mask))) + { + return ret; + } + } + while (mask != 1); + + IF_DEBUG2 (DP(RINT "(e) %u ", value)); + + return 0; +} + +static int +xd3_encode_secondary (xd3_stream *stream, + xd3_output **head, + xd3_output **tail, + xd3_sec_stream **sec_streamp, + xd3_sec_cfg *cfg, + int *did_it) +{ + xd3_sec_stream *sec_stream; + xd3_output *tmp_head; + xd3_output *tmp_tail; + + usize_t comp_size; + usize_t orig_size; + + int ret; + + orig_size = xd3_sizeof_output (*head); + + if (orig_size < SECONDARY_MIN_INPUT) { return 0; } + + if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) + { + return ENOMEM; + } + + tmp_head = xd3_alloc_output (stream, NULL); + + /* Encode the size, encode the data. Encoding the size makes it + * simpler, but is a little gross. Should not need the entire + * section in contiguous memory, but it is much easier this way. */ + if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) || + (ret = stream->sec_type->encode (stream, sec_stream, *head, + tmp_head, cfg))) + { + goto getout; + } + + /* If the secondary compressor determines it's no good, it returns + * XD3_NOSECOND. */ + + /* Setup tmp_tail, comp_size */ + tmp_tail = tmp_head; + comp_size = tmp_head->next; + + while (tmp_tail->next_page != NULL) + { + tmp_tail = tmp_tail->next_page; + comp_size += tmp_tail->next; + } + + XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head)); + XD3_ASSERT (tmp_tail != NULL); + + if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS)) + { + IF_DEBUG1(DP(RINT "secondary saved %u bytes: %u -> %u (%0.2f%%)\n", + orig_size - comp_size, orig_size, comp_size, + 100.0 * (double) comp_size / (double) orig_size)); + + xd3_free_output (stream, *head); + + *head = tmp_head; + *tail = tmp_tail; + *did_it = 1; + } + else + { + getout: + if (ret == XD3_NOSECOND) { ret = 0; } + xd3_free_output (stream, tmp_head); + } + + return ret; +} +#endif /* XD3_ENCODER */ +#endif /* _XDELTA3_SECOND_H_ */ diff --git a/xdelta3-test.h b/xdelta3-test.h new file mode 100644 index 0000000..979683f --- /dev/null +++ b/xdelta3-test.h @@ -0,0 +1,2827 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This is public-domain Mersenne Twister code, + * attributed to Michael Brundage. Thanks! + * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html + */ +static const uint32_t TEST_SEED1 = 5489UL; +#define MT_LEN 624 +#define MT_IA 397 +static const uint32_t UPPER_MASK = 0x80000000; +static const uint32_t LOWER_MASK = 0x7FFFFFFF; +static const uint32_t MATRIX_A = 0x9908B0DF; + +typedef struct mtrand mtrand; + +struct mtrand { + int mt_index_; + uint32_t mt_buffer_[MT_LEN]; +}; + +void mt_init(mtrand *mt, uint32_t seed) { + int i; + mt->mt_buffer_[0] = seed; + mt->mt_index_ = MT_LEN; + for (i = 1; i < MT_LEN; i++) { + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt->mt_buffer_[i] = + (1812433253UL * (mt->mt_buffer_[i-1] ^ (mt->mt_buffer_[i-1] >> 30)) + i); + } +} + + +uint32_t mt_random (mtrand *mt) { + uint32_t y; + unsigned long mag01[2]; + mag01[0] = 0; + mag01[1] = MATRIX_A; + + if (mt->mt_index_ >= MT_LEN) { + int kk; + + for (kk = 0; kk < MT_LEN - MT_IA; kk++) { + y = (mt->mt_buffer_[kk] & UPPER_MASK) | (mt->mt_buffer_[kk + 1] & LOWER_MASK); + mt->mt_buffer_[kk] = mt->mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (;kk < MT_LEN - 1; kk++) { + y = (mt->mt_buffer_[kk] & UPPER_MASK) | (mt->mt_buffer_[kk + 1] & LOWER_MASK); + mt->mt_buffer_[kk] = mt->mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt->mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt->mt_buffer_[0] & LOWER_MASK); + mt->mt_buffer_[MT_LEN - 1] = mt->mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + mt->mt_index_ = 0; + } + + y = mt->mt_buffer_[mt->mt_index_++]; + + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +static mtrand static_mtrand; + +#include <math.h> + +static uint32_t +mt_exp_rand (uint32_t mean, uint32_t max_value) +{ + double mean_d = mean; + double erand = log (1.0 / (mt_random (&static_mtrand) / + (double)UINT32_MAX)); + uint32_t x = (uint32_t) (mean_d * erand + 0.5); + + return min (x, max_value); +} + +#ifndef WIN32 +#include <sys/wait.h> +#endif + +#define MSG_IS(x) (stream->msg != NULL && strcmp ((x), stream->msg) == 0) + +static const usize_t TWO_MEGS_AND_DELTA = (2 << 20) + (1 << 10); +static const usize_t ADDR_CACHE_ROUNDS = 10000; + +static const usize_t TEST_FILE_MEAN = 16384; +static const double TEST_ADD_MEAN = 128; +static const double TEST_ADD_MAX = 512; +static const double TEST_ADD_RATIO = 0.1; +static const double TEST_EPSILON = 0.25; + +#define TESTBUFSIZE (1024 * 16) + +#define TESTFILESIZE (1024) + +static char TEST_TARGET_FILE[TESTFILESIZE]; +static char TEST_SOURCE_FILE[TESTFILESIZE]; +static char TEST_DELTA_FILE[TESTFILESIZE]; +static char TEST_RECON_FILE[TESTFILESIZE]; +static char TEST_RECON2_FILE[TESTFILESIZE]; +static char TEST_COPY_FILE[TESTFILESIZE]; +static char TEST_NOPERM_FILE[TESTFILESIZE]; + +#define CHECK(cond) if (!(cond)) { DP(RINT "check failure: " #cond); abort(); } + +/* Use a fixed soft config so that test values are fixed. See also + * test_compress_text(). */ +static const char* test_softcfg_str = "-C9,3,4,8,2,36,70"; + +static int test_setup (void); + +/*********************************************************************** + TEST HELPERS + ***********************************************************************/ + +static void DOT (void) { DP(RINT "."); } +static int do_cmd (xd3_stream *stream, const char *buf) +{ + int ret; + if ((ret = system (buf)) != 0) + { + if (WIFEXITED (ret)) + { + stream->msg = "command exited non-zero"; + } + else + { + stream->msg = "abnormal command termination"; + } + return XD3_INTERNAL; + } + return 0; +} + +static int do_fail (xd3_stream *stream, const char *buf) +{ + int ret; + ret = system (buf); + if (! WIFEXITED (ret) || WEXITSTATUS (ret) != 1) + { + stream->msg = "command should have not succeeded"; + DP(RINT "command was %s", buf); + return XD3_INTERNAL; + } + return 0; +} + +/* Test that the exponential distribution actually produces its mean. */ +static int +test_random_numbers (xd3_stream *stream, int ignore) +{ + usize_t i; + usize_t sum = 0; + usize_t mean = 50; + usize_t n_rounds = 1000000; + double average, error; + double allowed_error = 0.1; + + mt_init (& static_mtrand, 0x9f73f7fe); + + for (i = 0; i < n_rounds; i += 1) + { + sum += mt_exp_rand (mean, USIZE_T_MAX); + } + + average = (double) sum / (double) n_rounds; + error = average - (double) mean; + + if (error < allowed_error && error > -allowed_error) + { + return 0; + } + + /*DP(RINT "error is %f\n", error);*/ + stream->msg = "random distribution looks broken"; + return XD3_INTERNAL; +} + +static void +test_unlink (char* file) +{ + char buf[TESTBUFSIZE]; + while (unlink (file) != 0) + { + if (errno == ENOENT) + { + break; + } + sprintf (buf, "rm -f %s", file); + system (buf); + } +} + +static void +test_cleanup (void) +{ + test_unlink (TEST_TARGET_FILE); + test_unlink (TEST_SOURCE_FILE); + test_unlink (TEST_DELTA_FILE); + test_unlink (TEST_RECON_FILE); + test_unlink (TEST_RECON2_FILE); + test_unlink (TEST_COPY_FILE); + test_unlink (TEST_NOPERM_FILE); +} + +static int +test_setup (void) +{ + static int x = 0; + x++; + sprintf (TEST_TARGET_FILE, "/tmp/xdtest.target.%d", x); + sprintf (TEST_SOURCE_FILE, "/tmp/xdtest.source.%d", x); + sprintf (TEST_DELTA_FILE, "/tmp/xdtest.delta.%d", x); + sprintf (TEST_RECON_FILE, "/tmp/xdtest.recon.%d", x); + sprintf (TEST_RECON2_FILE, "/tmp/xdtest.recon2.%d", x); + sprintf (TEST_COPY_FILE, "/tmp/xdtest.copy.%d", x); + sprintf (TEST_NOPERM_FILE, "/tmp/xdtest.noperm.%d", x); + test_cleanup(); + return 0; +} + +static int +test_make_inputs (xd3_stream *stream, xoff_t *ss_out, xoff_t *ts_out) +{ + usize_t ts = (mt_random (&static_mtrand) % TEST_FILE_MEAN) + TEST_FILE_MEAN / 2; + usize_t ss = (mt_random (&static_mtrand) % TEST_FILE_MEAN) + TEST_FILE_MEAN / 2; + uint8_t *buf = (uint8_t*) malloc (ts + ss), *sbuf = buf, *tbuf = buf + ss; + usize_t sadd = 0, sadd_max = ss * TEST_ADD_RATIO; + FILE *tf = NULL, *sf = NULL; + usize_t i, j; + int ret; + + if (buf == NULL) { return ENOMEM; } + + if ((tf = fopen (TEST_TARGET_FILE, "w")) == NULL || + (ss_out != NULL && (sf = fopen (TEST_SOURCE_FILE, "w")) == NULL)) + { + stream->msg = "write failed"; + ret = get_errno (); + goto failure; + } + + if (ss_out != NULL) + { + for (i = 0; i < ss; ) + { + sbuf[i++] = mt_random (&static_mtrand); + } + } + + /* Then modify the data to produce copies, everything not copied is + * an add. The following logic produces the TEST_ADD_RATIO. The + * variable SADD contains the number of adds so far, which should + * not exceed SADD_MAX. */ + + /* DP(RINT "ss = %u ts = %u\n", ss, ts); */ + for (i = 0; i < ts; ) + { + size_t left = ts - i; + size_t next = mt_exp_rand (TEST_ADD_MEAN, TEST_ADD_MAX); + size_t add_left = sadd_max - sadd; + double add_prob = (left == 0) ? 0 : (add_left / (double) left); + int do_copy; + + next = min (left, next); + do_copy = (next > add_left || (mt_random (&static_mtrand) / (double)USIZE_T_MAX) >= add_prob); + + if (ss_out == NULL) + { + do_copy &= (i > 0); + } + else + { + do_copy &= (ss - next) > 0; + } + + if (do_copy) + { + /* Copy */ + size_t offset = mt_random (&static_mtrand) % ((ss_out == NULL) ? i : (ss - next)); + /* DP(RINT "[%u] copy %u at %u ", i, next, offset); */ + + for (j = 0; j < next; j += 1) + { + char c = ((ss_out == NULL) ? tbuf : sbuf)[offset + j]; + /* DP(RINT "%x%x", (c >> 4) & 0xf, c & 0xf); */ + tbuf[i++] = c; + } + /* DP(RINT "\n"); */ + } + else + { + /* Add */ + /* DP(RINT "[%u] add %u ", i, next); */ + for (j = 0; j < next; j += 1) + { + char c = mt_random (&static_mtrand); + /* DP(RINT "%x%x", (c >> 4) & 0xf, c & 0xf); */ + tbuf[i++] = c; + } + /* DP(RINT "\n"); */ + sadd += next; + } + } + + /* DP(RINT "sadd = %u max = %u\n", sadd, sadd_max); */ + + if ((fwrite (tbuf, 1, ts, tf) != ts) || + (ss_out != NULL && (fwrite (sbuf, 1, ss, sf) != ss))) + { + stream->msg = "write failed"; + ret = get_errno (); + goto failure; + } + + if ((ret = fclose (tf)) || (ss_out != NULL && (ret = fclose (sf)))) + { + stream->msg = "close failed"; + ret = get_errno (); + goto failure; + } + + if (ts_out) { (*ts_out) = ts; } + if (ss_out) { (*ss_out) = ss; } + + failure: + free (buf); + return ret; +} + +static int +compare_files (xd3_stream *stream, const char* tgt, const char *rec) +{ + FILE *orig, *recons; + static uint8_t obuf[TESTBUFSIZE], rbuf[TESTBUFSIZE]; + int offset = 0; + int i; + int oc, rc; + + if ((orig = fopen (tgt, "r")) == NULL) + { + DP(RINT "open %s failed", tgt); + stream->msg = "open failed"; + return get_errno (); + } + + if ((recons = fopen (rec, "r")) == NULL) + { + DP(RINT "open %s failed", rec); + stream->msg = "open failed"; + return get_errno (); + } + + for (;;) + { + oc = fread (obuf, 1, TESTBUFSIZE, orig); + rc = fread (rbuf, 1, TESTBUFSIZE, recons); + + if (oc < 0 || rc < 0) + { + stream->msg = "read failed"; + return get_errno (); + } + + if (oc != rc) + { + stream->msg = "compare files: different length"; + return XD3_INTERNAL; + } + + if (oc == 0) + { + break; + } + + for (i = 0; i < oc; i += 1) + { + if (obuf[i] != rbuf[i]) + { + stream->msg = "compare files: different values"; + return XD3_INTERNAL; + } + } + + offset += oc; + } + + fclose (orig); + fclose (recons); + return 0; +} + +static int +test_save_copy (const char *origname) +{ + char buf[TESTBUFSIZE]; + int ret; + + sprintf (buf, "cp -f %s %s", origname, TEST_COPY_FILE); + + if ((ret = system (buf)) != 0) + { + return XD3_INTERNAL; + } + + return 0; +} + +static int +test_file_size (const char* file, xoff_t *size) +{ + struct stat sbuf; + int ret; + (*size) = 0; + + if (stat (file, & sbuf) < 0) + { + ret = get_errno (); + DP(RINT "xdelta3: stat failed: %s: %s\n", file, strerror (ret)); + return ret; + } + + if (! S_ISREG (sbuf.st_mode)) + { + ret = XD3_INTERNAL; + DP(RINT "xdelta3: not a regular file: %s: %s\n", file, strerror (ret)); + return ret; + } + + (*size) = sbuf.st_size; + return 0; +} + +/*********************************************************************** + READ OFFSET + ***********************************************************************/ + +/* Common test for read_integer errors: encodes a 64-bit value and + * then attempts to read as a 32-bit value. If TRUNC is non-zero, + * attempts to get errors by shortening the input, otherwise it should + * overflow. Expects XD3_INTERNAL and MSG. */ +static int +test_read_integer_error (xd3_stream *stream, usize_t trunto, const char *msg) +{ + uint64_t eval = 1ULL << 34; + uint32_t rval; + xd3_output *buf = NULL; + const uint8_t *max; + const uint8_t *inp; + int ret; + + buf = xd3_alloc_output (stream, buf); + + if ((ret = xd3_emit_uint64_t (stream, & buf, eval))) + { + goto fail; + } + + again: + + inp = buf->base; + max = buf->base + buf->next - trunto; + + if ((ret = xd3_read_uint32_t (stream, & inp, max, & rval)) != XD3_INVALID_INPUT || + !MSG_IS (msg)) + { + ret = XD3_INTERNAL; + } + else if (trunto && trunto < buf->next) + { + trunto += 1; + goto again; + } + else + { + ret = 0; + } + + fail: + xd3_free_output (stream, buf); + return ret; +} + +/* Test integer overflow using the above routine. */ +static int +test_decode_integer_overflow (xd3_stream *stream, int unused) +{ + return test_read_integer_error (stream, 0, "overflow in read_intger"); +} + +/* Test integer EOI using the above routine. */ +static int +test_decode_integer_end_of_input (xd3_stream *stream, int unused) +{ + return test_read_integer_error (stream, 1, "end-of-input in read_integer"); +} + +/* Test that emit_integer/decode_integer/sizeof_integer/read_integer + * work on correct inputs. Tests powers of (2^7), plus or minus, up + * to the maximum value. */ +#define TEST_ENCODE_DECODE_INTEGER(TYPE,ONE,MAX) \ + xd3_output *rbuf = NULL; \ + xd3_output *dbuf = NULL; \ + TYPE values[64]; \ + usize_t nvalues = 0; \ + usize_t i; \ + int ret = 0; \ + \ + for (i = 0; i < (sizeof (TYPE) * 8); i += 7) \ + { \ + values[nvalues++] = (ONE << i) - ONE; \ + values[nvalues++] = (ONE << i); \ + values[nvalues++] = (ONE << i) + ONE; \ + } \ + \ + values[nvalues++] = MAX-ONE; \ + values[nvalues++] = MAX; \ + \ + rbuf = xd3_alloc_output (stream, rbuf); \ + dbuf = xd3_alloc_output (stream, dbuf); \ + \ + for (i = 0; i < nvalues; i += 1) \ + { \ + const uint8_t *max; \ + const uint8_t *inp; \ + TYPE val; \ + \ + DOT (); \ + rbuf->next = 0; \ + \ + if ((ret = xd3_emit_ ## TYPE (stream, & rbuf, values[i])) || \ + (ret = xd3_emit_ ## TYPE (stream, & dbuf, values[i]))) \ + { \ + goto fail; \ + } \ + \ + inp = rbuf->base; \ + max = rbuf->base + rbuf->next; \ + \ + if (rbuf->next != xd3_sizeof_ ## TYPE (values[i])) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + \ + if ((ret = xd3_read_ ## TYPE (stream, & inp, max, & val))) \ + { \ + goto fail; \ + } \ + \ + if (val != values[i]) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + \ + DOT (); \ + } \ + \ + stream->next_in = dbuf->base; \ + stream->avail_in = dbuf->next; \ + \ + for (i = 0; i < nvalues; i += 1) \ + { \ + TYPE val; \ + \ + if ((ret = xd3_decode_ ## TYPE (stream, & val))) \ + { \ + goto fail; \ + } \ + \ + if (val != values[i]) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + } \ + \ + if (stream->avail_in != 0) \ + { \ + ret = XD3_INTERNAL; \ + goto fail; \ + } \ + \ + fail: \ + xd3_free_output (stream, rbuf); \ + xd3_free_output (stream, dbuf); \ + \ + return ret + +static int +test_encode_decode_uint32_t (xd3_stream *stream, int unused) +{ + TEST_ENCODE_DECODE_INTEGER(uint32_t,1U,UINT32_MAX); +} + +static int +test_encode_decode_uint64_t (xd3_stream *stream, int unused) +{ + TEST_ENCODE_DECODE_INTEGER(uint64_t,1ULL,UINT64_MAX); +} + +static int +test_usize_t_overflow (xd3_stream *stream, int unused) +{ + if (USIZE_T_OVERFLOW (0, 0)) { goto fail; } + if (USIZE_T_OVERFLOW (USIZE_T_MAX, 0)) { goto fail; } + if (USIZE_T_OVERFLOW (0, USIZE_T_MAX)) { goto fail; } + if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, 0)) { goto fail; } + if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2)) { goto fail; } + if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2 + 1)) { goto fail; } + + if (! USIZE_T_OVERFLOW (USIZE_T_MAX, 1)) { goto fail; } + if (! USIZE_T_OVERFLOW (1, USIZE_T_MAX)) { goto fail; } + if (! USIZE_T_OVERFLOW (USIZE_T_MAX / 2 + 1, USIZE_T_MAX / 2 + 1)) { goto fail; } + + return 0; + + fail: + stream->msg = "incorrect overflow computation"; + return XD3_INTERNAL; +} + +static int +test_forward_match (xd3_stream *stream, int unused) +{ + usize_t i; + uint8_t buf1[256], buf2[256]; + + memset(buf1, 0, 256); + memset(buf2, 0, 256); + + for (i = 0; i < 256; i++) + { + CHECK(xd3_forward_match(buf1, buf2, i) == i); + } + + for (i = 0; i < 255; i++) + { + buf2[i] = 1; + CHECK(xd3_forward_match(buf1, buf2, 256) == i); + buf2[i] = 0; + } + + return 0; +} + +/*********************************************************************** + Address cache + ***********************************************************************/ + +static int +test_address_cache (xd3_stream *stream, int unused) +{ + int ret; + usize_t i; + usize_t offset; + usize_t *addrs; + uint8_t *big_buf, *buf_max; + const uint8_t *buf; + xd3_output *outp; + uint8_t *modes; + int mode_counts[16]; + + stream->acache.s_near = stream->code_table_desc->near_modes; + stream->acache.s_same = stream->code_table_desc->same_modes; + + if ((ret = xd3_encode_init_partial (stream))) { return ret; } + + addrs = (usize_t*) xd3_alloc (stream, sizeof (usize_t), ADDR_CACHE_ROUNDS); + modes = (uint8_t*) xd3_alloc (stream, sizeof (uint8_t), ADDR_CACHE_ROUNDS); + + memset (mode_counts, 0, sizeof (mode_counts)); + memset (modes, 0, ADDR_CACHE_ROUNDS); + + addrs[0] = 0; + + mt_init (& static_mtrand, 0x9f73f7fc); + + /* First pass: encode addresses */ + xd3_init_cache (& stream->acache); + + for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1) + { + double p; + usize_t addr; + usize_t prev_i; + usize_t nearby; + + p = (mt_random (&static_mtrand) / (double)USIZE_T_MAX); + prev_i = mt_random (&static_mtrand) % offset; + nearby = (mt_random (&static_mtrand) % 256) % offset; + nearby = max (1U, nearby); + + if (p < 0.1) { addr = addrs[offset-nearby]; } + else if (p < 0.4) { addr = min (addrs[prev_i] + nearby, offset-1); } + else { addr = prev_i; } + + if ((ret = xd3_encode_address (stream, addr, offset, & modes[offset]))) { return ret; } + + addrs[offset] = addr; + mode_counts[modes[offset]] += 1; + } + + /* Copy addresses into a contiguous buffer. */ + big_buf = (uint8_t*) xd3_alloc (stream, xd3_sizeof_output (ADDR_HEAD (stream)), 1); + + for (offset = 0, outp = ADDR_HEAD (stream); outp != NULL; offset += outp->next, outp = outp->next_page) + { + memcpy (big_buf + offset, outp->base, outp->next); + } + + buf_max = big_buf + offset; + buf = big_buf; + + /* Second pass: decode addresses */ + xd3_init_cache (& stream->acache); + + for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1) + { + uint32_t addr; + + if ((ret = xd3_decode_address (stream, offset, modes[offset], & buf, buf_max, & addr))) { return ret; } + + if (addr != addrs[offset]) + { + stream->msg = "incorrect decoded address"; + return XD3_INTERNAL; + } + } + + /* Check that every byte, mode was used. */ + if (buf != buf_max) + { + stream->msg = "address bytes not used"; + return XD3_INTERNAL; + } + + for (i = 0; i < (2 + stream->acache.s_same + stream->acache.s_near); i += 1) + { + if (mode_counts[i] == 0) + { + stream->msg = "address mode not used"; + return XD3_INTERNAL; + } + } + + xd3_free (stream, modes); + xd3_free (stream, addrs); + xd3_free (stream, big_buf); + + return 0; +} + +/*********************************************************************** + Encode and decode with single bit error + ***********************************************************************/ + +/* It compresses from 256 to around 185 bytes. + * Avoids matching addresses that are a single-bit difference. + * Avoids matching address 0. */ +static const uint8_t test_text[] = +"this is a story\n" +"abouttttttttttt\n" +"- his is a stor\n" +"- about nothing " +" all. boutique -" +"his story is a -" +"about " +"what happens all" +" the time what -" +"am I ttttttt the" +" person said, so" +" what, per son -" +" gory story is -" +" about nothing -" +"tttttt to test -" +"his sto nothing"; + +static const uint8_t test_apphead[] = "header test"; + +static int +test_compress_text (xd3_stream *stream, + uint8_t *encoded, + usize_t *encoded_size) +{ + int ret; + xd3_config cfg; + int oflags = stream->flags; + int flags = stream->flags | XD3_FLUSH; + + xd3_free_stream (stream); + xd3_init_config (& cfg, flags); + + /* This configuration is fixed so that the "expected non-error" the counts in + * decompress_single_bit_errors are too. See test_coftcfg_str. */ + cfg.smatch_cfg = XD3_SMATCH_SOFT; + cfg.smatcher_soft.name = "test"; + cfg.smatcher_soft.large_look = 64; /* no source, not used */ + cfg.smatcher_soft.large_step = 64; /* no source, not used */ + cfg.smatcher_soft.small_look = 4; + cfg.smatcher_soft.small_chain = 128; + cfg.smatcher_soft.small_lchain = 16; + cfg.smatcher_soft.max_lazy = 8; + cfg.smatcher_soft.long_enough = 128; + + xd3_config_stream (stream, & cfg); + + (*encoded_size) = 0; + + xd3_set_appheader (stream, test_apphead, strlen ((char*) test_apphead)); + + if ((ret = xd3_encode_stream (stream, test_text, sizeof (test_text), + encoded, encoded_size, 4*sizeof (test_text)))) { goto fail; } + + if ((ret = xd3_close_stream (stream))) { goto fail; } + + fail: + xd3_free_stream (stream); + xd3_init_config (& cfg, oflags); + xd3_config_stream (stream, & cfg); + return ret; +} + +static int +test_decompress_text (xd3_stream *stream, uint8_t *enc, usize_t enc_size, usize_t test_desize) +{ + xd3_config cfg; + char decoded[sizeof (test_text)]; + uint8_t *apphead; + usize_t apphead_size; + usize_t decoded_size; + const char *msg; + int ret; + usize_t pos = 0; + int flags = stream->flags; + usize_t take; + + input: + /* Test decoding test_desize input bytes at a time */ + take = min (enc_size - pos, test_desize); + CHECK(take > 0); + + xd3_avail_input (stream, enc + pos, take); + again: + ret = xd3_decode_input (stream); + + pos += take; + take = 0; + + switch (ret) + { + case XD3_OUTPUT: + break; + case XD3_WINSTART: + case XD3_GOTHEADER: + goto again; + case XD3_INPUT: + if (pos < enc_size) { goto input; } + /* else fallthrough */ + case XD3_WINFINISH: + default: + goto fail; + } + + CHECK(ret == XD3_OUTPUT); + CHECK(pos == enc_size); + + if (stream->avail_out != sizeof (test_text)) + { + stream->msg = "incorrect output size"; + ret = XD3_INTERNAL; + goto fail; + } + + decoded_size = stream->avail_out; + memcpy (decoded, stream->next_out, stream->avail_out); + + xd3_consume_output (stream); + + if ((ret = xd3_get_appheader (stream, & apphead, & apphead_size))) { goto fail; } + + if (apphead_size != strlen ((char*) test_apphead) || + memcmp (apphead, test_apphead, strlen ((char*) test_apphead)) != 0) + { + stream->msg = "incorrect appheader"; + ret = XD3_INTERNAL; + goto fail; + } + + if ((ret = xd3_decode_input (stream)) != XD3_WINFINISH || + (ret = xd3_close_stream (stream)) != 0) + { + goto fail; + } + + if (decoded_size != sizeof (test_text) || + memcmp (decoded, test_text, sizeof (test_text)) != 0) + { + stream->msg = "incorrect output text"; + ret = EIO; + } + + fail: + msg = stream->msg; + xd3_free_stream (stream); + xd3_init_config (& cfg, flags); + xd3_config_stream (stream, & cfg); + stream->msg = msg; + + return ret; +} + +static int +test_decompress_single_bit_error (xd3_stream *stream, int expected_non_failures) +{ + int ret; + usize_t i; + uint8_t encoded[4*sizeof (test_text)]; /* make room for alt code table */ + usize_t encoded_size; + int non_failures = 0; + int cksum = (stream->flags & XD3_ADLER32) != 0; + +//#define DEBUG_TEST_FAILURES +#ifndef DEBUG_TEST_FAILURES +#define TEST_FAILURES() +#else + /* For checking non-failure cases by hand, enable this macro and run + * xdelta printdelta with print_cpymode disabled. Every non-failure + * should change a copy address mode, which doesn't cause a failure + * because the address cache starts out with all zeros. + + ./xdelta3 test + for i in test_text.xz.*; do ./xdelta3 printdelta $i > $i.out; + diff $i.out test_text.xz.0.out; done + + */ + system ("rm -rf test_text.*"); + { + char buf[TESTBUFSIZE]; + FILE *f; + sprintf (buf, "test_text"); + f = fopen (buf, "w"); + fwrite (test_text,1,sizeof (test_text),f); + fclose (f); + } +#define TEST_FAILURES() \ + do { \ + char buf[TESTBUFSIZE]; \ + FILE *f; \ + sprintf (buf, "test_text.xz.%d", non_failures); \ + f = fopen (buf, "w"); \ + fwrite (encoded,1,encoded_size,f); \ + fclose (f); \ + } while (0) +#endif + + stream->sec_data.inefficient = 1; + stream->sec_inst.inefficient = 1; + stream->sec_addr.inefficient = 1; + + /* Encode text, test correct input */ + if ((ret = test_compress_text (stream, encoded, & encoded_size))) + { + /*stream->msg = "without error: encode failure";*/ + return ret; + } + + if ((ret = test_decompress_text (stream, encoded, encoded_size, + sizeof (test_text) / 4))) + { + /*stream->msg = "without error: decode failure";*/ + return ret; + } + + TEST_FAILURES(); + + for (i = 0; i < encoded_size*8; i += 1) + { + /* Single bit error. */ + encoded[i/8] ^= 1 << (i%8); + + if ((ret = test_decompress_text (stream, encoded, + encoded_size, sizeof (test_text))) == 0) + { + non_failures += 1; +#ifdef DEBUG_TEST_FAILURES + DP(RINT "%u[%u] non-failure %u\n", i/8, i%8, non_failures); +#endif + TEST_FAILURES(); + } + else + { + /*DP(RINT "%u[%u] failure: %s\n", i/8, i%8, stream->msg);*/ + } + + /* decompress_text returns EIO when the final memcmp() fails, but that + * should never happen with checksumming on. */ + if (cksum && ret == EIO) + { + /*DP(RINT "%u[%u] cksum mismatch\n", i/8, i%8);*/ + stream->msg = "checksum mismatch"; + return XD3_INTERNAL; + } + + /* Undo single bit error. */ + encoded[i/8] ^= 1 << (i%8); + } + + /* Test correct input again */ + if ((ret = test_decompress_text (stream, encoded, encoded_size, 1))) + { + /*stream->msg = "without error: decode failure";*/ + return ret; + } + + /* Check expected non-failures */ + if (non_failures != expected_non_failures) + { + DP(RINT "non-failures %u; expected %u", + non_failures, expected_non_failures); + stream->msg = "incorrect"; + return XD3_INTERNAL; + } + + DOT (); + + return 0; +} + +/*********************************************************************** + Secondary compression tests + ***********************************************************************/ + +#if SECONDARY_ANY +typedef int (*sec_dist_func) (xd3_stream *stream, xd3_output *data); + +static int sec_dist_func1 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func2 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func3 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func4 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func5 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func6 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func7 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func8 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func9 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func10 (xd3_stream *stream, xd3_output *data); +static int sec_dist_func11 (xd3_stream *stream, xd3_output *data); + +static sec_dist_func sec_dists[] = +{ + sec_dist_func1, + sec_dist_func2, + sec_dist_func3, + sec_dist_func4, + sec_dist_func5, + sec_dist_func6, + sec_dist_func7, + sec_dist_func8, + sec_dist_func9, + sec_dist_func10, + sec_dist_func11, +}; + +/* Test ditsribution: 100 bytes of the same character (13). */ +static int +sec_dist_func1 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < 100; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, 13))) { return ret; } + } + return 0; +} + +/* Test ditsribution: uniform covering half the alphabet. */ +static int +sec_dist_func2 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, i%(ALPHABET_SIZE/2)))) { return ret; } + } + return 0; +} + +/* Test ditsribution: uniform covering the entire alphabet. */ +static int +sec_dist_func3 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, i%ALPHABET_SIZE))) { return ret; } + } + return 0; +} + +/* Test distribution: An exponential distribution covering half the alphabet */ +static int +sec_dist_func4 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*20; i += 1) + { + x = mt_exp_rand (10, ALPHABET_SIZE/2); + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: An exponential distribution covering the entire alphabet */ +static int +sec_dist_func5 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*20; i += 1) + { + x = mt_exp_rand (10, ALPHABET_SIZE-1); + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: An uniform random distribution covering half the alphabet */ +static int +sec_dist_func6 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*20; i += 1) + { + x = mt_random (&static_mtrand) % (ALPHABET_SIZE/2); + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: An uniform random distribution covering the entire alphabet */ +static int +sec_dist_func7 (xd3_stream *stream, xd3_output *data) +{ + int i, ret, x; + for (i = 0; i < ALPHABET_SIZE*20; i += 1) + { + x = mt_random (&static_mtrand) % ALPHABET_SIZE; + if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; } + } + return 0; +} + +/* Test distribution: A small number of frequent characters, difficult + * to divide into many groups */ +static int +sec_dist_func8 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + for (i = 0; i < ALPHABET_SIZE*5; i += 1) + { + if ((ret = xd3_emit_byte (stream, & data, 0))) { return ret; } + if ((ret = xd3_emit_byte (stream, & data, 64))) { return ret; } + if ((ret = xd3_emit_byte (stream, & data, 128))) { return ret; } + if ((ret = xd3_emit_byte (stream, & data, 255))) { return ret; } + } + return 0; +} + +/* Test distribution: One that causes many FGK block promotions (found a bug) */ +static int +sec_dist_func9 (xd3_stream *stream, xd3_output *data) +{ + int i, ret; + + int ramp = 0; + int rcount = 0; + int prom = 0; + int pcount = 0; + + /* 200 was long enough to trigger it--only when stricter checking + * that counted all blocks was turned on, but it seems I deleted + * this code. (missing fgk_free_block on line 398). */ + for (i = 0; i < ALPHABET_SIZE*200; i += 1) + { + repeat: + if (ramp < ALPHABET_SIZE) + { + /* Initially Nth symbol has (N+1) frequency */ + if (rcount <= ramp) + { + rcount += 1; + if ((ret = xd3_emit_byte (stream, & data, ramp))) { return ret; } + continue; + } + + ramp += 1; + rcount = 0; + goto repeat; + } + + /* Thereafter, promote least freq to max freq */ + if (pcount == ALPHABET_SIZE) + { + pcount = 0; + prom = (prom + 1) % ALPHABET_SIZE; + } + + pcount += 1; + if ((ret = xd3_emit_byte (stream, & data, prom))) { return ret; } + } + + return 0; +} + +/* Test distribution: freq[i] == i*i, creates a 21-bit code length, fixed in 3.0r. */ +static int +sec_dist_func10 (xd3_stream *stream, xd3_output *data) +{ + int i, j, ret; + for (i = 0; i < ALPHABET_SIZE; i += 1) + { + for (j = 0; j <= (i*i); j += 1) + { + if ((ret = xd3_emit_byte (stream, & data, i))) { return ret; } + } + } + return 0; +} + +/* Test distribution: fibonacci */ +static int +sec_dist_func11 (xd3_stream *stream, xd3_output *data) +{ + int sum0 = 0; + int sum1 = 1; + int i, j, ret; + for (i = 0; i < 33; ++i) + { + for (j = 0; j < (sum0 + sum1); ++j) + { + if ((ret = xd3_emit_byte (stream, & data, i))) { return ret; } + } + sum0 = sum1; + sum1 = j; + } + return 0; +} + +static int +test_secondary_decode (xd3_stream *stream, + const xd3_sec_type *sec, + usize_t input_size, + usize_t compress_size, + const uint8_t *dec_input, + const uint8_t *dec_correct, + uint8_t *dec_output) +{ + int ret; + xd3_sec_stream *dec_stream; + const uint8_t *dec_input_used, *dec_input_end; + uint8_t *dec_output_used, *dec_output_end; + + if ((dec_stream = sec->alloc (stream)) == NULL) { return ENOMEM; } + + sec->init (dec_stream); + + dec_input_used = dec_input; + dec_input_end = dec_input + compress_size; + + dec_output_used = dec_output; + dec_output_end = dec_output + input_size; + + if ((ret = sec->decode (stream, dec_stream, + & dec_input_used, dec_input_end, + & dec_output_used, dec_output_end))) + { + goto fail; + } + + if (dec_input_used != dec_input_end) + { + stream->msg = "unused input"; + ret = XD3_INTERNAL; + goto fail; + } + + if (dec_output_used != dec_output_end) + { + stream->msg = "unfinished output"; + ret = XD3_INTERNAL; + goto fail; + } + + if (memcmp (dec_output, dec_correct, input_size) != 0) + { + stream->msg = "incorrect output"; + ret = XD3_INTERNAL; + goto fail; + } + + fail: + sec->destroy (stream, dec_stream); + return ret; +} + +static int +test_secondary (xd3_stream *stream, const xd3_sec_type *sec, int groups) +{ + int test_i, ret; + xd3_output *in_head, *out_head, *p; + usize_t p_off, input_size, compress_size; + uint8_t *dec_input = NULL, *dec_output = NULL, *dec_correct = NULL; + xd3_sec_stream *enc_stream; + xd3_sec_cfg cfg; + + memset (& cfg, 0, sizeof (cfg)); + + cfg.inefficient = 1; + + for (cfg.ngroups = 1; cfg.ngroups <= groups; cfg.ngroups += 1) + { + DP(RINT "\n..."); + for (test_i = 0; test_i < SIZEOF_ARRAY (sec_dists); test_i += 1) + { + mt_init (& static_mtrand, 0x9f73f7fc); + + in_head = xd3_alloc_output (stream, NULL); + out_head = xd3_alloc_output (stream, NULL); + enc_stream = sec->alloc (stream); + dec_input = NULL; + dec_output = NULL; + dec_correct = NULL; + + if (in_head == NULL || out_head == NULL || enc_stream == NULL) + { + goto nomem; + } + + if ((ret = sec_dists[test_i] (stream, in_head))) { goto fail; } + + sec->init (enc_stream); + + /* Encode data */ + if ((ret = sec->encode (stream, enc_stream, + in_head, out_head, & cfg))) + { + DP(RINT "test %u: encode: %s", test_i, stream->msg); + goto fail; + } + + /* Calculate sizes, allocate contiguous arrays for decoding */ + input_size = xd3_sizeof_output (in_head); + compress_size = xd3_sizeof_output (out_head); + + DP(RINT "%.3f", 8.0 * (double) compress_size / (double) input_size); + + if ((dec_input = xd3_alloc (stream, compress_size, 1)) == NULL || + (dec_output = xd3_alloc (stream, input_size, 1)) == NULL || + (dec_correct = xd3_alloc (stream, input_size, 1)) == NULL) + { + goto nomem; + } + + /* Fill the compressed data array */ + for (p_off = 0, p = out_head; p != NULL; + p_off += p->next, p = p->next_page) + { + memcpy (dec_input + p_off, p->base, p->next); + } + + CHECK(p_off == compress_size); + + /* Fill the input data array */ + for (p_off = 0, p = in_head; p != NULL; + p_off += p->next, p = p->next_page) + { + memcpy (dec_correct + p_off, p->base, p->next); + } + + CHECK(p_off == input_size); + + if ((ret = test_secondary_decode (stream, sec, input_size, + compress_size, dec_input, + dec_correct, dec_output))) + { + DP(RINT "test %u: decode: %s", test_i, stream->msg); + goto fail; + } + + /* Single-bit error test, only cover the first 10 bytes. + * Some non-failures are expected in the Huffman case: + * Changing the clclen array, for example, may not harm the + * decoding. Really looking for faults here. */ + { + int i; + int bytes = min (compress_size, 10U); + for (i = 0; i < bytes * 8; i += 1) + { + dec_input[i/8] ^= 1 << (i%8); + + if ((ret = test_secondary_decode (stream, sec, input_size, + compress_size, dec_input, + dec_correct, dec_output)) + == 0) + { + /*DP(RINT "test %u: decode single-bit [%u/%u] + error non-failure", test_i, i/8, i%8);*/ + } + + dec_input[i/8] ^= 1 << (i%8); + + if ((i % (2*bytes)) == (2*bytes)-1) + { + DOT (); + } + } + ret = 0; + } + + if (0) { nomem: ret = ENOMEM; } + + fail: + sec->destroy (stream, enc_stream); + xd3_free_output (stream, in_head); + xd3_free_output (stream, out_head); + xd3_free (stream, dec_input); + xd3_free (stream, dec_output); + xd3_free (stream, dec_correct); + + if (ret != 0) { return ret; } + } + } + + return 0; +} + +IF_FGK (static int test_secondary_fgk (xd3_stream *stream, int gp) + { return test_secondary (stream, & fgk_sec_type, gp); }) +IF_DJW (static int test_secondary_huff (xd3_stream *stream, int gp) + { return test_secondary (stream, & djw_sec_type, gp); }) +#endif + +/*********************************************************************** + TEST INSTRUCTION TABLE + ***********************************************************************/ + +/* Test that xd3_choose_instruction() does the right thing for its code + * table. */ +static int +test_choose_instruction (xd3_stream *stream, int ignore) +{ + int i; + + stream->code_table = (*stream->code_table_func) (); + + for (i = 0; i < 256; i += 1) + { + const xd3_dinst *d = stream->code_table + i; + xd3_rinst prev, inst; + + CHECK(d->type1 > 0); + + memset (& prev, 0, sizeof (prev)); + memset (& inst, 0, sizeof (inst)); + + if (d->type2 == 0) + { + inst.type = d->type1; + + if ((inst.size = d->size1) == 0) + { + inst.size = TESTBUFSIZE; + } + + XD3_CHOOSE_INSTRUCTION (stream, NULL, & inst); + + if (inst.code2 != 0 || inst.code1 != i) + { + stream->msg = "wrong single instruction"; + return XD3_INTERNAL; + } + } + else + { + prev.type = d->type1; + prev.size = d->size1; + inst.type = d->type2; + inst.size = d->size2; + + XD3_CHOOSE_INSTRUCTION (stream, & prev, & inst); + + if (prev.code2 != i) + { + stream->msg = "wrong double instruction"; + return XD3_INTERNAL; + } + } + } + + return 0; +} + +/*********************************************************************** + TEST INSTRUCTION TABLE CODING + ***********************************************************************/ + +#if GENERIC_ENCODE_TABLES +/* Test that encoding and decoding a code table works */ +static int +test_encode_code_table (xd3_stream *stream, int ignore) +{ + int ret; + const uint8_t *comp_data; + usize_t comp_size; + + if ((ret = xd3_compute_alternate_table_encoding (stream, & comp_data, & comp_size))) + { + return ret; + } + + stream->acache.s_near = __alternate_code_table_desc.near_modes; + stream->acache.s_same = __alternate_code_table_desc.same_modes; + + if ((ret = xd3_apply_table_encoding (stream, comp_data, comp_size))) + { + return ret; + } + + if (memcmp (stream->code_table, xd3_alternate_code_table (), sizeof (xd3_dinst) * 256) != 0) + { + stream->msg = "wrong code table reconstruction"; + return XD3_INTERNAL; + } + + return 0; +} +#endif + +/*********************************************************************** + 64BIT STREAMING + ***********************************************************************/ + +/* This test encodes and decodes a series of 1 megabyte windows, each + * containing a long run of zeros along with a single xoff_t size + * record to indicate the sequence. */ +static int +test_streaming (xd3_stream *in_stream, uint8_t *encbuf, uint8_t *decbuf, uint8_t *delbuf, usize_t megs) +{ + xd3_stream estream, dstream; + int ret; + usize_t i, delsize, decsize; + + if ((ret = xd3_config_stream (& estream, NULL)) || + (ret = xd3_config_stream (& dstream, NULL))) + { + goto fail; + } + + for (i = 0; i < megs; i += 1) + { + ((usize_t*) encbuf)[0] = i; + + if ((i % 200) == 199) { DOT (); } + + if ((ret = xd3_process_stream (1, & estream, xd3_encode_input, 0, + encbuf, 1 << 20, + delbuf, & delsize, 1 << 10))) + { + in_stream->msg = estream.msg; + goto fail; + } + + if ((ret = xd3_process_stream (0, & dstream, xd3_decode_input, 0, + delbuf, delsize, + decbuf, & decsize, 1 << 20))) + { + in_stream->msg = dstream.msg; + goto fail; + } + + if (decsize != 1 << 20 || + memcmp (encbuf, decbuf, 1 << 20) != 0) + { + in_stream->msg = "wrong result"; + ret = XD3_INTERNAL; + goto fail; + } + } + + if ((ret = xd3_close_stream (& estream)) || + (ret = xd3_close_stream (& dstream))) + { + goto fail; + } + + fail: + xd3_free_stream (& estream); + xd3_free_stream (& dstream); + return ret; +} + +/* Run tests of data streaming of over and around 4GB of data. */ +static int +test_compressed_stream_overflow (xd3_stream *stream, int ignore) +{ + int ret; + uint8_t *buf; + + if ((buf = (uint8_t*) malloc (TWO_MEGS_AND_DELTA)) == NULL) { return ENOMEM; } + + memset (buf, 0, TWO_MEGS_AND_DELTA); + + /* Test overflow of a 32-bit file offset. */ + if (SIZEOF_XOFF_T == 4) + { + ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), (1 << 12) + 1); + + if (ret == XD3_INVALID_INPUT && MSG_IS ("decoder file offset overflow")) + { + ret = 0; + } + else + { + XPR(NT XD3_LIB_ERRMSG (stream, ret)); + stream->msg = "expected overflow condition"; + ret = XD3_INTERNAL; + goto fail; + } + } + + /* Test transfer of exactly 32bits worth of data. */ + if ((ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), 1 << 12))) { goto fail; } + + fail: + free (buf); + return ret; +} + +/*********************************************************************** + COMMAND LINE + ***********************************************************************/ + +/* For each pair of command templates in the array below, test that + * encoding and decoding commands work. Also check for the expected + * size delta, which should be approximately TEST_ADD_RATIO times the + * file size created by test_make_inputs. Due to differences in the + * application header, it is suppressed (-A) so that all delta files + * are the same. */ +static int +test_command_line_arguments (xd3_stream *stream, int ignore) +{ + int i, ret; + + static const char* cmdpairs[] = + { + /* standard input, output */ + "%s %s -A < %s > %s", "%s -d < %s > %s", + "%s %s -A -e < %s > %s", "%s -d < %s > %s", + "%s %s -A= encode < %s > %s", "%s decode < %s > %s", + "%s %s -A -q encode < %s > %s", "%s -qdq < %s > %s", + + /* file input, standard output */ + "%s %s -A= %s > %s", "%s -d %s > %s", + "%s %s -A -e %s > %s", "%s -d %s > %s", + "%s %s encode -A= %s > %s", "%s decode %s > %s", + + /* file input, output */ + "%s %s -A= %s %s", "%s -d %s %s", + "%s %s -A -e %s %s", "%s -d %s %s", + "%s %s -A= encode %s %s", "%s decode %s %s", + + /* option placement */ + "%s %s -A -f %s %s", "%s -f -d %s %s", + "%s %s -e -A= %s %s", "%s -d -f %s %s", + "%s %s -f encode -A= %s %s", "%s -f decode -f %s %s", + }; + + char ecmd[TESTBUFSIZE], dcmd[TESTBUFSIZE]; + int pairs = SIZEOF_ARRAY (cmdpairs) / 2; + xoff_t tsize; + xoff_t dsize; + double ratio; + + mt_init (& static_mtrand, 0x9f73f7fc); + + for (i = 0; i < pairs; i += 1) + { + test_setup (); + if ((ret = test_make_inputs (stream, NULL, & tsize))) { return ret; } + + sprintf (ecmd, cmdpairs[2*i], program_name, + test_softcfg_str, TEST_TARGET_FILE, TEST_DELTA_FILE); + sprintf (dcmd, cmdpairs[2*i+1], program_name, + TEST_DELTA_FILE, TEST_RECON_FILE); + + /* Encode and decode. */ + if ((ret = system (ecmd)) != 0) + { + DP(RINT "xdelta3: encode command: %s\n", ecmd); + stream->msg = "encode cmd failed"; + return XD3_INTERNAL; + } + + if ((ret = system (dcmd)) != 0) + { + DP(RINT "xdelta3: decode command: %s\n", dcmd); + stream->msg = "decode cmd failed"; + return XD3_INTERNAL; + } + + /* Compare the target file. */ + if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) + { + return ret; + } + + if ((ret = test_file_size (TEST_DELTA_FILE, & dsize))) + { + return ret; + } + + ratio = (double) dsize / (double) tsize; + + /* Check that it is not too small, not too large. */ + if (ratio >= TEST_ADD_RATIO + TEST_EPSILON) + { + DP(RINT "xdelta3: test encode with size ratio %.4f, " + "expected < %.4f (%"Q"u, %"Q"u)\n", + ratio, TEST_ADD_RATIO + TEST_EPSILON, dsize, tsize); + stream->msg = "strange encoding"; + return XD3_INTERNAL; + } + + if (ratio <= TEST_ADD_RATIO * (1.0 - 2 * TEST_EPSILON)) + { + DP(RINT "xdelta3: test encode with size ratio %.4f, " + "expected > %.4f\n", + ratio, TEST_ADD_RATIO - TEST_EPSILON); + stream->msg = "strange encoding"; + return XD3_INTERNAL; + } + + /* Also check that compare_files works. The delta and original should + * not be identical. */ + if ((ret = compare_files (stream, TEST_DELTA_FILE, + TEST_TARGET_FILE)) == 0) + { + stream->msg = "broken compare_files"; + return XD3_INTERNAL; + } + + test_cleanup (); + DOT (); + } + + return 0; +} + +static int +check_vcdiff_header (xd3_stream *stream, + const char *input, + const char *line_start, + const char *matches, + int yes_or_no) +{ + int ret; + char vcmd[TESTBUFSIZE], gcmd[TESTBUFSIZE]; + + sprintf (vcmd, "%s printhdr -f %s %s", + program_name, input, TEST_RECON2_FILE); + + if ((ret = system (vcmd)) != 0) + { + DP(RINT "xdelta3: printhdr command: %s\n", vcmd); + stream->msg = "printhdr cmd failed"; + return XD3_INTERNAL; + } + + sprintf (gcmd, "grep \"%s.*%s.*\" %s > /dev/null", + line_start, matches, TEST_RECON2_FILE); + + if (yes_or_no) + { + if ((ret = do_cmd (stream, gcmd))) + { + DP(RINT "xdelta3: %s\n", gcmd); + return ret; + } + } + else + { + if ((ret = do_fail (stream, gcmd))) + { + DP(RINT "xdelta3: %s\n", gcmd); + return ret; + } + } + + return 0; +} + +static int +test_recode_command2 (xd3_stream *stream, int has_source, + int variant, int change) +{ + int has_adler32 = (variant & 0x1) != 0; + int has_apphead = (variant & 0x2) != 0; + int has_secondary = (variant & 0x4) != 0; + + int change_adler32 = (change & 0x1) != 0; + int change_apphead = (change & 0x2) != 0; + int change_secondary = (change & 0x4) != 0; + + int recoded_adler32 = change_adler32 ? !has_adler32 : has_adler32; + int recoded_apphead = change_apphead ? !has_apphead : has_apphead; + int recoded_secondary = change_secondary ? !has_secondary : has_secondary; + + char ecmd[TESTBUFSIZE], recmd[TESTBUFSIZE], dcmd[TESTBUFSIZE]; + xoff_t tsize, ssize; + int ret; + + test_setup (); + + if ((ret = test_make_inputs (stream, has_source ? & ssize : NULL, & tsize))) + { + return ret; + } + + /* First encode */ + sprintf (ecmd, "%s %s -f ", program_name, test_softcfg_str); + strcat (ecmd, has_adler32 ? "" : "-n "); + strcat (ecmd, has_apphead ? "-A=encode_apphead " : "-A= "); + strcat (ecmd, has_secondary ? "-S djw " : "-S none "); + + if (has_source) + { + strcat (ecmd, "-s "); + strcat (ecmd, TEST_SOURCE_FILE); + strcat (ecmd, " "); + } + + strcat (ecmd, TEST_TARGET_FILE); + strcat (ecmd, " "); + strcat (ecmd, TEST_DELTA_FILE); + + if ((ret = system (ecmd)) != 0) + { + DP(RINT "xdelta3: encode command: %s\n", ecmd); + stream->msg = "encode cmd failed"; + return XD3_INTERNAL; + } + + /* Now recode */ + sprintf (recmd, "%s recode %s -f ", program_name, test_softcfg_str); + strcat (recmd, recoded_adler32 ? "" : "-n "); + strcat (recmd, !change_apphead ? "" : + (recoded_apphead ? "-A=recode_apphead " : "-A= ")); + strcat (recmd, recoded_secondary ? "-S djw " : "-S none "); + strcat (recmd, TEST_DELTA_FILE); + strcat (recmd, " "); + strcat (recmd, TEST_COPY_FILE); + + if ((ret = system (recmd)) != 0) + { + DP(RINT "xdelta3: recode command: %s\n", recmd); + stream->msg = "recode cmd failed"; + return XD3_INTERNAL; + } + + /* Check recode changes. */ + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF window indicator", + "VCD_SOURCE", + has_source))) { return ret; } + + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF header indicator", + "VCD_SECONDARY", + recoded_secondary))) { return ret; } + + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF window indicator", + "VCD_ADLER32", + /* Recode can't generate an adler32 + * checksum, it can only preserve it or + * remove it. */ + has_adler32 && recoded_adler32))) + { + return ret; + } + + if (!change_apphead) + { + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF header indicator", + "VCD_APPHEADER", + has_apphead))) + { + return ret; + } + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF application header", + "encode_apphead", + has_apphead))) + { + return ret; + } + } + else + { + if ((ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF header indicator", + "VCD_APPHEADER", + recoded_apphead))) + { + return ret; + } + if (recoded_apphead && + (ret = check_vcdiff_header (stream, + TEST_COPY_FILE, + "VCDIFF application header", + "recode_apphead", + 1))) + { + return ret; + } + } + + /* Now decode */ + sprintf (dcmd, "%s -fd ", program_name); + + if (has_source) + { + strcat (dcmd, "-s "); + strcat (dcmd, TEST_SOURCE_FILE); + strcat (dcmd, " "); + } + + strcat (dcmd, TEST_COPY_FILE); + strcat (dcmd, " "); + strcat (dcmd, TEST_RECON_FILE); + + if ((ret = system (dcmd)) != 0) + { + DP(RINT "xdelta3: decode command: %s\n", dcmd); + stream->msg = "decode cmd failed"; + return XD3_INTERNAL; + } + + /* Now compare. */ + if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) + { + return ret; + } + + return 0; +} + +static int +test_recode_command (xd3_stream *stream, int ignore) +{ + /* Things to test: + * - with and without a source file (recode does not change) + * + * (recode may or may not change -- 8 variations) + * - with and without adler32 + * - with and without app header + * - with and without secondary + */ + int has_source; + int variant; + int change; + int ret; + + for (has_source = 0; has_source < 2; has_source++) + { + for (variant = 0; variant < 8; variant++) + { + for (change = 0; change < 8; change++) + { + if ((ret = test_recode_command2 (stream, has_source, + variant, change))) + { + return ret; + } + } + DOT (); + } + } + + return 0; +} + +/*********************************************************************** + EXTERNAL I/O DECOMPRESSION/RECOMPRESSION + ***********************************************************************/ + +#if EXTERNAL_COMPRESSION +/* This performs one step of the test_externally_compressed_io + * function described below. It builds a pipe containing both Xdelta + * and external compression/decompression that should not modify the + * data passing through. */ +static int +test_compressed_pipe (xd3_stream *stream, main_extcomp *ext, char* buf, + const char* comp_options, const char* decomp_options, + int do_ext_recomp, const char* msg) +{ + int ret; + char decomp_buf[TESTBUFSIZE]; + + if (do_ext_recomp) + { + sprintf (decomp_buf, " | %s %s", ext->decomp_cmdname, ext->decomp_options); + } + else + { + decomp_buf[0] = 0; + } + + sprintf (buf, "%s %s < %s | %s %s | %s %s%s > %s", + ext->recomp_cmdname, ext->recomp_options, + TEST_TARGET_FILE, + program_name, comp_options, + program_name, decomp_options, + decomp_buf, + TEST_RECON_FILE); + + if ((ret = system (buf)) != 0) + { + stream->msg = msg; + return XD3_INTERNAL; + } + + if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) + { + return XD3_INTERNAL; + } + + DOT (); + return 0; +} + +/* We want to test that a pipe such as: + * + * --> | gzip -cf | xdelta3 -cf | xdelta3 -dcf | gzip -dcf | --> + * + * is transparent, i.e., does not modify the stream of data. However, + * we also want to verify that at the center the data is properly + * compressed, i.e., that we do not just have a re-compressed gzip + * format, that we have an VCDIFF format. We do this in two steps. + * First test the above pipe, then test with suppressed output + * recompression (-D). The result should be the original input: + * + * --> | gzip -cf | xdelta3 -cf | xdelta3 -Ddcf | --> + * + * Finally we want to test that -D also disables input decompression: + * + * --> | gzip -cf | xdelta3 -Dcf | xdelta3 -Ddcf | gzip -dcf | --> + */ +static int +test_externally_compressed_io (xd3_stream *stream, int ignore) +{ + usize_t i; + int ret; + char buf[TESTBUFSIZE]; + + mt_init (& static_mtrand, 0x9f73f7fc); + + if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } + + for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1) + { + main_extcomp *ext = & extcomp_types[i]; + + /* Test for the existence of the external command first, if not skip. */ + sprintf (buf, "%s %s < /dev/null > /dev/null", ext->recomp_cmdname, ext->recomp_options); + + if ((ret = system (buf)) != 0) + { + DP(RINT "%s=0", ext->recomp_cmdname); + continue; + } + + if ((ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-dcfq", 1, + "compression failed: identity pipe")) || + (ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-Rdcfq", 0, + "compression failed: without recompression")) || + (ret = test_compressed_pipe (stream, ext, buf, "-Dcfq", "-Rdcfq", 1, + "compression failed: without decompression"))) + { + return ret; + } + } + + return 0; +} + +/* This tests the proper functioning of external decompression for + * source files. The source and target files are identical and + * compressed by gzip. Decoding such a delta with recompression + * disbaled (-R) should produce the original, uncompressed + * source/target file. Then it checks with output recompression + * enabled--in this case the output should be a compressed copy of the + * original source/target file. Then it checks that encoding with + * decompression disabled works--the compressed files are identical + * and decoding them should always produce a compressed output, + * regardless of -R since the encoded delta file had decompression + * disabled.. + */ +static int +test_source_decompression (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + const main_extcomp *ext; + + mt_init (& static_mtrand, 0x9f73f7fc); + + test_setup (); + if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } + + /* Use gzip. */ + if ((ext = main_get_compressor ("G")) == NULL) { DP(RINT "skipped"); return 0; } + + /* Save an uncompressed copy. */ + if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; } + + /* Compress the target. */ + sprintf (buf, "%s %s < %s > %s", ext->recomp_cmdname, + ext->recomp_options, TEST_TARGET_FILE, TEST_SOURCE_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Copy back to the source. */ + sprintf (buf, "cp -f %s %s", TEST_SOURCE_FILE, TEST_TARGET_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Now the two identical files are compressed. Delta-encode the target, + * with decompression. */ + sprintf (buf, "%s -eq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Decode the delta file with recompression disabled, should get an + * uncompressed file out. */ + sprintf (buf, "%s -dq -R -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON_FILE))) { return ret; } + + /* Decode the delta file with recompression, should get a compressed file + * out. But we can't compare compressed files directly. */ + sprintf (buf, "%s -dqf -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + sprintf (buf, "%s %s < %s > %s", ext->decomp_cmdname, ext->decomp_options, TEST_RECON_FILE, TEST_RECON2_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON2_FILE))) { return ret; } + + /* Encode with decompression disabled */ + sprintf (buf, "%s -feqD -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Decode the delta file with recompression enabled, it doesn't matter, + * should get the compressed file out. */ + sprintf (buf, "%s -fdq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; } + + /* Try again with recompression disabled, it doesn't make a difference. */ + sprintf (buf, "%s -fqRd -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; } + test_cleanup(); + return 0; +} +#endif + +/*********************************************************************** + FORCE, STDOUT + ***********************************************************************/ + +/* This tests that output will not overwrite an existing file unless + * -f was specified. The test is for encoding (the same code handles + * it for decoding). */ +static int +test_force_behavior (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + + /* Create empty target file */ + test_setup (); + sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Encode to delta file */ + sprintf (buf, "%s -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Encode again, should fail. */ + sprintf (buf, "%s -q -e %s %s ", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + + /* Force it, should succeed. */ + sprintf (buf, "%s -f -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + test_cleanup(); + return 0; +} + +/* This checks the proper operation of the -c flag. When specified + * the default output becomes stdout, otherwise the input must be + * provided (encode) or it may be defaulted (decode w/ app header). */ +static int +test_stdout_behavior (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + + test_setup(); + sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Without -c, encode writes to delta file */ + sprintf (buf, "%s -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* With -c, encode writes to stdout */ + sprintf (buf, "%s -e -c %s > %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Without -c, decode writes to target file name, but it fails because the + * file exists. */ + sprintf (buf, "%s -q -d %s ", program_name, TEST_DELTA_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + + /* With -c, decode writes to stdout */ + sprintf (buf, "%s -d -c %s > /dev/null", program_name, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + test_cleanup(); + + return 0; +} + +/* This tests that the no-output flag (-J) works. */ +static int +test_no_output (xd3_stream *stream, int ignore) +{ + int ret; + char buf[TESTBUFSIZE]; + + test_setup (); + + sprintf (buf, "touch %s && chmod 0000 %s", + TEST_NOPERM_FILE, TEST_NOPERM_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; } + + /* Try no_output encode w/out unwritable output file */ + sprintf (buf, "%s -q -f -e %s %s", program_name, + TEST_TARGET_FILE, TEST_NOPERM_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + sprintf (buf, "%s -J -e %s %s", program_name, + TEST_TARGET_FILE, TEST_NOPERM_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + /* Now really write the delta to test decode no-output */ + sprintf (buf, "%s -e %s %s", program_name, + TEST_TARGET_FILE, TEST_DELTA_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + + sprintf (buf, "%s -q -f -d %s %s", program_name, + TEST_DELTA_FILE, TEST_NOPERM_FILE); + if ((ret = do_fail (stream, buf))) { return ret; } + sprintf (buf, "%s -J -d %s %s", program_name, + TEST_DELTA_FILE, TEST_NOPERM_FILE); + if ((ret = do_cmd (stream, buf))) { return ret; } + test_cleanup (); + return 0; +} + +/*********************************************************************** + Source identical optimization + ***********************************************************************/ + +/* Computing a delta should be fastest when the two inputs are + * identical, this checks it. The library is called to compute a + * delta between a 10000 byte file, 1000 byte winsize, 500 byte source + * blocksize. The same buffer is used for both source and target. */ +static int +test_identical_behavior (xd3_stream *stream, int ignore) +{ +#define IDB_TGTSZ 10000 +#define IDB_BLKSZ 500 +#define IDB_WINSZ 1000 +#define IDB_DELSZ 1000 +#define IDB_WINCNT (IDB_TGTSZ / IDB_WINSZ) + + int ret, i; + uint8_t buf[IDB_TGTSZ]; + uint8_t del[IDB_DELSZ]; + uint8_t rec[IDB_TGTSZ]; + xd3_source source; + int nextencwin = 0; + int winstarts = 0, winfinishes = 0; + xoff_t srcwin = -1; + usize_t delpos = 0, recsize; + xd3_config config; + + for (i = 0; i < IDB_TGTSZ; i += 1) { buf[i] = mt_random (&static_mtrand); } + + stream->winsize = IDB_WINSZ; + + source.size = IDB_TGTSZ; + source.blksize = IDB_BLKSZ; + source.name = ""; + source.curblk = NULL; + source.curblkno = -1; + + if ((ret = xd3_set_source (stream, & source))) { goto fail; } + + /* Compute an delta between identical source and targets. */ + for (;;) + { + ret = xd3_encode_input (stream); + + if (ret == XD3_INPUT) + { + xd3_avail_input (stream, buf + (IDB_WINSZ * nextencwin), IDB_WINSZ); + nextencwin += 1; + continue; + } + + if (ret == XD3_GETSRCBLK) + { + source.curblkno = source.getblkno; + source.onblk = IDB_BLKSZ; + source.curblk = buf + source.getblkno * IDB_BLKSZ; + srcwin = source.getblkno; + continue; + } + + if (ret == XD3_WINSTART) + { + winstarts++; + continue; + } + if (ret == XD3_WINFINISH) + { + winfinishes++; + if (winfinishes == IDB_WINCNT) + { + break; + } + continue; + } + + if (ret != XD3_OUTPUT) { goto fail; } + + CHECK(delpos + stream->avail_out <= IDB_DELSZ); + + memcpy (del + delpos, stream->next_out, stream->avail_out); + + delpos += stream->avail_out; + + xd3_consume_output (stream); + } + + CHECK(srcwin == source.blocks - 1); + CHECK(winfinishes == IDB_WINCNT); + CHECK(winstarts == IDB_WINCNT); + CHECK(nextencwin == IDB_WINCNT); + + /* Reset. */ + source.blksize = IDB_TGTSZ; + source.onblk = IDB_TGTSZ; + source.curblk = buf; + source.curblkno = 0; + + if ((ret = xd3_close_stream (stream))) { goto fail; } + xd3_free_stream (stream); + xd3_init_config (& config, 0); + if ((ret = xd3_config_stream (stream, & config))) { goto fail; } + if ((ret = xd3_set_source (stream, & source))) { goto fail; } + + /* Decode. */ + if ((ret = xd3_decode_stream (stream, del, delpos, rec, & recsize, IDB_TGTSZ))) { goto fail; } + + /* Check result size and data. */ + if (recsize != IDB_TGTSZ) { stream->msg = "wrong size reconstruction"; goto fail; } + if (memcmp (rec, buf, IDB_TGTSZ) != 0) { stream->msg = "wrong data reconstruction"; goto fail; } + + /* Check that there was one copy per window. */ + IF_DEBUG (if (stream->n_scpy != IDB_WINCNT || + stream->n_add != 0 || + stream->n_run != 0) { stream->msg = "wrong copy count"; goto fail; }); + + /* Check that no checksums were computed because the initial match + was presumed. */ + IF_DEBUG (if (stream->large_ckcnt != 0) { stream->msg = "wrong checksum behavior"; goto fail; }); + + ret = 0; + fail: + return ret; +} + +/*********************************************************************** + String matching test + ***********************************************************************/ + +/* Check particular matching behaviors by calling + * xd3_string_match_soft directly with specific arguments. */ +typedef struct _string_match_test string_match_test; + +typedef enum +{ + SM_NONE = 0, + SM_LAZY = (1 << 1), +} string_match_flags; + +struct _string_match_test +{ + const char *input; + int flags; + const char *result; +}; + +static const string_match_test match_tests[] = +{ + /* nothing */ + { "1234567890", SM_NONE, "" }, + + /* basic run, copy */ + { "11111111112323232323", SM_NONE, "R0/10 C12/8@10" }, + + /* no run smaller than MIN_RUN=8 */ + { "1111111", SM_NONE, "C1/6@0" }, + { "11111111", SM_NONE, "R0/8" }, + + /* simple promotion: the third copy address depends on promotion */ + { "ABCDEF_ABCDEF^ABCDEF", SM_NONE, "C7/6@0 C14/6@7" }, + /* { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" }, forgotten */ + + /* simple lazy: there is a better copy starting with "23 X" than "123 " */ + { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" }, + { "123 23 XYZ 123 XYZ", SM_LAZY, "C11/4@0 C12/6@4" }, + + /* trylazy: no lazy matches unless there are at least two characters beyond + * the first match */ + { "2123_121212", SM_LAZY, "C7/4@5" }, + { "2123_1212123", SM_LAZY, "C7/4@5" }, + { "2123_1212123_", SM_LAZY, "C7/4@5 C8/5@0" }, + + /* trylazy: no lazy matches if the copy is >= MAXLAZY=10 */ + { "2123_121212123_", SM_LAZY, "C7/6@5 C10/5@0" }, + { "2123_12121212123_", SM_LAZY, "C7/8@5 C12/5@0" }, + { "2123_1212121212123_", SM_LAZY, "C7/10@5" }, + + /* lazy run: check a run overlapped by a longer copy */ + { "11111112 111111112 1", SM_LAZY, "C1/6@0 R9/8 C10/10@0" }, + + /* lazy match: match_length,run_l >= min_match tests, shouldn't get any + * copies within the run, no run within the copy */ + { "^________^________ ", SM_LAZY, "R1/8 C9/9@0" }, + + /* chain depth: it only goes back 10. this checks that the 10th match hits + * and the 11th misses. */ + { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234 ", SM_NONE, + "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/5@0" }, + { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234>1234 ", SM_NONE, + "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/4@45 C55/4@50" }, + + /* ssmatch test */ + { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" }, + /*{ "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" }, forgotten */ +}; + +static int +test_string_matching (xd3_stream *stream, int ignore) +{ + usize_t i; + int ret; + xd3_config config; + char rbuf[TESTBUFSIZE]; + + for (i = 0; i < SIZEOF_ARRAY (match_tests); i += 1) + { + const string_match_test *test = & match_tests[i]; + char *rptr = rbuf; + usize_t len = strlen (test->input); + + xd3_free_stream (stream); + xd3_init_config (& config, 0); + + config.smatch_cfg = XD3_SMATCH_SOFT; + config.smatcher_soft.large_look = 4; + config.smatcher_soft.large_step = 4; + config.smatcher_soft.small_look = 4; + config.smatcher_soft.small_chain = 10; + config.smatcher_soft.small_lchain = 10; + config.smatcher_soft.max_lazy = (test->flags & SM_LAZY) ? 10 : 0; + config.smatcher_soft.long_enough = 10; + + if ((ret = xd3_config_stream (stream, & config))) { return ret; } + if ((ret = xd3_encode_init_full (stream))) { return ret; } + + xd3_avail_input (stream, (uint8_t*)test->input, len); + + if ((ret = stream->smatcher.string_match (stream))) { return ret; } + + *rptr = 0; + while (! xd3_rlist_empty (& stream->iopt_used)) + { + xd3_rinst *inst = xd3_rlist_pop_front (& stream->iopt_used); + + switch (inst->type) + { + case XD3_RUN: *rptr++ = 'R'; break; + case XD3_CPY: *rptr++ = 'C'; break; + default: CHECK(0); + } + + sprintf (rptr, "%d/%d", inst->pos, inst->size); + rptr += strlen (rptr); + + if (inst->type == XD3_CPY) + { + *rptr++ = '@'; + sprintf (rptr, "%"Q"d", inst->addr); + rptr += strlen (rptr); + } + + *rptr++ = ' '; + + xd3_rlist_push_back (& stream->iopt_free, inst); + } + + if (rptr != rbuf) + { + rptr -= 1; *rptr = 0; + } + + if (strcmp (rbuf, test->result) != 0) + { + DP(RINT "test %u: expected %s: got %s", i, test->result, rbuf); + stream->msg = "wrong result"; + return XD3_INTERNAL; + } + } + + return 0; +} + +/* + * This is a test for many overlapping instructions. It must be a lazy + * matcher. + */ +static int +test_iopt_flush_instructions (xd3_stream *stream, int ignore) +{ + int ret, i; + usize_t tpos = 0; + usize_t delta_size, recon_size; + xd3_config config; + uint8_t target[TESTBUFSIZE]; + uint8_t delta[TESTBUFSIZE]; + uint8_t recon[TESTBUFSIZE]; + + xd3_free_stream (stream); + xd3_init_config (& config, 0); + + config.smatch_cfg = XD3_SMATCH_SOFT; + config.smatcher_soft.large_look = 16; + config.smatcher_soft.large_step = 16; + config.smatcher_soft.small_look = 4; + config.smatcher_soft.small_chain = 128; + config.smatcher_soft.small_lchain = 16; + config.smatcher_soft.max_lazy = 8; + config.smatcher_soft.long_enough = 128; + + if ((ret = xd3_config_stream (stream, & config))) { return ret; } + + for (i = 1; i < 250; i++) + { + target[tpos++] = i; + target[tpos++] = i+1; + target[tpos++] = i+2; + target[tpos++] = i+3; + target[tpos++] = 0; + } + for (i = 1; i < 253; i++) + { + target[tpos++] = i; + } + + if ((ret = xd3_encode_stream (stream, target, tpos, + delta, & delta_size, sizeof (delta)))) + { + return ret; + } + + xd3_free_stream(stream); + if ((ret = xd3_config_stream (stream, & config))) { return ret; } + + if ((ret = xd3_decode_stream (stream, delta, delta_size, + recon, & recon_size, sizeof (recon)))) + { + return ret; + } + + CHECK(tpos == recon_size); + CHECK(memcmp(target, recon, recon_size) == 0); + + return 0; +} + +/* + * This tests the 32/64bit ambiguity for source-window matching. + */ +static int +test_source_cksum_offset (xd3_stream *stream, int ignore) +{ + xd3_source source; + + // Inputs are: + struct { + xoff_t cpos; // stream->srcwin_cksum_pos; + xoff_t ipos; // stream->total_in; + xoff_t size; // stream->src->size; + + usize_t input; // input 32-bit offset + xoff_t output; // output 64-bit offset + + } cksum_test[] = { + // If cpos is <= 2^32 + { 1, 1, 1, 1, 1 }, + +#if XD3_USE_LARGEFILE64 +// cpos ipos size input output +// 0x____xxxxxULL, 0x____xxxxxULL, 0x____xxxxxULL, 0x___xxxxxUL, 0x____xxxxxULL + { 0x100100000ULL, 0x100000000ULL, 0x100200000ULL, 0x00000000UL, 0x100000000ULL }, + { 0x100100000ULL, 0x100000000ULL, 0x100200000ULL, 0xF0000000UL, 0x0F0000000ULL }, + + { 0x100200000ULL, 0x100100000ULL, 0x100200000ULL, 0x00300000UL, 0x000300000ULL }, + + { 25771983104ULL, 25770000000ULL, 26414808769ULL, 2139216707UL, 23614053187ULL }, + +#endif + + { 0, 0, 0, 0, 0 }, + }, *test_ptr; + + stream->src = &source; + + for (test_ptr = cksum_test; test_ptr->cpos; test_ptr++) { + xoff_t r; + stream->srcwin_cksum_pos = test_ptr->cpos; + stream->total_in = test_ptr->ipos; + stream->src->size = test_ptr->size; + + r = xd3_source_cksum_offset(stream, test_ptr->input); + CHECK(r == test_ptr->output); + } + return 0; +} + +static int +test_in_memory (xd3_stream *stream, int ignore) +{ + // test_text is 256 bytes + uint8_t ibuf[sizeof(test_text)]; + uint8_t dbuf[sizeof(test_text)]; + uint8_t obuf[sizeof(test_text)]; + usize_t size = sizeof(test_text); + usize_t dsize, osize; + int r1, r2; + int eflags = SECONDARY_DJW ? XD3_SEC_DJW : 0; + + memcpy(ibuf, test_text, size); + memset(ibuf + 128, 0, 16); + + r1 = xd3_encode_memory(ibuf, size, + test_text, size, + dbuf, &dsize, size, eflags); + + r2 = xd3_decode_memory(dbuf, dsize, + test_text, size, + obuf, &osize, size, 0); + + if (r1 != 0 || r2 != 0 || dsize >= (size/2) || dsize < 1 || + osize != size) { + stream->msg = "encode/decode size error"; + return XD3_INTERNAL; + } + + if (memcmp(obuf, ibuf, size) != 0) { + stream->msg = "encode/decode data error"; + return XD3_INTERNAL; + } + + return 0; +} + +/*********************************************************************** + TEST MAIN + ***********************************************************************/ + +static int +xd3_selftest (void) +{ +#define DO_TEST(fn,flags,arg) \ + do { \ + xd3_stream stream; \ + xd3_config config; \ + xd3_init_config (& config, flags); \ + DP(RINT "xdelta3: testing " #fn "%s...", \ + flags ? (" (" #flags ")") : ""); \ + if ((ret = xd3_config_stream (& stream, & config) == 0) && \ + (ret = test_ ## fn (& stream, arg)) == 0) { \ + DP(RINT " success\n"); \ + } else { \ + DP(RINT " failed: %s: %s\n", xd3_errstring (& stream), \ + xd3_mainerror (ret)); } \ + xd3_free_stream (& stream); \ + if (ret != 0) { goto failure; } \ + } while (0) + + int ret; + +#ifndef WIN32 + if (getuid() == 0) + { + DP(RINT "xdelta3: This test should not be run as root.\n"); + ret = XD3_INVALID; + goto failure; + } +#endif + + DO_TEST (random_numbers, 0, 0); + + DO_TEST (decode_integer_end_of_input, 0, 0); + DO_TEST (decode_integer_overflow, 0, 0); + DO_TEST (encode_decode_uint32_t, 0, 0); + DO_TEST (encode_decode_uint64_t, 0, 0); + DO_TEST (usize_t_overflow, 0, 0); + DO_TEST (forward_match, 0, 0); + + DO_TEST (address_cache, 0, 0); + IF_GENCODETBL (DO_TEST (address_cache, XD3_ALT_CODE_TABLE, 0)); + + DO_TEST (string_matching, 0, 0); + DO_TEST (choose_instruction, 0, 0); + DO_TEST (identical_behavior, 0, 0); + DO_TEST (in_memory, 0, 0); + + IF_GENCODETBL (DO_TEST (choose_instruction, XD3_ALT_CODE_TABLE, 0)); + IF_GENCODETBL (DO_TEST (encode_code_table, 0, 0)); + + DO_TEST (iopt_flush_instructions, 0, 0); + DO_TEST (source_cksum_offset, 0, 0); + + DO_TEST (decompress_single_bit_error, 0, 3); + DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3); + + IF_FGK (DO_TEST (decompress_single_bit_error, XD3_SEC_FGK, 3)); + IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 8)); + + /* There are many expected non-failures for ALT_CODE_TABLE because + * not all of the instruction codes are used. */ + IF_GENCODETBL ( + DO_TEST (decompress_single_bit_error, XD3_ALT_CODE_TABLE, 224)); + +#ifndef WIN32 + DO_TEST (force_behavior, 0, 0); + DO_TEST (stdout_behavior, 0, 0); + DO_TEST (no_output, 0, 0); + DO_TEST (command_line_arguments, 0, 0); + DO_TEST (recode_command, 0, 0); + +#if EXTERNAL_COMPRESSION + DO_TEST (source_decompression, 0, 0); + DO_TEST (externally_compressed_io, 0, 0); +#endif + +#endif /* WIN32 */ + + IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS)); + IF_FGK (DO_TEST (secondary_fgk, 0, 1)); + + DO_TEST (compressed_stream_overflow, 0, 0); + +failure: + test_cleanup (); + return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +#undef DO_TEST +} diff --git a/xdelta3-test.py b/xdelta3-test.py new file mode 100755 index 0000000..7fcc3d5 --- /dev/null +++ b/xdelta3-test.py @@ -0,0 +1,155 @@ +#!/usr/bin/python2.5 +# xdelta 3 - delta compression tools and library +# Copyright (C) 2003, 2006, 2007. Joshua P. MacDonald +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import xdelta3 + +# the test data section is expected to be len('target') +source = 'source source input0 source source' +target = 'source source target source source' + +# +# + +print 'encode: basic ...' +result, patch = xdelta3.xd3_encode_memory(target, source, 50) + +assert result == 0 +assert len(patch) < len(source) + +print 'encode: adler32 ...' +result, patch_adler32 = xdelta3.xd3_encode_memory(target, source, 50, + xdelta3.XD3_ADLER32) + +assert result == 0 +assert len(patch_adler32) < len(source) +assert len(patch_adler32) > len(patch) + +print 'encode: secondary ...' +result, patch_djw = xdelta3.xd3_encode_memory(target, source, 50, + xdelta3.XD3_SEC_DJW) + +assert result == 0 +# secondary compression doesn't help +assert len(patch_djw) > len(patch) + +print 'encode: exact ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch)) + +assert result == 0 +assert len(ignore) < len(source) + +print 'encode: out of space ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch) - 1) + +assert result == 28 +assert ignore == None + +print 'encode: zero space ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, 0) + +assert result == 28 +assert ignore == None + +print 'encode: no source ...' +result, zdata = xdelta3.xd3_encode_memory(target, None, 50) + +assert result == 0 +assert len(zdata) > len(patch) + +print 'encode: no input ...' +result, ignore = xdelta3.xd3_encode_memory(None, None, 50) + +assert result != 0 + +print 'decode: basic ...' +result, target1 = xdelta3.xd3_decode_memory(patch, source, len(target)) + +assert result == 0 +assert len(target1) == len(target) +assert target1 == target + +print 'decode: out of space ...' +result, ignore = xdelta3.xd3_decode_memory(patch, source, len(target) - 1) + +assert result == 28 +assert ignore == None + +print 'decode: zero space ...' +result, ignore = xdelta3.xd3_decode_memory(patch, source, 0) + +assert result == 28 +assert ignore == None + +print 'decode: single byte error ...' +# a few expected single-byte errors, e.g., unused address cache bits, see +# xdelta3-test.h's single-bit error tests +extra_count = 4 +noverify_count = 0 +for corrupt_pos in range(len(patch_adler32)): + input = ''.join([j == corrupt_pos and '\xff' or patch_adler32[j] + for j in range(len(patch_adler32))]) + + result, ignore = xdelta3.xd3_decode_memory(input, source, len(target), 0) + assert result == -17712 + assert ignore == None + + # without adler32 verification, the error may be in the data section which + # in this case is 6 bytes 'target' + result, corrupt = xdelta3.xd3_decode_memory(input, source, len(target), + xdelta3.XD3_ADLER32_NOVER) + if result == 0: + noverify_count = noverify_count + 1 + #print "got %s" % corrupt + #end +#end +assert noverify_count == len('target') + extra_count + +print 'decode: no source ...' +result, target2 = xdelta3.xd3_decode_memory(zdata, None, len(target)) + +assert result == 0 +assert target == target2 + +# Test compression level setting via flags. assumes a 9 byte checksum +# and that level 9 steps 2, level 1 steps 15: +# 01234567890123456789012345678901 +# level 1 only indexes 2 checksums "abcdefghi" and "ABCDEFGHI" +# outputs 43 vs. 23 bytes +print 'encode: compression level ...' + +source = '_la_la_abcdefghi_la_la_ABCDEFGHI' +target = 'la_la_ABCDEFGH__la_la_abcdefgh__' + +result1, level1 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_1) +result9, level9 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_9) + +assert result1 == 0 and result9 == 0 +assert len(level1) > len(level9) + +# +# Issue 65 +print 'encode: 65 ...' +source = 'Hello World' +target = 'Hello everyone' +result, patch = xdelta3.xd3_encode_memory(target, source, len(target)) +assert result != 0 + +result, patch = xdelta3.xd3_encode_memory(target, source, 2 * len(target)) +assert result == 0 + +print 'PASS' diff --git a/xdelta3.c b/xdelta3.c new file mode 100644 index 0000000..9028d0c --- /dev/null +++ b/xdelta3.c @@ -0,0 +1,5337 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + ------------------------------------------------------------------- + + Xdelta 3 + + The goal of this library is to to implement both the (stand-alone) + data-compression and delta-compression aspects of VCDIFF encoding, and + to support a programming interface that works like Zlib + (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic + Differencing and Compression Data Format. + + VCDIFF is a unified encoding that combines data-compression and + delta-encoding ("differencing"). + + VCDIFF has a detailed byte-code instruction set with many features. + The instruction format supports an immediate size operand for small + COPYs and ADDs (e.g., under 18 bytes). There are also instruction + "modes", which are used to compress COPY addresses by using two + address caches. An instruction mode refers to slots in the NEAR + and SAME caches for recent addresses. NEAR remembers the + previous 4 (by default) COPY addresses, and SAME catches + frequent re-uses of the same address using a 3-way (by default) + 256-entry associative cache of [ADDR mod 256], the encoded byte. + A hit in the NEAR/SAME cache requires 0/1 ADDR bytes. + + VCDIFF has a default instruction table, but an alternate + instruction tables may themselves be be delta-compressed and + included in the encoding header. This allows even more freedom. + There are 9 instruction modes in the default code table, 4 near, 3 + same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the + current position). + + ---------------------------------------------------------------------- + + Algorithms + + Aside from the details of encoding and decoding, there are a bunch + of algorithms needed. + + 1. STRING-MATCH. A two-level fingerprinting approach is used. A + single loop computes the two checksums -- small and large -- at + successive offsets in the TARGET file. The large checksum is more + accurate and is used to discover SOURCE matches, which are + potentially very long. The small checksum is used to discover + copies within the TARGET. Small matching, which is more expensive, + usually dominates the large STRING-MATCH costs in this code - the + more exhaustive the search, the better the results. Either of the + two string-matching mechanisms may be disabled. + + 2. INSTRUCTION SELECTION. The IOPT buffer here represents a queue + used to store overlapping copy instructions. There are two possible + optimizations that go beyond a greedy search. Both of these fall + into the category of "non-greedy matching" optimizations. + + The first optimization stems from backward SOURCE-COPY matching. + When a new SOURCE-COPY instruction covers a previous instruction in + the target completely, it is erased from the queue. Randal Burns + originally analyzed these algorithms and did a lot of related work + (\cite the 1.5-pass algorithm). + + The second optimization comes by the encoding of common very-small + COPY and ADD instructions, for which there are special DOUBLE-code + instructions, which code two instructions in a single byte. + + The cost of bad instruction-selection overhead is relatively high + for data-compression, relative to delta-compression, so this second + optimization is fairly important. With "lazy" matching (the name + used in Zlib for a similar optimization), the string-match + algorithm searches after a match for potential overlapping copy + instructions. In Xdelta and by default, VCDIFF, the minimum match + size is 4 bytes, whereas Zlib searches with a 3-byte minimum. This + feature, combined with double instructions, provides a nice + challenge. Search in this file for "black magic", a heuristic. + + 3. STREAM ALIGNMENT. Stream alignment is needed to compress large + inputs in constant space. See xd3_srcwin_move_point(). + + 4. WINDOW SELECTION. When the IOPT buffer flushes, in the first call + to xd3_iopt_finish_encoding containing any kind of copy instruction, + the parameters of the source window must be decided: the offset into + the source and the length of the window. Since the IOPT buffer is + finite, the program may be forced to fix these values before knowing + the best offset/length. + + 5. SECONDARY COMPRESSION. VCDIFF supports a secondary encoding to + be applied to the individual sections of the data format, which are + ADDRess, INSTruction, and DATA. Several secondary compressor + variations are implemented here, although none is standardized yet. + + One is an adaptive huffman algorithm -- the FGK algorithm (Faller, + Gallager, and Knuth, 1985). This compressor is extremely slow. + + The other is a simple static Huffman routine, which is the base + case of a semi-adaptive scheme published by D.J. Wheeler and first + widely used in bzip2 (by Julian Seward). This is a very + interesting algorithm, originally published in nearly cryptic form + by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, + secondary compression remains off by default. + ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps} + -------------------------------------------------------------------- + + Other Features + + 1. USER CONVENIENCE + + For user convenience, it is essential to recognize Gzip-compressed + files and automatically Gzip-decompress them prior to + delta-compression (or else no delta-compression will be achieved + unless the user manually decompresses the inputs). The compressed + represention competes with Xdelta, and this must be hidden from the + command-line user interface. The Xdelta-1.x encoding was simple, not + compressed itself, so Xdelta-1.x uses Zlib internally to compress the + representation. + + This implementation supports external compression, which implements + the necessary fork() and pipe() mechanics. There is a tricky step + involved to support automatic detection of a compressed input in a + non-seekable input. First you read a bit of the input to detect + magic headers. When a compressed format is recognized, exec() the + external compression program and create a second child process to + copy the original input stream. [Footnote: There is a difficulty + related to using Gzip externally. It is not possible to decompress + and recompress a Gzip file transparently. If FILE.GZ had a + cryptographic signature, then, after: (1) Gzip-decompression, (2) + Xdelta-encoding, (3) Gzip-compression the signature could be + broken. The only way to solve this problem is to guess at Gzip's + compression level or control it by other means. I recommend that + specific implementations of any compression scheme store + information needed to exactly re-compress the input, that way + external compression is transparent - however, this won't happen + here until it has stabilized.] + + 2. APPLICATION-HEADER + + This feature was introduced in RFC3284. It allows any application + to include a header within the VCDIFF file format. This allows + general inter-application data exchange with support for + application-specific extensions to communicate metadata. + + 3. VCDIFF CHECKSUM + + An optional checksum value is included with each window, which can + be used to validate the final result. This verifies the correct source + file was used for decompression as well as the obvious advantage: + checking the implementation (and underlying) correctness. + + 4. LIGHT WEIGHT + + The code makes efforts to avoid copying data more than necessary. + The code delays many initialization tasks until the first use, it + optimizes for identical (perfectly matching) inputs. It does not + compute any checksums until the first lookup misses. Memory usage + is reduced. String-matching is templatized (by slightly gross use + of CPP) to hard-code alternative compile-time defaults. The code + has few outside dependencies. + ---------------------------------------------------------------------- + + The default rfc3284 instruction table: + (see RFC for the explanation) + + TYPE SIZE MODE TYPE SIZE MODE INDEX + -------------------------------------------------------------------- + 1. Run 0 0 Noop 0 0 0 + 2. Add 0, [1,17] 0 Noop 0 0 [1,18] + 3. Copy 0, [4,18] 0 Noop 0 0 [19,34] + 4. Copy 0, [4,18] 1 Noop 0 0 [35,50] + 5. Copy 0, [4,18] 2 Noop 0 0 [51,66] + 6. Copy 0, [4,18] 3 Noop 0 0 [67,82] + 7. Copy 0, [4,18] 4 Noop 0 0 [83,98] + 8. Copy 0, [4,18] 5 Noop 0 0 [99,114] + 9. Copy 0, [4,18] 6 Noop 0 0 [115,130] + 10. Copy 0, [4,18] 7 Noop 0 0 [131,146] + 11. Copy 0, [4,18] 8 Noop 0 0 [147,162] + 12. Add [1,4] 0 Copy [4,6] 0 [163,174] + 13. Add [1,4] 0 Copy [4,6] 1 [175,186] + 14. Add [1,4] 0 Copy [4,6] 2 [187,198] + 15. Add [1,4] 0 Copy [4,6] 3 [199,210] + 16. Add [1,4] 0 Copy [4,6] 4 [211,222] + 17. Add [1,4] 0 Copy [4,6] 5 [223,234] + 18. Add [1,4] 0 Copy 4 6 [235,238] + 19. Add [1,4] 0 Copy 4 7 [239,242] + 20. Add [1,4] 0 Copy 4 8 [243,246] + 21. Copy 4 [0,8] Add 1 0 [247,255] + -------------------------------------------------------------------- + + Reading the source: Overview + + This file includes itself in several passes to macro-expand certain + sections with variable forms. Just read ahead, there's only a + little confusion. I know this sounds ugly, but hard-coding some of + the string-matching parameters results in a 10-15% increase in + string-match performance. The only time this hurts is when you have + unbalanced #if/endifs. + + A single compilation unit tames the Makefile. In short, this is to + allow the above-described hack without an explodingMakefile. The + single compilation unit includes the core library features, + configurable string-match templates, optional main() command-line + tool, misc optional features, and a regression test. Features are + controled with CPP #defines, see Makefile.am. + + The initial __XDELTA3_C_HEADER_PASS__ starts first, the _INLINE_ and + _TEMPLATE_ sections follow. Easy stuff first, hard stuff last. + + Optional features include: + + xdelta3-main.h The command-line interface, external compression + support, POSIX-specific, info & VCDIFF-debug tools. + xdelta3-second.h The common secondary compression routines. + xdelta3-decoder.h All decoding routines. + xdelta3-djw.h The semi-adaptive huffman secondary encoder. + xdelta3-fgk.h The adaptive huffman secondary encoder. + xdelta3-test.h The unit test covers major algorithms, + encoding and decoding. There are single-bit + error decoding tests. There are 32/64-bit file size + boundary tests. There are command-line tests. + There are compression tests. There are external + compression tests. There are string-matching tests. + There should be more tests... + + Additional headers include: + + xdelta3.h The public header file. + xdelta3-cfgs.h The default settings for default, built-in + encoders. These are hard-coded at + compile-time. There is also a single + soft-coded string matcher for experimenting + with arbitrary values. + xdelta3-list.h A cyclic list template + + Misc little debug utilities: + + badcopy.c Randomly modifies an input file based on two + parameters: (1) the probability that a byte in + the file is replaced with a pseudo-random value, + and (2) the mean change size. Changes are + generated using an expoential distribution + which approximates the expected error_prob + distribution. + -------------------------------------------------------------------- + + This file itself is unusually large. I hope to defend this layout + with lots of comments. Everything in this file is related to + encoding and decoding. I like it all together - the template stuff + is just a hack. */ + +#ifndef __XDELTA3_C_HEADER_PASS__ +#define __XDELTA3_C_HEADER_PASS__ + +#include <errno.h> +#include <string.h> + +#include "xdelta3.h" + +/*********************************************************************** + STATIC CONFIGURATION + ***********************************************************************/ + +#ifndef XD3_MAIN /* the main application */ +#define XD3_MAIN 0 +#endif + +#ifndef VCDIFF_TOOLS +#define VCDIFF_TOOLS XD3_MAIN +#endif + +#ifndef SECONDARY_FGK /* one from the algorithm preservation department: */ +#define SECONDARY_FGK 0 /* adaptive Huffman routines */ +#endif + +#ifndef SECONDARY_DJW /* semi-adaptive/static Huffman for the eventual */ +#define SECONDARY_DJW 0 /* standardization, off by default until such time. */ +#endif + +#ifndef GENERIC_ENCODE_TABLES /* These three are the RFC-spec'd app-specific */ +#define GENERIC_ENCODE_TABLES 0 /* code features. This is tested but not recommended */ +#endif /* unless there's a real application. */ +#ifndef GENERIC_ENCODE_TABLES_COMPUTE +#define GENERIC_ENCODE_TABLES_COMPUTE 0 +#endif +#ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT +#define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0 +#endif + +#if XD3_ENCODER +#define IF_ENCODER(x) x +#else +#define IF_ENCODER(x) +#endif + +/***********************************************************************/ + +typedef enum { + + /* header indicator bits */ + VCD_SECONDARY = (1 << 0), /* uses secondary compressor */ + VCD_CODETABLE = (1 << 1), /* supplies code table data */ + VCD_APPHEADER = (1 << 2), /* supplies application data */ + VCD_INVHDR = ~7U, + + /* window indicator bits */ + VCD_SOURCE = (1 << 0), /* copy window in source file */ + VCD_TARGET = (1 << 1), /* copy window in target file */ + VCD_ADLER32 = (1 << 2), /* has adler32 checksum */ + VCD_INVWIN = ~7U, + + VCD_SRCORTGT = VCD_SOURCE | VCD_TARGET, + + /* delta indicator bits */ + VCD_DATACOMP = (1 << 0), + VCD_INSTCOMP = (1 << 1), + VCD_ADDRCOMP = (1 << 2), + VCD_INVDEL = ~0x7U, + +} xd3_indicator; + +typedef enum { + VCD_DJW_ID = 1, + VCD_FGK_ID = 16, /* Note: these are not standard IANA-allocated IDs! */ +} xd3_secondary_ids; + +typedef enum { + SEC_NOFLAGS = 0, + + /* Note: SEC_COUNT_FREQS Not implemented (to eliminate 1st Huffman pass) */ + SEC_COUNT_FREQS = (1 << 0), +} xd3_secondary_flags; + +typedef enum { + DATA_SECTION, /* These indicate which section to the secondary + * compressor. */ + INST_SECTION, /* The header section is not compressed, therefore not + * listed here. */ + ADDR_SECTION, +} xd3_section_type; + +typedef enum +{ + XD3_NOOP = 0, + XD3_ADD = 1, + XD3_RUN = 2, + XD3_CPY = 3, /* XD3_CPY rtypes are represented as (XD3_CPY + + * copy-mode value) */ +} xd3_rtype; + +/***********************************************************************/ + +#include "xdelta3-list.h" + +XD3_MAKELIST(xd3_rlist, xd3_rinst, link); + +/***********************************************************************/ + +#define SECONDARY_MIN_SAVINGS 2 /* Secondary compression has to save + at least this many bytes. */ +#define SECONDARY_MIN_INPUT 10 /* Secondary compression needs at + least this many bytes. */ + +#define VCDIFF_MAGIC1 0xd6 /* 1st file byte */ +#define VCDIFF_MAGIC2 0xc3 /* 2nd file byte */ +#define VCDIFF_MAGIC3 0xc4 /* 3rd file byte */ +#define VCDIFF_VERSION 0x00 /* 4th file byte */ + +#define VCD_SELF 0 /* 1st address mode */ +#define VCD_HERE 1 /* 2nd address mode */ + +#define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */ +#define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code + * table string */ + +#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) + +#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary + * compressor alphabet. */ + +#define HASH_PERMUTE 1 /* The input is permuted by random nums */ +#define ADLER_LARGE_CKSUM 1 /* Adler checksum vs. RK checksum */ + +#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from + * offset 0 using this offset. */ + +#define MIN_SMALL_LOOK 2U /* Match-optimization stuff. */ +#define MIN_LARGE_LOOK 2U +#define MIN_MATCH_OFFSET 1U +#define MAX_MATCH_SPLIT 18U /* VCDIFF code table: 18 is the default limit + * for direct-coded ADD sizes */ + +#define LEAST_MATCH_INCR 0 /* The least number of bytes an overlapping + * match must beat the preceding match by. This + * is a bias for the lazy match optimization. A + * non-zero value means that an adjacent match + * has to be better by more than the step + * between them. 0. */ + +#define MIN_MATCH 4U /* VCDIFF code table: MIN_MATCH=4 */ +#define MIN_ADD 1U /* 1 */ +#define MIN_RUN 8U /* The shortest run, if it is shorter than this + * an immediate add/copy will be just as good. + * ADD1/COPY6 = 1I+1D+1A bytes, RUN18 = + * 1I+1D+1A. */ + +#define MAX_MODES 9 /* Maximum number of nodes used for + * compression--does not limit decompression. */ + +#define ENC_SECTS 4 /* Number of separate output sections. */ + +#define HDR_TAIL(s) ((s)->enc_tails[0]) +#define DATA_TAIL(s) ((s)->enc_tails[1]) +#define INST_TAIL(s) ((s)->enc_tails[2]) +#define ADDR_TAIL(s) ((s)->enc_tails[3]) + +#define HDR_HEAD(s) ((s)->enc_heads[0]) +#define DATA_HEAD(s) ((s)->enc_heads[1]) +#define INST_HEAD(s) ((s)->enc_heads[2]) +#define ADDR_HEAD(s) ((s)->enc_heads[3]) + +#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0])) + +#define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near) + +/* Template instances. */ +#if XD3_BUILD_SLOW +#define IF_BUILD_SLOW(x) x +#else +#define IF_BUILD_SLOW(x) +#endif +#if XD3_BUILD_FAST +#define IF_BUILD_FAST(x) x +#else +#define IF_BUILD_FAST(x) +#endif +#if XD3_BUILD_FASTER +#define IF_BUILD_FASTER(x) x +#else +#define IF_BUILD_FASTER(x) +#endif +#if XD3_BUILD_FASTEST +#define IF_BUILD_FASTEST(x) x +#else +#define IF_BUILD_FASTEST(x) +#endif +#if XD3_BUILD_SOFT +#define IF_BUILD_SOFT(x) x +#else +#define IF_BUILD_SOFT(x) +#endif +#if XD3_BUILD_DEFAULT +#define IF_BUILD_DEFAULT(x) x +#else +#define IF_BUILD_DEFAULT(x) +#endif + +/* Consume N bytes of input, only used by the decoder. */ +#define DECODE_INPUT(n) \ + do { \ + stream->total_in += (xoff_t) (n); \ + stream->avail_in -= (n); \ + stream->next_in += (n); \ + } while (0) + +/* Update the run-length state */ +#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } \ + else { run_c = (c); run_l = 1; } } while (0) + +/* This CPP-conditional stuff can be cleaned up... */ +#if XD3_DEBUG +#define IF_DEBUG(x) x +#else +#define IF_DEBUG(x) +#endif +#if XD3_DEBUG > 1 +#define IF_DEBUG1(x) x +#else +#define IF_DEBUG1(x) +#endif +#if XD3_DEBUG > 2 +#define IF_DEBUG2(x) x +#else +#define IF_DEBUG2(x) +#endif +#if REGRESSION_TEST +#define IF_REGRESSION(x) x +#else +#define IF_REGRESSION(x) +#endif + +/***********************************************************************/ + +#if XD3_ENCODER +static void* xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size); + + +static xd3_output* xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output); + +static int xd3_alloc_iopt (xd3_stream *stream, int elts); + +static void xd3_free_output (xd3_stream *stream, + xd3_output *output); + +static int xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code); + +static int xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size); + +static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, usize_t code); +static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, + usize_t code); + +static usize_t xd3_sizeof_output (xd3_output *output); +static void xd3_encode_reset (xd3_stream *stream); + +static int xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos); +static int xd3_source_extend_match (xd3_stream *stream); +static int xd3_srcwin_setup (xd3_stream *stream); +static usize_t xd3_iopt_last_matched (xd3_stream *stream); +static int xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, + uint32_t num); + +static usize_t xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset); +static int xd3_string_match_init (xd3_stream *stream); +static uint32_t xd3_scksum (uint32_t *state, const uint8_t *seg, const int ln); +static int xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp); +static int xd3_srcwin_move_point (xd3_stream *stream, + usize_t *next_move_point); + +static int xd3_emit_run (xd3_stream *stream, usize_t pos, + usize_t size, uint8_t run_c); +static usize_t xd3_checksum_hash (const xd3_hash_cfg *cfg, + const usize_t cksum); +static xoff_t xd3_source_cksum_offset(xd3_stream *stream, usize_t low); +static void xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos); + + +#if XD3_DEBUG +static void xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + int x_run_l, + uint8_t x_run_c); +static void xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum); +static void xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum); + +#endif /* XD3_DEBUG */ +#endif /* XD3_ENCODER */ + +static int xd3_decode_allocate (xd3_stream *stream, usize_t size, + uint8_t **copied1, usize_t *alloc1); + +static void xd3_compute_code_table_string (const xd3_dinst *code_table, + uint8_t *str); +static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); +static void xd3_free (xd3_stream *stream, void *ptr); + +static int xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *max, uint32_t *valp); + +#if REGRESSION_TEST +static int xd3_selftest (void); +#endif + +/***********************************************************************/ + +#define UINT32_OFLOW_MASK 0xfe000000U +#define UINT64_OFLOW_MASK 0xfe00000000000000ULL + +#ifndef UINT32_MAX +#define UINT32_MAX 4294967295U +#endif + +#ifndef UINT64_MAX +#define UINT64_MAX 18446744073709551615ULL +#endif + +#if SIZEOF_USIZE_T == 4 +#define USIZE_T_MAX UINT32_MAX +#define xd3_decode_size xd3_decode_uint32_t +#define xd3_emit_size xd3_emit_uint32_t +#define xd3_sizeof_size xd3_sizeof_uint32_t +#define xd3_read_size xd3_read_uint32_t +#elif SIZEOF_USIZE_T == 8 +#define USIZE_T_MAX UINT64_MAX +#define xd3_decode_size xd3_decode_uint64_t +#define xd3_emit_size xd3_emit_uint64_t +#define xd3_sizeof_size xd3_sizeof_uint64_t +#define xd3_read_size xd3_read_uint64_t +#endif + +#if SIZEOF_XOFF_T == 4 +#define XOFF_T_MAX UINT32_MAX +#define xd3_decode_offset xd3_decode_uint32_t +#define xd3_emit_offset xd3_emit_uint32_t +#elif SIZEOF_XOFF_T == 8 +#define XOFF_T_MAX UINT64_MAX +#define xd3_decode_offset xd3_decode_uint64_t +#define xd3_emit_offset xd3_emit_uint64_t +#endif + +#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b)) +#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b)) + +const char* xd3_strerror (int ret) +{ + switch (ret) + { + case XD3_INPUT: return "XD3_INPUT"; + case XD3_OUTPUT: return "XD3_OUTPUT"; + case XD3_GETSRCBLK: return "XD3_GETSRCBLK"; + case XD3_GOTHEADER: return "XD3_GOTHEADER"; + case XD3_WINSTART: return "XD3_WINSTART"; + case XD3_WINFINISH: return "XD3_WINFINISH"; + case XD3_TOOFARBACK: return "XD3_TOOFARBACK"; + case XD3_INTERNAL: return "XD3_INTERNAL"; + case XD3_INVALID_INPUT: return "XD3_INVALID_INPUT"; + } + return NULL; +} + +/***********************************************************************/ + +#define xd3_sec_data(s) ((s)->sec_stream_d) +#define xd3_sec_inst(s) ((s)->sec_stream_i) +#define xd3_sec_addr(s) ((s)->sec_stream_a) + +struct _xd3_sec_type +{ + int id; + const char *name; + xd3_secondary_flags flags; + + /* xd3_sec_stream is opaque to the generic code */ + xd3_sec_stream* (*alloc) (xd3_stream *stream); + void (*destroy) (xd3_stream *stream, + xd3_sec_stream *sec); + void (*init) (xd3_sec_stream *sec); + int (*decode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + const uint8_t **input, + const uint8_t *input_end, + uint8_t **output, + const uint8_t *output_end); +#if XD3_ENCODER + int (*encode) (xd3_stream *stream, + xd3_sec_stream *sec_stream, + xd3_output *input, + xd3_output *output, + xd3_sec_cfg *cfg); +#endif +}; + +#define BIT_STATE_ENCODE_INIT { 0, 1 } +#define BIT_STATE_DECODE_INIT { 0, 0x100 } + +typedef struct _bit_state bit_state; +struct _bit_state +{ + usize_t cur_byte; + usize_t cur_mask; +}; + +#if SECONDARY_ANY == 0 +#define IF_SEC(x) +#define IF_NSEC(x) x +#else /* yuck */ +#define IF_SEC(x) x +#define IF_NSEC(x) +static int +xd3_decode_secondary (xd3_stream *stream, + xd3_desect *sect, + xd3_sec_stream **sec_streamp); +#if XD3_ENCODER +static int +xd3_encode_secondary (xd3_stream *stream, + xd3_output **head, + xd3_output **tail, + xd3_sec_stream **sec_streamp, + xd3_sec_cfg *cfg, + int *did_it); +#endif +#endif /* SECONDARY_ANY */ + +#if SECONDARY_FGK +static const xd3_sec_type fgk_sec_type; +#define IF_FGK(x) x +#define FGK_CASE(s) \ + s->sec_type = & fgk_sec_type; \ + break; +#else +#define IF_FGK(x) +#define FGK_CASE(s) \ + s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \ + return XD3_INTERNAL; +#endif + +#if SECONDARY_DJW +static const xd3_sec_type djw_sec_type; +#define IF_DJW(x) x +#define DJW_CASE(s) \ + s->sec_type = & djw_sec_type; \ + break; +#else +#define IF_DJW(x) +#define DJW_CASE(s) \ + s->msg = "unavailable secondary compressor: DJW Static Huffman"; \ + return XD3_INTERNAL; +#endif + +/***********************************************************************/ + +#include "xdelta3-hash.h" + +/* Process template passes - this includes xdelta3.c several times. */ +#define __XDELTA3_C_TEMPLATE_PASS__ +#include "xdelta3-cfgs.h" +#undef __XDELTA3_C_TEMPLATE_PASS__ + +/* Process the inline pass. */ +#define __XDELTA3_C_INLINE_PASS__ +#include "xdelta3.c" +#undef __XDELTA3_C_INLINE_PASS__ + +/* Secondary compression */ +#if SECONDARY_ANY +#include "xdelta3-second.h" +#endif + +#if SECONDARY_FGK +#include "xdelta3-fgk.h" +static const xd3_sec_type fgk_sec_type = +{ + VCD_FGK_ID, + "FGK Adaptive Huffman", + SEC_NOFLAGS, + (xd3_sec_stream* (*)()) fgk_alloc, + (void (*)()) fgk_destroy, + (void (*)()) fgk_init, + (int (*)()) xd3_decode_fgk, + IF_ENCODER((int (*)()) xd3_encode_fgk) +}; +#endif + +#if SECONDARY_DJW +#include "xdelta3-djw.h" +static const xd3_sec_type djw_sec_type = +{ + VCD_DJW_ID, + "Static Huffman", + SEC_COUNT_FREQS, + (xd3_sec_stream* (*)()) djw_alloc, + (void (*)()) djw_destroy, + (void (*)()) djw_init, + (int (*)()) xd3_decode_huff, + IF_ENCODER((int (*)()) xd3_encode_huff) +}; +#endif + +#if XD3_MAIN || PYTHON_MODULE || SWIG_MODULE || NOT_MAIN +#include "xdelta3-main.h" +#endif + +#if REGRESSION_TEST +#include "xdelta3-test.h" +#endif + +#if PYTHON_MODULE +#include "xdelta3-python.h" +#endif + +#endif /* __XDELTA3_C_HEADER_PASS__ */ +#ifdef __XDELTA3_C_INLINE_PASS__ + +/**************************************************************** + Instruction tables + *****************************************************************/ + +/* The following code implements a parametrized description of the + * code table given above for a few reasons. It is not necessary for + * implementing the standard, to support compression with variable + * tables, so an implementation is only required to know the default + * code table to begin decompression. (If the encoder uses an + * alternate table, the table is included in compressed form inside + * the VCDIFF file.) + * + * Before adding variable-table support there were two functions which + * were hard-coded to the default table above. + * xd3_compute_default_table() would create the default table by + * filling a 256-elt array of xd3_dinst values. The corresponding + * function, xd3_choose_instruction(), would choose an instruction + * based on the hard-coded parameters of the default code table. + * + * Notes: The parametrized code table description here only generates + * tables of a certain regularity similar to the default table by + * allowing to vary the distribution of single- and + * double-instructions and change the number of near and same copy + * modes. More exotic tables are only possible by extending this + * code. + * + * For performance reasons, both the parametrized and non-parametrized + * versions of xd3_choose_instruction remain. The parametrized + * version is only needed for testing multi-table decoding support. + * If ever multi-table encoding is required, this can be optimized by + * compiling static functions for each table. + */ + +/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the + * table description when GENERIC_ENCODE_TABLES are in use. The + * IF_GENCODETBL macro enables generic-code-table specific code. */ +#if GENERIC_ENCODE_TABLES +#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst) +#define IF_GENCODETBL(x) x +#else +#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst) +#define IF_GENCODETBL(x) +#endif + +/* This structure maintains information needed by + * xd3_choose_instruction to compute the code for a double instruction + * by first indexing an array of code_table_sizes by copy mode, then + * using (offset + (muliplier * X)) */ +struct _xd3_code_table_sizes { + uint8_t cpy_max; + uint8_t offset; + uint8_t mult; +}; + +/* This contains a complete description of a code table. */ +struct _xd3_code_table_desc +{ + /* Assumes a single RUN instruction */ + /* Assumes that MIN_MATCH is 4 */ + + uint8_t add_sizes; /* Number of immediate-size single adds (default 17) */ + uint8_t near_modes; /* Number of near copy modes (default 4) */ + uint8_t same_modes; /* Number of same copy modes (default 3) */ + uint8_t cpy_sizes; /* Number of immediate-size single copies (default 15) */ + + uint8_t addcopy_add_max; /* Maximum add size for an add-copy double instruction, + all modes (default 4) */ + uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, + up through VCD_NEAR modes (default 6) */ + uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, + VCD_SAME modes (default 4) */ + + uint8_t copyadd_add_max; /* Maximum add size for a copy-add double instruction, + all modes (default 1) */ + uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, + up through VCD_NEAR modes (default 4) */ + uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, + VCD_SAME modes (default 4) */ + + xd3_code_table_sizes addcopy_max_sizes[MAX_MODES]; + xd3_code_table_sizes copyadd_max_sizes[MAX_MODES]; +}; + +/* The rfc3284 code table is represented: */ +static const xd3_code_table_desc __rfc3284_code_table_desc = { + 17, /* add sizes */ + 4, /* near modes */ + 3, /* same modes */ + 15, /* copy sizes */ + + 4, /* add-copy max add */ + 6, /* add-copy max cpy, near */ + 4, /* add-copy max cpy, same */ + + 1, /* copy-add max add */ + 4, /* copy-add max cpy, near */ + 4, /* copy-add max cpy, same */ + + /* addcopy */ + { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} }, + /* copyadd */ + { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} }, +}; + +#if GENERIC_ENCODE_TABLES +/* An alternate code table for testing (5 near, 0 same): + * + * TYPE SIZE MODE TYPE SIZE MODE INDEX + * --------------------------------------------------------------- + * 1. Run 0 0 Noop 0 0 0 + * 2. Add 0, [1,23] 0 Noop 0 0 [1,24] + * 3. Copy 0, [4,20] 0 Noop 0 0 [25,42] + * 4. Copy 0, [4,20] 1 Noop 0 0 [43,60] + * 5. Copy 0, [4,20] 2 Noop 0 0 [61,78] + * 6. Copy 0, [4,20] 3 Noop 0 0 [79,96] + * 7. Copy 0, [4,20] 4 Noop 0 0 [97,114] + * 8. Copy 0, [4,20] 5 Noop 0 0 [115,132] + * 9. Copy 0, [4,20] 6 Noop 0 0 [133,150] + * 10. Add [1,4] 0 Copy [4,6] 0 [151,162] + * 11. Add [1,4] 0 Copy [4,6] 1 [163,174] + * 12. Add [1,4] 0 Copy [4,6] 2 [175,186] + * 13. Add [1,4] 0 Copy [4,6] 3 [187,198] + * 14. Add [1,4] 0 Copy [4,6] 4 [199,210] + * 15. Add [1,4] 0 Copy [4,6] 5 [211,222] + * 16. Add [1,4] 0 Copy [4,6] 6 [223,234] + * 17. Copy 4 [0,6] Add [1,3] 0 [235,255] + * --------------------------------------------------------------- */ +static const xd3_code_table_desc __alternate_code_table_desc = { + 23, /* add sizes */ + 5, /* near modes */ + 0, /* same modes */ + 17, /* copy sizes */ + + 4, /* add-copy max add */ + 6, /* add-copy max cpy, near */ + 0, /* add-copy max cpy, same */ + + 3, /* copy-add max add */ + 4, /* copy-add max cpy, near */ + 0, /* copy-add max cpy, same */ + + /* addcopy */ + { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} }, + /* copyadd */ + { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} }, +}; +#endif + +/* Computes code table entries of TBL using the specified description. */ +static void +xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl) +{ + usize_t size1, size2, mode; + usize_t cpy_modes = 2 + desc->near_modes + desc->same_modes; + xd3_dinst *d = tbl; + + (d++)->type1 = XD3_RUN; + (d++)->type1 = XD3_ADD; + + for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + (d++)->type1 = XD3_CPY + mode; + + for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->addcopy_near_cpy_max : + desc->addcopy_same_cpy_max; + + for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1) + { + d->type1 = XD3_ADD; + d->size1 = size1; + d->type2 = XD3_CPY + mode; + d->size2 = size2; + } + } + } + + for (mode = 0; mode < cpy_modes; mode += 1) + { + usize_t max = (mode < 2U + desc->near_modes) ? + desc->copyadd_near_cpy_max : + desc->copyadd_same_cpy_max; + + for (size1 = MIN_MATCH; size1 <= max; size1 += 1) + { + for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1) + { + d->type1 = XD3_CPY + mode; + d->size1 = size1; + d->type2 = XD3_ADD; + d->size2 = size2; + } + } + } + + XD3_ASSERT (d - tbl == 256); +} + +/* This function generates the static default code table. */ +static const xd3_dinst* +xd3_rfc3284_code_table (void) +{ + static xd3_dinst __rfc3284_code_table[256]; + + if (__rfc3284_code_table[0].type1 != XD3_RUN) + { + xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table); + } + + return __rfc3284_code_table; +} + +#if XD3_ENCODER +#if GENERIC_ENCODE_TABLES +/* This function generates the alternate code table. */ +static const xd3_dinst* +xd3_alternate_code_table (void) +{ + static xd3_dinst __alternate_code_table[256]; + + if (__alternate_code_table[0].type1 != XD3_RUN) + { + xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table); + } + + return __alternate_code_table; +} + +/* This function computes the ideal second instruction INST based on + * preceding instruction PREV. If it is possible to issue a double + * instruction based on this pair it sets PREV->code2, otherwise it + * sets INST->code1. */ +static void +xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst) +{ + switch (inst->type) + { + case XD3_RUN: + /* The 0th instruction is RUN */ + inst->code1 = 0; + break; + + case XD3_ADD: + + if (inst->size > desc->add_sizes) + { + /* The first instruction is non-immediate ADD */ + inst->code1 = 1; + } + else + { + /* The following ADD_SIZES instructions are immediate ADDs */ + inst->code1 = 1 + inst->size; + + /* Now check for a possible COPY-ADD double instruction */ + if (prev != NULL) + { + int prev_mode = prev->type - XD3_CPY; + + /* If previous is a copy. Note: as long as the previous + * is not a RUN instruction, it should be a copy because + * it cannot be an add. This check is more clear. */ + if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max) + { + const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode]; + + /* This check and the inst->size-<= above are == in + the default table. */ + if (prev->size <= sizes->cpy_max) + { + /* The second and third exprs are 0 in the + default table. */ + prev->code2 = sizes->offset + + (sizes->mult * (prev->size - MIN_MATCH)) + + (inst->size - MIN_ADD); + } + } + } + } + break; + + default: + { + int mode = inst->type - XD3_CPY; + + /* The large copy instruction is offset by the run, large add, + * and immediate adds, then multipled by the number of + * immediate copies plus one (the large copy) (i.e., if there + * are 15 immediate copy instructions then there are 16 copy + * instructions per mode). */ + inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode; + + /* Now if the copy is short enough for an immediate instruction. */ + if (inst->size < MIN_MATCH + desc->cpy_sizes && + /* TODO: there needs to be a more comprehensive test for this + * boundary condition, merge is now exercising code in which + * size < MIN_MATCH is possible and it's unclear if the above + * size < (MIN_MATCH + cpy_sizes) should be a <= from inspection + * of the default table version below. */ + inst->size >= MIN_MATCH) + { + inst->code1 += inst->size + 1 - MIN_MATCH; + + /* Now check for a possible ADD-COPY double instruction. */ + if ( (prev != NULL) && + (prev->type == XD3_ADD) && + (prev->size <= desc->addcopy_add_max) ) + { + const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode]; + + if (inst->size <= sizes->cpy_max) + { + prev->code2 = sizes->offset + + (sizes->mult * (prev->size - MIN_ADD)) + + (inst->size - MIN_MATCH); + } + } + } + } + } +} +#else /* GENERIC_ENCODE_TABLES */ + +/* This version of xd3_choose_instruction is hard-coded for the default + table. */ +static void +xd3_choose_instruction (xd3_rinst *prev, xd3_rinst *inst) +{ + switch (inst->type) + { + case XD3_RUN: + inst->code1 = 0; + break; + + case XD3_ADD: + inst->code1 = 1; + + if (inst->size <= 17) + { + inst->code1 += inst->size; + + if ( (inst->size == 1) && + (prev != NULL) && + (prev->size == 4) && + (prev->type >= XD3_CPY) ) + { + prev->code2 = 247 + (prev->type - XD3_CPY); + } + } + + break; + + default: + { + int mode = inst->type - XD3_CPY; + + XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12); + + inst->code1 = 19 + 16 * mode; + + if (inst->size <= 18 && inst->size >= 4) + { + inst->code1 += inst->size - 3; + + if ( (prev != NULL) && + (prev->type == XD3_ADD) && + (prev->size <= 4) ) + { + if ( (inst->size <= 6) && + (mode <= 5) ) + { + prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4); + + XD3_ASSERT (prev->code2 <= 234); + } + else if ( (inst->size == 4) && + (mode >= 6) ) + { + prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1); + + XD3_ASSERT (prev->code2 <= 246); + } + } + } + + XD3_ASSERT (inst->code1 <= 162); + } + break; + } +} +#endif /* GENERIC_ENCODE_TABLES */ + +/*********************************************************************** + Instruction table encoder/decoder + ***********************************************************************/ + +#if GENERIC_ENCODE_TABLES +#if GENERIC_ENCODE_TABLES_COMPUTE == 0 + +/* In this case, we hard-code the result of + * compute_code_table_encoding for each alternate code table, + * presuming that saves time/space. This has been 131 bytes, but + * secondary compression was turned off. */ +static const uint8_t __alternate_code_table_compressed[178] = +{0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03, +0x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e, +0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04, +0x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05, +0x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54, +0x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15, +0x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00, +0x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00, +0x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,}; + +static int +xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) +{ + (*data) = __alternate_code_table_compressed; + (*size) = sizeof (__alternate_code_table_compressed); + return 0; +} + +#else + +/* The alternate code table will be computed and stored here. */ +static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE]; +static usize_t __alternate_code_table_compressed_size; + +/* This function generates a delta describing the code table for + * encoding within a VCDIFF file. This function is NOT thread safe + * because it is only intended that this function is used to generate + * statically-compiled strings. */ +int xd3_compute_code_table_encoding (xd3_stream *in_stream, + const xd3_dinst *code_table, + uint8_t *comp_string, + usize_t *comp_string_size) +{ + /* TODO: use xd3_encode_memory() */ + uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; + uint8_t code_string[CODE_TABLE_STRING_SIZE]; + xd3_stream stream; + xd3_source source; + xd3_config config; + int ret; + + memset (& source, 0, sizeof (source)); + + xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); + xd3_compute_code_table_string (code_table, code_string); + + /* Use DJW secondary compression if it is on by default. This saves + * about 20 bytes. */ + xd3_init_config (& config, XD3_FLUSH | (SECONDARY_DJW ? XD3_SEC_DJW : 0)); + + /* Be exhaustive. */ + config.sprevsz = 1<<11; + config.srcwin_maxsz = CODE_TABLE_STRING_SIZE; + + config.smatch_cfg = XD3_SMATCH_SOFT; + config.smatcher_soft.large_look = 4; + config.smatcher_soft.large_step = 1; + config.smatcher_soft.small_look = 4; + config.smatcher_soft.small_chain = CODE_TABLE_STRING_SIZE; + config.smatcher_soft.small_lchain = CODE_TABLE_STRING_SIZE; + config.smatcher_soft.max_lazy = CODE_TABLE_STRING_SIZE; + config.smatcher_soft.long_enough = CODE_TABLE_STRING_SIZE; + + if ((ret = xd3_config_stream (& stream, & config))) { goto fail; } + + source.size = CODE_TABLE_STRING_SIZE; + source.blksize = CODE_TABLE_STRING_SIZE; + source.onblk = CODE_TABLE_STRING_SIZE; + source.name = ""; + source.curblk = dflt_string; + source.curblkno = 0; + + if ((ret = xd3_set_source (& stream, & source))) { goto fail; } + + if ((ret = xd3_encode_stream (& stream, code_string, CODE_TABLE_STRING_SIZE, + comp_string, comp_string_size, CODE_TABLE_VCDIFF_SIZE))) { goto fail; } + + fail: + + in_stream->msg = stream.msg; + xd3_free_stream (& stream); + return ret; +} + +/* Compute a delta between alternate and rfc3284 tables. As soon as + * another alternate table is added, this code should become generic. + * For now there is only one alternate table for testing. */ +static int +xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size) +{ + int ret; + + if (__alternate_code_table_compressed[0] == 0) + { + if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (), + __alternate_code_table_compressed, + & __alternate_code_table_compressed_size))) + { + return ret; + } + + /* During development of a new code table, enable this variable to print + * the new static contents and determine its size. At run time the + * table will be filled in appropriately, but at least it should have + * the proper size beforehand. */ +#if GENERIC_ENCODE_TABLES_COMPUTE_PRINT + { + int i; + + DP(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n", + __alternate_code_table_compressed_size); + + DP(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{", + __alternate_code_table_compressed_size); + + for (i = 0; i < __alternate_code_table_compressed_size; i += 1) + { + DP(RINT, "0x%02x,", __alternate_code_table_compressed[i]); + if ((i % 20) == 19) { DP(RINT, "\n"); } + } + + DP(RINT, "};\n"); + } +#endif + } + + (*data) = __alternate_code_table_compressed; + (*size) = __alternate_code_table_compressed_size; + + return 0; +} +#endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */ +#endif /* GENERIC_ENCODE_TABLES */ + +#endif /* XD3_ENCODER */ + +/* This function generates the 1536-byte string specified in sections 5.4 and + * 7 of rfc3284, which is used to represent a code table within a VCDIFF + * file. */ +void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str) +{ + int i, s; + + XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256); + + for (s = 0; s < 6; s += 1) + { + for (i = 0; i < 256; i += 1) + { + switch (s) + { + case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break; + case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break; + case 2: *str++ = (code_table[i].size1); break; + case 3: *str++ = (code_table[i].size2); break; + case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break; + case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break; + } + } + } +} + +/* This function translates the code table string into the internal representation. The + * stream's near and same-modes should already be set. */ +static int +xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string) +{ + int i, s; + int modes = TOTAL_MODES (stream); + xd3_dinst *code_table; + + if ((code_table = stream->code_table_alloc = + (xd3_dinst*) xd3_alloc (stream, sizeof (xd3_dinst), 256)) == NULL) + { + return ENOMEM; + } + + for (s = 0; s < 6; s += 1) + { + for (i = 0; i < 256; i += 1) + { + switch (s) + { + case 0: + if (*code_string > XD3_CPY) + { + stream->msg = "invalid code-table opcode"; + return XD3_INTERNAL; + } + code_table[i].type1 = *code_string++; + break; + case 1: + if (*code_string > XD3_CPY) + { + stream->msg = "invalid code-table opcode"; + return XD3_INTERNAL; + } + code_table[i].type2 = *code_string++; + break; + case 2: + if (*code_string != 0 && code_table[i].type1 == XD3_NOOP) + { + stream->msg = "invalid code-table size"; + return XD3_INTERNAL; + } + code_table[i].size1 = *code_string++; + break; + case 3: + if (*code_string != 0 && code_table[i].type2 == XD3_NOOP) + { + stream->msg = "invalid code-table size"; + return XD3_INTERNAL; + } + code_table[i].size2 = *code_string++; + break; + case 4: + if (*code_string >= modes) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + if (*code_string != 0 && code_table[i].type1 != XD3_CPY) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + code_table[i].type1 += *code_string++; + break; + case 5: + if (*code_string >= modes) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + if (*code_string != 0 && code_table[i].type2 != XD3_CPY) + { + stream->msg = "invalid code-table mode"; + return XD3_INTERNAL; + } + code_table[i].type2 += *code_string++; + break; + } + } + } + + stream->code_table = code_table; + return 0; +} + +/* This function applies a code table delta and returns an actual code table. */ +static int +xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size) +{ + uint8_t dflt_string[CODE_TABLE_STRING_SIZE]; + uint8_t code_string[CODE_TABLE_STRING_SIZE]; + usize_t code_size; + xd3_stream stream; + xd3_source source; + int ret; + + /* The default code table string can be cached if alternate code tables ever become + * popular. */ + xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string); + + source.size = CODE_TABLE_STRING_SIZE; + source.blksize = CODE_TABLE_STRING_SIZE; + source.onblk = CODE_TABLE_STRING_SIZE; + source.name = "rfc3284 code table"; + source.curblk = dflt_string; + source.curblkno = 0; + + if ((ret = xd3_config_stream (& stream, NULL)) || + (ret = xd3_set_source (& stream, & source)) || + (ret = xd3_decode_stream (& stream, data, size, code_string, & code_size, sizeof (code_string)))) + { + in_stream->msg = stream.msg; + goto fail; + } + + if (code_size != sizeof (code_string)) + { + stream.msg = "corrupt code-table encoding"; + ret = XD3_INTERNAL; + goto fail; + } + + if ((ret = xd3_apply_table_string (in_stream, code_string))) { goto fail; } + + fail: + + xd3_free_stream (& stream); + return ret; +} + +/***********************************************************************/ + +static inline void +xd3_swap_uint8p (uint8_t** p1, uint8_t** p2) +{ + uint8_t *t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +static inline void +xd3_swap_usize_t (usize_t* p1, usize_t* p2) +{ + usize_t t = (*p1); + (*p1) = (*p2); + (*p2) = t; +} + +/* It's not constant time, but it computes the log. */ +static int +xd3_check_pow2 (usize_t value, usize_t *logof) +{ + usize_t x = 1; + usize_t nolog; + if (logof == NULL) { + logof = &nolog; + } + + *logof = 0; + + for (; x != 0; x <<= 1, *logof += 1) + { + if (x == value) + { + return 0; + } + } + + return XD3_INTERNAL; +} + +static usize_t +xd3_pow2_roundup (usize_t x) +{ + usize_t i = 1; + while (x > i) { + i <<= 1; + } + return i; +} + +static usize_t +xd3_round_blksize (usize_t sz, usize_t blksz) +{ + usize_t mod = sz & (blksz-1); + + XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0); + + return mod ? (sz + (blksz - mod)) : sz; +} + +/*********************************************************************** + Adler32 stream function: code copied from Zlib, defined in RFC1950 + ***********************************************************************/ + +#define A32_BASE 65521L /* Largest prime smaller than 2^16 */ +#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1); +#define A32_DO4(buf,i) A32_DO2(buf,i); A32_DO2(buf,i+2); +#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4); +#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8); + +static unsigned long adler32 (unsigned long adler, const uint8_t *buf, usize_t len) +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + while (len > 0) + { + k = (len < A32_NMAX) ? len : A32_NMAX; + len -= k; + + while (k >= 16) + { + A32_DO16(buf); + buf += 16; + k -= 16; + } + + if (k != 0) + { + do + { + s1 += *buf++; + s2 += s1; + } + while (--k); + } + + s1 %= A32_BASE; + s2 %= A32_BASE; + } + + return (s2 << 16) | s1; +} + +/*********************************************************************** + Run-length function + ***********************************************************************/ + +#if XD3_ENCODER +static int +xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp) +{ + int i; + int run_l = 0; + uint8_t run_c = 0; + + for (i = 0; i < slook; i += 1) + { + NEXTRUN(seg[i]); + } + + (*run_cp) = run_c; + + return run_l; +} +#endif + +/*********************************************************************** + Basic encoder/decoder functions + ***********************************************************************/ + +static inline int +xd3_decode_byte (xd3_stream *stream, usize_t *val) +{ + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + (*val) = stream->next_in[0]; + + DECODE_INPUT (1); + return 0; +} + +static inline int +xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size) +{ + usize_t want; + usize_t take; + + /* Note: The case where (*pos == size) happens when a zero-length appheader or code + * table is transmitted, but there is nothing in the standard against that. */ + + while (*pos < size) + { + if (stream->avail_in == 0) + { + stream->msg = "further input required"; + return XD3_INPUT; + } + + want = size - *pos; + take = min (want, stream->avail_in); + + memcpy (buf + *pos, stream->next_in, take); + + DECODE_INPUT (take); + (*pos) += take; + } + + return 0; +} + +#if XD3_ENCODER +static inline int +xd3_emit_byte (xd3_stream *stream, + xd3_output **outputp, + uint8_t code) +{ + xd3_output *output = (*outputp); + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + output->base[output->next++] = code; + + return 0; +} + +static inline int +xd3_emit_bytes (xd3_stream *stream, + xd3_output **outputp, + const uint8_t *base, + usize_t size) +{ + xd3_output *output = (*outputp); + + do + { + usize_t take; + + if (output->next == output->avail) + { + xd3_output *aoutput; + + if ((aoutput = xd3_alloc_output (stream, output)) == NULL) + { + return ENOMEM; + } + + output = (*outputp) = aoutput; + } + + take = min (output->avail - output->next, size); + + memcpy (output->base + output->next, base, take); + + output->next += take; + size -= take; + base += take; + } + while (size > 0); + + return 0; +} +#endif /* XD3_ENCODER */ + +/********************************************************************* + Integer encoder/decoder functions + **********************************************************************/ + +#define DECODE_INTEGER_TYPE(PART,OFLOW) \ + while (stream->avail_in != 0) \ + { \ + usize_t next = stream->next_in[0]; \ + \ + DECODE_INPUT(1); \ + \ + if (PART & OFLOW) \ + { \ + stream->msg = "overflow in decode_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + PART = (PART << 7) | (next & 127); \ + \ + if ((next & 128) == 0) \ + { \ + (*val) = PART; \ + PART = 0; \ + return 0; \ + } \ + } \ + \ + stream->msg = "further input required"; \ + return XD3_INPUT + +#define READ_INTEGER_TYPE(TYPE, OFLOW) \ + TYPE val = 0; \ + const uint8_t *inp = (*inpp); \ + usize_t next; \ + \ + do \ + { \ + if (inp == max) \ + { \ + stream->msg = "end-of-input in read_integer"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + if (val & OFLOW) \ + { \ + stream->msg = "overflow in read_intger"; \ + return XD3_INVALID_INPUT; \ + } \ + \ + next = (*inp++); \ + val = (val << 7) | (next & 127); \ + } \ + while (next & 128); \ + \ + (*valp) = val; \ + (*inpp) = inp; \ + \ + return 0 + +#define EMIT_INTEGER_TYPE() \ + /* max 64-bit value in base-7 encoding is 9.1 bytes */ \ + uint8_t buf[10]; \ + usize_t bufi = 10; \ + \ + XD3_ASSERT (num >= 0); \ + \ + /* This loop performs division and turns on all MSBs. */ \ + do \ + { \ + buf[--bufi] = (num & 127) | 128; \ + num >>= 7; \ + } \ + while (num != 0); \ + \ + /* Turn off MSB of the last byte. */ \ + buf[9] &= 127; \ + \ + XD3_ASSERT (bufi >= 0); \ + \ + return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi) + +#define IF_SIZEOF32(x) if (num < (1U << (7 * (x)))) return (x); +#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x); + +#if USE_UINT32 +static inline uint32_t +xd3_sizeof_uint32_t (uint32_t num) +{ + IF_SIZEOF32(1); + IF_SIZEOF32(2); + IF_SIZEOF32(3); + IF_SIZEOF32(4); + return 5; +} + +static inline int +xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); } + +static inline int +xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *max, uint32_t *valp) +{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); } + +#if XD3_ENCODER +static inline int +xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num) +{ EMIT_INTEGER_TYPE (); } +#endif +#endif + +#if USE_UINT64 +static inline int +xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val) +{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); } + +#if XD3_ENCODER +static inline int +xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num) +{ EMIT_INTEGER_TYPE (); } +#endif + +/* These are tested but not used */ +#if REGRESSION_TEST +static int +xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, + const uint8_t *max, uint64_t *valp) +{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); } + +static uint32_t +xd3_sizeof_uint64_t (uint64_t num) +{ + IF_SIZEOF64(1); + IF_SIZEOF64(2); + IF_SIZEOF64(3); + IF_SIZEOF64(4); + IF_SIZEOF64(5); + IF_SIZEOF64(6); + IF_SIZEOF64(7); + IF_SIZEOF64(8); + IF_SIZEOF64(9); + + return 10; +} +#endif + +#endif + +/*********************************************************************** + Address cache stuff + ***********************************************************************/ + +static int +xd3_alloc_cache (xd3_stream *stream) +{ + if (stream->acache.near_array != NULL) + { + xd3_free (stream, stream->acache.near_array); + } + + if (stream->acache.same_array != NULL) + { + xd3_free (stream, stream->acache.same_array); + } + + if (((stream->acache.s_near > 0) && + (stream->acache.near_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_near, sizeof (usize_t))) + == NULL) || + ((stream->acache.s_same > 0) && + (stream->acache.same_array = (usize_t*) + xd3_alloc (stream, stream->acache.s_same * 256, sizeof (usize_t))) + == NULL)) + { + return ENOMEM; + } + + return 0; +} + +void +xd3_init_cache (xd3_addr_cache* acache) +{ + if (acache->s_near > 0) + { + memset (acache->near_array, 0, acache->s_near * sizeof (usize_t)); + acache->next_slot = 0; + } + + if (acache->s_same > 0) + { + memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t)); + } +} + +static void +xd3_update_cache (xd3_addr_cache* acache, usize_t addr) +{ + if (acache->s_near > 0) + { + acache->near_array[acache->next_slot] = addr; + acache->next_slot = (acache->next_slot + 1) % acache->s_near; + } + + if (acache->s_same > 0) + { + acache->same_array[addr % (acache->s_same*256)] = addr; + } +} + +#if XD3_ENCODER +/* OPT: this gets called a lot, can it be optimized? */ +static int +xd3_encode_address (xd3_stream *stream, usize_t addr, usize_t here, uint8_t* mode) +{ + usize_t d, bestd; + usize_t i, bestm, ret; + xd3_addr_cache* acache = & stream->acache; + +#define SMALLEST_INT(x) do { if (((x) & ~127) == 0) { goto good; } } while (0) + + /* Attempt to find the address mode that yields the smallest integer value + * for "d", the encoded address value, thereby minimizing the encoded size + * of the address. */ + bestd = addr; + bestm = VCD_SELF; + + XD3_ASSERT (addr < here); + + SMALLEST_INT (bestd); + + if ((d = here-addr) < bestd) + { + bestd = d; + bestm = VCD_HERE; + + SMALLEST_INT (bestd); + } + + for (i = 0; i < acache->s_near; i += 1) + { + d = addr - acache->near_array[i]; + + if (d >= 0 && d < bestd) + { + bestd = d; + bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */ + + SMALLEST_INT (bestd); + } + } + + if (acache->s_same > 0 && acache->same_array[d = addr%(acache->s_same*256)] == addr) + { + bestd = d%256; + bestm = acache->s_near + 2 + d/256; /* 2 + s_near offsets past the VCD_NEAR modes */ + + if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) { return ret; } + } + else + { + good: + + if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) { return ret; } + } + + xd3_update_cache (acache, addr); + + (*mode) += bestm; + + return 0; +} +#endif + +static int +xd3_decode_address (xd3_stream *stream, usize_t here, + usize_t mode, const uint8_t **inpp, + const uint8_t *max, uint32_t *valp) +{ + int ret; + usize_t same_start = 2 + stream->acache.s_near; + + if (mode < same_start) + { + if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; } + + switch (mode) + { + case VCD_SELF: + break; + case VCD_HERE: + (*valp) = here - (*valp); + break; + default: + (*valp) += stream->acache.near_array[mode - 2]; + break; + } + } + else + { + if (*inpp == max) + { + stream->msg = "address underflow"; + return XD3_INVALID_INPUT; + } + + mode -= same_start; + + (*valp) = stream->acache.same_array[mode*256 + (**inpp)]; + + (*inpp) += 1; + } + + xd3_update_cache (& stream->acache, *valp); + + return 0; +} + +/*********************************************************************** + Alloc/free +***********************************************************************/ + +static void* +__xd3_alloc_func (void* opaque, usize_t items, usize_t size) +{ + return malloc (items * size); +} + +static void +__xd3_free_func (void* opaque, void* address) +{ + free (address); +} + +static void* +xd3_alloc (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = stream->alloc (stream->opaque, elts, size); + + if (a != NULL) + { + IF_DEBUG (stream->alloc_cnt += 1); + IF_DEBUG2 (DP(RINT "[stream %p malloc] size %u ptr %p\n", + stream, elts * size, a)); + } + else + { + stream->msg = "out of memory"; + } + + return a; +} + +static void +xd3_free (xd3_stream *stream, + void *ptr) +{ + if (ptr != NULL) + { + IF_DEBUG (stream->free_cnt += 1); + XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt); + IF_DEBUG2 (DP(RINT "[stream %p free] %p\n", + stream, ptr)); + stream->free (stream->opaque, ptr); + } +} + +#if XD3_ENCODER +static void* +xd3_alloc0 (xd3_stream *stream, + usize_t elts, + usize_t size) +{ + void *a = xd3_alloc (stream, elts, size); + + if (a != NULL) + { + memset (a, 0, elts * size); + } + + return a; +} + +static xd3_output* +xd3_alloc_output (xd3_stream *stream, + xd3_output *old_output) +{ + xd3_output *output; + uint8_t *base; + + if (stream->enc_free != NULL) + { + output = stream->enc_free; + stream->enc_free = output->next_page; + } + else + { + if ((output = (xd3_output*) xd3_alloc (stream, 1, sizeof (xd3_output))) == NULL) + { + return NULL; + } + + if ((base = (uint8_t*) xd3_alloc (stream, XD3_ALLOCSIZE, sizeof (uint8_t))) == NULL) + { + xd3_free (stream, output); + return NULL; + } + + output->base = base; + output->avail = XD3_ALLOCSIZE; + } + + output->next = 0; + + if (old_output) + { + old_output->next_page = output; + } + + output->next_page = NULL; + + return output; +} + +static usize_t +xd3_sizeof_output (xd3_output *output) +{ + usize_t s = 0; + + for (; output; output = output->next_page) + { + s += output->next; + } + + return s; +} + +static void +xd3_freelist_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *tmp; + + while (output) + { + tmp = output; + output = output->next_page; + + tmp->next = 0; + tmp->next_page = stream->enc_free; + stream->enc_free = tmp; + } +} + +static void +xd3_free_output (xd3_stream *stream, + xd3_output *output) +{ + xd3_output *next; + + again: + if (output == NULL) + { + return; + } + + next = output->next_page; + + xd3_free (stream, output->base); + xd3_free (stream, output); + + output = next; + goto again; +} +#endif /* XD3_ENCODER */ + +void +xd3_free_stream (xd3_stream *stream) +{ + xd3_iopt_buflist *blist = stream->iopt_alloc; + + while (blist != NULL) + { + xd3_iopt_buflist *tmp = blist; + blist = blist->next; + xd3_free (stream, tmp->buffer); + xd3_free (stream, tmp); + } + + xd3_free (stream, stream->large_table); + xd3_free (stream, stream->small_table); + xd3_free (stream, stream->small_prev); + +#if XD3_ENCODER + { + int i; + for (i = 0; i < ENC_SECTS; i += 1) + { + xd3_free_output (stream, stream->enc_heads[i]); + } + xd3_free_output (stream, stream->enc_free); + } +#endif + + xd3_free (stream, stream->acache.near_array); + xd3_free (stream, stream->acache.same_array); + + xd3_free (stream, stream->inst_sect.copied1); + xd3_free (stream, stream->addr_sect.copied1); + xd3_free (stream, stream->data_sect.copied1); + + xd3_free (stream, stream->dec_buffer); + xd3_free (stream, (uint8_t*) stream->dec_lastwin); + + xd3_free (stream, stream->buf_in); + xd3_free (stream, stream->dec_appheader); + xd3_free (stream, stream->dec_codetbl); + xd3_free (stream, stream->code_table_alloc); + +#if SECONDARY_ANY + xd3_free (stream, stream->inst_sect.copied2); + xd3_free (stream, stream->addr_sect.copied2); + xd3_free (stream, stream->data_sect.copied2); + + if (stream->sec_type != NULL) + { + stream->sec_type->destroy (stream, stream->sec_stream_d); + stream->sec_type->destroy (stream, stream->sec_stream_i); + stream->sec_type->destroy (stream, stream->sec_stream_a); + } +#endif + + xd3_free (stream, stream->whole_target.adds); + xd3_free (stream, stream->whole_target.inst); + xd3_free (stream, stream->whole_target.wininfo); + + XD3_ASSERT (stream->alloc_cnt == stream->free_cnt); + + memset (stream, 0, sizeof (xd3_stream)); +} + +#if (XD3_DEBUG > 1 || VCDIFF_TOOLS) +static const char* +xd3_rtype_to_string (xd3_rtype type, int print_mode) +{ + switch (type) + { + case XD3_NOOP: + return "NOOP "; + case XD3_RUN: + return "RUN "; + case XD3_ADD: + return "ADD "; + default: break; + } + if (! print_mode) + { + return "CPY "; + } + switch (type) + { + case XD3_CPY + 0: return "CPY_0"; + case XD3_CPY + 1: return "CPY_1"; + case XD3_CPY + 2: return "CPY_2"; + case XD3_CPY + 3: return "CPY_3"; + case XD3_CPY + 4: return "CPY_4"; + case XD3_CPY + 5: return "CPY_5"; + case XD3_CPY + 6: return "CPY_6"; + case XD3_CPY + 7: return "CPY_7"; + case XD3_CPY + 8: return "CPY_8"; + case XD3_CPY + 9: return "CPY_9"; + default: return "CPY>9"; + } +} +#endif + +/**************************************************************** + Stream configuration + ******************************************************************/ + +int +xd3_config_stream(xd3_stream *stream, + xd3_config *config) +{ + int ret; + xd3_config defcfg; + xd3_smatcher *smatcher = &stream->smatcher; + + if (config == NULL) + { + config = & defcfg; + memset (config, 0, sizeof (*config)); + } + + /* Initial setup: no error checks yet */ + memset (stream, 0, sizeof (*stream)); + + stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE; + stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ; + stream->srcwin_maxsz = config->srcwin_maxsz ? + config->srcwin_maxsz : XD3_DEFAULT_SRCWINSZ; + + if (config->iopt_size == 0) + { + stream->iopt_size = XD3_ALLOCSIZE / sizeof(xd3_rinst); + stream->iopt_unlimited = 1; + } + else + { + stream->iopt_size = config->iopt_size; + } + + stream->getblk = config->getblk; + stream->alloc = config->alloc ? config->alloc : __xd3_alloc_func; + stream->free = config->freef ? config->freef : __xd3_free_func; + stream->opaque = config->opaque; + stream->flags = config->flags; + + /* Secondary setup. */ + stream->sec_data = config->sec_data; + stream->sec_inst = config->sec_inst; + stream->sec_addr = config->sec_addr; + + stream->sec_data.data_type = DATA_SECTION; + stream->sec_inst.data_type = INST_SECTION; + stream->sec_addr.data_type = ADDR_SECTION; + + /* Check static sizes. */ + if (sizeof (usize_t) != SIZEOF_USIZE_T || + sizeof (xoff_t) != SIZEOF_XOFF_T || + (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL))) + { + stream->msg = "incorrect compilation: wrong integer sizes"; + return XD3_INTERNAL; + } + + /* Check/set secondary compressor. */ + switch (stream->flags & XD3_SEC_TYPE) + { + case 0: + if (stream->flags & XD3_SEC_NOALL) + { + stream->msg = "XD3_SEC flags require a secondary compressor type"; + return XD3_INTERNAL; + } + break; + case XD3_SEC_FGK: + FGK_CASE (stream); + case XD3_SEC_DJW: + DJW_CASE (stream); + default: + stream->msg = "too many secondary compressor types set"; + return XD3_INTERNAL; + } + + /* Check/set encoder code table. */ + switch (stream->flags & XD3_ALT_CODE_TABLE) { + case 0: + stream->code_table_desc = & __rfc3284_code_table_desc; + stream->code_table_func = xd3_rfc3284_code_table; + break; +#if GENERIC_ENCODE_TABLES + case XD3_ALT_CODE_TABLE: + stream->code_table_desc = & __alternate_code_table_desc; + stream->code_table_func = xd3_alternate_code_table; + stream->comp_table_func = xd3_compute_alternate_table_encoding; + break; +#endif + default: + stream->msg = "alternate code table support was not compiled"; + return XD3_INTERNAL; + } + + /* Check sprevsz */ + if (smatcher->small_chain == 1 && + smatcher->small_lchain == 1) + { + stream->sprevsz = 0; + } + else + { + if ((ret = xd3_check_pow2 (stream->sprevsz, NULL))) + { + stream->msg = "sprevsz is required to be a power of two"; + return XD3_INTERNAL; + } + + stream->sprevmask = stream->sprevsz - 1; + } + + /* Default scanner settings. */ +#if XD3_ENCODER + switch (config->smatch_cfg) + { + IF_BUILD_SOFT(case XD3_SMATCH_SOFT: + { + *smatcher = config->smatcher_soft; + smatcher->string_match = __smatcher_soft.string_match; + smatcher->name = __smatcher_soft.name; + if (smatcher->large_look < MIN_MATCH || + smatcher->large_step < 1 || + smatcher->small_look < MIN_MATCH) + { + stream->msg = "invalid soft string-match config"; + return XD3_INVALID; + } + break; + }) + + IF_BUILD_DEFAULT(case XD3_SMATCH_DEFAULT: + *smatcher = __smatcher_default; + break;) + IF_BUILD_SLOW(case XD3_SMATCH_SLOW: + *smatcher = __smatcher_slow; + break;) + IF_BUILD_FASTEST(case XD3_SMATCH_FASTEST: + *smatcher = __smatcher_fastest; + break;) + IF_BUILD_FASTER(case XD3_SMATCH_FASTER: + *smatcher = __smatcher_faster; + break;) + IF_BUILD_FAST(case XD3_SMATCH_FAST: + *smatcher = __smatcher_fast; + break;) + default: + stream->msg = "invalid string match config type"; + return XD3_INTERNAL; + } + + if (config->smatch_cfg == XD3_SMATCH_DEFAULT && + (stream->flags & XD3_COMPLEVEL_MASK) != 0) + { + int level = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; + + switch (level) + { + case 1: + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + case 2: + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + case 3: case 4: case 5: + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + case 6: + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + default: + IF_BUILD_SLOW(*smatcher = __smatcher_slow; + break;) + IF_BUILD_DEFAULT(*smatcher = __smatcher_default; + break;) + IF_BUILD_FAST(*smatcher = __smatcher_fast; + break;) + IF_BUILD_FASTER(*smatcher = __smatcher_faster; + break;) + IF_BUILD_FASTEST(*smatcher = __smatcher_fastest; + break;) + } + } +#endif + + return 0; +} + +/***************************************************************** + Getblk interface + ***********************************************************/ + +/* This function interfaces with the client getblk function, checks + * its results, etc. */ +static int +xd3_getblk (xd3_stream *stream, xoff_t blkno) +{ + int ret; + xd3_source *source = stream->src; + + if (source->curblk == NULL || + blkno != source->curblkno) + { + if (blkno >= source->blocks) + { + stream->msg = "source file too short"; + return XD3_INTERNAL; + } + + XD3_ASSERT (source->curblk != NULL || blkno != source->curblkno); + + source->getblkno = blkno; + + if (stream->getblk == NULL) + { + stream->msg = "getblk source input"; + return XD3_GETSRCBLK; + } + else if ((ret = stream->getblk (stream, source, blkno)) != 0) + { + stream->msg = "getblk failed"; + return ret; + } + + XD3_ASSERT (source->curblk != NULL); + } + + if (source->onblk != (blkno == source->blocks - 1 ? + source->onlastblk : source->blksize)) + { + stream->msg = "getblk returned short block"; + return XD3_INTERNAL; + } + + return 0; +} + +/*********************************************************** + Stream open/close + ***************************************************************/ + +int +xd3_set_source (xd3_stream *stream, + xd3_source *src) +{ + xoff_t blk_num; + usize_t tail_size, shiftby; + + IF_DEBUG1 (DP(RINT "[set source] size %"Q"u\n", src->size)); + + if (src == NULL || src->size < stream->smatcher.large_look) { return 0; } + + stream->src = src; + + // If src->blksize is a power-of-two, xd3_blksize_div() will use + // shift and mask rather than divide. Check that here. + if (xd3_check_pow2 (src->blksize, &shiftby) == 0) + { + src->shiftby = shiftby; + src->maskby = (1 << shiftby) - 1; + } + else if (src->size <= src->blksize) + { + int x = xd3_pow2_roundup (src->blksize); + xd3_check_pow2 (x, &shiftby); + src->shiftby = shiftby; + src->maskby = (1 << shiftby) - 1; + } + else + { + src->shiftby = 0; + src->maskby = 0; + } + + xd3_blksize_div (src->size, src, &blk_num, &tail_size); + src->blocks = blk_num + (tail_size > 0); + src->onlastblk = xd3_bytes_on_srcblk (src, src->blocks - 1); + src->srclen = 0; + src->srcbase = 0; + + return 0; +} + +void +xd3_abort_stream (xd3_stream *stream) +{ + stream->dec_state = DEC_ABORTED; + stream->enc_state = ENC_ABORTED; +} + +int +xd3_close_stream (xd3_stream *stream) +{ + if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED) + { + if (stream->buf_leftover != NULL) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + + if (stream->enc_state == ENC_POSTWIN) + { +#if XD3_ENCODER + xd3_encode_reset (stream); +#endif + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + } + + /* If encoding, should be ready for more input but not actually + have any. */ + if (stream->enc_state != ENC_INPUT || stream->avail_in != 0) + { + stream->msg = "encoding is incomplete"; + return XD3_INTERNAL; + } + } + else + { + switch (stream->dec_state) + { + case DEC_VCHEAD: + case DEC_WININD: + /* TODO: Address the zero-byte ambiguity. Does the encoder + * emit a window or not? If so, then catch an error here. + * If not, need another routine to say + * decode_at_least_one_if_empty. */ + case DEC_ABORTED: + break; + default: + /* If decoding, should be ready for the next window. */ + stream->msg = "EOF in decode"; + return XD3_INTERNAL; + } + } + + return 0; +} + +/************************************************************** + Application header + ****************************************************************/ + +int +xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size) +{ + if (stream->dec_state < DEC_WININD) + { + stream->msg = "application header not available"; + return XD3_INTERNAL; + } + + (*data) = stream->dec_appheader; + (*size) = stream->dec_appheadsz; + return 0; +} + +/********************************************************** + Decoder stuff + *************************************************/ + +#include "xdelta3-decode.h" + +/**************************************************************** + Encoder stuff + *****************************************************************/ + +#if XD3_ENCODER +void +xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size) +{ + stream->enc_appheader = data; + stream->enc_appheadsz = size; +} + +#if XD3_DEBUG +static int +xd3_iopt_check (xd3_stream *stream) +{ + usize_t ul = xd3_rlist_length (& stream->iopt_used); + usize_t fl = xd3_rlist_length (& stream->iopt_free); + + return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size; +} +#endif + +static xd3_rinst* +xd3_iopt_free (xd3_stream *stream, xd3_rinst *i) +{ + xd3_rinst *n = xd3_rlist_remove (i); + xd3_rlist_push_back (& stream->iopt_free, i); + return n; +} + +static void +xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i) +{ + if (i->type != XD3_ADD) + { + xd3_rlist_push_back (& stream->iopt_free, i); + } +} + +/* When an instruction is ready to flush from the iopt buffer, this + * function is called to produce an encoding. It writes the + * instruction plus size, address, and data to the various encoding + * sections. */ +static int +xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + + /* Check for input overflow. */ + XD3_ASSERT (inst->pos + inst->size <= stream->avail_in); + + switch (inst->type) + { + case XD3_CPY: + { + /* the address may have an offset if there is a source window. */ + usize_t addr; + xd3_source *src = stream->src; + + if (src != NULL) + { + /* If there is a source copy, the source must have its + * source window decided before we can encode. This can + * be bad -- we have to make this decision even if no + * source matches have been found. */ + if (stream->srcwin_decided == 0) + { + if ((ret = xd3_srcwin_setup (stream))) { return ret; } + } + + /* xtra field indicates the copy is from the source */ + if (inst->xtra) + { + XD3_ASSERT (inst->addr >= src->srcbase); + XD3_ASSERT (inst->addr + inst->size <= src->srcbase + src->srclen); + addr = (inst->addr - src->srcbase); + stream->n_scpy += 1; + stream->l_scpy += inst->size; + } + else + { + /* with source window: target copy address is offset by taroff. */ + addr = stream->taroff + (usize_t) inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += inst->size; + } + } + else + { + addr = (usize_t) inst->addr; + stream->n_tcpy += 1; + stream->l_tcpy += inst->size; + } + + /* Note: used to assert inst->size >= MIN_MATCH, but not true + * for merge operations & identical match heuristics. */ + /* the "here" position is always offset by taroff */ + if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, + & inst->type))) + { + return ret; + } + + IF_DEBUG1 ({ + static int cnt; + DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n", + cnt++, + stream->total_in + inst->pos, + stream->total_in + inst->pos + inst->size, + inst->addr, inst->addr + inst->size, inst->size); + }); + break; + } + case XD3_RUN: + { + XD3_ASSERT (inst->size >= MIN_MATCH); + + if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; } + + stream->n_run += 1; + stream->l_run += inst->size; + + IF_DEBUG1 ({ + static int cnt; + DP(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + break; + } + case XD3_ADD: + { + if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream), + stream->next_in + inst->pos, inst->size))) { return ret; } + + stream->n_add += 1; + stream->l_add += inst->size; + + IF_DEBUG1 ({ + static int cnt; + DP(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); + }); + + break; + } + } + + /* This is the only place stream->unencoded_offset is incremented. */ + XD3_ASSERT (stream->unencoded_offset == inst->pos); + stream->unencoded_offset += inst->size; + + inst->code2 = 0; + + XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst); + + if (stream->iout != NULL) + { + if (stream->iout->code2 != 0) + { + if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + xd3_iopt_free_nonadd (stream, inst); + stream->iout = NULL; + return 0; + } + else + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + } + } + + stream->iout = inst; + + return 0; +} + +/* This possibly encodes an add instruction, iadd, which must remain + * on the stack until the following call to + * xd3_iopt_finish_encoding. */ +static int +xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd) +{ + int ret; + usize_t off = stream->unencoded_offset; + + if (pos > off) + { + iadd->type = XD3_ADD; + iadd->pos = off; + iadd->size = pos - off; + + if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; } + } + + return 0; +} + +/* This function calls xd3_iopt_finish_encoding to finish encoding an + * instruction, and it may also produce an add instruction for an + * unmatched region. */ +static int +xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; } + + if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; } + + return 0; +} + +/* Generates a final add instruction to encode the remaining input. */ +static int +xd3_iopt_add_finalize (xd3_stream *stream) +{ + int ret; + xd3_rinst iadd; + + if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; } + + if (stream->iout) + { + if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; } + + xd3_iopt_free_nonadd (stream, stream->iout); + stream->iout = NULL; + } + + return 0; +} + +/* Compact the instruction buffer by choosing the best non-overlapping + * instructions when lazy string-matching. There are no ADDs in the + * iopt buffer because those are synthesized in xd3_iopt_add_encoding + * and during xd3_iopt_add_finalize. */ +static int +xd3_iopt_flush_instructions (xd3_stream *stream, int force) +{ + xd3_rinst *r1 = xd3_rlist_front (& stream->iopt_used); + xd3_rinst *r2; + xd3_rinst *r3; + usize_t r1end; + usize_t r2end; + usize_t r2off; + usize_t r2moff; + usize_t gap; + usize_t flushed; + int ret; + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* Note: once tried to skip this step if it's possible to assert + * there are no overlapping instructions. Doesn't work because + * xd3_opt_erase leaves overlapping instructions. */ + while (! xd3_rlist_end (& stream->iopt_used, r1) && + ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1))) + { + r1end = r1->pos + r1->size; + + /* If the instructions do not overlap, continue. */ + if (r1end <= r2->pos) + { + r1 = r2; + continue; + } + + r2end = r2->pos + r2->size; + + /* The min_match adjustments prevent this. */ + XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR)); + + /* If r3 is available... */ + if (! xd3_rlist_end (& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + /* If r3 starts before r1 finishes or just about, r2 is irrelevant */ + if (r3->pos <= r1end + 1) + { + xd3_iopt_free (stream, r2); + continue; + } + } + else if (! force) + { + /* Unless force, end the loop when r3 is not available. */ + break; + } + + r2off = r2->pos - r1->pos; + r2moff = r2end - r1end; + gap = r2end - r1->pos; + + /* If the two matches overlap almost entirely, choose the better match + * and discard the other. The else branch can still create inefficient + * copies, e.g., a 4-byte copy that takes 4 bytes to encode, which + * xd3_smatch() wouldn't allow by its crude efficiency check. However, + * in this case there are adjacent copies which mean the add would cost + * one extra byte. Allow the inefficiency here. */ + if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2) + { + /* Only one match should be used, choose the longer one. */ + if (r1->size < r2->size) + { + xd3_iopt_free (stream, r1); + r1 = r2; + } + else + { + /* We are guaranteed that r1 does not overlap now, so advance past r2 */ + r1 = xd3_iopt_free (stream, r2); + } + continue; + } + else + { + /* Shorten one of the instructions -- could be optimized + * based on the address cache. */ + usize_t average; + usize_t newsize; + usize_t adjust1; + + XD3_ASSERT (r1end > r2->pos && r2end > r1->pos); + + /* Try to balance the length of both instructions, but avoid + * making both longer than MAX_MATCH_SPLIT . */ + average = gap / 2; + newsize = min (MAX_MATCH_SPLIT, gap - average); + + /* Should be possible to simplify this code. */ + if (newsize > r1->size) + { + /* shorten r2 */ + adjust1 = r1end - r2->pos; + } + else if (newsize > r2->size) + { + /* shorten r1 */ + adjust1 = r1end - r2->pos; + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* don't shorten r2 */ + adjust1 = 0; + } + else + { + /* shorten r1 */ + adjust1 = r1->size - newsize; + + if (r2->pos > r1end - adjust1) + { + adjust1 -= r2->pos - (r1end - adjust1); + } + + XD3_ASSERT (r1->size > adjust1); + + r1->size -= adjust1; + + /* shorten r2 */ + XD3_ASSERT (r1->pos + r1->size >= r2->pos); + + adjust1 = r1->pos + r1->size - r2->pos; + } + + /* Fallthrough above if-else, shorten r2 */ + XD3_ASSERT (r2->size > adjust1); + + r2->size -= adjust1; + r2->pos += adjust1; + r2->addr += adjust1; + + XD3_ASSERT (r1->size >= MIN_MATCH); + XD3_ASSERT (r2->size >= MIN_MATCH); + + r1 = r2; + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + /* If forcing, pick instructions until the list is empty, otherwise + * this empties 50% of the queue. */ + for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); ) + { + xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used); + if ((ret = xd3_iopt_add_encoding (stream, renc))) + { + return ret; + } + + if (! force) + { + if (++flushed > stream->iopt_size / 2) + { + break; + } + + /* If there are only two instructions remaining, break, + * because they were not optimized. This means there were + * more than 50% eliminated by the loop above. */ + r1 = xd3_rlist_front (& stream->iopt_used); + if (xd3_rlist_end(& stream->iopt_used, r1) || + xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) || + xd3_rlist_end(& stream->iopt_used, r3 = xd3_rlist_next (r2))) + { + break; + } + } + } + + XD3_ASSERT (xd3_iopt_check (stream)); + + XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt_used) == 0); + + return 0; +} + +static int +xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr) +{ + xd3_rinst *i; + int ret; + + if (xd3_rlist_empty (& stream->iopt_free)) + { + if (stream->iopt_unlimited) + { + int elts = XD3_ALLOCSIZE / sizeof(xd3_rinst); + + if ((ret = xd3_alloc_iopt (stream, elts))) + { + return ret; + } + + stream->iopt_size += elts; + } + else + { + if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; } + + XD3_ASSERT (! xd3_rlist_empty (& stream->iopt_free)); + } + } + + i = xd3_rlist_pop_back (& stream->iopt_free); + + xd3_rlist_push_back (& stream->iopt_used, i); + + (*iptr) = i; + + ++stream->i_slots_used; + + return 0; +} + +/* A copy is about to be emitted that extends backwards to POS, + * therefore it may completely cover some existing instructions in the + * buffer. If an instruction is completely covered by this new match, + * erase it. If the new instruction is covered by the previous one, + * return 1 to skip it. */ +static void +xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size) +{ + while (! xd3_rlist_empty (& stream->iopt_used)) + { + xd3_rinst *r = xd3_rlist_back (& stream->iopt_used); + + /* Verify that greedy is working. The previous instruction + * should end before the new one begins. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos)); + /* Verify that min_match is working. The previous instruction + * should end before the new one ends. */ + XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size)); + + /* See if the last instruction starts before the new + * instruction. If so, there is nothing to erase. */ + if (r->pos < pos) + { + return; + } + + /* Otherwise, the new instruction covers the old one, delete it + and repeat. */ + xd3_rlist_remove (r); + xd3_rlist_push_back (& stream->iopt_free, r); + --stream->i_slots_used; + } +} + +/* This function tells the last matched input position. */ +static usize_t +xd3_iopt_last_matched (xd3_stream *stream) +{ + xd3_rinst *r; + + if (xd3_rlist_empty (& stream->iopt_used)) + { + return 0; + } + + r = xd3_rlist_back (& stream->iopt_used); + + return r->pos + r->size; +} + +/********************************************************* + Emit routines + ***********************************************************/ + +static int +xd3_emit_single (xd3_stream *stream, xd3_rinst *single, usize_t code) +{ + int has_size = stream->code_table[code].size1 == 0; + int ret; + + IF_DEBUG1 (DP(RINT "[emit1] %u %s (%u) code %u\n", + single->pos, + xd3_rtype_to_string ((xd3_rtype) single->type, 0), + single->size, + code)); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + if (has_size) + { + if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) + { + return ret; + } + } + + return 0; +} + +static int +xd3_emit_double (xd3_stream *stream, xd3_rinst *first, + xd3_rinst *second, usize_t code) +{ + int ret; + + /* All double instructions use fixed sizes, so all we need to do is + * output the instruction code, no sizes. */ + XD3_ASSERT (stream->code_table[code].size1 != 0 && + stream->code_table[code].size2 != 0); + + if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) + { + return ret; + } + + IF_DEBUG1 (DP(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n", + first->pos, + xd3_rtype_to_string ((xd3_rtype) first->type, 0), + first->size, + xd3_rtype_to_string ((xd3_rtype) second->type, 0), + second->size, + code)); + + return 0; +} + +/* This enters a potential run instruction into the iopt buffer. The + * position argument is relative to the target window. */ +static int +xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_RUN; + ri->xtra = run_c; + ri->pos = pos; + ri->size = size; + + return 0; +} + +/* This enters a potential copy instruction into the iopt buffer. The + * position argument is relative to the target window.. */ +int +xd3_found_match (xd3_stream *stream, usize_t pos, + usize_t size, xoff_t addr, int is_source) +{ + xd3_rinst* ri; + int ret; + + if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; } + + ri->type = XD3_CPY; + ri->xtra = is_source; + ri->pos = pos; + ri->size = size; + ri->addr = addr; + + return 0; +} + +static int +xd3_emit_hdr (xd3_stream *stream) +{ + int ret; + int use_secondary = stream->sec_type != NULL; + int use_adler32 = stream->flags & (XD3_ADLER32 | XD3_ADLER32_RECODE); + int vcd_source = xd3_encoder_used_source (stream); + usize_t win_ind = 0; + usize_t del_ind = 0; + usize_t enc_len; + usize_t tgt_len; + usize_t data_len; + usize_t inst_len; + usize_t addr_len; + + if (stream->current_window == 0) + { + usize_t hdr_ind = 0; + int use_appheader = stream->enc_appheader != NULL; + int use_gencodetbl = GENERIC_ENCODE_TABLES && + (stream->code_table_desc != & __rfc3284_code_table_desc); + + if (use_secondary) { hdr_ind |= VCD_SECONDARY; } + if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; } + if (use_appheader) { hdr_ind |= VCD_APPHEADER; } + + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC1)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC2)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_MAGIC3)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + VCDIFF_VERSION)) != 0 || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0) + { + return ret; + } + + /* Secondary compressor ID */ +#if SECONDARY_ANY + if (use_secondary && + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->sec_type->id))) + { + return ret; + } +#endif + + /* Compressed code table */ + if (use_gencodetbl) + { + usize_t code_table_size; + const uint8_t *code_table_data; + + if ((ret = stream->comp_table_func (stream, & code_table_data, + & code_table_size))) + { + return ret; + } + + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + code_table_size + 2)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->code_table_desc->near_modes)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), + stream->code_table_desc->same_modes)) || + (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), + code_table_data, code_table_size))) + { + return ret; + } + } + + /* Application header */ + if (use_appheader) + { + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->enc_appheadsz)) || + (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), + stream->enc_appheader, + stream->enc_appheadsz))) + { + return ret; + } + } + } + + /* try to compress this window */ +#if SECONDARY_ANY + if (use_secondary) + { + int data_sec = 0; + int inst_sec = 0; + int addr_sec = 0; + +# define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \ + ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \ + (ret = xd3_encode_secondary (stream, \ + & UPPER ## _HEAD (stream), \ + & UPPER ## _TAIL (stream), \ + & xd3_sec_ ## LOWER (stream), \ + & stream->sec_ ## LOWER, \ + & LOWER ## _sec))) + + if (ENCODE_SECONDARY_SECTION (DATA, data) || + ENCODE_SECONDARY_SECTION (INST, inst) || + ENCODE_SECONDARY_SECTION (ADDR, addr)) + { + return ret; + } + + del_ind |= (data_sec ? VCD_DATACOMP : 0); + del_ind |= (inst_sec ? VCD_INSTCOMP : 0); + del_ind |= (addr_sec ? VCD_ADDRCOMP : 0); + } +#endif + + /* if (vcd_target) { win_ind |= VCD_TARGET; } */ + if (vcd_source) { win_ind |= VCD_SOURCE; } + if (use_adler32) { win_ind |= VCD_ADLER32; } + + /* window indicator */ + if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) + { + return ret; + } + + /* source window */ + if (vcd_source) + { + /* or (vcd_target) { ... } */ + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), + stream->src->srclen)) || + (ret = xd3_emit_offset (stream, & HDR_TAIL (stream), + stream->src->srcbase))) { return ret; } + } + + tgt_len = stream->avail_in; + data_len = xd3_sizeof_output (DATA_HEAD (stream)); + inst_len = xd3_sizeof_output (INST_HEAD (stream)); + addr_len = xd3_sizeof_output (ADDR_HEAD (stream)); + + /* The enc_len field is a redundency for future extensions.*/ + enc_len = (1 + (xd3_sizeof_size (tgt_len) + + xd3_sizeof_size (data_len) + + xd3_sizeof_size (inst_len) + + xd3_sizeof_size (addr_len)) + + data_len + + inst_len + + addr_len + + (use_adler32 ? 4 : 0)); + + if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) || + (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) || + (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len))) + { + return ret; + } + + if (use_adler32) + { + uint8_t send[4]; + uint32_t a32; + + if (stream->flags & XD3_ADLER32) + { + a32 = adler32 (1L, stream->next_in, stream->avail_in); + } + else + { + a32 = stream->recode_adler32; + } + + send[0] = (a32 >> 24); + send[1] = (a32 >> 16); + send[2] = (a32 >> 8); + send[3] = (a32 & 0xff); + + if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) + { + return ret; + } + } + + return 0; +} + +/**************************************************************** + Encode routines + ****************************************************************/ + +static int +xd3_encode_buffer_leftover (xd3_stream *stream) +{ + usize_t take; + usize_t room; + + /* Allocate the buffer. */ + if (stream->buf_in == NULL && + (stream->buf_in = (uint8_t*) xd3_alloc (stream, stream->winsize, 1)) == NULL) + { + return ENOMEM; + } + + /* Take leftover input first. */ + if (stream->buf_leftover != NULL) + { + XD3_ASSERT (stream->buf_avail == 0); + XD3_ASSERT (stream->buf_leftavail < stream->winsize); + + IF_DEBUG1 (DP(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in)); + + memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail); + + stream->buf_leftover = NULL; + stream->buf_avail = stream->buf_leftavail; + } + + /* Copy into the buffer. */ + room = stream->winsize - stream->buf_avail; + take = min (room, stream->avail_in); + + memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take); + + stream->buf_avail += take; + + if (take < stream->avail_in) + { + /* Buffer is full */ + stream->buf_leftover = stream->next_in + take; + stream->buf_leftavail = stream->avail_in - take; + + IF_DEBUG1 (DP(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail)); + } + else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH)) + { + /* Buffer has space */ + IF_DEBUG1 (DP(RINT "[leftover] %u emptied\n", take)); + return XD3_INPUT; + } + + /* Use the buffer: */ + stream->next_in = stream->buf_in; + stream->avail_in = stream->buf_avail; + stream->buf_avail = 0; + + return 0; +} + +/* Allocates one block of xd3_rlist elements */ +static int +xd3_alloc_iopt (xd3_stream *stream, int elts) +{ + int i; + xd3_iopt_buflist* last = + (xd3_iopt_buflist*) xd3_alloc (stream, sizeof (xd3_iopt_buflist), 1); + + if (last == NULL || + (last->buffer = (xd3_rinst*) xd3_alloc (stream, sizeof (xd3_rinst), elts)) == NULL) + { + return ENOMEM; + } + + last->next = stream->iopt_alloc; + stream->iopt_alloc = last; + + for (i = 0; i < elts; i += 1) + { + xd3_rlist_push_back (& stream->iopt_free, & last->buffer[i]); + } + + return 0; +} + +/* This function allocates all memory initially used by the encoder. */ +static int +xd3_encode_init (xd3_stream *stream, int full_init) +{ + int i; + + if (full_init) + { + int large_comp = (stream->src != NULL); + int small_comp = ! (stream->flags & XD3_NOCOMPRESS); + + /* Memory allocations for checksum tables are delayed until + * xd3_string_match_init in the first call to string_match--that way + * identical or short inputs require no table allocation. */ + if (large_comp) + { + usize_t hash_values = (stream->srcwin_maxsz / stream->smatcher.large_step); + + xd3_size_hashtable (stream, + hash_values, + & stream->large_hash); + } + + if (small_comp) + { + /* TODO: This is under devel: used to have min(sprevsz) here, which sort + * of makes sense, but observed fast performance w/ larger tables, which + * also sort of makes sense. @@@ */ + usize_t hash_values = stream->winsize; + + xd3_size_hashtable (stream, + hash_values, + & stream->small_hash); + } + } + + /* data buffers */ + for (i = 0; i < ENC_SECTS; i += 1) + { + if ((stream->enc_heads[i] = + stream->enc_tails[i] = + xd3_alloc_output (stream, NULL)) == NULL) + { + return ENOMEM; + } + } + + /* iopt buffer */ + xd3_rlist_init (& stream->iopt_used); + xd3_rlist_init (& stream->iopt_free); + + if (xd3_alloc_iopt (stream, stream->iopt_size) != 0) { goto fail; } + + XD3_ASSERT (xd3_rlist_length (& stream->iopt_free) == stream->iopt_size); + XD3_ASSERT (xd3_rlist_length (& stream->iopt_used) == 0); + + /* address cache, code table */ + stream->acache.s_near = stream->code_table_desc->near_modes; + stream->acache.s_same = stream->code_table_desc->same_modes; + stream->code_table = stream->code_table_func (); + + return xd3_alloc_cache (stream); + + fail: + + return ENOMEM; +} + +int +xd3_encode_init_full (xd3_stream *stream) +{ + return xd3_encode_init (stream, 1); +} + +int +xd3_encode_init_partial (xd3_stream *stream) +{ + return xd3_encode_init (stream, 0); +} + +/* Called after the ENC_POSTOUT state, this puts the output buffers + * back into separate lists and re-initializes some variables. (The + * output lists were spliced together during the ENC_FLUSH state.) */ +static void +xd3_encode_reset (xd3_stream *stream) +{ + int i; + xd3_output *olist; + + stream->avail_in = 0; + stream->small_reset = 1; + stream->i_slots_used = 0; + + if (stream->src != NULL) + { + stream->src->srcbase = 0; + stream->src->srclen = 0; + stream->srcwin_decided = 0; + stream->match_minaddr = 0; + stream->match_maxaddr = 0; + stream->taroff = 0; + } + + /* Reset output chains. */ + olist = stream->enc_heads[0]; + + for (i = 0; i < ENC_SECTS; i += 1) + { + XD3_ASSERT (olist != NULL); + + stream->enc_heads[i] = olist; + stream->enc_tails[i] = olist; + olist = olist->next_page; + + stream->enc_heads[i]->next = 0; + stream->enc_heads[i]->next_page = NULL; + + stream->enc_tails[i]->next_page = NULL; + stream->enc_tails[i] = stream->enc_heads[i]; + } + + xd3_freelist_output (stream, olist); +} + +/* The main encoding routine. */ +int +xd3_encode_input (xd3_stream *stream) +{ + int ret, i; + + if (stream->dec_state != 0) + { + stream->msg = "encoder/decoder transition"; + return XD3_INTERNAL; + } + + switch (stream->enc_state) + { + case ENC_INIT: + /* Only reached on first time through: memory setup. */ + if ((ret = xd3_encode_init_full (stream))) { return ret; } + + stream->enc_state = ENC_INPUT; + + case ENC_INPUT: + + /* If there is no input yet, just return. This checks for + * next_in == NULL, not avail_in == 0 since zero bytes is a + * valid input. There is an assertion in xd3_avail_input() that + * next_in != NULL for this reason. By returning right away we + * avoid creating an input buffer before the caller has supplied + * its first data. It is possible for xd3_avail_input to be + * called both before and after the first call to + * xd3_encode_input(). */ + if (stream->next_in == NULL) + { + return XD3_INPUT; + } + + enc_flush: + /* See if we should buffer the input: either if there is already + * a leftover buffer, or if the input is short of winsize + * without flush. The label at this point is reached by a goto + * below, when there is leftover input after postout. */ + if ((stream->buf_leftover != NULL) || + (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH))) + { + if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; } + } + + /* Initalize the address cache before each window. */ + xd3_init_cache (& stream->acache); + + stream->input_position = 0; + stream->min_match = MIN_MATCH; + stream->unencoded_offset = 0; + + stream->enc_state = ENC_SEARCH; + + IF_DEBUG1 (DP(RINT "[WINSTART:%"Q"u] input bytes %u offset %"Q"u\n", + stream->current_window, stream->avail_in, + stream->total_in)); + return XD3_WINSTART; + + case ENC_SEARCH: + + /* Reentrant matching. */ + if (stream->src != NULL) + { + switch (stream->match_state) + { + case MATCH_TARGET: + /* Try matching forward at the start of the target. + * This is entered the first time through, to check for + * a perfect match, and whenever there is a source match + * that extends to the end of the previous window. The + * match_srcpos field is initially zero and later set + * during xd3_source_extend_match. */ + + if (stream->avail_in > 0) + { + /* This call can't fail because the source window is + unrestricted. */ + ret = xd3_source_match_setup (stream, stream->match_srcpos); + XD3_ASSERT (ret == 0); + stream->match_state = MATCH_FORWARD; + } + else + { + stream->match_state = MATCH_SEARCHING; + stream->match_fwd = 0; + } + XD3_ASSERT (stream->match_fwd == 0); + + case MATCH_FORWARD: + case MATCH_BACKWARD: + if (stream->avail_in != 0) + { + if ((ret = xd3_source_extend_match (stream)) != 0) + { + return ret; + } + + /* The search has to make forward progress here + * or else it can get stuck in a match-backward + * (getsrcblk) then match-forward (getsrcblk), + * find insufficient match length, then repeat + * exactly the same search. + */ + stream->input_position += stream->match_fwd; + } + + case MATCH_SEARCHING: + /* Continue string matching. (It's possible that the + * initial match continued through the entire input, in + * which case we're still in MATCH_FORWARD and should + * remain so for the next input window.) */ + break; + } + } + + /* String matching... */ + if (stream->avail_in != 0 && + (ret = stream->smatcher.string_match (stream))) + { + return ret; + } + + stream->enc_state = ENC_INSTR; + + case ENC_INSTR: + /* Note: Jump here to encode VCDIFF deltas w/o using this + * string-matching code. */ + + /* Flush the instrution buffer, then possibly add one more + * instruction, then emit the header. */ + if ((ret = xd3_iopt_flush_instructions (stream, 1)) || + (ret = xd3_iopt_add_finalize (stream))) + { + return ret; + } + + stream->enc_state = ENC_FLUSH; + + case ENC_FLUSH: + /* Note: main_recode_func() bypasses string-matching by setting + * ENC_FLUSH. */ + if ((ret = xd3_emit_hdr (stream))) + { + return ret; + } + + /* Begin output. */ + stream->enc_current = HDR_HEAD (stream); + + /* Chain all the outputs together. After doing this, it looks + * as if there is only one section. The other enc_heads are set + * to NULL to avoid freeing them more than once. */ + for (i = 1; i < ENC_SECTS; i += 1) + { + stream->enc_tails[i-1]->next_page = stream->enc_heads[i]; + stream->enc_heads[i] = NULL; + } + + enc_output: + + stream->enc_state = ENC_POSTOUT; + stream->next_out = stream->enc_current->base; + stream->avail_out = stream->enc_current->next; + stream->total_out += (xoff_t) stream->avail_out; + + /* If there is any output in this buffer, return it, otherwise + * fall through to handle the next buffer or finish the window + * after all buffers have been output. */ + if (stream->avail_out > 0) + { + /* This is the only place xd3_encode returns XD3_OUTPUT */ + return XD3_OUTPUT; + } + + case ENC_POSTOUT: + + if (stream->avail_out != 0) + { + stream->msg = "missed call to consume output"; + return XD3_INTERNAL; + } + + /* Continue outputting one buffer at a time, until the next is NULL. */ + if ((stream->enc_current = stream->enc_current->next_page) != NULL) + { + goto enc_output; + } + + stream->total_in += (xoff_t) stream->avail_in; + stream->enc_state = ENC_POSTWIN; + + IF_DEBUG1 (DP(RINT "[WINFINISH:%"Q"u] in=%"Q"u\n", + stream->current_window, + stream->total_in)); + return XD3_WINFINISH; + + case ENC_POSTWIN: + + xd3_encode_reset (stream); + + stream->current_window += 1; + stream->enc_state = ENC_INPUT; + + /* If there is leftover input to flush, repeat. */ + if ((stream->buf_leftover != NULL) && (stream->flags & XD3_FLUSH)) + { + goto enc_flush; + } + + /* Ready for more input. */ + return XD3_INPUT; + + default: + stream->msg = "invalid state"; + return XD3_INTERNAL; + } +} +#endif /* XD3_ENCODER */ + +/***************************************************************** + Client convenience functions + ******************************************************************/ + +static int +xd3_process_stream (int is_encode, + xd3_stream *stream, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + usize_t ipos = 0; + usize_t n = min(stream->winsize, input_size); + + (*output_size) = 0; + + stream->flags |= XD3_FLUSH; + + xd3_avail_input (stream, input + ipos, n); + ipos += n; + + for (;;) + { + int ret; + switch((ret = func (stream))) + { + case XD3_OUTPUT: { /* memcpy below */ break; } + case XD3_INPUT: { + n = min(stream->winsize, input_size - ipos); + if (n == 0) { + goto done; + } + xd3_avail_input (stream, input + ipos, n); + ipos += n; + continue; + } + case XD3_GOTHEADER: { /* ignore */ continue; } + case XD3_WINSTART: { /* ignore */ continue; } + case XD3_WINFINISH: { /* ignore */ continue; } + case XD3_GETSRCBLK: + { + stream->msg = "stream requires source input"; + return XD3_INTERNAL; + } + case 0: + { + /* xd3_encode_input/xd3_decode_input never return 0 */ + stream->msg = "invalid return: 0"; + return XD3_INTERNAL; + } + default: + return ret; + } + + if (*output_size + stream->avail_out > output_size_max) + { + stream->msg = "insufficient output space"; + return ENOSPC; + } + + memcpy (output + *output_size, stream->next_out, stream->avail_out); + + *output_size += stream->avail_out; + + xd3_consume_output (stream); + } + done: + return (close_stream == 0) ? 0 : xd3_close_stream (stream); +} + +static int +xd3_process_memory (int is_encode, + int (*func) (xd3_stream *), + int close_stream, + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + xd3_stream stream; + xd3_config config; + xd3_source src; + int ret; + + memset (& stream, 0, sizeof (stream)); + memset (& config, 0, sizeof (config)); + + if (input == NULL || output == NULL) { + stream.msg = "invalid input/output buffer"; + ret = XD3_INTERNAL; + goto exit; + } + + config.flags = flags; + + if (is_encode) + { + config.srcwin_maxsz = source_size; + config.winsize = min(input_size, (usize_t) XD3_DEFAULT_WINSIZE); + config.iopt_size = min(input_size / 32, XD3_DEFAULT_IOPT_SIZE); + config.iopt_size = max(config.iopt_size, 128U); + config.sprevsz = xd3_pow2_roundup (config.winsize); + } + + if ((ret = xd3_config_stream (&stream, &config)) != 0) + { + goto exit; + } + + if (source != NULL) + { + memset (& src, 0, sizeof (src)); + src.size = source_size; + + src.blksize = source_size; + src.onblk = source_size; + src.curblk = source; + src.curblkno = 0; + + if ((ret = xd3_set_source (&stream, &src)) != 0) + { + goto exit; + } + } + + if ((ret = xd3_process_stream (is_encode, + & stream, + func, 1, + input, input_size, + output, + output_size, + output_size_max)) != 0) + { + goto exit; + } + + exit: + if (ret != 0) { + IF_DEBUG1 (DP(RINT "process_memory: %d: %s", ret, stream.msg)); + } + xd3_free_stream(&stream); + return ret; +} + +int +xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (0, stream, & xd3_decode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (0, & xd3_decode_input, 1, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} + + +#if XD3_ENCODER +int +xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max) +{ + return xd3_process_stream (1, stream, & xd3_encode_input, 1, + input, input_size, + output, output_size, output_size_max); +} + +int +xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + int flags) { + return xd3_process_memory (1, & xd3_encode_input, 1, + input, input_size, + source, source_size, + output, output_size, output_size_max, + flags); +} +#endif + + +/************************************************************* + String matching helpers + *************************************************************/ + +#if XD3_ENCODER +/* Do the initial xd3_string_match() checksum table setup. + * Allocations are delayed until first use to avoid allocation + * sometimes (e.g., perfect matches, zero-length inputs). */ +static int +xd3_string_match_init (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + + if (DO_LARGE && stream->large_table == NULL) + { + if ((stream->large_table = + (usize_t*) xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + } + + if (DO_SMALL) + { + /* Subsequent calls can return immediately after checking reset. */ + if (stream->small_table != NULL) + { + /* The target hash table is reinitialized once per window. */ + /* TODO: This would not have to be reinitialized if absolute + * offsets were being stored. */ + if (stream->small_reset) + { + stream->small_reset = 0; + memset (stream->small_table, 0, + sizeof (usize_t) * stream->small_hash.size); + } + + return 0; + } + + if ((stream->small_table = + (usize_t*) xd3_alloc0 (stream, + stream->small_hash.size, + sizeof (usize_t))) == NULL) + { + return ENOMEM; + } + + /* If there is a previous table needed. */ + if (stream->smatcher.small_lchain > 1 || + stream->smatcher.small_chain > 1) + { + if ((stream->small_prev = + (xd3_slist*) xd3_alloc (stream, + stream->sprevsz, + sizeof (xd3_slist))) == NULL) + { + return ENOMEM; + } + } + } + + return 0; +} + +#if XD3_USE_LARGEFILE64 +/* This function handles the 32/64bit ambiguity -- file positions are 64bit + * but the hash table for source-offsets is 32bit. */ +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + xoff_t scp = stream->srcwin_cksum_pos; + xoff_t s0 = scp >> 32; + + usize_t sr = (usize_t) scp; + + if (s0 == 0) { + return low; + } + + /* This should not be >= because srcwin_cksum_pos is the next + * position to index. */ + if (low > sr) { + return (--s0 << 32) | low; + } + + return (s0 << 32) | low; +} +#else +static xoff_t +xd3_source_cksum_offset(xd3_stream *stream, usize_t low) +{ + return (xoff_t) low; +} +#endif + +/* This function sets up the stream->src fields srcbase, srclen. The + * call is delayed until these values are needed to encode a copy + * address. At this point the decision has to be made. */ +static int +xd3_srcwin_setup (xd3_stream *stream) +{ + xd3_source *src = stream->src; + xoff_t length, x; + + /* Check the undecided state. */ + XD3_ASSERT (src->srclen == 0 && src->srcbase == 0); + + /* Avoid repeating this call. */ + stream->srcwin_decided = 1; + + /* If the stream is flushing, then the iopt buffer was able to + * contain the complete encoding. If no copies were issued no + * source window is actually needed. This prevents the VCDIFF + * header from including source base/len. xd3_emit_hdr checks for + * srclen == 0. */ + if (stream->enc_state == ENC_INSTR && stream->match_maxaddr == 0) + { + goto done; + } + + /* Check for overflow, srclen is usize_t - this can't happen unless + * XD3_DEFAULT_SRCBACK and related parameters are extreme - should + * use smaller windows. */ + length = stream->match_maxaddr - stream->match_minaddr; + + x = (xoff_t) USIZE_T_MAX; + if (length > x) + { + stream->msg = "source window length overflow (not 64bit)"; + return XD3_INTERNAL; + } + + /* If ENC_INSTR, then we know the exact source window to use because + * no more copies can be issued. */ + if (stream->enc_state == ENC_INSTR) + { + src->srcbase = stream->match_minaddr; + src->srclen = (usize_t) length; + XD3_ASSERT (src->srclen); + goto done; + } + + /* Otherwise, we have to make a guess. More copies may still be + * issued, but we have to decide the source window base and length + * now. */ + src->srcbase = stream->match_minaddr; + src->srclen = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2)); + if (src->size < src->srcbase + (xoff_t) src->srclen) + { + /* Could reduce srcbase, as well. */ + src->srclen = src->size - src->srcbase; + } + + XD3_ASSERT (src->srclen); + done: + /* Set the taroff. This convenience variable is used even when + stream->src == NULL. */ + stream->taroff = src->srclen; + return 0; +} + +/* Sets the bounding region for a newly discovered source match, prior + * to calling xd3_source_extend_match(). This sets the match_maxfwd, + * match_maxback variables. Note: srcpos is an absolute position + * (xoff_t) but the match_maxfwd, match_maxback variables are usize_t. + * Returns 0 if the setup succeeds, or 1 if the source position lies + * outside an already-decided srcbase/srclen window. */ +static int +xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos) +{ + xd3_source *src = stream->src; + usize_t greedy_or_not; + + stream->match_maxback = 0; + stream->match_maxfwd = 0; + stream->match_back = 0; + stream->match_fwd = 0; + + /* This avoids a non-blocking endless loop caused by scanning + * backwards across a block boundary, only to find not enough + * matching bytes to beat the current min_match due to a better lazy + * target match: the re-entry to xd3_string_match() repeats the same + * long match because the input position hasn't changed. TODO: if + * ever duplicates are added to the source hash table, this logic + * won't suffice to avoid loops. See testing/regtest.cc's + * TestNonBlockingProgress test! */ + if (srcpos != 0 && srcpos == stream->match_last_srcpos) + { + goto bad; + } + + /* Going backwards, the 1.5-pass algorithm allows some + * already-matched input may be covered by a longer source match. + * The greedy algorithm does not allow this. */ + if (stream->flags & XD3_BEGREEDY) + { + /* The greedy algorithm allows backward matching to the last + matched position. */ + greedy_or_not = xd3_iopt_last_matched (stream); + } + else + { + /* The 1.5-pass algorithm allows backward matching to go back as + * far as the unencoded offset, which is updated as instructions + * pass out of the iopt buffer. If this (default) is chosen, it + * means xd3_iopt_erase may be called to eliminate instructions + * when a covering source match is found. */ + greedy_or_not = stream->unencoded_offset; + } + + /* Backward target match limit. */ + XD3_ASSERT (stream->input_position >= greedy_or_not); + stream->match_maxback = stream->input_position - greedy_or_not; + + /* Forward target match limit. */ + XD3_ASSERT (stream->avail_in > stream->input_position); + stream->match_maxfwd = stream->avail_in - stream->input_position; + + /* Now we take the source position into account. It depends whether + * the srclen/srcbase have been decided yet. */ + if (stream->srcwin_decided == 0) + { + /* Unrestricted case: the match can cover the entire source, + * 0--src->size. We compare the usize_t + * match_maxfwd/match_maxback against the xoff_t + * src->size/srcpos values and take the min. */ + xoff_t srcavail; + + if (srcpos < (xoff_t) stream->match_maxback) + { + stream->match_maxback = srcpos; + } + + srcavail = src->size - srcpos; + if (srcavail < (xoff_t) stream->match_maxfwd) + { + stream->match_maxfwd = srcavail; + } + + goto good; + } + + /* Decided some source window. */ + XD3_ASSERT (src->srclen > 0); + + /* Restricted case: fail if the srcpos lies outside the source window */ + if ((srcpos < src->srcbase) || (srcpos > (src->srcbase + (xoff_t) src->srclen))) + { + goto bad; + } + else + { + usize_t srcavail; + + srcavail = (usize_t) (srcpos - src->srcbase); + if (srcavail < stream->match_maxback) + { + stream->match_maxback = srcavail; + } + + srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos); + if (srcavail < stream->match_maxfwd) { + stream->match_maxfwd = srcavail; + } + + goto good; + } + + good: + stream->match_state = MATCH_BACKWARD; + stream->match_srcpos = srcpos; + stream->match_last_srcpos = srcpos; + return 0; + + bad: + stream->match_state = MATCH_SEARCHING; + return 1; +} + +/* This code is experimental, and I'm having trouble benchmarking + * it reliably. */ +#if 0 +static inline int +xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, size_t n) +{ + size_t i = 0; +#if UNALIGNED_OK + size_t nint = n / sizeof(int); + + if (nint >> 3) + { + size_t j = 0; + const int *s1 = (const int*)s1c; + const int *s2 = (const int*)s2c; + size_t nint_8 = nint - 8; + + while (i <= nint_8 && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++] && + s1[i++] == s2[j++]) { } + + i = (i - 1) * sizeof(int); + } +#endif + + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} +#else +static inline usize_t +xd3_forward_match(const uint8_t *s1c, + const uint8_t *s2c, + usize_t n) { + usize_t i = 0; + while (i < n && s1c[i] == s2c[i]) + { + i++; + } + return i; +} +#endif + + +/* This function expands the source match backward and forward. It is + * reentrant, since xd3_getblk may return XD3_GETSRCBLK, so most + * variables are kept in xd3_stream. There are two callers of this + * function, the string_matching routine when a checksum match is + * discovered, and xd3_encode_input whenever a continuing (or initial) + * match is suspected. The two callers do different things with the + * input_position, thus this function leaves that variable untouched. + * If a match is taken the resulting stream->match_fwd is left + * non-zero. */ +static int +xd3_source_extend_match (xd3_stream *stream) +{ + int ret; + xd3_source *src = stream->src; + xoff_t matchoff; /* matchoff is the current right/left-boundary of + the source match being tested. */ + usize_t streamoff; /* streamoff is the current right/left-boundary + of the input match being tested. */ + xoff_t tryblk; /* tryblk, tryoff are the block, offset position + of matchoff */ + usize_t tryoff; + usize_t tryrem; /* tryrem is the number of matchable bytes */ + usize_t matched; + + XD3_ASSERT (src != NULL); + + /* Does it make sense to compute backward match AFTER forward match? */ + if (stream->match_state == MATCH_BACKWARD) + { + /* Note: this code is practically duplicated below, substituting + * match_fwd/match_back and direction. Consolidate? */ + matchoff = stream->match_srcpos - stream->match_back; + streamoff = stream->input_position - stream->match_back; + xd3_blksize_div (matchoff, src, &tryblk, &tryoff); + + /* this loops backward over source blocks */ + while (stream->match_back < stream->match_maxback) + { + /* see if we're backing across a source block boundary */ + if (tryoff == 0) + { + tryoff = src->blksize; + tryblk -= 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + /* if search went too far back, continue forward. */ + if (ret == XD3_TOOFARBACK) + { + break; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + /* TODO: This code can be optimized similar to xd3_match_forward() */ + for (tryrem = min (tryoff, stream->match_maxback - + stream->match_back); + tryrem != 0; + tryrem -= 1, stream->match_back += 1) + { + if (src->curblk[tryoff-1] != stream->next_in[streamoff-1]) + { + goto doneback; + } + + tryoff -= 1; + streamoff -= 1; + } + } + + doneback: + stream->match_state = MATCH_FORWARD; + } + + XD3_ASSERT (stream->match_state == MATCH_FORWARD); + + matchoff = stream->match_srcpos + stream->match_fwd; + streamoff = stream->input_position + stream->match_fwd; + xd3_blksize_div (matchoff, src, & tryblk, & tryoff); + + /* Note: practically the same code as backwards case above: same comments */ + while (stream->match_fwd < stream->match_maxfwd) + { + if (tryoff == src->blksize) + { + tryoff = 0; + tryblk += 1; + } + + if ((ret = xd3_getblk (stream, tryblk))) + { + /* if search went too far back, continue forward. */ + if (ret == XD3_TOOFARBACK) + { + break; + } + + /* could be a XD3_GETSRCBLK failure. */ + return ret; + } + + tryrem = min(stream->match_maxfwd - stream->match_fwd, + src->blksize - tryoff); + + matched = xd3_forward_match(src->curblk + tryoff, + stream->next_in + streamoff, + tryrem); + tryoff += matched; + streamoff += matched; + stream->match_fwd += matched; + + if (tryrem != matched) + { + break; + } + } + + stream->match_state = MATCH_SEARCHING; + + /* If the match ends short of the last instruction end, we probably + * don't want it. There is the possibility that a copy ends short + * of the last copy but also goes further back, in which case we + * might want it. This code does not implement such: if so we would + * need more complicated xd3_iopt_erase logic. */ + if (stream->match_fwd < stream->min_match) + { + stream->match_fwd = 0; + } + else + { + usize_t total = stream->match_fwd + stream->match_back; + + /* Correct the variables to remove match_back from the equation. */ + usize_t target_position = stream->input_position - stream->match_back; + usize_t match_length = stream->match_back + stream->match_fwd; + xoff_t match_position = stream->match_srcpos - stream->match_back; + xoff_t match_end = stream->match_srcpos + stream->match_fwd; + + /* At this point we may have to erase any iopt-buffer + * instructions that are fully covered by a backward-extending + * copy. */ + if (stream->match_back > 0) + { + xd3_iopt_erase (stream, target_position, total); + } + + stream->match_back = 0; + + /* Update ranges. The first source match occurs with both + values set to 0. */ + if (stream->match_maxaddr == 0 || + match_position < stream->match_minaddr) + { + stream->match_minaddr = match_position; + } + + if (match_end > stream->match_maxaddr) + { + /* Note: per-window */ + stream->match_maxaddr = match_end; + } + + if (match_end > stream->maxsrcaddr) + { + /* Note: across windows */ + stream->maxsrcaddr = match_end; + } + + IF_DEBUG1 ({ + static int x = 0; + DP(RINT "[source match:%d] <inp %"Q"u %"Q"u> <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n", + x++, + stream->total_in + target_position, + stream->total_in + target_position + match_length, + match_position, + match_position + match_length, + (stream->total_in + target_position == match_position) ? "same" : "diff", + match_length); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ target_position, + /* length */ match_length, + /* address */ match_position, + /* is_source */ 1))) + { + return ret; + } + + /* If the match ends with the available input: */ + if (target_position + match_length == stream->avail_in) + { + /* Setup continuing match for the next window. */ + stream->match_state = MATCH_TARGET; + stream->match_srcpos = match_end; + } + } + + return 0; +} + +/* Update the small hash. Values in the small_table are offset by + * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */ +static void +xd3_scksum_insert (xd3_stream *stream, + usize_t inx, + usize_t scksum, + usize_t pos) +{ + /* If we are maintaining previous duplicates. */ + if (stream->small_prev) + { + usize_t last_pos = stream->small_table[inx]; + xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask]; + + /* Note last_pos is offset by HASH_CKOFFSET. */ + pos_list->last_pos = last_pos; + } + + /* Enter the new position into the hash bucket. */ + stream->small_table[inx] = pos + HASH_CKOFFSET; +} + +#if XD3_DEBUG +static int +xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0, + const uint8_t *inp_max, usize_t cmp_len) +{ + usize_t i; + + for (i = 0; i < cmp_len; i += 1) + { + XD3_ASSERT (ref0[i] == inp0[i]); + } + + if (inp0 + cmp_len < inp_max) + { + XD3_ASSERT (inp0[i] != ref0[i]); + } + + return 1; +} +#endif /* XD3_DEBUG */ + +/* When the hash table indicates a possible small string match, it + * calls this routine to find the best match. The first matching + * position is taken from the small_table, HASH_CKOFFSET is subtracted + * to get the actual position. After checking that match, if previous + * linked lists are in use (because stream->smatcher.small_chain > 1), + * previous matches are tested searching for the longest match. If + * (stream->min_match > MIN_MATCH) then a lazy match is in effect. + */ +static usize_t +xd3_smatch (xd3_stream *stream, + usize_t base, + usize_t scksum, + usize_t *match_offset) +{ + usize_t cmp_len; + usize_t match_length = 0; + usize_t chain = (stream->min_match == MIN_MATCH ? + stream->smatcher.small_chain : + stream->smatcher.small_lchain); + const uint8_t *inp_max = stream->next_in + stream->avail_in; + const uint8_t *inp; + const uint8_t *ref; + + SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position); + + XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in); + + base -= HASH_CKOFFSET; + + again: + + IF_DEBUG2 (DP(RINT "smatch at base=%u inp=%u cksum=%u\n", base, + stream->input_position, scksum)); + + /* For small matches, we can always go to the end-of-input because + * the matching position must be less than the input position. */ + XD3_ASSERT (base < stream->input_position); + + ref = stream->next_in + base; + inp = stream->next_in + stream->input_position; + + SMALL_HASH_DEBUG2 (stream, ref); + + /* Expand potential match forward. */ + while (inp < inp_max && *inp == *ref) + { + ++inp; + ++ref; + } + + cmp_len = inp - (stream->next_in + stream->input_position); + + /* Verify correctness */ + XD3_ASSERT (xd3_check_smatch (stream->next_in + base, + stream->next_in + stream->input_position, + inp_max, cmp_len)); + + /* Update longest match */ + if (cmp_len > match_length) + { + ( match_length) = cmp_len; + (*match_offset) = base; + + /* Stop if we match the entire input or have a long_enough match. */ + if (inp == inp_max || cmp_len >= stream->smatcher.long_enough) + { + goto done; + } + } + + /* If we have not reached the chain limit, see if there is another + previous position. */ + while (--chain != 0) + { + /* Calculate the previous offset. */ + usize_t prev_pos = stream->small_prev[base & stream->sprevmask].last_pos; + usize_t diff_pos; + + if (prev_pos == 0) + { + break; + } + + prev_pos -= HASH_CKOFFSET; + + if (prev_pos > base) + { + break; + } + + base = prev_pos; + + XD3_ASSERT (stream->input_position > base); + diff_pos = stream->input_position - base; + + /* Stop searching if we go beyond sprevsz, since those entries + * are for unrelated checksum entries. */ + if (diff_pos & ~stream->sprevmask) + { + break; + } + + goto again; + } + + done: + /* Crude efficiency test: if the match is very short and very far back, it's + * unlikely to help, but the exact calculation requires knowing the state of + * the address cache and adjacent instructions, which we can't do here. + * Rather than encode a probably inefficient copy here and check it later + * (which complicates the code a lot), do this: + */ + if (match_length == 4 && stream->input_position - (*match_offset) >= 1<<14) + { + /* It probably takes >2 bytes to encode an address >= 2^14 from here */ + return 0; + } + if (match_length == 5 && stream->input_position - (*match_offset) >= 1<<21) + { + /* It probably takes >3 bytes to encode an address >= 2^21 from here */ + return 0; + } + + /* It's unlikely that a window is large enough for the (match_length == 6 && + * address >= 2^28) check */ + return match_length; +} + +#if XD3_DEBUG +static void +xd3_verify_small_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum) +{ + uint32_t state; + uint32_t cksum = xd3_scksum (&state, inp, stream->smatcher.small_look); + + XD3_ASSERT (cksum == x_cksum); +} + +static void +xd3_verify_large_state (xd3_stream *stream, + const uint8_t *inp, + uint32_t x_cksum) +{ + uint32_t cksum = xd3_lcksum (inp, stream->smatcher.large_look); + XD3_ASSERT (cksum == x_cksum); +} +static void +xd3_verify_run_state (xd3_stream *stream, + const uint8_t *inp, + int x_run_l, + uint8_t x_run_c) +{ + int slook = stream->smatcher.small_look; + uint8_t run_c; + int run_l = xd3_comprun (inp, slook, &run_c); + + XD3_ASSERT (run_l == 0 || run_c == x_run_c); + XD3_ASSERT (x_run_l > slook || run_l == x_run_l); +} +#endif /* XD3_DEBUG */ + +/* This function computes more source checksums to advance the window. + * Called at every entrance to the string-match loop and each time + * stream->input_position reaches the value returned as + * *next_move_point. NB: this is one of the most expensive functions + * in this code and also the most critical for good compression. + * + * TODO: really would like a good test for this logic. how? + * Update: testing/regtest.cc has some basic tests, more would be nice. + * TODO: optimize the inner loop + */ +static int +xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point) +{ + xoff_t logical_input_cksum_pos; + + XD3_ASSERT(stream->srcwin_cksum_pos <= stream->src->size); + if (stream->srcwin_cksum_pos == stream->src->size) + { + *next_move_point = USIZE_T_MAX; + return 0; + } + + /* Begin by advancing at twice the input rate, up to half the + * maximum window size. */ + logical_input_cksum_pos = min((stream->total_in + stream->input_position) * 2, + (stream->total_in + stream->input_position) + + (stream->srcwin_maxsz / 2)); + + /* If srcwin_cksum_pos is already greater, wait until the difference + * is met. */ + if (stream->srcwin_cksum_pos > logical_input_cksum_pos) + { + *next_move_point = stream->input_position + + (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); + return 0; + } + + /* A long match may have extended past srcwin_cksum_pos. Don't + * start checksumming already-matched source data. */ + if (stream->maxsrcaddr > stream->srcwin_cksum_pos) + { + stream->srcwin_cksum_pos = stream->maxsrcaddr; + } + + if (logical_input_cksum_pos < stream->srcwin_cksum_pos) + { + logical_input_cksum_pos = stream->srcwin_cksum_pos; + } + + /* Advance at least one source block. With the command-line + * defaults this means: + * + * if (src->size <= srcwin_maxsz), index the entire source at once + * using the position of the first non-match. This is good for + * small inputs, especially when the content may have moved anywhere + * in the file (e.g., tar files). + * + * if (src->size > srcwin_maxsz), index at least one block (which + * the command-line sets to 1/32 of srcwin_maxsz) ahead of the + * logical position. This is good for different reasons: when a + * long match spanning several source blocks is encountered, this + * avoids computing checksums for those blocks. If the data can + * move anywhere, this is bad. + */ + logical_input_cksum_pos += stream->src->blksize; + + IF_DEBUG1 (DP(RINT "[srcwin_move_point] T=%"Q"u S=%"Q"u/%"Q"u\n", + stream->total_in + stream->input_position, + stream->srcwin_cksum_pos, + logical_input_cksum_pos)); + + while (stream->srcwin_cksum_pos < logical_input_cksum_pos && + stream->srcwin_cksum_pos < stream->src->size) + { + xoff_t blkno; + xoff_t blkbaseoffset; + usize_t blkrem; + ssize_t oldpos; + ssize_t blkpos; + int ret; + xd3_blksize_div (stream->srcwin_cksum_pos, + stream->src, &blkno, &blkrem); + oldpos = blkrem; + blkpos = xd3_bytes_on_srcblk_fast (stream->src, blkno); + + if (oldpos + stream->smatcher.large_look > (usize_t) blkpos) + { + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + continue; + } + + if ((ret = xd3_getblk (stream, blkno))) + { + /* TOOFARBACK should never occur here, since we read forward. */ + if (ret == XD3_TOOFARBACK) + { + ret = XD3_INTERNAL; + } + return ret; + } + + /* This inserts checksums for the entire block, in reverse, + * starting from the end of the block. This logic does not test + * stream->srcwin_cksum_pos because it always advances it to the + * start of the next block. + * + * oldpos is the srcwin_cksum_pos within this block. blkpos is + * the number of bytes available. Each iteration inspects + * large_look bytes then steps back large_step bytes. The + * if-stmt above ensures at least one large_look of data. */ + blkpos -= stream->smatcher.large_look; + blkbaseoffset = stream->src->blksize * blkno; + + do + { + uint32_t cksum = xd3_lcksum (stream->src->curblk + blkpos, + stream->smatcher.large_look); + usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); + + stream->large_table[hval] = + (usize_t) (blkbaseoffset + + (xoff_t)(blkpos + HASH_CKOFFSET)); + + IF_DEBUG (stream->large_ckcnt += 1); + + blkpos -= stream->smatcher.large_step; + } + while (blkpos >= oldpos); + + stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; + } + + if (stream->srcwin_cksum_pos >= stream->src->size) + { + /* This invariant is needed for xd3_source_cksum_offset() */ + stream->srcwin_cksum_pos = stream->src->size; + *next_move_point = USIZE_T_MAX; + return 0; + } + + /* How long until this function should be called again. */ + XD3_ASSERT(stream->srcwin_cksum_pos >= logical_input_cksum_pos); + *next_move_point = stream->input_position + 1 + + (usize_t)(stream->srcwin_cksum_pos - logical_input_cksum_pos); + return 0; +} + +#endif /* XD3_ENCODER */ + +/******************************************************************** + TEMPLATE pass + *********************************************************************/ + +#endif /* __XDELTA3_C_INLINE_PASS__ */ +#ifdef __XDELTA3_C_TEMPLATE_PASS__ + +#if XD3_ENCODER + +/******************************************************************** + Templates + *******************************************************************/ + +/* Template macros */ +#define XD3_TEMPLATE(x) XD3_TEMPLATE2(x,TEMPLATE) +#define XD3_TEMPLATE2(x,n) XD3_TEMPLATE3(x,n) +#define XD3_TEMPLATE3(x,n) x ## n +#define XD3_STRINGIFY(x) XD3_STRINGIFY2(x) +#define XD3_STRINGIFY2(x) #x + +static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream); + +static const xd3_smatcher XD3_TEMPLATE(__smatcher_) = +{ + XD3_STRINGIFY(TEMPLATE), + XD3_TEMPLATE(xd3_string_match_), +#if SOFTCFG == 1 + 0, 0, 0, 0, 0, 0, 0 +#else + LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH +#endif +}; + +static int +XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream) +{ + const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS); + const int DO_LARGE = (stream->src != NULL); + const int DO_RUN = (1); + + const uint8_t *inp; + uint32_t scksum = 0; + uint32_t scksum_state; + uint32_t lcksum = 0; + usize_t sinx; + usize_t linx; + uint8_t run_c; + size_t run_l; + int ret; + usize_t match_length; + usize_t match_offset = 0; + usize_t next_move_point; + + /* If there will be no compression due to settings or short input, + * skip it entirely. */ + if (! (DO_SMALL || DO_LARGE || DO_RUN) || + stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + if ((ret = xd3_string_match_init (stream))) { return ret; } + + /* The restartloop label is reached when the incremental loop state + * needs to be reset. */ + restartloop: + + /* If there is not enough input remaining for any kind of match, + skip it. */ + if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; } + + /* Now reset the incremental loop state: */ + + /* The min_match variable is updated to avoid matching the same lazy + * match over and over again. For example, if you find a (small) + * match of length 9 at one position, you will likely find a match + * of length 8 at the next position. */ + if (xd3_iopt_last_matched (stream) > stream->input_position) + { + stream->min_match = max(MIN_MATCH, + 1 + xd3_iopt_last_matched(stream) - + stream->input_position); + } + else + { + stream->min_match = MIN_MATCH; + } + + /* The current input byte. */ + inp = stream->next_in + stream->input_position; + + /* Small match state. */ + if (DO_SMALL) + { + scksum = xd3_scksum (&scksum_state, inp, SLOOK); + } + + /* Run state. */ + if (DO_RUN) + { + run_l = xd3_comprun (inp, SLOOK, & run_c); + } + + /* Large match state. We continue the loop even after not enough + * bytes for LLOOK remain, so always check stream->input_position in + * DO_LARGE code. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + /* Source window: next_move_point is the point that + * stream->input_position must reach before computing more + * source checksum. */ + if ((ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + lcksum = xd3_lcksum (inp, LLOOK); + } + + /* TRYLAZYLEN: True if a certain length match should be followed by + * lazy search. This checks that LEN is shorter than MAXLAZY and + * that there is enough leftover data to consider lazy matching. + * "Enough" is set to 2 since the next match will start at the next + * offset, it must match two extra characters. */ +#define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) \ + && (POS) + (LEN) <= (MAX) - 2) + + /* HANDLELAZY: This statement is called each time an instruciton is + * emitted (three cases). If the instruction is large enough, the + * loop is restarted, otherwise lazy matching may ensue. */ +#define HANDLELAZY(mlen) \ + if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \ + { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \ + else \ + { stream->input_position += (mlen); goto restartloop; } + + /* Now loop over one input byte at a time until a match is found... */ + for (;; inp += 1, stream->input_position += 1) + { + /* Now we try three kinds of string match in order of expense: + * run, large match, small match. */ + + /* Expand the start of a RUN. The test for (run_l == SLOOK) + * avoids repeating this check when we pass through a run area + * performing lazy matching. The run is only expanded once when + * the min_match is first reached. If lazy matching is + * performed, the run_l variable will remain inconsistent until + * the first non-running input character is reached, at which + * time the run_l may then again grow to SLOOK. */ + if (DO_RUN && run_l == SLOOK) + { + usize_t max_len = stream->avail_in - stream->input_position; + + IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, run_c)); + + while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; } + + /* Output a RUN instruction. */ + if (run_l >= stream->min_match && run_l >= MIN_RUN) + { + if ((ret = xd3_emit_run (stream, stream->input_position, + run_l, run_c))) { return ret; } + + HANDLELAZY (run_l); + } + } + + /* If there is enough input remaining. */ + if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in)) + { + if ((stream->input_position >= next_move_point) && + (ret = xd3_srcwin_move_point (stream, & next_move_point))) + { + return ret; + } + + linx = xd3_checksum_hash (& stream->large_hash, lcksum); + + IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum)); + + if (stream->large_table[linx] != 0) + { + /* the match_setup will fail if the source window has + * been decided and the match lies outside it. You + * could consider forcing a window at this point to + * permit a new source window. */ + xoff_t adj_offset = + xd3_source_cksum_offset(stream, + stream->large_table[linx] - + HASH_CKOFFSET); + if (xd3_source_match_setup (stream, adj_offset) == 0) + { + if ((ret = xd3_source_extend_match (stream))) + { + return ret; + } + + /* Update stream position. match_fwd is zero if no + * match. */ + if (stream->match_fwd > 0) + { + HANDLELAZY (stream->match_fwd); + } + } + } + } + + /* Small matches. */ + if (DO_SMALL) + { + sinx = xd3_checksum_hash (& stream->small_hash, scksum); + + /* Verify incremental state in debugging mode. */ + IF_DEBUG (xd3_verify_small_state (stream, inp, scksum)); + + /* Search for the longest match */ + if (stream->small_table[sinx] != 0) + { + match_length = xd3_smatch (stream, + stream->small_table[sinx], + scksum, + & match_offset); + } + else + { + match_length = 0; + } + + /* Insert a hash for this string. */ + xd3_scksum_insert (stream, sinx, scksum, stream->input_position); + + /* Maybe output a COPY instruction */ + if (match_length >= stream->min_match) + { + IF_DEBUG1 ({ + static int x = 0; + DP(RINT "[target match:%d] <inp %u %u> <cpy %u %u> " + "(-%d) [ %u bytes ]\n", + x++, + stream->input_position, + stream->input_position + match_length, + match_offset, + match_offset + match_length, + stream->input_position - match_offset, + match_length); + }); + + if ((ret = xd3_found_match (stream, + /* decoder position */ + stream->input_position, + /* length */ match_length, + /* address */ match_offset, + /* is_source */ 0))) + { + return ret; + } + + /* Copy instruction. */ + HANDLELAZY (match_length); + } + } + + /* The logic above prevents excess work during lazy matching by + * increasing min_match to avoid smaller matches. Each time we + * advance stream->input_position by one, the minimum match + * shortens as well. */ + if (stream->min_match > MIN_MATCH) + { + stream->min_match -= 1; + } + + updateone: + + /* See if there are no more incremental cksums to compute. */ + if (stream->input_position + SLOOK == stream->avail_in) + { + goto loopnomore; + } + + /* Compute next RUN, CKSUM */ + if (DO_RUN) { NEXTRUN (inp[SLOOK]); } + if (DO_SMALL) + { + scksum = xd3_small_cksum_update (&scksum_state, inp, SLOOK); + } + if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) + { + lcksum = xd3_large_cksum_update (lcksum, inp, LLOOK); + } + } + + loopnomore: + return 0; +} +#endif /* XD3_ENCODER */ +#endif /* __XDELTA3_C_TEMPLATE_PASS__ */ diff --git a/xdelta3.h b/xdelta3.h new file mode 100644 index 0000000..3aab8b6 --- /dev/null +++ b/xdelta3.h @@ -0,0 +1,1310 @@ +/* xdelta 3 - delta compression tools and library + * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* To know more about Xdelta, start by reading xdelta3.c. If you are + * ready to use the API, continue reading here. There are two + * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen + * or so related calls. This interface is styled after Zlib. */ + +#ifndef _XDELTA3_H_ +#define _XDELTA3_H_ + +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +/****************************************************************/ + +/* Default configured value of stream->winsize. If the program + * supplies xd3_encode_input() with data smaller than winsize the + * stream will automatically buffer the input, otherwise the input + * buffer is used directly. + */ +#ifndef XD3_DEFAULT_WINSIZE +#define XD3_DEFAULT_WINSIZE (1U << 23) +#endif + +/* Default total size of the source window used in xdelta3-main.h */ +#ifndef XD3_DEFAULT_SRCWINSZ +#define XD3_DEFAULT_SRCWINSZ (1U << 26) +#endif + +/* When Xdelta requests a memory allocation for certain buffers, it + * rounds up to units of at least this size. The code assumes (and + * asserts) that this is a power-of-two. */ +#ifndef XD3_ALLOCSIZE +#define XD3_ALLOCSIZE (1U<<14) +#endif + +/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect + * decoders against malicious files. The decoder will never decode a + * window larger than this. If the file specifies VCD_TARGET the + * decoder may require two buffers of this size. + * + * 8-16MB is reasonable, probably don't need to go larger. */ +#ifndef XD3_HARDMAXWINSIZE +#define XD3_HARDMAXWINSIZE (1U<<24) +#endif +/* The IOPT_SIZE value sets the size of a buffer used to batch + * overlapping copy instructions before they are optimized by picking + * the best non-overlapping ranges. The larger this buffer, the + * longer a forced xd3_srcwin_setup() decision is held off. Setting + * this value to 0 causes an unlimited buffer to be used. */ +#ifndef XD3_DEFAULT_IOPT_SIZE +#define XD3_DEFAULT_IOPT_SIZE (1U<<15) +#endif + +/* The maximum distance backward to search for small matches */ +#ifndef XD3_DEFAULT_SPREVSZ +#define XD3_DEFAULT_SPREVSZ (1U<<18) +#endif + +/* The default compression level + */ +#ifndef XD3_DEFAULT_LEVEL +#define XD3_DEFAULT_LEVEL 3 +#endif + +#ifndef XD3_DEFAULT_SECONDARY_LEVEL +#define XD3_DEFAULT_SECONDARY_LEVEL 6 +#endif + +#ifndef XD3_USE_LARGEFILE64 +#define XD3_USE_LARGEFILE64 1 +#endif + +/* Sizes and addresses within VCDIFF windows are represented as usize_t + * + * For source-file offsets and total file sizes, total input and + * output counts, the xoff_t type is used. The decoder and encoder + * generally check for overflow of the xoff_t size (this is tested at + * the 32bit boundary [xdelta3-test.h]). + */ +#ifndef _WIN32 +#include <stdint.h> +typedef unsigned int usize_t; +#else +#define WIN32_LEAN_AND_MEAN +#if XD3_USE_LARGEFILE64 +/* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx. + * requires Win2000 or newer version of WinNT */ +#define WINVER 0x0500 +#define _WIN32_WINNT 0x0500 +#else +/* 32 bit (DWORD) file offsets: uses GetFileSize and + * SetFilePointer. compatible with win9x-me and WinNT4 */ +#define WINVER 0x0400 +#define _WIN32_WINNT 0x0400 +#endif +#include <windows.h> +typedef unsigned int usize_t; +#ifdef _MSC_VER +#define inline +typedef signed int ssize_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +typedef ULONGLONG uint64_t; +#else +/* mingw32, lcc and watcom provide a proper header */ +#include <stdint.h> +#endif +#endif + +/* TODO: note that SIZEOF_USIZE_T is never set to 8, although it should be for + * a 64bit platform. OTOH, may be that using 32bits is appropriate even on a + * 64bit platform because we allocate large arrays of these values. */ +#if XD3_USE_LARGEFILE64 +#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */ +typedef uint64_t xoff_t; +#define SIZEOF_XOFF_T 8 +#define SIZEOF_USIZE_T 4 +#ifndef WIN32 +#define Q "ll" +#else +#define Q "I64" +#endif +#else +typedef uint32_t xoff_t; +#define SIZEOF_XOFF_T 4 +#define SIZEOF_USIZE_T 4 +#define Q +#endif + +#define USE_UINT32 (SIZEOF_USIZE_T == 4 || \ + SIZEOF_XOFF_T == 4 || REGRESSION_TEST) +#define USE_UINT64 (SIZEOF_USIZE_T == 8 || \ + SIZEOF_XOFF_T == 8 || REGRESSION_TEST) + +/* TODO: probably should do something better here. */ +#ifndef UNALIGNED_OK +#if defined(__i386__) || defined(__i486__) || defined(__i586__) || \ + defined(__i686__) || defined(_X86_) || defined(__x86_64__) +#define UNALIGNED_OK 1 +#else +#define UNALIGNED_OK 0 +#endif +#endif + +/**********************************************************************/ + +/* Whether to build the encoder, otherwise only build the decoder. */ +#ifndef XD3_ENCODER +#define XD3_ENCODER 1 +#endif + +/* The code returned when main() fails, also defined in system + includes. */ +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif + +/* REGRESSION TEST enables the "xdelta3 test" command, which runs a + series of self-tests. */ +#ifndef REGRESSION_TEST +#define REGRESSION_TEST 0 +#endif + +/* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1 + * enable some additional output only useful during development and + * debugging. */ +#ifndef XD3_DEBUG +#define XD3_DEBUG 0 +#endif + +#ifndef PYTHON_MODULE +#define PYTHON_MODULE 0 +#endif + +#ifndef SWIG_MODULE +#define SWIG_MODULE 0 +#endif + +/* There are three string matching functions supplied: one fast, one + * slow (default), and one soft-configurable. To disable any of + * these, use the following definitions. */ +#ifndef XD3_BUILD_SLOW +#define XD3_BUILD_SLOW 1 +#endif +#ifndef XD3_BUILD_FAST +#define XD3_BUILD_FAST 1 +#endif +#ifndef XD3_BUILD_FASTER +#define XD3_BUILD_FASTER 1 +#endif +#ifndef XD3_BUILD_FASTEST +#define XD3_BUILD_FASTEST 1 +#endif +#ifndef XD3_BUILD_SOFT +#define XD3_BUILD_SOFT 1 +#endif +#ifndef XD3_BUILD_DEFAULT +#define XD3_BUILD_DEFAULT 1 +#endif + +#if XD3_DEBUG +#include <stdio.h> +#endif + +/* XPRINT. Debug output and VCDIFF_TOOLS functions report to stderr. + * I have used an irregular style to abbreviate [fprintf(stderr, "] as + * [DP(RINT "]. */ +#define DP fprintf +#define RINT stderr, + +typedef struct _xd3_stream xd3_stream; +typedef struct _xd3_source xd3_source; +typedef struct _xd3_hash_cfg xd3_hash_cfg; +typedef struct _xd3_smatcher xd3_smatcher; +typedef struct _xd3_rinst xd3_rinst; +typedef struct _xd3_dinst xd3_dinst; +typedef struct _xd3_hinst xd3_hinst; +typedef struct _xd3_winst xd3_winst; +typedef struct _xd3_rpage xd3_rpage; +typedef struct _xd3_addr_cache xd3_addr_cache; +typedef struct _xd3_output xd3_output; +typedef struct _xd3_desect xd3_desect; +typedef struct _xd3_iopt_buflist xd3_iopt_buflist; +typedef struct _xd3_rlist xd3_rlist; +typedef struct _xd3_sec_type xd3_sec_type; +typedef struct _xd3_sec_cfg xd3_sec_cfg; +typedef struct _xd3_sec_stream xd3_sec_stream; +typedef struct _xd3_config xd3_config; +typedef struct _xd3_code_table_desc xd3_code_table_desc; +typedef struct _xd3_code_table_sizes xd3_code_table_sizes; +typedef struct _xd3_slist xd3_slist; +typedef struct _xd3_whole_state xd3_whole_state; +typedef struct _xd3_wininfo xd3_wininfo; + +/* The stream configuration has three callbacks functions, all of + * which may be supplied with NULL values. If config->getblk is + * provided as NULL, the stream returns XD3_GETSRCBLK. */ + +typedef void* (xd3_alloc_func) (void *opaque, + usize_t items, + usize_t size); +typedef void (xd3_free_func) (void *opaque, + void *address); + +typedef int (xd3_getblk_func) (xd3_stream *stream, + xd3_source *source, + xoff_t blkno); + +/* These are internal functions to delay construction of encoding + * tables and support alternate code tables. See the comments & code + * enabled by GENERIC_ENCODE_TABLES. */ + +typedef const xd3_dinst* (xd3_code_table_func) (void); +typedef int (xd3_comp_table_func) (xd3_stream *stream, + const uint8_t **data, + usize_t *size); + + + +#if XD3_DEBUG +#define XD3_ASSERT(x) \ + do { if (! (x)) { DP(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \ + abort (); } } while (0) +#else +#define XD3_ASSERT(x) (void)0 +#endif + +#ifdef __GNUC__ +/* As seen on linux-kernel. */ +#ifndef max +#define max(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) +#endif + +#ifndef min +#define min(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) +#endif +#else +#ifndef max +#define max(x,y) ((x) < (y) ? (y) : (x)) +#endif +#ifndef min +#define min(x,y) ((x) < (y) ? (x) : (y)) +#endif +#endif + +/**************************************************************** + PUBLIC ENUMS + ******************************************************************/ + +/* These are the five ordinary status codes returned by the + * xd3_encode_input() and xd3_decode_input() state machines. */ +typedef enum { + + /* An application must be prepared to handle these five return + * values from either xd3_encode_input or xd3_decode_input, except + * in the case of no-source compression, in which case XD3_GETSRCBLK + * is never returned. More detailed comments for these are given in + * xd3_encode_input and xd3_decode_input comments, below. */ + XD3_INPUT = -17703, /* need input */ + XD3_OUTPUT = -17704, /* have output */ + XD3_GETSRCBLK = -17705, /* need a block of source input (with no + * xd3_getblk function), a chance to do + * non-blocking read. */ + XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & + first window header */ + XD3_WINSTART = -17707, /* notification: returned before a window is + * processed, giving a chance to + * XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that + * window. */ + XD3_WINFINISH = -17708, /* notification: returned after + encode/decode & output for a window */ + XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by + getblk() if the block is too old */ + XD3_INTERNAL = -17710, /* internal error */ + XD3_INVALID = -17711, /* invalid config */ + XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */ + XD3_NOSECOND = -17713, /* when secondary compression finds no + improvement. */ + +} xd3_rvalues; + +/* special values in config->flags */ +typedef enum +{ + XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see + xdelta3-main.h. */ + XD3_FLUSH = (1 << 4), /* flush the stream buffer to + prepare for + xd3_stream_close(). */ + + XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */ + XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */ + XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK), + + XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of + the data section. */ + XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of + the inst section. */ + XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of + the addr section. */ + + XD3_SEC_NOALL = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR), + + XD3_ADLER32 = (1 << 10), /* enable checksum computation in + the encoder. */ + XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in + the decoder. */ + + XD3_ALT_CODE_TABLE = (1 << 12), /* for testing th + e alternate code table encoding. */ + + XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data + * compression feature, only search + * the source, not the target. */ + XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass + * algorithm", instead use greedy + * matching. Greedy is off by + * default. */ + XD3_ADLER32_RECODE = (1 << 15), /* used by "recode". */ + + /* 4 bits to set the compression level the same as the command-line + * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag, + * and is independent of compression level). This is for + * convenience, especially with xd3_encode_memory(). */ + + XD3_COMPLEVEL_SHIFT = 20, /* 20 - 24 */ + XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_2 = (2 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT), + XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT), + +} xd3_flags; + +/* The values of this enumeration are set in xd3_config using the + * smatch_cfg variable. It can be set to default, slow, fast, etc., + * and soft. */ +typedef enum +{ + XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits, + else default. */ + XD3_SMATCH_SLOW = 1, + XD3_SMATCH_FAST = 2, + XD3_SMATCH_FASTER = 3, + XD3_SMATCH_FASTEST = 4, + XD3_SMATCH_SOFT = 5, +} xd3_smatch_cfg; + +/********************************************************************* + PRIVATE ENUMS +**********************************************************************/ + +/* stream->match_state is part of the xd3_encode_input state machine + * for source matching: + * + * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching + * 2. this state spans encoder windows: a match and end-of-window + * will continue in the next 3. the initial target byte and source + * byte are a presumed match, to avoid some computation in case the + * inputs are identical. + */ +typedef enum { + + MATCH_TARGET = 0, /* in this state, attempt to match the start of + * the target with the previously set source + * address (initially 0). */ + MATCH_BACKWARD = 1, /* currently expanding a match backward in the + source/target. */ + MATCH_FORWARD = 2, /* currently expanding a match forward in the + source/target. */ + MATCH_SEARCHING = 3, /* currently searching for a match. */ + +} xd3_match_state; + +/* The xd3_encode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + */ +typedef enum { + + ENC_INIT = 0, /* xd3_encode_input has never been called. */ + ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */ + ENC_SEARCH = 2, /* currently searching for matches. */ + ENC_INSTR = 3, /* currently formatting output. */ + ENC_FLUSH = 4, /* currently emitting output. */ + ENC_POSTOUT = 5, /* after an output section. */ + ENC_POSTWIN = 6, /* after all output sections. */ + ENC_ABORTED = 7, /* abort. */ +} xd3_encode_state; + +/* The xd3_decode_input state machine steps through these states in + * the following order. The matcher is reentrant and returns + * XD3_INPUT whenever it requires more data. After receiving + * XD3_INPUT, if the application reads EOF it should call + * xd3_stream_close(). + * + * 0-8: the VCDIFF header + * 9-18: the VCDIFF window header + * 19-21: the three primary sections: data, inst, addr + * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK, + * 23: return XD3_WINFINISH, set state=9 to decode more input + */ +typedef enum { + + DEC_VCHEAD = 0, /* VCDIFF header */ + DEC_HDRIND = 1, /* header indicator */ + + DEC_SECONDID = 2, /* secondary compressor ID */ + + DEC_TABLEN = 3, /* code table length */ + DEC_NEAR = 4, /* code table near */ + DEC_SAME = 5, /* code table same */ + DEC_TABDAT = 6, /* code table data */ + + DEC_APPLEN = 7, /* application data length */ + DEC_APPDAT = 8, /* application data */ + + DEC_WININD = 9, /* window indicator */ + + DEC_CPYLEN = 10, /* copy window length */ + DEC_CPYOFF = 11, /* copy window offset */ + + DEC_ENCLEN = 12, /* length of delta encoding */ + DEC_TGTLEN = 13, /* length of target window */ + DEC_DELIND = 14, /* delta indicator */ + + DEC_DATALEN = 15, /* length of ADD+RUN data */ + DEC_INSTLEN = 16, /* length of instruction data */ + DEC_ADDRLEN = 17, /* length of address data */ + + DEC_CKSUM = 18, /* window checksum */ + + DEC_DATA = 19, /* data section */ + DEC_INST = 20, /* instruction section */ + DEC_ADDR = 21, /* address section */ + + DEC_EMIT = 22, /* producing data */ + + DEC_FINISH = 23, /* window finished */ + + DEC_ABORTED = 24, /* xd3_abort_stream */ +} xd3_decode_state; + +/************************************************************ + internal types + ************************************************************/ + +/* instruction lists used in the IOPT buffer */ +struct _xd3_rlist +{ + xd3_rlist *next; + xd3_rlist *prev; +}; + +/* the raw encoding of an instruction used in the IOPT buffer */ +struct _xd3_rinst +{ + uint8_t type; + uint8_t xtra; + uint8_t code1; + uint8_t code2; + usize_t pos; + usize_t size; + xoff_t addr; + xd3_rlist link; +}; + +/* the code-table form of an single- or double-instruction */ +struct _xd3_dinst +{ + uint8_t type1; + uint8_t size1; + uint8_t type2; + uint8_t size2; +}; + +/* the decoded form of a single (half) instruction. */ +struct _xd3_hinst +{ + uint8_t type; + uint32_t size; /* TODO: why decode breaks if this is usize_t? */ + uint32_t addr; /* TODO: why decode breaks if this is usize_t? */ +}; + +/* the form of a whole-file instruction */ +struct _xd3_winst +{ + uint8_t type; /* RUN, ADD, COPY */ + uint8_t mode; /* 0, VCD_SOURCE, VCD_TARGET */ + usize_t size; + xoff_t addr; + xoff_t position; /* absolute position of this inst */ +}; + +/* used by the encoder to buffer output in sections. list of blocks. */ +struct _xd3_output +{ + uint8_t *base; + usize_t next; + usize_t avail; + xd3_output *next_page; +}; + +/* used by the decoder to buffer input in sections. */ +struct _xd3_desect +{ + const uint8_t *buf; + const uint8_t *buf_max; + uint32_t size; /* TODO: why decode breaks if this is usize_t? */ + usize_t pos; + + /* used in xdelta3-decode.h */ + uint8_t *copied1; + usize_t alloc1; + + /* used in xdelta3-second.h */ + uint8_t *copied2; + usize_t alloc2; +}; + +/* the VCDIFF address cache, see the RFC */ +struct _xd3_addr_cache +{ + usize_t s_near; + usize_t s_same; + usize_t next_slot; /* the circular index for near */ + usize_t *near_array; /* array of size s_near */ + usize_t *same_array; /* array of size s_same*256 */ +}; + +/* the IOPT buffer list is just a list of buffers, which may be allocated + * during encode when using an unlimited buffer. */ +struct _xd3_iopt_buflist +{ + xd3_rinst *buffer; + xd3_iopt_buflist *next; +}; + +/* This is the record of a pre-compiled configuration, a subset of + xd3_config. */ +struct _xd3_smatcher +{ + const char *name; + int (*string_match) (xd3_stream *stream); + usize_t large_look; + usize_t large_step; + usize_t small_look; + usize_t small_chain; + usize_t small_lchain; + usize_t max_lazy; + usize_t long_enough; +}; + +/* hash table size & power-of-two hash function. */ +struct _xd3_hash_cfg +{ + usize_t size; + usize_t shift; + usize_t mask; +}; + +/* the sprev list */ +struct _xd3_slist +{ + usize_t last_pos; +}; + +/* window info (for whole state) */ +struct _xd3_wininfo { + xoff_t offset; + usize_t length; + uint32_t adler32; +}; + +/* whole state for, e.g., merge */ +struct _xd3_whole_state { + usize_t addslen; + uint8_t *adds; + usize_t adds_alloc; + + usize_t instlen; + xd3_winst *inst; + usize_t inst_alloc; + + usize_t wininfolen; + xd3_wininfo *wininfo; + usize_t wininfo_alloc; + + xoff_t length; +}; + +/******************************************************************** + public types + *******************************************************************/ + +/* Settings for the secondary compressor. */ +struct _xd3_sec_cfg +{ + int data_type; /* Which section. (set automatically) */ + int ngroups; /* Number of DJW Huffman groups. */ + int sector_size; /* Sector size. */ + int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */ +}; + +/* This is the user-visible stream configuration. */ +struct _xd3_config +{ + usize_t winsize; /* The encoder window size. */ + usize_t sprevsz; /* How far back small string + matching goes */ + usize_t iopt_size; /* entries in the + instruction-optimizing + buffer */ + usize_t srcwin_maxsz; /* srcwin_size grows by a factor + of 2 when no matches are + found */ + + xd3_getblk_func *getblk; /* The three callbacks. */ + xd3_alloc_func *alloc; + xd3_free_func *freef; + void *opaque; /* Not used. */ + int flags; /* stream->flags are initialized + * from xd3_config & never + * modified by the library. Use + * xd3_set_flags to modify flags + * settings mid-stream. */ + + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for + soft config */ + xd3_smatcher smatcher_soft; +}; + +/* The primary source file object. You create one of these objects and + * initialize the first four fields. This library maintains the next + * 5 fields. The configured getblk implementation is responsible for + * setting the final 3 fields when called (and/or when XD3_GETSRCBLK + * is returned). + */ +struct _xd3_source +{ + /* you set */ + xoff_t size; /* size of this source */ + usize_t blksize; /* block size */ + const char *name; /* its name, for debug/print + purposes */ + void *ioh; /* opaque handle */ + + /* getblk sets */ + xoff_t curblkno; /* current block number: client + sets after getblk request */ + usize_t onblk; /* number of bytes on current + block: client sets, xd3 + verifies */ + const uint8_t *curblk; /* current block array: client + sets after getblk request */ + + /* xd3 sets */ + usize_t srclen; /* length of this source window */ + xoff_t srcbase; /* offset of this source window + in the source itself */ + xoff_t blocks; /* the total number of blocks in + this source */ + usize_t onlastblk; /* cached size info, avoid __udivdi3 */ + int shiftby; /* for power-of-two blocksizes */ + int maskby; /* for power-of-two blocksizes */ + xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */ + usize_t cpyoff_blkoff; /* offset of copy window in + blocks, remainder */ + xoff_t getblkno; /* request block number: xd3 sets + current getblk request */ +}; + +/* The primary xd3_stream object, used for encoding and decoding. You + * may access only two fields: avail_out, next_out. Use the methods + * above to operate on xd3_stream. */ +struct _xd3_stream +{ + /* input state */ + const uint8_t *next_in; /* next input byte */ + usize_t avail_in; /* number of bytes available at + next_in */ + xoff_t total_in; /* how many bytes in */ + + /* output state */ + uint8_t *next_out; /* next output byte */ + usize_t avail_out; /* number of bytes available at + next_out */ + usize_t space_out; /* total out space */ + xoff_t current_window; /* number of windows encoded/decoded */ + xoff_t total_out; /* how many bytes out */ + + /* to indicate an error, xd3 sets */ + const char *msg; /* last error message, NULL if + no error */ + + /* source configuration */ + xd3_source *src; /* source array */ + + /* encoder memory configuration */ + usize_t winsize; /* suggested window size */ + usize_t sprevsz; /* small string, previous window + size (power of 2) */ + usize_t sprevmask; /* small string, previous window + size mask */ + usize_t iopt_size; + usize_t iopt_unlimited; + usize_t srcwin_maxsz; + + /* general configuration */ + xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */ + xd3_alloc_func *alloc; /* malloc function */ + xd3_free_func *free; /* free function */ + void* opaque; /* private data object passed to + alloc, free, and getblk */ + int flags; /* various options */ + + /* secondary compressor configuration */ + xd3_sec_cfg sec_data; /* Secondary compressor config: data */ + xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */ + xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */ + + xd3_smatcher smatcher; + + usize_t *large_table; /* table of large checksums */ + xd3_hash_cfg large_hash; /* large hash config */ + + usize_t *small_table; /* table of small checksums */ + xd3_slist *small_prev; /* table of previous offsets, + circular linked list */ + int small_reset; /* true if small table should + be reset */ + + xd3_hash_cfg small_hash; /* small hash config */ + xd3_addr_cache acache; /* the vcdiff address cache */ + xd3_encode_state enc_state; /* state of the encoder */ + + usize_t taroff; /* base offset of the target input */ + usize_t input_position; /* current input position */ + usize_t min_match; /* current minimum match + length, avoids redundent + matches */ + usize_t unencoded_offset; /* current input, first + * unencoded offset. this value + * is <= the first instruction's + * position in the iopt buffer, + * if there is at least one + * match in the buffer. */ + + // SRCWIN + // these variables plus srcwin_maxsz above (set by config) + int srcwin_decided; /* boolean: true if the + srclen,srcbase have been + decided. */ + xoff_t srcwin_cksum_pos; /* Source checksum position */ + + // MATCH + xd3_match_state match_state; /* encoder match state */ + xoff_t match_srcpos; /* current match source + position relative to + srcbase */ + xoff_t match_last_srcpos; /* previously attempted + * srcpos, to avoid loops. */ + xoff_t match_minaddr; /* smallest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + xoff_t match_maxaddr; /* largest matching address to + * set window params (reset each + * window xd3_encode_reset) */ + usize_t match_back; /* match extends back so far */ + usize_t match_maxback; /* match extends back maximum */ + usize_t match_fwd; /* match extends forward so far */ + usize_t match_maxfwd; /* match extends forward maximum */ + + xoff_t maxsrcaddr; /* address of the last source + match (across windows) */ + + uint8_t *buf_in; /* for saving buffered input */ + usize_t buf_avail; /* amount of saved input */ + const uint8_t *buf_leftover; /* leftover content of next_in + (i.e., user's buffer) */ + usize_t buf_leftavail; /* amount of leftover content */ + + xd3_output *enc_current; /* current output buffer */ + xd3_output *enc_free; /* free output buffers */ + xd3_output *enc_heads[4]; /* array of encoded outputs: + head of chain */ + xd3_output *enc_tails[4]; /* array of encoded outputs: + tail of chain */ + uint32_t recode_adler32; /* set the adler32 checksum + * during "recode". */ + + xd3_rlist iopt_used; /* instruction optimizing buffer */ + xd3_rlist iopt_free; + xd3_rinst *iout; /* next single instruction */ + xd3_iopt_buflist *iopt_alloc; + + const uint8_t *enc_appheader; /* application header to encode */ + usize_t enc_appheadsz; /* application header size */ + + /* decoder stuff */ + xd3_decode_state dec_state; /* current DEC_XXX value */ + usize_t dec_hdr_ind; /* VCDIFF header indicator */ + usize_t dec_win_ind; /* VCDIFF window indicator */ + usize_t dec_del_ind; /* VCDIFF delta indicator */ + + uint8_t dec_magic[4]; /* First four bytes */ + usize_t dec_magicbytes; /* Magic position. */ + + usize_t dec_secondid; /* Optional secondary compressor ID. */ + + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_codetblsz; /* Optional code table: length. */ + uint8_t *dec_codetbl; /* Optional code table: storage. */ + usize_t dec_codetblbytes; /* Optional code table: position. */ + + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_appheadsz; /* Optional application header: + size. */ + uint8_t *dec_appheader; /* Optional application header: + storage */ + usize_t dec_appheadbytes; /* Optional application header: + position. */ + + usize_t dec_cksumbytes; /* Optional checksum: position. */ + uint8_t dec_cksum[4]; /* Optional checksum: storage. */ + uint32_t dec_adler32; /* Optional checksum: value. */ + + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_cpylen; /* length of copy window + (VCD_SOURCE or VCD_TARGET) */ + xoff_t dec_cpyoff; /* offset of copy window + (VCD_SOURCE or VCD_TARGET) */ + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_enclen; /* length of delta encoding */ + /* TODO: why decode breaks if this is usize_t? */ + uint32_t dec_tgtlen; /* length of target window */ + +#if USE_UINT64 + uint64_t dec_64part; /* part of a decoded uint64_t */ +#endif +#if USE_UINT32 + uint32_t dec_32part; /* part of a decoded uint32_t */ +#endif + + xoff_t dec_winstart; /* offset of the start of + current target window */ + xoff_t dec_window_count; /* == current_window + 1 in + DEC_FINISH */ + usize_t dec_winbytes; /* bytes of the three sections + so far consumed */ + usize_t dec_hdrsize; /* VCDIFF + app header size */ + + const uint8_t *dec_tgtaddrbase; /* Base of decoded target + addresses (addr >= + dec_cpylen). */ + const uint8_t *dec_cpyaddrbase; /* Base of decoded copy + addresses (addr < + dec_cpylen). */ + + usize_t dec_position; /* current decoder position + counting the cpylen + offset */ + usize_t dec_maxpos; /* maximum decoder position + counting the cpylen + offset */ + xd3_hinst dec_current1; /* current instruction */ + xd3_hinst dec_current2; /* current instruction */ + + uint8_t *dec_buffer; /* Decode buffer */ + uint8_t *dec_lastwin; /* In case of VCD_TARGET, the + last target window. */ + usize_t dec_lastlen; /* length of the last target + window */ + xoff_t dec_laststart; /* offset of the start of last + target window */ + usize_t dec_lastspace; /* allocated space of last + target window, for reuse */ + + xd3_desect inst_sect; /* staging area for decoding + window sections */ + xd3_desect addr_sect; + xd3_desect data_sect; + + xd3_code_table_func *code_table_func; + xd3_comp_table_func *comp_table_func; + const xd3_dinst *code_table; + const xd3_code_table_desc *code_table_desc; + xd3_dinst *code_table_alloc; + + /* secondary compression */ + const xd3_sec_type *sec_type; + xd3_sec_stream *sec_stream_d; + xd3_sec_stream *sec_stream_i; + xd3_sec_stream *sec_stream_a; + + /* state for reconstructing whole files (e.g., for merge), this only + * supports loading USIZE_T_MAX instructions, adds, etc. */ + xd3_whole_state whole_target; + + /* statistics */ + xoff_t n_scpy; + xoff_t n_tcpy; + xoff_t n_add; + xoff_t n_run; + + xoff_t l_scpy; + xoff_t l_tcpy; + xoff_t l_add; + xoff_t l_run; + + usize_t i_slots_used; + +#if XD3_DEBUG + usize_t large_ckcnt; + + /* memory usage */ + usize_t alloc_cnt; + usize_t free_cnt; +#endif +}; + +/************************************************************************** + PUBLIC FUNCTIONS + **************************************************************************/ + +/* This function configures an xd3_stream using the provided in-memory + * input buffer, source buffer, output buffer, and flags. The output + * array must be large enough or else ENOSPC will be returned. This + * is the simplest in-memory encoding interface. */ +int xd3_encode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buffer, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* The reverse of xd3_encode_memory. */ +int xd3_decode_memory (const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output_buf, + usize_t *output_size, + usize_t avail_output, + int flags); + +/* This function encodes an in-memory input. Everything else about + * the xd3_stream is configurable. The output array must be large + * enough to hold the output or else ENOSPC is returned. The source + * (if any) should be set using xd3_set_source() with a single-block + * xd3_source. This calls the underlying non-blocking interface, + * xd3_encode_input(), handling the necessary input/output states. + * This method be considered a reference for any application using + * xd3_encode_input() directly. + * + * xd3_stream stream; + * xd3_config config; + * xd3_source src; + * + * memset (& src, 0, sizeof (src)); + * memset (& stream, 0, sizeof (stream)); + * memset (& config, 0, sizeof (config)); + * + * if (source != NULL) + * { + * src.size = source_size; + * src.blksize = source_size; + * src.curblkno = 0; + * src.onblk = source_size; + * src.curblk = source; + * xd3_set_source(&stream, &src); + * } + * + * config.flags = flags; + * config.srcwin_maxsz = source_size; + * config.winsize = input_size; + * + * ... set smatcher, appheader, encoding-table, compression-level, etc. + * + * xd3_config_stream(&stream, &config); + * xd3_encode_stream(&stream, ...); + * xd3_free_stream(&stream); + * + * DO NOT USE except for testing. These methods are allocate bad buffer sizes. + */ +int xd3_encode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_output); + +/* The reverse of xd3_encode_stream. */ +int xd3_decode_stream (xd3_stream *stream, + const uint8_t *input, + usize_t input_size, + uint8_t *output, + usize_t *output_size, + usize_t avail_size); + +/* This is the non-blocking interface. + * + * Handling input and output states is the same for encoding or + * decoding using the xd3_avail_input() and xd3_consume_output() + * routines, inlined below. + * + * Return values: + * + * XD3_INPUT: the process requires more input: call + * xd3_avail_input() then repeat + * + * XD3_OUTPUT: the process has more output: read stream->next_out, + * stream->avail_out, then call xd3_consume_output(), + * then repeat + * + * XD3_GOTHEADER: (decoder-only) notification returned following the + * VCDIFF header and first window header. the decoder + * may use the header to configure itself. + * + * XD3_WINSTART: a general notification returned once for each + * window except the 0-th window, which is implied by + * XD3_GOTHEADER. It is recommended to use a + * switch-stmt such as: + * + * ... + * again: + * switch ((ret = xd3_decode_input (stream))) { + * case XD3_GOTHEADER: { + * assert(stream->current_window == 0); + * stuff; + * } + * // fallthrough + * case XD3_WINSTART: { + * something(stream->current_window); + * goto again; + * } + * ... + * + * XD3_WINFINISH: a general notification, following the complete + * input & output of a window. at this point, + * stream->total_in and stream->total_out are consistent + * for either encoding or decoding. + * + * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value + * is returned to initiate a non-blocking source read. + */ +int xd3_decode_input (xd3_stream *stream); +int xd3_encode_input (xd3_stream *stream); + +/* The xd3_config structure is used to initialize a stream - all data + * is copied into stream so config may be a temporary variable. See + * the [documentation] or comments on the xd3_config structure. */ +int xd3_config_stream (xd3_stream *stream, + xd3_config *config); + +/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an + * error check that the stream is in a proper state to be closed: this + * means the encoder is flushed and the decoder is at a window + * boundary. The application is responsible for freeing any of the + * resources it supplied. */ +int xd3_close_stream (xd3_stream *stream); + +/* This arranges for closes the stream to succeed. Does not free the + * stream.*/ +void xd3_abort_stream (xd3_stream *stream); + +/* xd3_free_stream frees all memory allocated for the stream. The + * application is responsible for freeing any of the resources it + * supplied. */ +void xd3_free_stream (xd3_stream *stream); + +/* This function informs the encoder or decoder that source matching + * (i.e., delta-compression) is possible. For encoding, this should + * be called before the first xd3_encode_input. A NULL source is + * ignored. For decoding, this should be called before the first + * window is decoded, but the appheader may be read first + * (XD3_GOTHEADER). After decoding the header, call xd3_set_source() + * if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE) + * is true, it means the first window expects there to be a source file. + */ +int xd3_set_source (xd3_stream *stream, + xd3_source *source); + +/* This should be called before the first call to xd3_encode_input() + * to include application-specific data in the VCDIFF header. */ +void xd3_set_appheader (xd3_stream *stream, + const uint8_t *data, + usize_t size); + +/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. + * For convenience, the decoder always adds a single byte padding to + * the end of the application header, which is set to zero in case the + * application header is a string. */ +int xd3_get_appheader (xd3_stream *stream, + uint8_t **data, + usize_t *size); + +/* To generate a VCDIFF encoded delta with xd3_encode_init() from + * another format, use: + * + * xd3_encode_init_partial() -- initialze encoder state (w/o hash tables) + * xd3_init_cache() -- reset VCDIFF address cache + * xd3_found_match() -- to report a copy instruction + * + * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual. + */ +int xd3_encode_init_partial (xd3_stream *stream); +void xd3_init_cache (xd3_addr_cache* acache); +int xd3_found_match (xd3_stream *stream, + usize_t pos, usize_t size, + xoff_t addr, int is_source); + +/* Gives an error string for xdelta3-speficic errors, returns NULL for + system errors */ +const char* xd3_strerror (int ret); + +/* For convenience, zero & initialize the xd3_config structure with + specified flags. */ +static inline +void xd3_init_config (xd3_config *config, + int flags) +{ + memset (config, 0, sizeof (*config)); + config->flags = flags; +} + +/* This supplies some input to the stream. */ +static inline +void xd3_avail_input (xd3_stream *stream, + const uint8_t *idata, + usize_t isize) +{ + /* Even if isize is zero, the code expects a non-NULL idata. Why? + * It uses this value to determine whether xd3_avail_input has ever + * been called. If xd3_encode_input is called before + * xd3_avail_input it will return XD3_INPUT right away without + * allocating a stream->winsize buffer. This is to avoid an + * unwanted allocation. */ + XD3_ASSERT (idata != NULL || isize == 0); + + stream->next_in = idata; + stream->avail_in = isize; +} + +/* This acknowledges receipt of output data, must be called after any + * XD3_OUTPUT return. */ +static inline +void xd3_consume_output (xd3_stream *stream) +{ + stream->avail_out = 0; +} + +/* These are set for each XD3_WINFINISH return. */ +static inline +int xd3_encoder_used_source (xd3_stream *stream) { + return stream->src != NULL && stream->src->srclen > 0; +} +static inline +xoff_t xd3_encoder_srcbase (xd3_stream *stream) { + return stream->src->srcbase; +} +static inline +usize_t xd3_encoder_srclen (xd3_stream *stream) { + return stream->src->srclen; +} + +/* Checks for legal flag changes. */ +static inline +void xd3_set_flags (xd3_stream *stream, int flags) +{ + /* The bitwise difference should contain only XD3_FLUSH or + XD3_SKIP_WINDOW */ + XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0); + stream->flags = flags; +} + +/* Gives some extra information about the latest library error, if any + is known. */ +static inline +const char* xd3_errstring (xd3_stream *stream) +{ + return stream->msg ? stream->msg : ""; +} + + +/* 64-bit divisions are expensive. on a 32bit platform, these show in + * a profile as __udivdi3(). these are all the xoff_t divisions: */ +static inline +void xd3_blksize_div (const xoff_t offset, + const xd3_source *source, + xoff_t *blkno, + usize_t *blkoff) { + *blkno = source->maskby ? + (offset >> source->shiftby) : + (offset / source->blksize); + *blkoff = source->maskby ? + (offset & source->maskby) : + (offset - *blkno * source->blksize); +} + +/* This function tells the number of bytes expected to be set in + * source->onblk after a getblk request. This is for convenience of + * handling a partial last block. Note that this is a relatively + * expensive function for 64-bit binaries on platforms w/o native + * 64-bit integers, so source->onlastblk is set to this value. + * TODO: force source->blksize to a power of two? */ +static inline +usize_t xd3_bytes_on_srcblk (xd3_source *source, xoff_t blkno) +{ + xoff_t s_1_div; + usize_t s_1_rem; + XD3_ASSERT (blkno < source->blocks); + + if (blkno != source->blocks - 1) + { + return source->blksize; + } + xd3_blksize_div(source->size - 1, source, &s_1_div, &s_1_rem); + return s_1_rem + 1; +} + +static inline +usize_t xd3_bytes_on_srcblk_fast (xd3_source *source, xoff_t blkno) +{ + return (blkno == source->blocks - 1 ? + source->onlastblk : + source->blksize); +} + +#endif /* _XDELTA3_H_ */ diff --git a/xdelta3.py b/xdelta3.py new file mode 100644 index 0000000..d077192 --- /dev/null +++ b/xdelta3.py @@ -0,0 +1,71 @@ +# This file was automatically generated by SWIG (http://www.swig.org). +# Version 1.3.31 +# +# Don't modify this file, modify the SWIG interface instead. +# This file is compatible with both classic and new-style classes. + +import _xdelta3 +import new +new_instancemethod = new.instancemethod +try: + _swig_property = property +except NameError: + pass # Python < 2.2 doesn't have 'property'. +def _swig_setattr_nondynamic(self,class_type,name,value,static=1): + if (name == "thisown"): return self.this.own(value) + if (name == "this"): + if type(value).__name__ == 'PySwigObject': + self.__dict__[name] = value + return + method = class_type.__swig_setmethods__.get(name,None) + if method: return method(self,value) + if (not static) or hasattr(self,name): + self.__dict__[name] = value + else: + raise AttributeError("You cannot add attributes to %s" % self) + +def _swig_setattr(self,class_type,name,value): + return _swig_setattr_nondynamic(self,class_type,name,value,0) + +def _swig_getattr(self,class_type,name): + if (name == "thisown"): return self.this.own() + method = class_type.__swig_getmethods__.get(name,None) + if method: return method(self) + raise AttributeError,name + +def _swig_repr(self): + try: strthis = "proxy of " + self.this.__repr__() + except: strthis = "" + return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) + +import types +try: + _object = types.ObjectType + _newclass = 1 +except AttributeError: + class _object : pass + _newclass = 0 +del types + + +xd3_encode_memory = _xdelta3.xd3_encode_memory +xd3_decode_memory = _xdelta3.xd3_decode_memory +xd3_main_cmdline = _xdelta3.xd3_main_cmdline +XD3_SEC_DJW = _xdelta3.XD3_SEC_DJW +XD3_SEC_FGK = _xdelta3.XD3_SEC_FGK +XD3_SEC_NODATA = _xdelta3.XD3_SEC_NODATA +XD3_SEC_NOINST = _xdelta3.XD3_SEC_NOINST +XD3_SEC_NOADDR = _xdelta3.XD3_SEC_NOADDR +XD3_ADLER32 = _xdelta3.XD3_ADLER32 +XD3_ADLER32_NOVER = _xdelta3.XD3_ADLER32_NOVER +XD3_ALT_CODE_TABLE = _xdelta3.XD3_ALT_CODE_TABLE +XD3_NOCOMPRESS = _xdelta3.XD3_NOCOMPRESS +XD3_BEGREEDY = _xdelta3.XD3_BEGREEDY +XD3_COMPLEVEL_SHIFT = _xdelta3.XD3_COMPLEVEL_SHIFT +XD3_COMPLEVEL_MASK = _xdelta3.XD3_COMPLEVEL_MASK +XD3_COMPLEVEL_1 = _xdelta3.XD3_COMPLEVEL_1 +XD3_COMPLEVEL_3 = _xdelta3.XD3_COMPLEVEL_3 +XD3_COMPLEVEL_6 = _xdelta3.XD3_COMPLEVEL_6 +XD3_COMPLEVEL_9 = _xdelta3.XD3_COMPLEVEL_9 + + diff --git a/xdelta3.swig b/xdelta3.swig new file mode 100644 index 0000000..2ef4306 --- /dev/null +++ b/xdelta3.swig @@ -0,0 +1,93 @@ +%module xdelta3 +%import cstring.i +%import argcargv.i +%{ +#include "xdelta3.h" + +int xd3_main_cmdline (int ARGC, char **ARGV); + +#undef SWIG_init +#undef SWIG_name + +#define SWIG_init initxdelta3 +#define SWIG_name "xdelta3" + +%} + +%cstring_input_binary(const char *input, unsigned int input_size); +%cstring_input_binary(const char *source, unsigned int source_size); + +%define %max_output_withsize(TYPEMAP, SIZE, MAXSIZE) +%typemap(in) MAXSIZE (unsigned int alloc_size) { + $1 = alloc_size = PyInt_AsLong(obj2); +} +%typemap(in,numinputs=0) (TYPEMAP, SIZE) { +} +%typemap(check) (TYPEMAP, SIZE) { + // alloc_size input is #7th position in xd3_xxcode_memory() + $1 = malloc(alloc_size7); + $2 = &alloc_size7; +} +%typemap(argout,fragment="t_output_helper") (TYPEMAP, SIZE) { + if (result == 0) { + PyObject *o; + // alloc_size7 now carries actual size + o = PyString_FromStringAndSize($1,alloc_size7); + $result = t_output_helper($result,o); + } else { + $result = t_output_helper($result,Py_None); + } + free($1); +} +%typemap(default) int flags { + $1 = 0; +} +%enddef + +%max_output_withsize(char *output_buf, unsigned int *output_size, unsigned int max_output); + +int xd3_encode_memory (const char *input, + unsigned int input_size, + const char *source, + unsigned int source_size, + char *output_buf, + unsigned int *output_size, + unsigned int max_output, + int flags); + +int xd3_decode_memory (const char *input, + unsigned int input_size, + const char *source, + unsigned int source_size, + char *output_buf, + unsigned int *output_size, + unsigned int max_output, + int flags); + +int xd3_main_cmdline (int ARGC, char **ARGV); + +/* Is this the right way? */ +enum { + /*XD3_JUST_HDR,*/ + /*XD3_SKIP_WINDOW,*/ + /*XD3_SKIP_EMIT,*/ + /*XD3_FLUSH,*/ + XD3_SEC_DJW, + XD3_SEC_FGK, + /*XD3_SEC_TYPE,*/ + XD3_SEC_NODATA, + XD3_SEC_NOINST, + XD3_SEC_NOADDR, + /*XD3_SEC_OTHER,*/ + XD3_ADLER32, + XD3_ADLER32_NOVER, + XD3_ALT_CODE_TABLE, + XD3_NOCOMPRESS, + XD3_BEGREEDY, + XD3_COMPLEVEL_SHIFT, + XD3_COMPLEVEL_MASK, + XD3_COMPLEVEL_1, + XD3_COMPLEVEL_3, + XD3_COMPLEVEL_6, + XD3_COMPLEVEL_9, +}; diff --git a/xdelta3.vcproj b/xdelta3.vcproj new file mode 100644 index 0000000..5dfb477 --- /dev/null +++ b/xdelta3.vcproj @@ -0,0 +1,236 @@ +<?xml version="1.0" encoding="Windows-1252"?> +<VisualStudioProject + ProjectType="Visual C++" + Version="8.00" + Name="xdelta3" + ProjectGUID="{7F30EDF1-4493-4E47-8664-0661516BC9E4}" + Keyword="Win32Proj" + > + <Platforms> + <Platform + Name="Win32" + /> + </Platforms> + <ToolFiles> + </ToolFiles> + <Configurations> + <Configuration + Name="Debug|Win32" + OutputDirectory="Debug" + IntermediateDirectory="Debug" + ConfigurationType="1" + > + <Tool + Name="VCPreBuildEventTool" + /> + <Tool + Name="VCCustomBuildTool" + /> + <Tool + Name="VCXMLDataGeneratorTool" + /> + <Tool + Name="VCWebServiceProxyGeneratorTool" + /> + <Tool + Name="VCMIDLTool" + /> + <Tool + Name="VCCLCompilerTool" + AdditionalOptions="/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0 /D_CRT_SECURE_NO_DEPRECATE" + Optimization="0" + PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;" + MinimalRebuild="true" + BasicRuntimeChecks="3" + RuntimeLibrary="3" + UsePrecompiledHeader="0" + WarningLevel="3" + Detect64BitPortabilityProblems="true" + DebugInformationFormat="4" + /> + <Tool + Name="VCManagedResourceCompilerTool" + /> + <Tool + Name="VCResourceCompilerTool" + /> + <Tool + Name="VCPreLinkEventTool" + /> + <Tool + Name="VCLinkerTool" + LinkIncremental="2" + GenerateDebugInformation="true" + SubSystem="1" + TargetMachine="1" + /> + <Tool + Name="VCALinkTool" + /> + <Tool + Name="VCManifestTool" + /> + <Tool + Name="VCXDCMakeTool" + /> + <Tool + Name="VCBscMakeTool" + /> + <Tool + Name="VCFxCopTool" + /> + <Tool + Name="VCAppVerifierTool" + /> + <Tool + Name="VCWebDeploymentTool" + /> + <Tool + Name="VCPostBuildEventTool" + /> + </Configuration> + <Configuration + Name="Release|Win32" + OutputDirectory="Release" + IntermediateDirectory="Release" + ConfigurationType="1" + WholeProgramOptimization="1" + > + <Tool + Name="VCPreBuildEventTool" + /> + <Tool + Name="VCCustomBuildTool" + /> + <Tool + Name="VCXMLDataGeneratorTool" + /> + <Tool + Name="VCWebServiceProxyGeneratorTool" + /> + <Tool + Name="VCMIDLTool" + /> + <Tool + Name="VCCLCompilerTool" + AdditionalOptions="/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0 /D_CRT_SECURE_NO_DEPRECATE" + PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;" + RuntimeLibrary="0" + UsePrecompiledHeader="0" + WarningLevel="3" + Detect64BitPortabilityProblems="true" + DebugInformationFormat="3" + /> + <Tool + Name="VCManagedResourceCompilerTool" + /> + <Tool + Name="VCResourceCompilerTool" + /> + <Tool + Name="VCPreLinkEventTool" + /> + <Tool + Name="VCLinkerTool" + LinkIncremental="2" + GenerateDebugInformation="true" + SubSystem="1" + OptimizeReferences="2" + EnableCOMDATFolding="2" + TargetMachine="1" + /> + <Tool + Name="VCALinkTool" + /> + <Tool + Name="VCManifestTool" + /> + <Tool + Name="VCXDCMakeTool" + /> + <Tool + Name="VCBscMakeTool" + /> + <Tool + Name="VCFxCopTool" + /> + <Tool + Name="VCAppVerifierTool" + /> + <Tool + Name="VCWebDeploymentTool" + /> + <Tool + Name="VCPostBuildEventTool" + /> + </Configuration> + </Configurations> + <References> + </References> + <Files> + <Filter + Name="Header Files" + Filter="h;hpp;hxx;hm;inl;inc;xsd" + UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}" + > + <File + RelativePath=".\xdelta3-cfgs.h" + > + </File> + <File + RelativePath=".\xdelta3-decode.h" + > + </File> + <File + RelativePath=".\xdelta3-djw.h" + > + </File> + <File + RelativePath=".\xdelta3-fgk.h" + > + </File> + <File + RelativePath=".\xdelta3-list.h" + > + </File> + <File + RelativePath=".\xdelta3-main.h" + > + </File> + <File + RelativePath=".\xdelta3-python.h" + > + </File> + <File + RelativePath=".\xdelta3-second.h" + > + </File> + <File + RelativePath=".\xdelta3-test.h" + > + </File> + <File + RelativePath=".\xdelta3.h" + > + </File> + </Filter> + <Filter + Name="Resource Files" + Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx" + UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}" + > + </Filter> + <Filter + Name="Source Files" + Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" + UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" + > + <File + RelativePath=".\xdelta3.c" + > + </File> + </Filter> + </Files> + <Globals> + </Globals> +</VisualStudioProject> diff --git a/xdelta3.wxi b/xdelta3.wxi new file mode 100644 index 0000000..2ef8426 --- /dev/null +++ b/xdelta3.wxi @@ -0,0 +1,7 @@ +<Include> + <?define PRODUCT_ID=60131be5-be4d-4975-9108-dd0be735890d ?> + <?define PACKAGE_ID=82bf21ca-ee08-4701-ab78-37210dac82ce ?> + <?define COMPONENT_ID=85bc3206-05f8-41f8-b500-6ea32e5d6a8f ?> + <?define MANUAL_ID=07f387bc-a0c5-4af9-88db-1a84443f1fc5 ?> + <?define SOURCE_ID=4e1503a9-3ed1-4e06-b0c0-890462b1a4fd ?> +</Include> diff --git a/xdelta3.wxs b/xdelta3.wxs new file mode 100644 index 0000000..5e2d05c --- /dev/null +++ b/xdelta3.wxs @@ -0,0 +1,131 @@ +<?xml version='1.0'?> +<?include $(sys.SOURCEFILEDIR)\xdelta3.wxi ?> + +<Wix xmlns='http://schemas.microsoft.com/wix/2003/01/wi'> + <Product Id='$(var.PRODUCT_ID)' + Name='Xdelta 3.0u' + Language='1033' + Codepage='1252' + Version='3.0.1.1' + Manufacturer='Josh.MacDonald@Gmail.Com'> + + <Package Id='$(var.PACKAGE_ID)' + Keywords='Installer' + Description='Xdelta 3.0u' + Comments='http://xdelta.org' + Manufacturer='Josh.MacDonald@Gmail.Com' + InstallerVersion='300' + Languages='1033' + Compressed='yes' /> + + <Media Id='1' + Cabinet='xdelta30t.cab' + EmbedCab='yes' /> + + <Directory Id='TARGETDIR' Name='SourceDir'> + <Directory Id='ProgramFilesFolder' Name='PFiles'> + <Directory Id='Xdelta' + Name='Xdelta'> + + <Component Id='Main' + Guid='$(var.COMPONENT_ID)'> + <File Id='XdeltaEXE' + Name='xdelt30t' + LongName='xdelta30t.exe' + DiskId='1' + Source='G:\jmacd\svn\xdelta3\Release\xdelta3.exe' + Vital='yes'> + </File> + </Component> + + <Component Id='Readme' + Guid='$(var.MANUAL_ID)'> + <File Id='Readme' + Name='readme.txt' + LongName='readme.txt' + DiskId='1' + Source='G:\jmacd\svn\xdelta3\readme.txt' + Vital='yes'> + <Shortcut Id="startupmenuReadme" + Directory="ProgramMenuDir" + Name="readme.txt" + LongName="Xdelta3 readme.txt" + /> + </File> + </Component> + + <Component Id='Copyright' + Guid='$(var.MANUAL_ID)'> + <File Id='Copyright' + Name='COPYING' + LongName='COPYING' + DiskId='1' + Source='G:\jmacd\svn\xdelta3\COPYING' + Vital='yes'> + <Shortcut Id="startupmenuCopyright" + Directory="ProgramMenuDir" + Name="COPYING" + LongName="GNU Public License" + /> + </File> + </Component> + + <Component Id='Source' + Guid='$(var.SOURCE_ID)'> + <File Id='Source' + Name='xdelt30t.zip' + LongName='xdelta3.0u.zip' + DiskId='1' + Source='G:\jmacd\svn\xdelta3\xdelta3.0u.zip' + Vital='yes'> + <Shortcut Id="startupmenuSource" + Directory="ProgramMenuDir" + Name="xdelt30t.zip" + LongName="xdelta3.0u.zip" + /> + </File> + </Component> + + </Directory> + </Directory> + + <Directory Id="ProgramMenuFolder" Name="PMenu" LongName="Programs"> + <Directory Id="ProgramMenuDir" + Name="xdelt30t" + LongName="Xdelta 3.0u"> + </Directory> + </Directory> + +<!-- <Merge Id='CRT' --> +<!-- Language='0' --> +<!-- DiskId='1' --> +<!-- src='C:\Program Files\Common Files\Merge Modules\microsoft_vc80_crt_x86.msm' --> +<!-- /> --> +<!-- <Merge Id='CRT Policy' --> +<!-- Language='0' --> +<!-- DiskId='1' --> +<!-- src='C:\Program Files\Common Files\Merge Modules\policy_8_0_Microsoft_VC80_CRT_x86.msm' --> +<!-- /> --> + </Directory> + + <Feature Id='Complete' + Level='1'> + <ComponentRef Id='Main' /> + <ComponentRef Id='Readme' /> + <ComponentRef Id='Copyright' /> + <ComponentRef Id='Source' /> + </Feature> + +<!-- <Feature Id='CRT_WinSXS' Title='CRT WinSXS' Level='1'> --> +<!-- <MergeRef Id='CRT' /> --> +<!-- <MergeRef Id='CRT Policy' /> --> +<!-- </Feature> --> + + <InstallExecuteSequence> + <RemoveRegistryValues/> + <RemoveFiles/> + <InstallFiles/> + <WriteRegistryValues/> + </InstallExecuteSequence> + </Product> +</Wix> diff --git a/xdelta3_wrap.c b/xdelta3_wrap.c new file mode 100644 index 0000000..1ed9b5d --- /dev/null +++ b/xdelta3_wrap.c @@ -0,0 +1,3615 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.31 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + +#define SWIGPYTHON +#define SWIG_PYTHON_DIRECTOR_NO_VTABLE +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) +# if (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + + +/* Python.h has to appear first */ +#include <Python.h> + +/* ----------------------------------------------------------------------------- + * swigrun.swg + * + * This file contains generic CAPI SWIG runtime support for pointer + * type checking. + * ----------------------------------------------------------------------------- */ + +/* This should only be incremented when either the layout of swig_type_info changes, + or for whatever reason, the runtime changes incompatibly */ +#define SWIG_RUNTIME_VERSION "3" + +/* define SWIG_TYPE_TABLE_NAME as "SWIG_TYPE_TABLE" */ +#ifdef SWIG_TYPE_TABLE +# define SWIG_QUOTE_STRING(x) #x +# define SWIG_EXPAND_AND_QUOTE_STRING(x) SWIG_QUOTE_STRING(x) +# define SWIG_TYPE_TABLE_NAME SWIG_EXPAND_AND_QUOTE_STRING(SWIG_TYPE_TABLE) +#else +# define SWIG_TYPE_TABLE_NAME +#endif + +/* + You can use the SWIGRUNTIME and SWIGRUNTIMEINLINE macros for + creating a static or dynamic library from the swig runtime code. + In 99.9% of the cases, swig just needs to declare them as 'static'. + + But only do this if is strictly necessary, ie, if you have problems + with your compiler or so. +*/ + +#ifndef SWIGRUNTIME +# define SWIGRUNTIME SWIGINTERN +#endif + +#ifndef SWIGRUNTIMEINLINE +# define SWIGRUNTIMEINLINE SWIGRUNTIME SWIGINLINE +#endif + +/* Generic buffer size */ +#ifndef SWIG_BUFFER_SIZE +# define SWIG_BUFFER_SIZE 1024 +#endif + +/* Flags for pointer conversions */ +#define SWIG_POINTER_DISOWN 0x1 + +/* Flags for new pointer objects */ +#define SWIG_POINTER_OWN 0x1 + + +/* + Flags/methods for returning states. + + The swig conversion methods, as ConvertPtr, return and integer + that tells if the conversion was successful or not. And if not, + an error code can be returned (see swigerrors.swg for the codes). + + Use the following macros/flags to set or process the returning + states. + + In old swig versions, you usually write code as: + + if (SWIG_ConvertPtr(obj,vptr,ty.flags) != -1) { + // success code + } else { + //fail code + } + + Now you can be more explicit as: + + int res = SWIG_ConvertPtr(obj,vptr,ty.flags); + if (SWIG_IsOK(res)) { + // success code + } else { + // fail code + } + + that seems to be the same, but now you can also do + + Type *ptr; + int res = SWIG_ConvertPtr(obj,(void **)(&ptr),ty.flags); + if (SWIG_IsOK(res)) { + // success code + if (SWIG_IsNewObj(res) { + ... + delete *ptr; + } else { + ... + } + } else { + // fail code + } + + I.e., now SWIG_ConvertPtr can return new objects and you can + identify the case and take care of the deallocation. Of course that + requires also to SWIG_ConvertPtr to return new result values, as + + int SWIG_ConvertPtr(obj, ptr,...) { + if (<obj is ok>) { + if (<need new object>) { + *ptr = <ptr to new allocated object>; + return SWIG_NEWOBJ; + } else { + *ptr = <ptr to old object>; + return SWIG_OLDOBJ; + } + } else { + return SWIG_BADOBJ; + } + } + + Of course, returning the plain '0(success)/-1(fail)' still works, but you can be + more explicit by returning SWIG_BADOBJ, SWIG_ERROR or any of the + swig errors code. + + Finally, if the SWIG_CASTRANK_MODE is enabled, the result code + allows to return the 'cast rank', for example, if you have this + + int food(double) + int fooi(int); + + and you call + + food(1) // cast rank '1' (1 -> 1.0) + fooi(1) // cast rank '0' + + just use the SWIG_AddCast()/SWIG_CheckState() + + + */ +#define SWIG_OK (0) +#define SWIG_ERROR (-1) +#define SWIG_IsOK(r) (r >= 0) +#define SWIG_ArgError(r) ((r != SWIG_ERROR) ? r : SWIG_TypeError) + +/* The CastRankLimit says how many bits are used for the cast rank */ +#define SWIG_CASTRANKLIMIT (1 << 8) +/* The NewMask denotes the object was created (using new/malloc) */ +#define SWIG_NEWOBJMASK (SWIG_CASTRANKLIMIT << 1) +/* The TmpMask is for in/out typemaps that use temporal objects */ +#define SWIG_TMPOBJMASK (SWIG_NEWOBJMASK << 1) +/* Simple returning values */ +#define SWIG_BADOBJ (SWIG_ERROR) +#define SWIG_OLDOBJ (SWIG_OK) +#define SWIG_NEWOBJ (SWIG_OK | SWIG_NEWOBJMASK) +#define SWIG_TMPOBJ (SWIG_OK | SWIG_TMPOBJMASK) +/* Check, add and del mask methods */ +#define SWIG_AddNewMask(r) (SWIG_IsOK(r) ? (r | SWIG_NEWOBJMASK) : r) +#define SWIG_DelNewMask(r) (SWIG_IsOK(r) ? (r & ~SWIG_NEWOBJMASK) : r) +#define SWIG_IsNewObj(r) (SWIG_IsOK(r) && (r & SWIG_NEWOBJMASK)) +#define SWIG_AddTmpMask(r) (SWIG_IsOK(r) ? (r | SWIG_TMPOBJMASK) : r) +#define SWIG_DelTmpMask(r) (SWIG_IsOK(r) ? (r & ~SWIG_TMPOBJMASK) : r) +#define SWIG_IsTmpObj(r) (SWIG_IsOK(r) && (r & SWIG_TMPOBJMASK)) + + +/* Cast-Rank Mode */ +#if defined(SWIG_CASTRANK_MODE) +# ifndef SWIG_TypeRank +# define SWIG_TypeRank unsigned long +# endif +# ifndef SWIG_MAXCASTRANK /* Default cast allowed */ +# define SWIG_MAXCASTRANK (2) +# endif +# define SWIG_CASTRANKMASK ((SWIG_CASTRANKLIMIT) -1) +# define SWIG_CastRank(r) (r & SWIG_CASTRANKMASK) +SWIGINTERNINLINE int SWIG_AddCast(int r) { + return SWIG_IsOK(r) ? ((SWIG_CastRank(r) < SWIG_MAXCASTRANK) ? (r + 1) : SWIG_ERROR) : r; +} +SWIGINTERNINLINE int SWIG_CheckState(int r) { + return SWIG_IsOK(r) ? SWIG_CastRank(r) + 1 : 0; +} +#else /* no cast-rank mode */ +# define SWIG_AddCast +# define SWIG_CheckState(r) (SWIG_IsOK(r) ? 1 : 0) +#endif + + + + +#include <string.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void *(*swig_converter_func)(void *); +typedef struct swig_type_info *(*swig_dycast_func)(void **); + +/* Structure to store inforomation on one type */ +typedef struct swig_type_info { + const char *name; /* mangled name of this type */ + const char *str; /* human readable name of this type */ + swig_dycast_func dcast; /* dynamic cast function down a hierarchy */ + struct swig_cast_info *cast; /* linked list of types that can cast into this type */ + void *clientdata; /* language specific type data */ + int owndata; /* flag if the structure owns the clientdata */ +} swig_type_info; + +/* Structure to store a type and conversion function used for casting */ +typedef struct swig_cast_info { + swig_type_info *type; /* pointer to type that is equivalent to this type */ + swig_converter_func converter; /* function to cast the void pointers */ + struct swig_cast_info *next; /* pointer to next cast in linked list */ + struct swig_cast_info *prev; /* pointer to the previous cast */ +} swig_cast_info; + +/* Structure used to store module information + * Each module generates one structure like this, and the runtime collects + * all of these structures and stores them in a circularly linked list.*/ +typedef struct swig_module_info { + swig_type_info **types; /* Array of pointers to swig_type_info structures that are in this module */ + size_t size; /* Number of types in this module */ + struct swig_module_info *next; /* Pointer to next element in circularly linked list */ + swig_type_info **type_initial; /* Array of initially generated type structures */ + swig_cast_info **cast_initial; /* Array of initially generated casting structures */ + void *clientdata; /* Language specific module data */ +} swig_module_info; + +/* + Compare two type names skipping the space characters, therefore + "char*" == "char *" and "Class<int>" == "Class<int >", etc. + + Return 0 when the two name types are equivalent, as in + strncmp, but skipping ' '. +*/ +SWIGRUNTIME int +SWIG_TypeNameComp(const char *f1, const char *l1, + const char *f2, const char *l2) { + for (;(f1 != l1) && (f2 != l2); ++f1, ++f2) { + while ((*f1 == ' ') && (f1 != l1)) ++f1; + while ((*f2 == ' ') && (f2 != l2)) ++f2; + if (*f1 != *f2) return (*f1 > *f2) ? 1 : -1; + } + return (l1 - f1) - (l2 - f2); +} + +/* + Check type equivalence in a name list like <name1>|<name2>|... + Return 0 if not equal, 1 if equal +*/ +SWIGRUNTIME int +SWIG_TypeEquiv(const char *nb, const char *tb) { + int equiv = 0; + const char* te = tb + strlen(tb); + const char* ne = nb; + while (!equiv && *ne) { + for (nb = ne; *ne; ++ne) { + if (*ne == '|') break; + } + equiv = (SWIG_TypeNameComp(nb, ne, tb, te) == 0) ? 1 : 0; + if (*ne) ++ne; + } + return equiv; +} + +/* + Check type equivalence in a name list like <name1>|<name2>|... + Return 0 if equal, -1 if nb < tb, 1 if nb > tb +*/ +SWIGRUNTIME int +SWIG_TypeCompare(const char *nb, const char *tb) { + int equiv = 0; + const char* te = tb + strlen(tb); + const char* ne = nb; + while (!equiv && *ne) { + for (nb = ne; *ne; ++ne) { + if (*ne == '|') break; + } + equiv = (SWIG_TypeNameComp(nb, ne, tb, te) == 0) ? 1 : 0; + if (*ne) ++ne; + } + return equiv; +} + + +/* think of this as a c++ template<> or a scheme macro */ +#define SWIG_TypeCheck_Template(comparison, ty) \ + if (ty) { \ + swig_cast_info *iter = ty->cast; \ + while (iter) { \ + if (comparison) { \ + if (iter == ty->cast) return iter; \ + /* Move iter to the top of the linked list */ \ + iter->prev->next = iter->next; \ + if (iter->next) \ + iter->next->prev = iter->prev; \ + iter->next = ty->cast; \ + iter->prev = 0; \ + if (ty->cast) ty->cast->prev = iter; \ + ty->cast = iter; \ + return iter; \ + } \ + iter = iter->next; \ + } \ + } \ + return 0 + +/* + Check the typename +*/ +SWIGRUNTIME swig_cast_info * +SWIG_TypeCheck(const char *c, swig_type_info *ty) { + SWIG_TypeCheck_Template(strcmp(iter->type->name, c) == 0, ty); +} + +/* Same as previous function, except strcmp is replaced with a pointer comparison */ +SWIGRUNTIME swig_cast_info * +SWIG_TypeCheckStruct(swig_type_info *from, swig_type_info *into) { + SWIG_TypeCheck_Template(iter->type == from, into); +} + +/* + Cast a pointer up an inheritance hierarchy +*/ +SWIGRUNTIMEINLINE void * +SWIG_TypeCast(swig_cast_info *ty, void *ptr) { + return ((!ty) || (!ty->converter)) ? ptr : (*ty->converter)(ptr); +} + +/* + Dynamic pointer casting. Down an inheritance hierarchy +*/ +SWIGRUNTIME swig_type_info * +SWIG_TypeDynamicCast(swig_type_info *ty, void **ptr) { + swig_type_info *lastty = ty; + if (!ty || !ty->dcast) return ty; + while (ty && (ty->dcast)) { + ty = (*ty->dcast)(ptr); + if (ty) lastty = ty; + } + return lastty; +} + +/* + Return the name associated with this type +*/ +SWIGRUNTIMEINLINE const char * +SWIG_TypeName(const swig_type_info *ty) { + return ty->name; +} + +/* + Return the pretty name associated with this type, + that is an unmangled type name in a form presentable to the user. +*/ +SWIGRUNTIME const char * +SWIG_TypePrettyName(const swig_type_info *type) { + /* The "str" field contains the equivalent pretty names of the + type, separated by vertical-bar characters. We choose + to print the last name, as it is often (?) the most + specific. */ + if (!type) return NULL; + if (type->str != NULL) { + const char *last_name = type->str; + const char *s; + for (s = type->str; *s; s++) + if (*s == '|') last_name = s+1; + return last_name; + } + else + return type->name; +} + +/* + Set the clientdata field for a type +*/ +SWIGRUNTIME void +SWIG_TypeClientData(swig_type_info *ti, void *clientdata) { + swig_cast_info *cast = ti->cast; + /* if (ti->clientdata == clientdata) return; */ + ti->clientdata = clientdata; + + while (cast) { + if (!cast->converter) { + swig_type_info *tc = cast->type; + if (!tc->clientdata) { + SWIG_TypeClientData(tc, clientdata); + } + } + cast = cast->next; + } +} +SWIGRUNTIME void +SWIG_TypeNewClientData(swig_type_info *ti, void *clientdata) { + SWIG_TypeClientData(ti, clientdata); + ti->owndata = 1; +} + +/* + Search for a swig_type_info structure only by mangled name + Search is a O(log #types) + + We start searching at module start, and finish searching when start == end. + Note: if start == end at the beginning of the function, we go all the way around + the circular list. +*/ +SWIGRUNTIME swig_type_info * +SWIG_MangledTypeQueryModule(swig_module_info *start, + swig_module_info *end, + const char *name) { + swig_module_info *iter = start; + do { + if (iter->size) { + register size_t l = 0; + register size_t r = iter->size - 1; + do { + /* since l+r >= 0, we can (>> 1) instead (/ 2) */ + register size_t i = (l + r) >> 1; + const char *iname = iter->types[i]->name; + if (iname) { + register int compare = strcmp(name, iname); + if (compare == 0) { + return iter->types[i]; + } else if (compare < 0) { + if (i) { + r = i - 1; + } else { + break; + } + } else if (compare > 0) { + l = i + 1; + } + } else { + break; /* should never happen */ + } + } while (l <= r); + } + iter = iter->next; + } while (iter != end); + return 0; +} + +/* + Search for a swig_type_info structure for either a mangled name or a human readable name. + It first searches the mangled names of the types, which is a O(log #types) + If a type is not found it then searches the human readable names, which is O(#types). + + We start searching at module start, and finish searching when start == end. + Note: if start == end at the beginning of the function, we go all the way around + the circular list. +*/ +SWIGRUNTIME swig_type_info * +SWIG_TypeQueryModule(swig_module_info *start, + swig_module_info *end, + const char *name) { + /* STEP 1: Search the name field using binary search */ + swig_type_info *ret = SWIG_MangledTypeQueryModule(start, end, name); + if (ret) { + return ret; + } else { + /* STEP 2: If the type hasn't been found, do a complete search + of the str field (the human readable name) */ + swig_module_info *iter = start; + do { + register size_t i = 0; + for (; i < iter->size; ++i) { + if (iter->types[i]->str && (SWIG_TypeEquiv(iter->types[i]->str, name))) + return iter->types[i]; + } + iter = iter->next; + } while (iter != end); + } + + /* neither found a match */ + return 0; +} + +/* + Pack binary data into a string +*/ +SWIGRUNTIME char * +SWIG_PackData(char *c, void *ptr, size_t sz) { + static const char hex[17] = "0123456789abcdef"; + register const unsigned char *u = (unsigned char *) ptr; + register const unsigned char *eu = u + sz; + for (; u != eu; ++u) { + register unsigned char uu = *u; + *(c++) = hex[(uu & 0xf0) >> 4]; + *(c++) = hex[uu & 0xf]; + } + return c; +} + +/* + Unpack binary data from a string +*/ +SWIGRUNTIME const char * +SWIG_UnpackData(const char *c, void *ptr, size_t sz) { + register unsigned char *u = (unsigned char *) ptr; + register const unsigned char *eu = u + sz; + for (; u != eu; ++u) { + register char d = *(c++); + register unsigned char uu; + if ((d >= '0') && (d <= '9')) + uu = ((d - '0') << 4); + else if ((d >= 'a') && (d <= 'f')) + uu = ((d - ('a'-10)) << 4); + else + return (char *) 0; + d = *(c++); + if ((d >= '0') && (d <= '9')) + uu |= (d - '0'); + else if ((d >= 'a') && (d <= 'f')) + uu |= (d - ('a'-10)); + else + return (char *) 0; + *u = uu; + } + return c; +} + +/* + Pack 'void *' into a string buffer. +*/ +SWIGRUNTIME char * +SWIG_PackVoidPtr(char *buff, void *ptr, const char *name, size_t bsz) { + char *r = buff; + if ((2*sizeof(void *) + 2) > bsz) return 0; + *(r++) = '_'; + r = SWIG_PackData(r,&ptr,sizeof(void *)); + if (strlen(name) + 1 > (bsz - (r - buff))) return 0; + strcpy(r,name); + return buff; +} + +SWIGRUNTIME const char * +SWIG_UnpackVoidPtr(const char *c, void **ptr, const char *name) { + if (*c != '_') { + if (strcmp(c,"NULL") == 0) { + *ptr = (void *) 0; + return name; + } else { + return 0; + } + } + return SWIG_UnpackData(++c,ptr,sizeof(void *)); +} + +SWIGRUNTIME char * +SWIG_PackDataName(char *buff, void *ptr, size_t sz, const char *name, size_t bsz) { + char *r = buff; + size_t lname = (name ? strlen(name) : 0); + if ((2*sz + 2 + lname) > bsz) return 0; + *(r++) = '_'; + r = SWIG_PackData(r,ptr,sz); + if (lname) { + strncpy(r,name,lname+1); + } else { + *r = 0; + } + return buff; +} + +SWIGRUNTIME const char * +SWIG_UnpackDataName(const char *c, void *ptr, size_t sz, const char *name) { + if (*c != '_') { + if (strcmp(c,"NULL") == 0) { + memset(ptr,0,sz); + return name; + } else { + return 0; + } + } + return SWIG_UnpackData(++c,ptr,sz); +} + +#ifdef __cplusplus +} +#endif + +/* Errors in SWIG */ +#define SWIG_UnknownError -1 +#define SWIG_IOError -2 +#define SWIG_RuntimeError -3 +#define SWIG_IndexError -4 +#define SWIG_TypeError -5 +#define SWIG_DivisionByZero -6 +#define SWIG_OverflowError -7 +#define SWIG_SyntaxError -8 +#define SWIG_ValueError -9 +#define SWIG_SystemError -10 +#define SWIG_AttributeError -11 +#define SWIG_MemoryError -12 +#define SWIG_NullReferenceError -13 + + + + +/* Add PyOS_snprintf for old Pythons */ +#if PY_VERSION_HEX < 0x02020000 +# if defined(_MSC_VER) || defined(__BORLANDC__) || defined(_WATCOM) +# define PyOS_snprintf _snprintf +# else +# define PyOS_snprintf snprintf +# endif +#endif + +/* A crude PyString_FromFormat implementation for old Pythons */ +#if PY_VERSION_HEX < 0x02020000 + +#ifndef SWIG_PYBUFFER_SIZE +# define SWIG_PYBUFFER_SIZE 1024 +#endif + +static PyObject * +PyString_FromFormat(const char *fmt, ...) { + va_list ap; + char buf[SWIG_PYBUFFER_SIZE * 2]; + int res; + va_start(ap, fmt); + res = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + return (res < 0 || res >= (int)sizeof(buf)) ? 0 : PyString_FromString(buf); +} +#endif + +/* Add PyObject_Del for old Pythons */ +#if PY_VERSION_HEX < 0x01060000 +# define PyObject_Del(op) PyMem_DEL((op)) +#endif +#ifndef PyObject_DEL +# define PyObject_DEL PyObject_Del +#endif + +/* A crude PyExc_StopIteration exception for old Pythons */ +#if PY_VERSION_HEX < 0x02020000 +# ifndef PyExc_StopIteration +# define PyExc_StopIteration PyExc_RuntimeError +# endif +# ifndef PyObject_GenericGetAttr +# define PyObject_GenericGetAttr 0 +# endif +#endif +/* Py_NotImplemented is defined in 2.1 and up. */ +#if PY_VERSION_HEX < 0x02010000 +# ifndef Py_NotImplemented +# define Py_NotImplemented PyExc_RuntimeError +# endif +#endif + + +/* A crude PyString_AsStringAndSize implementation for old Pythons */ +#if PY_VERSION_HEX < 0x02010000 +# ifndef PyString_AsStringAndSize +# define PyString_AsStringAndSize(obj, s, len) {*s = PyString_AsString(obj); *len = *s ? strlen(*s) : 0;} +# endif +#endif + +/* PySequence_Size for old Pythons */ +#if PY_VERSION_HEX < 0x02000000 +# ifndef PySequence_Size +# define PySequence_Size PySequence_Length +# endif +#endif + + +/* PyBool_FromLong for old Pythons */ +#if PY_VERSION_HEX < 0x02030000 +static +PyObject *PyBool_FromLong(long ok) +{ + PyObject *result = ok ? Py_True : Py_False; + Py_INCREF(result); + return result; +} +#endif + +/* Py_ssize_t for old Pythons */ +/* This code is as recommended by: */ +/* http://www.python.org/dev/peps/pep-0353/#conversion-guidelines */ +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +# define PY_SSIZE_T_MAX INT_MAX +# define PY_SSIZE_T_MIN INT_MIN +#endif + +/* ----------------------------------------------------------------------------- + * error manipulation + * ----------------------------------------------------------------------------- */ + +SWIGRUNTIME PyObject* +SWIG_Python_ErrorType(int code) { + PyObject* type = 0; + switch(code) { + case SWIG_MemoryError: + type = PyExc_MemoryError; + break; + case SWIG_IOError: + type = PyExc_IOError; + break; + case SWIG_RuntimeError: + type = PyExc_RuntimeError; + break; + case SWIG_IndexError: + type = PyExc_IndexError; + break; + case SWIG_TypeError: + type = PyExc_TypeError; + break; + case SWIG_DivisionByZero: + type = PyExc_ZeroDivisionError; + break; + case SWIG_OverflowError: + type = PyExc_OverflowError; + break; + case SWIG_SyntaxError: + type = PyExc_SyntaxError; + break; + case SWIG_ValueError: + type = PyExc_ValueError; + break; + case SWIG_SystemError: + type = PyExc_SystemError; + break; + case SWIG_AttributeError: + type = PyExc_AttributeError; + break; + default: + type = PyExc_RuntimeError; + } + return type; +} + + +SWIGRUNTIME void +SWIG_Python_AddErrorMsg(const char* mesg) +{ + PyObject *type = 0; + PyObject *value = 0; + PyObject *traceback = 0; + + if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback); + if (value) { + PyObject *old_str = PyObject_Str(value); + PyErr_Clear(); + Py_XINCREF(type); + PyErr_Format(type, "%s %s", PyString_AsString(old_str), mesg); + Py_DECREF(old_str); + Py_DECREF(value); + } else { + PyErr_Format(PyExc_RuntimeError, mesg); + } +} + + + +#if defined(SWIG_PYTHON_NO_THREADS) +# if defined(SWIG_PYTHON_THREADS) +# undef SWIG_PYTHON_THREADS +# endif +#endif +#if defined(SWIG_PYTHON_THREADS) /* Threading support is enabled */ +# if !defined(SWIG_PYTHON_USE_GIL) && !defined(SWIG_PYTHON_NO_USE_GIL) +# if (PY_VERSION_HEX >= 0x02030000) /* For 2.3 or later, use the PyGILState calls */ +# define SWIG_PYTHON_USE_GIL +# endif +# endif +# if defined(SWIG_PYTHON_USE_GIL) /* Use PyGILState threads calls */ +# ifndef SWIG_PYTHON_INITIALIZE_THREADS +# define SWIG_PYTHON_INITIALIZE_THREADS PyEval_InitThreads() +# endif +# ifdef __cplusplus /* C++ code */ + class SWIG_Python_Thread_Block { + bool status; + PyGILState_STATE state; + public: + void end() { if (status) { PyGILState_Release(state); status = false;} } + SWIG_Python_Thread_Block() : status(true), state(PyGILState_Ensure()) {} + ~SWIG_Python_Thread_Block() { end(); } + }; + class SWIG_Python_Thread_Allow { + bool status; + PyThreadState *save; + public: + void end() { if (status) { PyEval_RestoreThread(save); status = false; }} + SWIG_Python_Thread_Allow() : status(true), save(PyEval_SaveThread()) {} + ~SWIG_Python_Thread_Allow() { end(); } + }; +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK SWIG_Python_Thread_Block _swig_thread_block +# define SWIG_PYTHON_THREAD_END_BLOCK _swig_thread_block.end() +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW SWIG_Python_Thread_Allow _swig_thread_allow +# define SWIG_PYTHON_THREAD_END_ALLOW _swig_thread_allow.end() +# else /* C code */ +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK PyGILState_STATE _swig_thread_block = PyGILState_Ensure() +# define SWIG_PYTHON_THREAD_END_BLOCK PyGILState_Release(_swig_thread_block) +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW PyThreadState *_swig_thread_allow = PyEval_SaveThread() +# define SWIG_PYTHON_THREAD_END_ALLOW PyEval_RestoreThread(_swig_thread_allow) +# endif +# else /* Old thread way, not implemented, user must provide it */ +# if !defined(SWIG_PYTHON_INITIALIZE_THREADS) +# define SWIG_PYTHON_INITIALIZE_THREADS +# endif +# if !defined(SWIG_PYTHON_THREAD_BEGIN_BLOCK) +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK +# endif +# if !defined(SWIG_PYTHON_THREAD_END_BLOCK) +# define SWIG_PYTHON_THREAD_END_BLOCK +# endif +# if !defined(SWIG_PYTHON_THREAD_BEGIN_ALLOW) +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW +# endif +# if !defined(SWIG_PYTHON_THREAD_END_ALLOW) +# define SWIG_PYTHON_THREAD_END_ALLOW +# endif +# endif +#else /* No thread support */ +# define SWIG_PYTHON_INITIALIZE_THREADS +# define SWIG_PYTHON_THREAD_BEGIN_BLOCK +# define SWIG_PYTHON_THREAD_END_BLOCK +# define SWIG_PYTHON_THREAD_BEGIN_ALLOW +# define SWIG_PYTHON_THREAD_END_ALLOW +#endif + +/* ----------------------------------------------------------------------------- + * Python API portion that goes into the runtime + * ----------------------------------------------------------------------------- */ + +#ifdef __cplusplus +extern "C" { +#if 0 +} /* cc-mode */ +#endif +#endif + +/* ----------------------------------------------------------------------------- + * Constant declarations + * ----------------------------------------------------------------------------- */ + +/* Constant Types */ +#define SWIG_PY_POINTER 4 +#define SWIG_PY_BINARY 5 + +/* Constant information structure */ +typedef struct swig_const_info { + int type; + char *name; + long lvalue; + double dvalue; + void *pvalue; + swig_type_info **ptype; +} swig_const_info; + +#ifdef __cplusplus +#if 0 +{ /* cc-mode */ +#endif +} +#endif + + +/* ----------------------------------------------------------------------------- + * See the LICENSE file for information on copyright, usage and redistribution + * of SWIG, and the README file for authors - http://www.swig.org/release.html. + * + * pyrun.swg + * + * This file contains the runtime support for Python modules + * and includes code for managing global variables and pointer + * type checking. + * + * ----------------------------------------------------------------------------- */ + +/* Common SWIG API */ + +/* for raw pointers */ +#define SWIG_Python_ConvertPtr(obj, pptr, type, flags) SWIG_Python_ConvertPtrAndOwn(obj, pptr, type, flags, 0) +#define SWIG_ConvertPtr(obj, pptr, type, flags) SWIG_Python_ConvertPtr(obj, pptr, type, flags) +#define SWIG_ConvertPtrAndOwn(obj,pptr,type,flags,own) SWIG_Python_ConvertPtrAndOwn(obj, pptr, type, flags, own) +#define SWIG_NewPointerObj(ptr, type, flags) SWIG_Python_NewPointerObj(ptr, type, flags) +#define SWIG_CheckImplicit(ty) SWIG_Python_CheckImplicit(ty) +#define SWIG_AcquirePtr(ptr, src) SWIG_Python_AcquirePtr(ptr, src) +#define swig_owntype int + +/* for raw packed data */ +#define SWIG_ConvertPacked(obj, ptr, sz, ty) SWIG_Python_ConvertPacked(obj, ptr, sz, ty) +#define SWIG_NewPackedObj(ptr, sz, type) SWIG_Python_NewPackedObj(ptr, sz, type) + +/* for class or struct pointers */ +#define SWIG_ConvertInstance(obj, pptr, type, flags) SWIG_ConvertPtr(obj, pptr, type, flags) +#define SWIG_NewInstanceObj(ptr, type, flags) SWIG_NewPointerObj(ptr, type, flags) + +/* for C or C++ function pointers */ +#define SWIG_ConvertFunctionPtr(obj, pptr, type) SWIG_Python_ConvertFunctionPtr(obj, pptr, type) +#define SWIG_NewFunctionPtrObj(ptr, type) SWIG_Python_NewPointerObj(ptr, type, 0) + +/* for C++ member pointers, ie, member methods */ +#define SWIG_ConvertMember(obj, ptr, sz, ty) SWIG_Python_ConvertPacked(obj, ptr, sz, ty) +#define SWIG_NewMemberObj(ptr, sz, type) SWIG_Python_NewPackedObj(ptr, sz, type) + + +/* Runtime API */ + +#define SWIG_GetModule(clientdata) SWIG_Python_GetModule() +#define SWIG_SetModule(clientdata, pointer) SWIG_Python_SetModule(pointer) +#define SWIG_NewClientData(obj) PySwigClientData_New(obj) + +#define SWIG_SetErrorObj SWIG_Python_SetErrorObj +#define SWIG_SetErrorMsg SWIG_Python_SetErrorMsg +#define SWIG_ErrorType(code) SWIG_Python_ErrorType(code) +#define SWIG_Error(code, msg) SWIG_Python_SetErrorMsg(SWIG_ErrorType(code), msg) +#define SWIG_fail goto fail + + +/* Runtime API implementation */ + +/* Error manipulation */ + +SWIGINTERN void +SWIG_Python_SetErrorObj(PyObject *errtype, PyObject *obj) { + SWIG_PYTHON_THREAD_BEGIN_BLOCK; + PyErr_SetObject(errtype, obj); + Py_DECREF(obj); + SWIG_PYTHON_THREAD_END_BLOCK; +} + +SWIGINTERN void +SWIG_Python_SetErrorMsg(PyObject *errtype, const char *msg) { + SWIG_PYTHON_THREAD_BEGIN_BLOCK; + PyErr_SetString(errtype, (char *) msg); + SWIG_PYTHON_THREAD_END_BLOCK; +} + +#define SWIG_Python_Raise(obj, type, desc) SWIG_Python_SetErrorObj(SWIG_Python_ExceptionType(desc), obj) + +/* Set a constant value */ + +SWIGINTERN void +SWIG_Python_SetConstant(PyObject *d, const char *name, PyObject *obj) { + PyDict_SetItemString(d, (char*) name, obj); + Py_DECREF(obj); +} + +/* Append a value to the result obj */ + +SWIGINTERN PyObject* +SWIG_Python_AppendOutput(PyObject* result, PyObject* obj) { +#if !defined(SWIG_PYTHON_OUTPUT_TUPLE) + if (!result) { + result = obj; + } else if (result == Py_None) { + Py_DECREF(result); + result = obj; + } else { + if (!PyList_Check(result)) { + PyObject *o2 = result; + result = PyList_New(1); + PyList_SetItem(result, 0, o2); + } + PyList_Append(result,obj); + Py_DECREF(obj); + } + return result; +#else + PyObject* o2; + PyObject* o3; + if (!result) { + result = obj; + } else if (result == Py_None) { + Py_DECREF(result); + result = obj; + } else { + if (!PyTuple_Check(result)) { + o2 = result; + result = PyTuple_New(1); + PyTuple_SET_ITEM(result, 0, o2); + } + o3 = PyTuple_New(1); + PyTuple_SET_ITEM(o3, 0, obj); + o2 = result; + result = PySequence_Concat(o2, o3); + Py_DECREF(o2); + Py_DECREF(o3); + } + return result; +#endif +} + +/* Unpack the argument tuple */ + +SWIGINTERN int +SWIG_Python_UnpackTuple(PyObject *args, const char *name, int min, int max, PyObject **objs) +{ + if (!args) { + if (!min && !max) { + return 1; + } else { + PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got none", + name, (min == max ? "" : "at least "), min); + return 0; + } + } + if (!PyTuple_Check(args)) { + PyErr_SetString(PyExc_SystemError, "UnpackTuple() argument list is not a tuple"); + return 0; + } else { + register int l = PyTuple_GET_SIZE(args); + if (l < min) { + PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got %d", + name, (min == max ? "" : "at least "), min, l); + return 0; + } else if (l > max) { + PyErr_Format(PyExc_TypeError, "%s expected %s%d arguments, got %d", + name, (min == max ? "" : "at most "), max, l); + return 0; + } else { + register int i; + for (i = 0; i < l; ++i) { + objs[i] = PyTuple_GET_ITEM(args, i); + } + for (; l < max; ++l) { + objs[l] = 0; + } + return i + 1; + } + } +} + +/* A functor is a function object with one single object argument */ +#if PY_VERSION_HEX >= 0x02020000 +#define SWIG_Python_CallFunctor(functor, obj) PyObject_CallFunctionObjArgs(functor, obj, NULL); +#else +#define SWIG_Python_CallFunctor(functor, obj) PyObject_CallFunction(functor, "O", obj); +#endif + +/* + Helper for static pointer initialization for both C and C++ code, for example + static PyObject *SWIG_STATIC_POINTER(MyVar) = NewSomething(...); +*/ +#ifdef __cplusplus +#define SWIG_STATIC_POINTER(var) var +#else +#define SWIG_STATIC_POINTER(var) var = 0; if (!var) var +#endif + +/* ----------------------------------------------------------------------------- + * Pointer declarations + * ----------------------------------------------------------------------------- */ + +/* Flags for new pointer objects */ +#define SWIG_POINTER_NOSHADOW (SWIG_POINTER_OWN << 1) +#define SWIG_POINTER_NEW (SWIG_POINTER_NOSHADOW | SWIG_POINTER_OWN) + +#define SWIG_POINTER_IMPLICIT_CONV (SWIG_POINTER_DISOWN << 1) + +#ifdef __cplusplus +extern "C" { +#if 0 +} /* cc-mode */ +#endif +#endif + +/* How to access Py_None */ +#if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# ifndef SWIG_PYTHON_NO_BUILD_NONE +# ifndef SWIG_PYTHON_BUILD_NONE +# define SWIG_PYTHON_BUILD_NONE +# endif +# endif +#endif + +#ifdef SWIG_PYTHON_BUILD_NONE +# ifdef Py_None +# undef Py_None +# define Py_None SWIG_Py_None() +# endif +SWIGRUNTIMEINLINE PyObject * +_SWIG_Py_None(void) +{ + PyObject *none = Py_BuildValue((char*)""); + Py_DECREF(none); + return none; +} +SWIGRUNTIME PyObject * +SWIG_Py_None(void) +{ + static PyObject *SWIG_STATIC_POINTER(none) = _SWIG_Py_None(); + return none; +} +#endif + +/* The python void return value */ + +SWIGRUNTIMEINLINE PyObject * +SWIG_Py_Void(void) +{ + PyObject *none = Py_None; + Py_INCREF(none); + return none; +} + +/* PySwigClientData */ + +typedef struct { + PyObject *klass; + PyObject *newraw; + PyObject *newargs; + PyObject *destroy; + int delargs; + int implicitconv; +} PySwigClientData; + +SWIGRUNTIMEINLINE int +SWIG_Python_CheckImplicit(swig_type_info *ty) +{ + PySwigClientData *data = (PySwigClientData *)ty->clientdata; + return data ? data->implicitconv : 0; +} + +SWIGRUNTIMEINLINE PyObject * +SWIG_Python_ExceptionType(swig_type_info *desc) { + PySwigClientData *data = desc ? (PySwigClientData *) desc->clientdata : 0; + PyObject *klass = data ? data->klass : 0; + return (klass ? klass : PyExc_RuntimeError); +} + + +SWIGRUNTIME PySwigClientData * +PySwigClientData_New(PyObject* obj) +{ + if (!obj) { + return 0; + } else { + PySwigClientData *data = (PySwigClientData *)malloc(sizeof(PySwigClientData)); + /* the klass element */ + data->klass = obj; + Py_INCREF(data->klass); + /* the newraw method and newargs arguments used to create a new raw instance */ + if (PyClass_Check(obj)) { + data->newraw = 0; + data->newargs = obj; + Py_INCREF(obj); + } else { +#if (PY_VERSION_HEX < 0x02020000) + data->newraw = 0; +#else + data->newraw = PyObject_GetAttrString(data->klass, (char *)"__new__"); +#endif + if (data->newraw) { + Py_INCREF(data->newraw); + data->newargs = PyTuple_New(1); + PyTuple_SetItem(data->newargs, 0, obj); + } else { + data->newargs = obj; + } + Py_INCREF(data->newargs); + } + /* the destroy method, aka as the C++ delete method */ + data->destroy = PyObject_GetAttrString(data->klass, (char *)"__swig_destroy__"); + if (PyErr_Occurred()) { + PyErr_Clear(); + data->destroy = 0; + } + if (data->destroy) { + int flags; + Py_INCREF(data->destroy); + flags = PyCFunction_GET_FLAGS(data->destroy); +#ifdef METH_O + data->delargs = !(flags & (METH_O)); +#else + data->delargs = 0; +#endif + } else { + data->delargs = 0; + } + data->implicitconv = 0; + return data; + } +} + +SWIGRUNTIME void +PySwigClientData_Del(PySwigClientData* data) +{ + Py_XDECREF(data->newraw); + Py_XDECREF(data->newargs); + Py_XDECREF(data->destroy); +} + +/* =============== PySwigObject =====================*/ + +typedef struct { + PyObject_HEAD + void *ptr; + swig_type_info *ty; + int own; + PyObject *next; +} PySwigObject; + +SWIGRUNTIME PyObject * +PySwigObject_long(PySwigObject *v) +{ + return PyLong_FromVoidPtr(v->ptr); +} + +SWIGRUNTIME PyObject * +PySwigObject_format(const char* fmt, PySwigObject *v) +{ + PyObject *res = NULL; + PyObject *args = PyTuple_New(1); + if (args) { + if (PyTuple_SetItem(args, 0, PySwigObject_long(v)) == 0) { + PyObject *ofmt = PyString_FromString(fmt); + if (ofmt) { + res = PyString_Format(ofmt,args); + Py_DECREF(ofmt); + } + Py_DECREF(args); + } + } + return res; +} + +SWIGRUNTIME PyObject * +PySwigObject_oct(PySwigObject *v) +{ + return PySwigObject_format("%o",v); +} + +SWIGRUNTIME PyObject * +PySwigObject_hex(PySwigObject *v) +{ + return PySwigObject_format("%x",v); +} + +SWIGRUNTIME PyObject * +#ifdef METH_NOARGS +PySwigObject_repr(PySwigObject *v) +#else +PySwigObject_repr(PySwigObject *v, PyObject *args) +#endif +{ + const char *name = SWIG_TypePrettyName(v->ty); + PyObject *hex = PySwigObject_hex(v); + PyObject *repr = PyString_FromFormat("<Swig Object of type '%s' at 0x%s>", name, PyString_AsString(hex)); + Py_DECREF(hex); + if (v->next) { +#ifdef METH_NOARGS + PyObject *nrep = PySwigObject_repr((PySwigObject *)v->next); +#else + PyObject *nrep = PySwigObject_repr((PySwigObject *)v->next, args); +#endif + PyString_ConcatAndDel(&repr,nrep); + } + return repr; +} + +SWIGRUNTIME int +PySwigObject_print(PySwigObject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) +{ +#ifdef METH_NOARGS + PyObject *repr = PySwigObject_repr(v); +#else + PyObject *repr = PySwigObject_repr(v, NULL); +#endif + if (repr) { + fputs(PyString_AsString(repr), fp); + Py_DECREF(repr); + return 0; + } else { + return 1; + } +} + +SWIGRUNTIME PyObject * +PySwigObject_str(PySwigObject *v) +{ + char result[SWIG_BUFFER_SIZE]; + return SWIG_PackVoidPtr(result, v->ptr, v->ty->name, sizeof(result)) ? + PyString_FromString(result) : 0; +} + +SWIGRUNTIME int +PySwigObject_compare(PySwigObject *v, PySwigObject *w) +{ + void *i = v->ptr; + void *j = w->ptr; + return (i < j) ? -1 : ((i > j) ? 1 : 0); +} + +SWIGRUNTIME PyTypeObject* _PySwigObject_type(void); + +SWIGRUNTIME PyTypeObject* +PySwigObject_type(void) { + static PyTypeObject *SWIG_STATIC_POINTER(type) = _PySwigObject_type(); + return type; +} + +SWIGRUNTIMEINLINE int +PySwigObject_Check(PyObject *op) { + return ((op)->ob_type == PySwigObject_type()) + || (strcmp((op)->ob_type->tp_name,"PySwigObject") == 0); +} + +SWIGRUNTIME PyObject * +PySwigObject_New(void *ptr, swig_type_info *ty, int own); + +SWIGRUNTIME void +PySwigObject_dealloc(PyObject *v) +{ + PySwigObject *sobj = (PySwigObject *) v; + PyObject *next = sobj->next; + if (sobj->own) { + swig_type_info *ty = sobj->ty; + PySwigClientData *data = ty ? (PySwigClientData *) ty->clientdata : 0; + PyObject *destroy = data ? data->destroy : 0; + if (destroy) { + /* destroy is always a VARARGS method */ + PyObject *res; + if (data->delargs) { + /* we need to create a temporal object to carry the destroy operation */ + PyObject *tmp = PySwigObject_New(sobj->ptr, ty, 0); + res = SWIG_Python_CallFunctor(destroy, tmp); + Py_DECREF(tmp); + } else { + PyCFunction meth = PyCFunction_GET_FUNCTION(destroy); + PyObject *mself = PyCFunction_GET_SELF(destroy); + res = ((*meth)(mself, v)); + } + Py_XDECREF(res); + } else { + const char *name = SWIG_TypePrettyName(ty); +#if !defined(SWIG_PYTHON_SILENT_MEMLEAK) + printf("swig/python detected a memory leak of type '%s', no destructor found.\n", name); +#endif + } + } + Py_XDECREF(next); + PyObject_DEL(v); +} + +SWIGRUNTIME PyObject* +PySwigObject_append(PyObject* v, PyObject* next) +{ + PySwigObject *sobj = (PySwigObject *) v; +#ifndef METH_O + PyObject *tmp = 0; + if (!PyArg_ParseTuple(next,(char *)"O:append", &tmp)) return NULL; + next = tmp; +#endif + if (!PySwigObject_Check(next)) { + return NULL; + } + sobj->next = next; + Py_INCREF(next); + return SWIG_Py_Void(); +} + +SWIGRUNTIME PyObject* +#ifdef METH_NOARGS +PySwigObject_next(PyObject* v) +#else +PySwigObject_next(PyObject* v, PyObject *SWIGUNUSEDPARM(args)) +#endif +{ + PySwigObject *sobj = (PySwigObject *) v; + if (sobj->next) { + Py_INCREF(sobj->next); + return sobj->next; + } else { + return SWIG_Py_Void(); + } +} + +SWIGINTERN PyObject* +#ifdef METH_NOARGS +PySwigObject_disown(PyObject *v) +#else +PySwigObject_disown(PyObject* v, PyObject *SWIGUNUSEDPARM(args)) +#endif +{ + PySwigObject *sobj = (PySwigObject *)v; + sobj->own = 0; + return SWIG_Py_Void(); +} + +SWIGINTERN PyObject* +#ifdef METH_NOARGS +PySwigObject_acquire(PyObject *v) +#else +PySwigObject_acquire(PyObject* v, PyObject *SWIGUNUSEDPARM(args)) +#endif +{ + PySwigObject *sobj = (PySwigObject *)v; + sobj->own = SWIG_POINTER_OWN; + return SWIG_Py_Void(); +} + +SWIGINTERN PyObject* +PySwigObject_own(PyObject *v, PyObject *args) +{ + PyObject *val = 0; +#if (PY_VERSION_HEX < 0x02020000) + if (!PyArg_ParseTuple(args,(char *)"|O:own",&val)) +#else + if (!PyArg_UnpackTuple(args, (char *)"own", 0, 1, &val)) +#endif + { + return NULL; + } + else + { + PySwigObject *sobj = (PySwigObject *)v; + PyObject *obj = PyBool_FromLong(sobj->own); + if (val) { +#ifdef METH_NOARGS + if (PyObject_IsTrue(val)) { + PySwigObject_acquire(v); + } else { + PySwigObject_disown(v); + } +#else + if (PyObject_IsTrue(val)) { + PySwigObject_acquire(v,args); + } else { + PySwigObject_disown(v,args); + } +#endif + } + return obj; + } +} + +#ifdef METH_O +static PyMethodDef +swigobject_methods[] = { + {(char *)"disown", (PyCFunction)PySwigObject_disown, METH_NOARGS, (char *)"releases ownership of the pointer"}, + {(char *)"acquire", (PyCFunction)PySwigObject_acquire, METH_NOARGS, (char *)"aquires ownership of the pointer"}, + {(char *)"own", (PyCFunction)PySwigObject_own, METH_VARARGS, (char *)"returns/sets ownership of the pointer"}, + {(char *)"append", (PyCFunction)PySwigObject_append, METH_O, (char *)"appends another 'this' object"}, + {(char *)"next", (PyCFunction)PySwigObject_next, METH_NOARGS, (char *)"returns the next 'this' object"}, + {(char *)"__repr__",(PyCFunction)PySwigObject_repr, METH_NOARGS, (char *)"returns object representation"}, + {0, 0, 0, 0} +}; +#else +static PyMethodDef +swigobject_methods[] = { + {(char *)"disown", (PyCFunction)PySwigObject_disown, METH_VARARGS, (char *)"releases ownership of the pointer"}, + {(char *)"acquire", (PyCFunction)PySwigObject_acquire, METH_VARARGS, (char *)"aquires ownership of the pointer"}, + {(char *)"own", (PyCFunction)PySwigObject_own, METH_VARARGS, (char *)"returns/sets ownership of the pointer"}, + {(char *)"append", (PyCFunction)PySwigObject_append, METH_VARARGS, (char *)"appends another 'this' object"}, + {(char *)"next", (PyCFunction)PySwigObject_next, METH_VARARGS, (char *)"returns the next 'this' object"}, + {(char *)"__repr__",(PyCFunction)PySwigObject_repr, METH_VARARGS, (char *)"returns object representation"}, + {0, 0, 0, 0} +}; +#endif + +#if PY_VERSION_HEX < 0x02020000 +SWIGINTERN PyObject * +PySwigObject_getattr(PySwigObject *sobj,char *name) +{ + return Py_FindMethod(swigobject_methods, (PyObject *)sobj, name); +} +#endif + +SWIGRUNTIME PyTypeObject* +_PySwigObject_type(void) { + static char swigobject_doc[] = "Swig object carries a C/C++ instance pointer"; + + static PyNumberMethods PySwigObject_as_number = { + (binaryfunc)0, /*nb_add*/ + (binaryfunc)0, /*nb_subtract*/ + (binaryfunc)0, /*nb_multiply*/ + (binaryfunc)0, /*nb_divide*/ + (binaryfunc)0, /*nb_remainder*/ + (binaryfunc)0, /*nb_divmod*/ + (ternaryfunc)0,/*nb_power*/ + (unaryfunc)0, /*nb_negative*/ + (unaryfunc)0, /*nb_positive*/ + (unaryfunc)0, /*nb_absolute*/ + (inquiry)0, /*nb_nonzero*/ + 0, /*nb_invert*/ + 0, /*nb_lshift*/ + 0, /*nb_rshift*/ + 0, /*nb_and*/ + 0, /*nb_xor*/ + 0, /*nb_or*/ + (coercion)0, /*nb_coerce*/ + (unaryfunc)PySwigObject_long, /*nb_int*/ + (unaryfunc)PySwigObject_long, /*nb_long*/ + (unaryfunc)0, /*nb_float*/ + (unaryfunc)PySwigObject_oct, /*nb_oct*/ + (unaryfunc)PySwigObject_hex, /*nb_hex*/ +#if PY_VERSION_HEX >= 0x02020000 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_true_divide */ +#elif PY_VERSION_HEX >= 0x02000000 + 0,0,0,0,0,0,0,0,0,0,0 /* nb_inplace_add -> nb_inplace_or */ +#endif + }; + + static PyTypeObject pyswigobject_type; + static int type_init = 0; + if (!type_init) { + const PyTypeObject tmp + = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + (char *)"PySwigObject", /* tp_name */ + sizeof(PySwigObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)PySwigObject_dealloc, /* tp_dealloc */ + (printfunc)PySwigObject_print, /* tp_print */ +#if PY_VERSION_HEX < 0x02020000 + (getattrfunc)PySwigObject_getattr, /* tp_getattr */ +#else + (getattrfunc)0, /* tp_getattr */ +#endif + (setattrfunc)0, /* tp_setattr */ + (cmpfunc)PySwigObject_compare, /* tp_compare */ + (reprfunc)PySwigObject_repr, /* tp_repr */ + &PySwigObject_as_number, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)PySwigObject_str, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + swigobject_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ +#if PY_VERSION_HEX >= 0x02020000 + 0, /* tp_iter */ + 0, /* tp_iternext */ + swigobject_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ +#endif +#if PY_VERSION_HEX >= 0x02030000 + 0, /* tp_del */ +#endif +#ifdef COUNT_ALLOCS + 0,0,0,0 /* tp_alloc -> tp_next */ +#endif + }; + pyswigobject_type = tmp; + pyswigobject_type.ob_type = &PyType_Type; + type_init = 1; + } + return &pyswigobject_type; +} + +SWIGRUNTIME PyObject * +PySwigObject_New(void *ptr, swig_type_info *ty, int own) +{ + PySwigObject *sobj = PyObject_NEW(PySwigObject, PySwigObject_type()); + if (sobj) { + sobj->ptr = ptr; + sobj->ty = ty; + sobj->own = own; + sobj->next = 0; + } + return (PyObject *)sobj; +} + +/* ----------------------------------------------------------------------------- + * Implements a simple Swig Packed type, and use it instead of string + * ----------------------------------------------------------------------------- */ + +typedef struct { + PyObject_HEAD + void *pack; + swig_type_info *ty; + size_t size; +} PySwigPacked; + +SWIGRUNTIME int +PySwigPacked_print(PySwigPacked *v, FILE *fp, int SWIGUNUSEDPARM(flags)) +{ + char result[SWIG_BUFFER_SIZE]; + fputs("<Swig Packed ", fp); + if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))) { + fputs("at ", fp); + fputs(result, fp); + } + fputs(v->ty->name,fp); + fputs(">", fp); + return 0; +} + +SWIGRUNTIME PyObject * +PySwigPacked_repr(PySwigPacked *v) +{ + char result[SWIG_BUFFER_SIZE]; + if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))) { + return PyString_FromFormat("<Swig Packed at %s%s>", result, v->ty->name); + } else { + return PyString_FromFormat("<Swig Packed %s>", v->ty->name); + } +} + +SWIGRUNTIME PyObject * +PySwigPacked_str(PySwigPacked *v) +{ + char result[SWIG_BUFFER_SIZE]; + if (SWIG_PackDataName(result, v->pack, v->size, 0, sizeof(result))){ + return PyString_FromFormat("%s%s", result, v->ty->name); + } else { + return PyString_FromString(v->ty->name); + } +} + +SWIGRUNTIME int +PySwigPacked_compare(PySwigPacked *v, PySwigPacked *w) +{ + size_t i = v->size; + size_t j = w->size; + int s = (i < j) ? -1 : ((i > j) ? 1 : 0); + return s ? s : strncmp((char *)v->pack, (char *)w->pack, 2*v->size); +} + +SWIGRUNTIME PyTypeObject* _PySwigPacked_type(void); + +SWIGRUNTIME PyTypeObject* +PySwigPacked_type(void) { + static PyTypeObject *SWIG_STATIC_POINTER(type) = _PySwigPacked_type(); + return type; +} + +SWIGRUNTIMEINLINE int +PySwigPacked_Check(PyObject *op) { + return ((op)->ob_type == _PySwigPacked_type()) + || (strcmp((op)->ob_type->tp_name,"PySwigPacked") == 0); +} + +SWIGRUNTIME void +PySwigPacked_dealloc(PyObject *v) +{ + if (PySwigPacked_Check(v)) { + PySwigPacked *sobj = (PySwigPacked *) v; + free(sobj->pack); + } + PyObject_DEL(v); +} + +SWIGRUNTIME PyTypeObject* +_PySwigPacked_type(void) { + static char swigpacked_doc[] = "Swig object carries a C/C++ instance pointer"; + static PyTypeObject pyswigpacked_type; + static int type_init = 0; + if (!type_init) { + const PyTypeObject tmp + = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + (char *)"PySwigPacked", /* tp_name */ + sizeof(PySwigPacked), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)PySwigPacked_dealloc, /* tp_dealloc */ + (printfunc)PySwigPacked_print, /* tp_print */ + (getattrfunc)0, /* tp_getattr */ + (setattrfunc)0, /* tp_setattr */ + (cmpfunc)PySwigPacked_compare, /* tp_compare */ + (reprfunc)PySwigPacked_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)0, /* tp_hash */ + (ternaryfunc)0, /* tp_call */ + (reprfunc)PySwigPacked_str, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + swigpacked_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ +#if PY_VERSION_HEX >= 0x02020000 + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ +#endif +#if PY_VERSION_HEX >= 0x02030000 + 0, /* tp_del */ +#endif +#ifdef COUNT_ALLOCS + 0,0,0,0 /* tp_alloc -> tp_next */ +#endif + }; + pyswigpacked_type = tmp; + pyswigpacked_type.ob_type = &PyType_Type; + type_init = 1; + } + return &pyswigpacked_type; +} + +SWIGRUNTIME PyObject * +PySwigPacked_New(void *ptr, size_t size, swig_type_info *ty) +{ + PySwigPacked *sobj = PyObject_NEW(PySwigPacked, PySwigPacked_type()); + if (sobj) { + void *pack = malloc(size); + if (pack) { + memcpy(pack, ptr, size); + sobj->pack = pack; + sobj->ty = ty; + sobj->size = size; + } else { + PyObject_DEL((PyObject *) sobj); + sobj = 0; + } + } + return (PyObject *) sobj; +} + +SWIGRUNTIME swig_type_info * +PySwigPacked_UnpackData(PyObject *obj, void *ptr, size_t size) +{ + if (PySwigPacked_Check(obj)) { + PySwigPacked *sobj = (PySwigPacked *)obj; + if (sobj->size != size) return 0; + memcpy(ptr, sobj->pack, size); + return sobj->ty; + } else { + return 0; + } +} + +/* ----------------------------------------------------------------------------- + * pointers/data manipulation + * ----------------------------------------------------------------------------- */ + +SWIGRUNTIMEINLINE PyObject * +_SWIG_This(void) +{ + return PyString_FromString("this"); +} + +SWIGRUNTIME PyObject * +SWIG_This(void) +{ + static PyObject *SWIG_STATIC_POINTER(swig_this) = _SWIG_This(); + return swig_this; +} + +/* #define SWIG_PYTHON_SLOW_GETSET_THIS */ + +SWIGRUNTIME PySwigObject * +SWIG_Python_GetSwigThis(PyObject *pyobj) +{ + if (PySwigObject_Check(pyobj)) { + return (PySwigObject *) pyobj; + } else { + PyObject *obj = 0; +#if (!defined(SWIG_PYTHON_SLOW_GETSET_THIS) && (PY_VERSION_HEX >= 0x02030000)) + if (PyInstance_Check(pyobj)) { + obj = _PyInstance_Lookup(pyobj, SWIG_This()); + } else { + PyObject **dictptr = _PyObject_GetDictPtr(pyobj); + if (dictptr != NULL) { + PyObject *dict = *dictptr; + obj = dict ? PyDict_GetItem(dict, SWIG_This()) : 0; + } else { +#ifdef PyWeakref_CheckProxy + if (PyWeakref_CheckProxy(pyobj)) { + PyObject *wobj = PyWeakref_GET_OBJECT(pyobj); + return wobj ? SWIG_Python_GetSwigThis(wobj) : 0; + } +#endif + obj = PyObject_GetAttr(pyobj,SWIG_This()); + if (obj) { + Py_DECREF(obj); + } else { + if (PyErr_Occurred()) PyErr_Clear(); + return 0; + } + } + } +#else + obj = PyObject_GetAttr(pyobj,SWIG_This()); + if (obj) { + Py_DECREF(obj); + } else { + if (PyErr_Occurred()) PyErr_Clear(); + return 0; + } +#endif + if (obj && !PySwigObject_Check(obj)) { + /* a PyObject is called 'this', try to get the 'real this' + PySwigObject from it */ + return SWIG_Python_GetSwigThis(obj); + } + return (PySwigObject *)obj; + } +} + +/* Acquire a pointer value */ + +SWIGRUNTIME int +SWIG_Python_AcquirePtr(PyObject *obj, int own) { + if (own) { + PySwigObject *sobj = SWIG_Python_GetSwigThis(obj); + if (sobj) { + int oldown = sobj->own; + sobj->own = own; + return oldown; + } + } + return 0; +} + +/* Convert a pointer value */ + +SWIGRUNTIME int +SWIG_Python_ConvertPtrAndOwn(PyObject *obj, void **ptr, swig_type_info *ty, int flags, int *own) { + if (!obj) return SWIG_ERROR; + if (obj == Py_None) { + if (ptr) *ptr = 0; + return SWIG_OK; + } else { + PySwigObject *sobj = SWIG_Python_GetSwigThis(obj); + while (sobj) { + void *vptr = sobj->ptr; + if (ty) { + swig_type_info *to = sobj->ty; + if (to == ty) { + /* no type cast needed */ + if (ptr) *ptr = vptr; + break; + } else { + swig_cast_info *tc = SWIG_TypeCheck(to->name,ty); + if (!tc) { + sobj = (PySwigObject *)sobj->next; + } else { + if (ptr) *ptr = SWIG_TypeCast(tc,vptr); + break; + } + } + } else { + if (ptr) *ptr = vptr; + break; + } + } + if (sobj) { + if (own) *own = sobj->own; + if (flags & SWIG_POINTER_DISOWN) { + sobj->own = 0; + } + return SWIG_OK; + } else { + int res = SWIG_ERROR; + if (flags & SWIG_POINTER_IMPLICIT_CONV) { + PySwigClientData *data = ty ? (PySwigClientData *) ty->clientdata : 0; + if (data && !data->implicitconv) { + PyObject *klass = data->klass; + if (klass) { + PyObject *impconv; + data->implicitconv = 1; /* avoid recursion and call 'explicit' constructors*/ + impconv = SWIG_Python_CallFunctor(klass, obj); + data->implicitconv = 0; + if (PyErr_Occurred()) { + PyErr_Clear(); + impconv = 0; + } + if (impconv) { + PySwigObject *iobj = SWIG_Python_GetSwigThis(impconv); + if (iobj) { + void *vptr; + res = SWIG_Python_ConvertPtrAndOwn((PyObject*)iobj, &vptr, ty, 0, 0); + if (SWIG_IsOK(res)) { + if (ptr) { + *ptr = vptr; + /* transfer the ownership to 'ptr' */ + iobj->own = 0; + res = SWIG_AddCast(res); + res = SWIG_AddNewMask(res); + } else { + res = SWIG_AddCast(res); + } + } + } + Py_DECREF(impconv); + } + } + } + } + return res; + } + } +} + +/* Convert a function ptr value */ + +SWIGRUNTIME int +SWIG_Python_ConvertFunctionPtr(PyObject *obj, void **ptr, swig_type_info *ty) { + if (!PyCFunction_Check(obj)) { + return SWIG_ConvertPtr(obj, ptr, ty, 0); + } else { + void *vptr = 0; + + /* here we get the method pointer for callbacks */ + const char *doc = (((PyCFunctionObject *)obj) -> m_ml -> ml_doc); + const char *desc = doc ? strstr(doc, "swig_ptr: ") : 0; + if (desc) { + desc = ty ? SWIG_UnpackVoidPtr(desc + 10, &vptr, ty->name) : 0; + if (!desc) return SWIG_ERROR; + } + if (ty) { + swig_cast_info *tc = SWIG_TypeCheck(desc,ty); + if (!tc) return SWIG_ERROR; + *ptr = SWIG_TypeCast(tc,vptr); + } else { + *ptr = vptr; + } + return SWIG_OK; + } +} + +/* Convert a packed value value */ + +SWIGRUNTIME int +SWIG_Python_ConvertPacked(PyObject *obj, void *ptr, size_t sz, swig_type_info *ty) { + swig_type_info *to = PySwigPacked_UnpackData(obj, ptr, sz); + if (!to) return SWIG_ERROR; + if (ty) { + if (to != ty) { + /* check type cast? */ + swig_cast_info *tc = SWIG_TypeCheck(to->name,ty); + if (!tc) return SWIG_ERROR; + } + } + return SWIG_OK; +} + +/* ----------------------------------------------------------------------------- + * Create a new pointer object + * ----------------------------------------------------------------------------- */ + +/* + Create a new instance object, whitout calling __init__, and set the + 'this' attribute. +*/ + +SWIGRUNTIME PyObject* +SWIG_Python_NewShadowInstance(PySwigClientData *data, PyObject *swig_this) +{ +#if (PY_VERSION_HEX >= 0x02020000) + PyObject *inst = 0; + PyObject *newraw = data->newraw; + if (newraw) { + inst = PyObject_Call(newraw, data->newargs, NULL); + if (inst) { +#if !defined(SWIG_PYTHON_SLOW_GETSET_THIS) + PyObject **dictptr = _PyObject_GetDictPtr(inst); + if (dictptr != NULL) { + PyObject *dict = *dictptr; + if (dict == NULL) { + dict = PyDict_New(); + *dictptr = dict; + PyDict_SetItem(dict, SWIG_This(), swig_this); + } + } +#else + PyObject *key = SWIG_This(); + PyObject_SetAttr(inst, key, swig_this); +#endif + } + } else { + PyObject *dict = PyDict_New(); + PyDict_SetItem(dict, SWIG_This(), swig_this); + inst = PyInstance_NewRaw(data->newargs, dict); + Py_DECREF(dict); + } + return inst; +#else +#if (PY_VERSION_HEX >= 0x02010000) + PyObject *inst; + PyObject *dict = PyDict_New(); + PyDict_SetItem(dict, SWIG_This(), swig_this); + inst = PyInstance_NewRaw(data->newargs, dict); + Py_DECREF(dict); + return (PyObject *) inst; +#else + PyInstanceObject *inst = PyObject_NEW(PyInstanceObject, &PyInstance_Type); + if (inst == NULL) { + return NULL; + } + inst->in_class = (PyClassObject *)data->newargs; + Py_INCREF(inst->in_class); + inst->in_dict = PyDict_New(); + if (inst->in_dict == NULL) { + Py_DECREF(inst); + return NULL; + } +#ifdef Py_TPFLAGS_HAVE_WEAKREFS + inst->in_weakreflist = NULL; +#endif +#ifdef Py_TPFLAGS_GC + PyObject_GC_Init(inst); +#endif + PyDict_SetItem(inst->in_dict, SWIG_This(), swig_this); + return (PyObject *) inst; +#endif +#endif +} + +SWIGRUNTIME void +SWIG_Python_SetSwigThis(PyObject *inst, PyObject *swig_this) +{ + PyObject *dict; +#if (PY_VERSION_HEX >= 0x02020000) && !defined(SWIG_PYTHON_SLOW_GETSET_THIS) + PyObject **dictptr = _PyObject_GetDictPtr(inst); + if (dictptr != NULL) { + dict = *dictptr; + if (dict == NULL) { + dict = PyDict_New(); + *dictptr = dict; + } + PyDict_SetItem(dict, SWIG_This(), swig_this); + return; + } +#endif + dict = PyObject_GetAttrString(inst, (char*)"__dict__"); + PyDict_SetItem(dict, SWIG_This(), swig_this); + Py_DECREF(dict); +} + + +SWIGINTERN PyObject * +SWIG_Python_InitShadowInstance(PyObject *args) { + PyObject *obj[2]; + if (!SWIG_Python_UnpackTuple(args,(char*)"swiginit", 2, 2, obj)) { + return NULL; + } else { + PySwigObject *sthis = SWIG_Python_GetSwigThis(obj[0]); + if (sthis) { + PySwigObject_append((PyObject*) sthis, obj[1]); + } else { + SWIG_Python_SetSwigThis(obj[0], obj[1]); + } + return SWIG_Py_Void(); + } +} + +/* Create a new pointer object */ + +SWIGRUNTIME PyObject * +SWIG_Python_NewPointerObj(void *ptr, swig_type_info *type, int flags) { + if (!ptr) { + return SWIG_Py_Void(); + } else { + int own = (flags & SWIG_POINTER_OWN) ? SWIG_POINTER_OWN : 0; + PyObject *robj = PySwigObject_New(ptr, type, own); + PySwigClientData *clientdata = type ? (PySwigClientData *)(type->clientdata) : 0; + if (clientdata && !(flags & SWIG_POINTER_NOSHADOW)) { + PyObject *inst = SWIG_Python_NewShadowInstance(clientdata, robj); + if (inst) { + Py_DECREF(robj); + robj = inst; + } + } + return robj; + } +} + +/* Create a new packed object */ + +SWIGRUNTIMEINLINE PyObject * +SWIG_Python_NewPackedObj(void *ptr, size_t sz, swig_type_info *type) { + return ptr ? PySwigPacked_New((void *) ptr, sz, type) : SWIG_Py_Void(); +} + +/* -----------------------------------------------------------------------------* + * Get type list + * -----------------------------------------------------------------------------*/ + +#ifdef SWIG_LINK_RUNTIME +void *SWIG_ReturnGlobalTypeList(void *); +#endif + +SWIGRUNTIME swig_module_info * +SWIG_Python_GetModule(void) { + static void *type_pointer = (void *)0; + /* first check if module already created */ + if (!type_pointer) { +#ifdef SWIG_LINK_RUNTIME + type_pointer = SWIG_ReturnGlobalTypeList((void *)0); +#else + type_pointer = PyCObject_Import((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION, + (char*)"type_pointer" SWIG_TYPE_TABLE_NAME); + if (PyErr_Occurred()) { + PyErr_Clear(); + type_pointer = (void *)0; + } +#endif + } + return (swig_module_info *) type_pointer; +} + +#if PY_MAJOR_VERSION < 2 +/* PyModule_AddObject function was introduced in Python 2.0. The following function + is copied out of Python/modsupport.c in python version 2.3.4 */ +SWIGINTERN int +PyModule_AddObject(PyObject *m, char *name, PyObject *o) +{ + PyObject *dict; + if (!PyModule_Check(m)) { + PyErr_SetString(PyExc_TypeError, + "PyModule_AddObject() needs module as first arg"); + return SWIG_ERROR; + } + if (!o) { + PyErr_SetString(PyExc_TypeError, + "PyModule_AddObject() needs non-NULL value"); + return SWIG_ERROR; + } + + dict = PyModule_GetDict(m); + if (dict == NULL) { + /* Internal error -- modules must have a dict! */ + PyErr_Format(PyExc_SystemError, "module '%s' has no __dict__", + PyModule_GetName(m)); + return SWIG_ERROR; + } + if (PyDict_SetItemString(dict, name, o)) + return SWIG_ERROR; + Py_DECREF(o); + return SWIG_OK; +} +#endif + +SWIGRUNTIME void +SWIG_Python_DestroyModule(void *vptr) +{ + swig_module_info *swig_module = (swig_module_info *) vptr; + swig_type_info **types = swig_module->types; + size_t i; + for (i =0; i < swig_module->size; ++i) { + swig_type_info *ty = types[i]; + if (ty->owndata) { + PySwigClientData *data = (PySwigClientData *) ty->clientdata; + if (data) PySwigClientData_Del(data); + } + } + Py_DECREF(SWIG_This()); +} + +SWIGRUNTIME void +SWIG_Python_SetModule(swig_module_info *swig_module) { + static PyMethodDef swig_empty_runtime_method_table[] = { {NULL, NULL, 0, NULL} };/* Sentinel */ + + PyObject *module = Py_InitModule((char*)"swig_runtime_data" SWIG_RUNTIME_VERSION, + swig_empty_runtime_method_table); + PyObject *pointer = PyCObject_FromVoidPtr((void *) swig_module, SWIG_Python_DestroyModule); + if (pointer && module) { + PyModule_AddObject(module, (char*)"type_pointer" SWIG_TYPE_TABLE_NAME, pointer); + } else { + Py_XDECREF(pointer); + } +} + +/* The python cached type query */ +SWIGRUNTIME PyObject * +SWIG_Python_TypeCache(void) { + static PyObject *SWIG_STATIC_POINTER(cache) = PyDict_New(); + return cache; +} + +SWIGRUNTIME swig_type_info * +SWIG_Python_TypeQuery(const char *type) +{ + PyObject *cache = SWIG_Python_TypeCache(); + PyObject *key = PyString_FromString(type); + PyObject *obj = PyDict_GetItem(cache, key); + swig_type_info *descriptor; + if (obj) { + descriptor = (swig_type_info *) PyCObject_AsVoidPtr(obj); + } else { + swig_module_info *swig_module = SWIG_Python_GetModule(); + descriptor = SWIG_TypeQueryModule(swig_module, swig_module, type); + if (descriptor) { + obj = PyCObject_FromVoidPtr(descriptor, NULL); + PyDict_SetItem(cache, key, obj); + Py_DECREF(obj); + } + } + Py_DECREF(key); + return descriptor; +} + +/* + For backward compatibility only +*/ +#define SWIG_POINTER_EXCEPTION 0 +#define SWIG_arg_fail(arg) SWIG_Python_ArgFail(arg) +#define SWIG_MustGetPtr(p, type, argnum, flags) SWIG_Python_MustGetPtr(p, type, argnum, flags) + +SWIGRUNTIME int +SWIG_Python_AddErrMesg(const char* mesg, int infront) +{ + if (PyErr_Occurred()) { + PyObject *type = 0; + PyObject *value = 0; + PyObject *traceback = 0; + PyErr_Fetch(&type, &value, &traceback); + if (value) { + PyObject *old_str = PyObject_Str(value); + Py_XINCREF(type); + PyErr_Clear(); + if (infront) { + PyErr_Format(type, "%s %s", mesg, PyString_AsString(old_str)); + } else { + PyErr_Format(type, "%s %s", PyString_AsString(old_str), mesg); + } + Py_DECREF(old_str); + } + return 1; + } else { + return 0; + } +} + +SWIGRUNTIME int +SWIG_Python_ArgFail(int argnum) +{ + if (PyErr_Occurred()) { + /* add information about failing argument */ + char mesg[256]; + PyOS_snprintf(mesg, sizeof(mesg), "argument number %d:", argnum); + return SWIG_Python_AddErrMesg(mesg, 1); + } else { + return 0; + } +} + +SWIGRUNTIMEINLINE const char * +PySwigObject_GetDesc(PyObject *self) +{ + PySwigObject *v = (PySwigObject *)self; + swig_type_info *ty = v ? v->ty : 0; + return ty ? ty->str : (char*)""; +} + +SWIGRUNTIME void +SWIG_Python_TypeError(const char *type, PyObject *obj) +{ + if (type) { +#if defined(SWIG_COBJECT_TYPES) + if (obj && PySwigObject_Check(obj)) { + const char *otype = (const char *) PySwigObject_GetDesc(obj); + if (otype) { + PyErr_Format(PyExc_TypeError, "a '%s' is expected, 'PySwigObject(%s)' is received", + type, otype); + return; + } + } else +#endif + { + const char *otype = (obj ? obj->ob_type->tp_name : 0); + if (otype) { + PyObject *str = PyObject_Str(obj); + const char *cstr = str ? PyString_AsString(str) : 0; + if (cstr) { + PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s(%s)' is received", + type, otype, cstr); + } else { + PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s' is received", + type, otype); + } + Py_XDECREF(str); + return; + } + } + PyErr_Format(PyExc_TypeError, "a '%s' is expected", type); + } else { + PyErr_Format(PyExc_TypeError, "unexpected type is received"); + } +} + + +/* Convert a pointer value, signal an exception on a type mismatch */ +SWIGRUNTIME void * +SWIG_Python_MustGetPtr(PyObject *obj, swig_type_info *ty, int argnum, int flags) { + void *result; + if (SWIG_Python_ConvertPtr(obj, &result, ty, flags) == -1) { + PyErr_Clear(); + if (flags & SWIG_POINTER_EXCEPTION) { + SWIG_Python_TypeError(SWIG_TypePrettyName(ty), obj); + SWIG_Python_ArgFail(argnum); + } + } + return result; +} + + +#ifdef __cplusplus +#if 0 +{ /* cc-mode */ +#endif +} +#endif + + + +#define SWIG_exception_fail(code, msg) do { SWIG_Error(code, msg); SWIG_fail; } while(0) + +#define SWIG_contract_assert(expr, msg) if (!(expr)) { SWIG_Error(SWIG_RuntimeError, msg); SWIG_fail; } else + + + +/* -------- TYPES TABLE (BEGIN) -------- */ + +#define SWIGTYPE_p_char swig_types[0] +#define SWIGTYPE_p_p_char swig_types[1] +#define SWIGTYPE_p_unsigned_int swig_types[2] +static swig_type_info *swig_types[4]; +static swig_module_info swig_module = {swig_types, 3, 0, 0, 0, 0}; +#define SWIG_TypeQuery(name) SWIG_TypeQueryModule(&swig_module, &swig_module, name) +#define SWIG_MangledTypeQuery(name) SWIG_MangledTypeQueryModule(&swig_module, &swig_module, name) + +/* -------- TYPES TABLE (END) -------- */ + +#if (PY_VERSION_HEX <= 0x02000000) +# if !defined(SWIG_PYTHON_CLASSIC) +# error "This python version requires swig to be run with the '-classic' option" +# endif +#endif + +/*----------------------------------------------- + @(target):= _xdelta3.so + ------------------------------------------------*/ +#define SWIG_init init_xdelta3 + +#define SWIG_name "_xdelta3" + +#define SWIGVERSION 0x010331 +#define SWIG_VERSION SWIGVERSION + + +#define SWIG_as_voidptr(a) (void *)((const void *)(a)) +#define SWIG_as_voidptrptr(a) ((void)SWIG_as_voidptr(*a),(void**)(a)) + + +#include "xdelta3.h" + +int xd3_main_cmdline (int ARGC, char **ARGV); + +#undef SWIG_init +#undef SWIG_name + +#define SWIG_init initxdelta3 +#define SWIG_name "xdelta3" + + + +SWIGINTERN swig_type_info* +SWIG_pchar_descriptor(void) +{ + static int init = 0; + static swig_type_info* info = 0; + if (!init) { + info = SWIG_TypeQuery("_p_char"); + init = 1; + } + return info; +} + + +SWIGINTERN int +SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) +{ + if (PyString_Check(obj)) { + char *cstr; Py_ssize_t len; + PyString_AsStringAndSize(obj, &cstr, &len); + if (cptr) { + if (alloc) { + /* + In python the user should not be able to modify the inner + string representation. To warranty that, if you define + SWIG_PYTHON_SAFE_CSTRINGS, a new/copy of the python string + buffer is always returned. + + The default behavior is just to return the pointer value, + so, be careful. + */ +#if defined(SWIG_PYTHON_SAFE_CSTRINGS) + if (*alloc != SWIG_OLDOBJ) +#else + if (*alloc == SWIG_NEWOBJ) +#endif + { + *cptr = (char *)memcpy((char *)malloc((len + 1)*sizeof(char)), cstr, sizeof(char)*(len + 1)); + *alloc = SWIG_NEWOBJ; + } + else { + *cptr = cstr; + *alloc = SWIG_OLDOBJ; + } + } else { + *cptr = PyString_AsString(obj); + } + } + if (psize) *psize = len + 1; + return SWIG_OK; + } else { + swig_type_info* pchar_descriptor = SWIG_pchar_descriptor(); + if (pchar_descriptor) { + void* vptr = 0; + if (SWIG_ConvertPtr(obj, &vptr, pchar_descriptor, 0) == SWIG_OK) { + if (cptr) *cptr = (char *) vptr; + if (psize) *psize = vptr ? (strlen((char *)vptr) + 1) : 0; + if (alloc) *alloc = SWIG_OLDOBJ; + return SWIG_OK; + } + } + } + return SWIG_TypeError; +} + + +#include <limits.h> +#ifndef LLONG_MIN +# define LLONG_MIN LONG_LONG_MIN +#endif +#ifndef LLONG_MAX +# define LLONG_MAX LONG_LONG_MAX +#endif +#ifndef ULLONG_MAX +# define ULLONG_MAX ULONG_LONG_MAX +#endif + + +SWIGINTERN int +SWIG_AsVal_double (PyObject *obj, double *val) +{ + int res = SWIG_TypeError; + if (PyFloat_Check(obj)) { + if (val) *val = PyFloat_AsDouble(obj); + return SWIG_OK; + } else if (PyInt_Check(obj)) { + if (val) *val = PyInt_AsLong(obj); + return SWIG_OK; + } else if (PyLong_Check(obj)) { + double v = PyLong_AsDouble(obj); + if (!PyErr_Occurred()) { + if (val) *val = v; + return SWIG_OK; + } else { + PyErr_Clear(); + } + } +#ifdef SWIG_PYTHON_CAST_MODE + { + int dispatch = 0; + double d = PyFloat_AsDouble(obj); + if (!PyErr_Occurred()) { + if (val) *val = d; + return SWIG_AddCast(SWIG_OK); + } else { + PyErr_Clear(); + } + if (!dispatch) { + long v = PyLong_AsLong(obj); + if (!PyErr_Occurred()) { + if (val) *val = v; + return SWIG_AddCast(SWIG_AddCast(SWIG_OK)); + } else { + PyErr_Clear(); + } + } + } +#endif + return res; +} + + +#include <float.h> + + +#include <math.h> + + +SWIGINTERNINLINE int +SWIG_CanCastAsInteger(double *d, double min, double max) { + double x = *d; + if ((min <= x && x <= max)) { + double fx = floor(x); + double cx = ceil(x); + double rd = ((x - fx) < 0.5) ? fx : cx; /* simple rint */ + if ((errno == EDOM) || (errno == ERANGE)) { + errno = 0; + } else { + double summ, reps, diff; + if (rd < x) { + diff = x - rd; + } else if (rd > x) { + diff = rd - x; + } else { + return 1; + } + summ = rd + x; + reps = diff/summ; + if (reps < 8*DBL_EPSILON) { + *d = rd; + return 1; + } + } + } + return 0; +} + + +SWIGINTERN int +SWIG_AsVal_long (PyObject *obj, long* val) +{ + if (PyInt_Check(obj)) { + if (val) *val = PyInt_AsLong(obj); + return SWIG_OK; + } else if (PyLong_Check(obj)) { + long v = PyLong_AsLong(obj); + if (!PyErr_Occurred()) { + if (val) *val = v; + return SWIG_OK; + } else { + PyErr_Clear(); + } + } +#ifdef SWIG_PYTHON_CAST_MODE + { + int dispatch = 0; + long v = PyInt_AsLong(obj); + if (!PyErr_Occurred()) { + if (val) *val = v; + return SWIG_AddCast(SWIG_OK); + } else { + PyErr_Clear(); + } + if (!dispatch) { + double d; + int res = SWIG_AddCast(SWIG_AsVal_double (obj,&d)); + if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, LONG_MIN, LONG_MAX)) { + if (val) *val = (long)(d); + return res; + } + } + } +#endif + return SWIG_TypeError; +} + + +SWIGINTERN int +SWIG_AsVal_int (PyObject * obj, int *val) +{ + long v; + int res = SWIG_AsVal_long (obj, &v); + if (SWIG_IsOK(res)) { + if ((v < INT_MIN || v > INT_MAX)) { + return SWIG_OverflowError; + } else { + if (val) *val = (int)(v); + } + } + return res; +} + + + + + +SWIGINTERN int +SWIG_AsVal_unsigned_SS_long (PyObject *obj, unsigned long *val) +{ + if (PyInt_Check(obj)) { + long v = PyInt_AsLong(obj); + if (v >= 0) { + if (val) *val = v; + return SWIG_OK; + } else { + return SWIG_OverflowError; + } + } else if (PyLong_Check(obj)) { + unsigned long v = PyLong_AsUnsignedLong(obj); + if (!PyErr_Occurred()) { + if (val) *val = v; + return SWIG_OK; + } else { + PyErr_Clear(); + } + } +#ifdef SWIG_PYTHON_CAST_MODE + { + int dispatch = 0; + unsigned long v = PyLong_AsUnsignedLong(obj); + if (!PyErr_Occurred()) { + if (val) *val = v; + return SWIG_AddCast(SWIG_OK); + } else { + PyErr_Clear(); + } + if (!dispatch) { + double d; + int res = SWIG_AddCast(SWIG_AsVal_double (obj,&d)); + if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, 0, ULONG_MAX)) { + if (val) *val = (unsigned long)(d); + return res; + } + } + } +#endif + return SWIG_TypeError; +} + + +SWIGINTERN int +SWIG_AsVal_unsigned_SS_int (PyObject * obj, unsigned int *val) +{ + unsigned long v; + int res = SWIG_AsVal_unsigned_SS_long (obj, &v); + if (SWIG_IsOK(res)) { + if ((v > UINT_MAX)) { + return SWIG_OverflowError; + } else { + if (val) *val = (unsigned int)(v); + } + } + return res; +} + + +#define t_output_helper SWIG_Python_AppendOutput + + + #define SWIG_From_long PyInt_FromLong + + +SWIGINTERNINLINE PyObject * +SWIG_From_int (int value) +{ + return SWIG_From_long (value); +} + + +SWIGINTERN int +SWIG_AsArgcArgv(PyObject *input, + swig_type_info *ppchar_info, + size_t *argc, char ***argv, int *owner) +{ + void *vptr; + int res = SWIG_ConvertPtr(input, &vptr, ppchar_info, 0); + if (!SWIG_IsOK(res)) { + int list = 0; + PyErr_Clear(); + list = PyList_Check(input); + if (list || PyTuple_Check(input)) { + size_t i = 0; + size_t size = list ? PyList_Size(input) : PyTuple_Size(input); + if (argc) *argc = size; + if (argv) { + *argv = (char* *)malloc((size + 1)*sizeof(char*)); + for (; i < size; ++i) { + PyObject *obj = list ? PyList_GetItem(input,i) : PyTuple_GetItem(input,i); + char *cptr = 0; size_t sz = 0; int alloc = 0; + res = SWIG_AsCharPtrAndSize(obj, &cptr, &sz, &alloc); + if (SWIG_IsOK(res)) { + if (cptr && sz) { + (*argv)[i] = (alloc == SWIG_NEWOBJ) ? cptr : (char *)memcpy((char *)malloc((sz)*sizeof(char)), cptr, sizeof(char)*(sz)); + } else { + (*argv)[i] = 0; + } + } else { + return SWIG_TypeError; + } + } + (*argv)[i] = 0; + if (owner) *owner = 1; + } else { + for (; i < size; ++i) { + PyObject *obj = list ? PyList_GetItem(input,i) : PyTuple_GetItem(input,i); + res = SWIG_AsCharPtrAndSize(obj, 0, 0, 0); + if (!SWIG_IsOK(res)) return SWIG_TypeError; + } + if (owner) *owner = 0; + } + return SWIG_OK; + } else { + return SWIG_TypeError; + } + } else { + /* seems dangerous, but the user asked for it... */ + size_t i = 0; + if (argv) { while (*argv[i] != 0) ++i;} + if (argc) *argc = i; + if (owner) *owner = 0; + return SWIG_OK; + } +} + +#ifdef __cplusplus +extern "C" { +#endif +SWIGINTERN PyObject *_wrap_xd3_encode_memory(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + char *arg1 = (char *) 0 ; + unsigned int arg2 ; + char *arg3 = (char *) 0 ; + unsigned int arg4 ; + char *arg5 = (char *) 0 ; + unsigned int *arg6 = (unsigned int *) 0 ; + unsigned int arg7 ; + int arg8 ; + int result; + int res1 ; + char *buf1 = 0 ; + size_t size1 = 0 ; + int alloc1 = 0 ; + int res3 ; + char *buf3 = 0 ; + size_t size3 = 0 ; + int alloc3 = 0 ; + unsigned int alloc_size7 ; + int val8 ; + int ecode8 = 0 ; + PyObject * obj0 = 0 ; + PyObject * obj1 = 0 ; + PyObject * obj2 = 0 ; + PyObject * obj3 = 0 ; + + { + arg8 = 0; + } + { + + } + if (!PyArg_ParseTuple(args,(char *)"OOO|O:xd3_encode_memory",&obj0,&obj1,&obj2,&obj3)) SWIG_fail; + res1 = SWIG_AsCharPtrAndSize(obj0, &buf1, &size1, &alloc1); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "xd3_encode_memory" "', argument " "1"" of type '" "(const char *input, unsigned int input_size)""'"); + } + arg1 = (char *) buf1; + arg2 = (unsigned int) size1 - 1; + res3 = SWIG_AsCharPtrAndSize(obj1, &buf3, &size3, &alloc3); + if (!SWIG_IsOK(res3)) { + SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "xd3_encode_memory" "', argument " "3"" of type '" "(const char *source, unsigned int source_size)""'"); + } + arg3 = (char *) buf3; + arg4 = (unsigned int) size3 - 1; + { + arg7 = alloc_size7 = PyInt_AsLong(obj2); + } + if (obj3) { + ecode8 = SWIG_AsVal_int(obj3, &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "xd3_encode_memory" "', argument " "8"" of type '" "int""'"); + } + arg8 = (int)(val8); + } + { + // alloc_size input is #7th position in xd3_xxcode_memory() + arg5 = malloc(alloc_size7); + arg6 = &alloc_size7; + } + result = (int)xd3_encode_memory((char const *)arg1,arg2,(char const *)arg3,arg4,arg5,arg6,arg7,arg8); + resultobj = SWIG_From_int((int)(result)); + { + if (result == 0) { + PyObject *o; + // alloc_size7 now carries actual size + o = PyString_FromStringAndSize(arg5,alloc_size7); + resultobj = t_output_helper(resultobj,o); + } else { + resultobj = t_output_helper(resultobj,Py_None); + } + free(arg5); + } + if (alloc1 == SWIG_NEWOBJ) free((char*)buf1); + if (alloc3 == SWIG_NEWOBJ) free((char*)buf3); + return resultobj; +fail: + if (alloc1 == SWIG_NEWOBJ) free((char*)buf1); + if (alloc3 == SWIG_NEWOBJ) free((char*)buf3); + return NULL; +} + + +SWIGINTERN PyObject *_wrap_xd3_decode_memory(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + char *arg1 = (char *) 0 ; + unsigned int arg2 ; + char *arg3 = (char *) 0 ; + unsigned int arg4 ; + char *arg5 = (char *) 0 ; + unsigned int *arg6 = (unsigned int *) 0 ; + unsigned int arg7 ; + int arg8 ; + int result; + int res1 ; + char *buf1 = 0 ; + size_t size1 = 0 ; + int alloc1 = 0 ; + int res3 ; + char *buf3 = 0 ; + size_t size3 = 0 ; + int alloc3 = 0 ; + unsigned int alloc_size7 ; + int val8 ; + int ecode8 = 0 ; + PyObject * obj0 = 0 ; + PyObject * obj1 = 0 ; + PyObject * obj2 = 0 ; + PyObject * obj3 = 0 ; + + { + arg8 = 0; + } + { + + } + if (!PyArg_ParseTuple(args,(char *)"OOO|O:xd3_decode_memory",&obj0,&obj1,&obj2,&obj3)) SWIG_fail; + res1 = SWIG_AsCharPtrAndSize(obj0, &buf1, &size1, &alloc1); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "xd3_decode_memory" "', argument " "1"" of type '" "(const char *input, unsigned int input_size)""'"); + } + arg1 = (char *) buf1; + arg2 = (unsigned int) size1 - 1; + res3 = SWIG_AsCharPtrAndSize(obj1, &buf3, &size3, &alloc3); + if (!SWIG_IsOK(res3)) { + SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "xd3_decode_memory" "', argument " "3"" of type '" "(const char *source, unsigned int source_size)""'"); + } + arg3 = (char *) buf3; + arg4 = (unsigned int) size3 - 1; + { + arg7 = alloc_size7 = PyInt_AsLong(obj2); + } + if (obj3) { + ecode8 = SWIG_AsVal_int(obj3, &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "xd3_decode_memory" "', argument " "8"" of type '" "int""'"); + } + arg8 = (int)(val8); + } + { + // alloc_size input is #7th position in xd3_xxcode_memory() + arg5 = malloc(alloc_size7); + arg6 = &alloc_size7; + } + result = (int)xd3_decode_memory((char const *)arg1,arg2,(char const *)arg3,arg4,arg5,arg6,arg7,arg8); + resultobj = SWIG_From_int((int)(result)); + { + if (result == 0) { + PyObject *o; + // alloc_size7 now carries actual size + o = PyString_FromStringAndSize(arg5,alloc_size7); + resultobj = t_output_helper(resultobj,o); + } else { + resultobj = t_output_helper(resultobj,Py_None); + } + free(arg5); + } + if (alloc1 == SWIG_NEWOBJ) free((char*)buf1); + if (alloc3 == SWIG_NEWOBJ) free((char*)buf3); + return resultobj; +fail: + if (alloc1 == SWIG_NEWOBJ) free((char*)buf1); + if (alloc3 == SWIG_NEWOBJ) free((char*)buf3); + return NULL; +} + + +SWIGINTERN PyObject *_wrap_xd3_main_cmdline(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + int arg1 ; + char **arg2 = (char **) 0 ; + int result; + int res1 ; + char **argv1 = 0 ; + size_t argc1 = 0 ; + int owner1 = 0 ; + PyObject * obj0 = 0 ; + + if (!PyArg_ParseTuple(args,(char *)"O:xd3_main_cmdline",&obj0)) SWIG_fail; + res1 = SWIG_AsArgcArgv(obj0, SWIGTYPE_p_p_char, &argc1, &argv1, &owner1); + if (!SWIG_IsOK(res1)) { + arg1 = 0; arg2 = 0; + SWIG_exception_fail(SWIG_ArgError(SWIG_TypeError), "in method '" "xd3_main_cmdline" "', argument " "1"" of type '" "int ARGC, char **ARGV""'"); + } else { + arg1 = (int)(argc1); + arg2 = (char **)(argv1); + } + result = (int)xd3_main_cmdline(arg1,arg2); + resultobj = SWIG_From_int((int)(result)); + if (owner1) { + size_t i = argc1; + while (i) { + free((char*)argv1[--i]); + } + free((char*)argv1); + } + return resultobj; +fail: + if (owner1) { + size_t i = argc1; + while (i) { + free((char*)argv1[--i]); + } + free((char*)argv1); + } + return NULL; +} + + +static PyMethodDef SwigMethods[] = { + { (char *)"xd3_encode_memory", _wrap_xd3_encode_memory, METH_VARARGS, NULL}, + { (char *)"xd3_decode_memory", _wrap_xd3_decode_memory, METH_VARARGS, NULL}, + { (char *)"xd3_main_cmdline", _wrap_xd3_main_cmdline, METH_VARARGS, NULL}, + { NULL, NULL, 0, NULL } +}; + + +/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (BEGIN) -------- */ + +static swig_type_info _swigt__p_char = {"_p_char", "char *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_p_char = {"_p_p_char", "char **", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_unsigned_int = {"_p_unsigned_int", "unsigned int *", 0, 0, (void*)0, 0}; + +static swig_type_info *swig_type_initial[] = { + &_swigt__p_char, + &_swigt__p_p_char, + &_swigt__p_unsigned_int, +}; + +static swig_cast_info _swigc__p_char[] = { {&_swigt__p_char, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_p_char[] = { {&_swigt__p_p_char, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_unsigned_int[] = { {&_swigt__p_unsigned_int, 0, 0, 0},{0, 0, 0, 0}}; + +static swig_cast_info *swig_cast_initial[] = { + _swigc__p_char, + _swigc__p_p_char, + _swigc__p_unsigned_int, +}; + + +/* -------- TYPE CONVERSION AND EQUIVALENCE RULES (END) -------- */ + +static swig_const_info swig_const_table[] = { +{0, 0, 0, 0.0, 0, 0}}; + +#ifdef __cplusplus +} +#endif +/* ----------------------------------------------------------------------------- + * Type initialization: + * This problem is tough by the requirement that no dynamic + * memory is used. Also, since swig_type_info structures store pointers to + * swig_cast_info structures and swig_cast_info structures store pointers back + * to swig_type_info structures, we need some lookup code at initialization. + * The idea is that swig generates all the structures that are needed. + * The runtime then collects these partially filled structures. + * The SWIG_InitializeModule function takes these initial arrays out of + * swig_module, and does all the lookup, filling in the swig_module.types + * array with the correct data and linking the correct swig_cast_info + * structures together. + * + * The generated swig_type_info structures are assigned staticly to an initial + * array. We just loop through that array, and handle each type individually. + * First we lookup if this type has been already loaded, and if so, use the + * loaded structure instead of the generated one. Then we have to fill in the + * cast linked list. The cast data is initially stored in something like a + * two-dimensional array. Each row corresponds to a type (there are the same + * number of rows as there are in the swig_type_initial array). Each entry in + * a column is one of the swig_cast_info structures for that type. + * The cast_initial array is actually an array of arrays, because each row has + * a variable number of columns. So to actually build the cast linked list, + * we find the array of casts associated with the type, and loop through it + * adding the casts to the list. The one last trick we need to do is making + * sure the type pointer in the swig_cast_info struct is correct. + * + * First off, we lookup the cast->type name to see if it is already loaded. + * There are three cases to handle: + * 1) If the cast->type has already been loaded AND the type we are adding + * casting info to has not been loaded (it is in this module), THEN we + * replace the cast->type pointer with the type pointer that has already + * been loaded. + * 2) If BOTH types (the one we are adding casting info to, and the + * cast->type) are loaded, THEN the cast info has already been loaded by + * the previous module so we just ignore it. + * 3) Finally, if cast->type has not already been loaded, then we add that + * swig_cast_info to the linked list (because the cast->type) pointer will + * be correct. + * ----------------------------------------------------------------------------- */ + +#ifdef __cplusplus +extern "C" { +#if 0 +} /* c-mode */ +#endif +#endif + +#if 0 +#define SWIGRUNTIME_DEBUG +#endif + + +SWIGRUNTIME void +SWIG_InitializeModule(void *clientdata) { + size_t i; + swig_module_info *module_head, *iter; + int found; + + clientdata = clientdata; + + /* check to see if the circular list has been setup, if not, set it up */ + if (swig_module.next==0) { + /* Initialize the swig_module */ + swig_module.type_initial = swig_type_initial; + swig_module.cast_initial = swig_cast_initial; + swig_module.next = &swig_module; + } + + /* Try and load any already created modules */ + module_head = SWIG_GetModule(clientdata); + if (!module_head) { + /* This is the first module loaded for this interpreter */ + /* so set the swig module into the interpreter */ + SWIG_SetModule(clientdata, &swig_module); + module_head = &swig_module; + } else { + /* the interpreter has loaded a SWIG module, but has it loaded this one? */ + found=0; + iter=module_head; + do { + if (iter==&swig_module) { + found=1; + break; + } + iter=iter->next; + } while (iter!= module_head); + + /* if the is found in the list, then all is done and we may leave */ + if (found) return; + /* otherwise we must add out module into the list */ + swig_module.next = module_head->next; + module_head->next = &swig_module; + } + + /* Now work on filling in swig_module.types */ +#ifdef SWIGRUNTIME_DEBUG + printf("SWIG_InitializeModule: size %d\n", swig_module.size); +#endif + for (i = 0; i < swig_module.size; ++i) { + swig_type_info *type = 0; + swig_type_info *ret; + swig_cast_info *cast; + +#ifdef SWIGRUNTIME_DEBUG + printf("SWIG_InitializeModule: type %d %s\n", i, swig_module.type_initial[i]->name); +#endif + + /* if there is another module already loaded */ + if (swig_module.next != &swig_module) { + type = SWIG_MangledTypeQueryModule(swig_module.next, &swig_module, swig_module.type_initial[i]->name); + } + if (type) { + /* Overwrite clientdata field */ +#ifdef SWIGRUNTIME_DEBUG + printf("SWIG_InitializeModule: found type %s\n", type->name); +#endif + if (swig_module.type_initial[i]->clientdata) { + type->clientdata = swig_module.type_initial[i]->clientdata; +#ifdef SWIGRUNTIME_DEBUG + printf("SWIG_InitializeModule: found and overwrite type %s \n", type->name); +#endif + } + } else { + type = swig_module.type_initial[i]; + } + + /* Insert casting types */ + cast = swig_module.cast_initial[i]; + while (cast->type) { + /* Don't need to add information already in the list */ + ret = 0; +#ifdef SWIGRUNTIME_DEBUG + printf("SWIG_InitializeModule: look cast %s\n", cast->type->name); +#endif + if (swig_module.next != &swig_module) { + ret = SWIG_MangledTypeQueryModule(swig_module.next, &swig_module, cast->type->name); +#ifdef SWIGRUNTIME_DEBUG + if (ret) printf("SWIG_InitializeModule: found cast %s\n", ret->name); +#endif + } + if (ret) { + if (type == swig_module.type_initial[i]) { +#ifdef SWIGRUNTIME_DEBUG + printf("SWIG_InitializeModule: skip old type %s\n", ret->name); +#endif + cast->type = ret; + ret = 0; + } else { + /* Check for casting already in the list */ + swig_cast_info *ocast = SWIG_TypeCheck(ret->name, type); +#ifdef SWIGRUNTIME_DEBUG + if (ocast) printf("SWIG_InitializeModule: skip old cast %s\n", ret->name); +#endif + if (!ocast) ret = 0; + } + } + + if (!ret) { +#ifdef SWIGRUNTIME_DEBUG + printf("SWIG_InitializeModule: adding cast %s\n", cast->type->name); +#endif + if (type->cast) { + type->cast->prev = cast; + cast->next = type->cast; + } + type->cast = cast; + } + cast++; + } + /* Set entry in modules->types array equal to the type */ + swig_module.types[i] = type; + } + swig_module.types[i] = 0; + +#ifdef SWIGRUNTIME_DEBUG + printf("**** SWIG_InitializeModule: Cast List ******\n"); + for (i = 0; i < swig_module.size; ++i) { + int j = 0; + swig_cast_info *cast = swig_module.cast_initial[i]; + printf("SWIG_InitializeModule: type %d %s\n", i, swig_module.type_initial[i]->name); + while (cast->type) { + printf("SWIG_InitializeModule: cast type %s\n", cast->type->name); + cast++; + ++j; + } + printf("---- Total casts: %d\n",j); + } + printf("**** SWIG_InitializeModule: Cast List ******\n"); +#endif +} + +/* This function will propagate the clientdata field of type to +* any new swig_type_info structures that have been added into the list +* of equivalent types. It is like calling +* SWIG_TypeClientData(type, clientdata) a second time. +*/ +SWIGRUNTIME void +SWIG_PropagateClientData(void) { + size_t i; + swig_cast_info *equiv; + static int init_run = 0; + + if (init_run) return; + init_run = 1; + + for (i = 0; i < swig_module.size; i++) { + if (swig_module.types[i]->clientdata) { + equiv = swig_module.types[i]->cast; + while (equiv) { + if (!equiv->converter) { + if (equiv->type && !equiv->type->clientdata) + SWIG_TypeClientData(equiv->type, swig_module.types[i]->clientdata); + } + equiv = equiv->next; + } + } + } +} + +#ifdef __cplusplus +#if 0 +{ + /* c-mode */ +#endif +} +#endif + + + +#ifdef __cplusplus +extern "C" { +#endif + + /* Python-specific SWIG API */ +#define SWIG_newvarlink() SWIG_Python_newvarlink() +#define SWIG_addvarlink(p, name, get_attr, set_attr) SWIG_Python_addvarlink(p, name, get_attr, set_attr) +#define SWIG_InstallConstants(d, constants) SWIG_Python_InstallConstants(d, constants) + + /* ----------------------------------------------------------------------------- + * global variable support code. + * ----------------------------------------------------------------------------- */ + + typedef struct swig_globalvar { + char *name; /* Name of global variable */ + PyObject *(*get_attr)(void); /* Return the current value */ + int (*set_attr)(PyObject *); /* Set the value */ + struct swig_globalvar *next; + } swig_globalvar; + + typedef struct swig_varlinkobject { + PyObject_HEAD + swig_globalvar *vars; + } swig_varlinkobject; + + SWIGINTERN PyObject * + swig_varlink_repr(swig_varlinkobject *SWIGUNUSEDPARM(v)) { + return PyString_FromString("<Swig global variables>"); + } + + SWIGINTERN PyObject * + swig_varlink_str(swig_varlinkobject *v) { + PyObject *str = PyString_FromString("("); + swig_globalvar *var; + for (var = v->vars; var; var=var->next) { + PyString_ConcatAndDel(&str,PyString_FromString(var->name)); + if (var->next) PyString_ConcatAndDel(&str,PyString_FromString(", ")); + } + PyString_ConcatAndDel(&str,PyString_FromString(")")); + return str; + } + + SWIGINTERN int + swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) { + PyObject *str = swig_varlink_str(v); + fprintf(fp,"Swig global variables "); + fprintf(fp,"%s\n", PyString_AsString(str)); + Py_DECREF(str); + return 0; + } + + SWIGINTERN void + swig_varlink_dealloc(swig_varlinkobject *v) { + swig_globalvar *var = v->vars; + while (var) { + swig_globalvar *n = var->next; + free(var->name); + free(var); + var = n; + } + } + + SWIGINTERN PyObject * + swig_varlink_getattr(swig_varlinkobject *v, char *n) { + PyObject *res = NULL; + swig_globalvar *var = v->vars; + while (var) { + if (strcmp(var->name,n) == 0) { + res = (*var->get_attr)(); + break; + } + var = var->next; + } + if (res == NULL && !PyErr_Occurred()) { + PyErr_SetString(PyExc_NameError,"Unknown C global variable"); + } + return res; + } + + SWIGINTERN int + swig_varlink_setattr(swig_varlinkobject *v, char *n, PyObject *p) { + int res = 1; + swig_globalvar *var = v->vars; + while (var) { + if (strcmp(var->name,n) == 0) { + res = (*var->set_attr)(p); + break; + } + var = var->next; + } + if (res == 1 && !PyErr_Occurred()) { + PyErr_SetString(PyExc_NameError,"Unknown C global variable"); + } + return res; + } + + SWIGINTERN PyTypeObject* + swig_varlink_type(void) { + static char varlink__doc__[] = "Swig var link object"; + static PyTypeObject varlink_type; + static int type_init = 0; + if (!type_init) { + const PyTypeObject tmp + = { + PyObject_HEAD_INIT(NULL) + 0, /* Number of items in variable part (ob_size) */ + (char *)"swigvarlink", /* Type name (tp_name) */ + sizeof(swig_varlinkobject), /* Basic size (tp_basicsize) */ + 0, /* Itemsize (tp_itemsize) */ + (destructor) swig_varlink_dealloc, /* Deallocator (tp_dealloc) */ + (printfunc) swig_varlink_print, /* Print (tp_print) */ + (getattrfunc) swig_varlink_getattr, /* get attr (tp_getattr) */ + (setattrfunc) swig_varlink_setattr, /* Set attr (tp_setattr) */ + 0, /* tp_compare */ + (reprfunc) swig_varlink_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + (reprfunc)swig_varlink_str, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + varlink__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ +#if PY_VERSION_HEX >= 0x02020000 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* tp_iter -> tp_weaklist */ +#endif +#if PY_VERSION_HEX >= 0x02030000 + 0, /* tp_del */ +#endif +#ifdef COUNT_ALLOCS + 0,0,0,0 /* tp_alloc -> tp_next */ +#endif + }; + varlink_type = tmp; + varlink_type.ob_type = &PyType_Type; + type_init = 1; + } + return &varlink_type; + } + + /* Create a variable linking object for use later */ + SWIGINTERN PyObject * + SWIG_Python_newvarlink(void) { + swig_varlinkobject *result = PyObject_NEW(swig_varlinkobject, swig_varlink_type()); + if (result) { + result->vars = 0; + } + return ((PyObject*) result); + } + + SWIGINTERN void + SWIG_Python_addvarlink(PyObject *p, char *name, PyObject *(*get_attr)(void), int (*set_attr)(PyObject *p)) { + swig_varlinkobject *v = (swig_varlinkobject *) p; + swig_globalvar *gv = (swig_globalvar *) malloc(sizeof(swig_globalvar)); + if (gv) { + size_t size = strlen(name)+1; + gv->name = (char *)malloc(size); + if (gv->name) { + strncpy(gv->name,name,size); + gv->get_attr = get_attr; + gv->set_attr = set_attr; + gv->next = v->vars; + } + } + v->vars = gv; + } + + SWIGINTERN PyObject * + SWIG_globals(void) { + static PyObject *_SWIG_globals = 0; + if (!_SWIG_globals) _SWIG_globals = SWIG_newvarlink(); + return _SWIG_globals; + } + + /* ----------------------------------------------------------------------------- + * constants/methods manipulation + * ----------------------------------------------------------------------------- */ + + /* Install Constants */ + SWIGINTERN void + SWIG_Python_InstallConstants(PyObject *d, swig_const_info constants[]) { + PyObject *obj = 0; + size_t i; + for (i = 0; constants[i].type; ++i) { + switch(constants[i].type) { + case SWIG_PY_POINTER: + obj = SWIG_NewPointerObj(constants[i].pvalue, *(constants[i]).ptype,0); + break; + case SWIG_PY_BINARY: + obj = SWIG_NewPackedObj(constants[i].pvalue, constants[i].lvalue, *(constants[i].ptype)); + break; + default: + obj = 0; + break; + } + if (obj) { + PyDict_SetItemString(d, constants[i].name, obj); + Py_DECREF(obj); + } + } + } + + /* -----------------------------------------------------------------------------*/ + /* Fix SwigMethods to carry the callback ptrs when needed */ + /* -----------------------------------------------------------------------------*/ + + SWIGINTERN void + SWIG_Python_FixMethods(PyMethodDef *methods, + swig_const_info *const_table, + swig_type_info **types, + swig_type_info **types_initial) { + size_t i; + for (i = 0; methods[i].ml_name; ++i) { + const char *c = methods[i].ml_doc; + if (c && (c = strstr(c, "swig_ptr: "))) { + int j; + swig_const_info *ci = 0; + const char *name = c + 10; + for (j = 0; const_table[j].type; ++j) { + if (strncmp(const_table[j].name, name, + strlen(const_table[j].name)) == 0) { + ci = &(const_table[j]); + break; + } + } + if (ci) { + size_t shift = (ci->ptype) - types; + swig_type_info *ty = types_initial[shift]; + size_t ldoc = (c - methods[i].ml_doc); + size_t lptr = strlen(ty->name)+2*sizeof(void*)+2; + char *ndoc = (char*)malloc(ldoc + lptr + 10); + if (ndoc) { + char *buff = ndoc; + void *ptr = (ci->type == SWIG_PY_POINTER) ? ci->pvalue : 0; + if (ptr) { + strncpy(buff, methods[i].ml_doc, ldoc); + buff += ldoc; + strncpy(buff, "swig_ptr: ", 10); + buff += 10; + SWIG_PackVoidPtr(buff, ptr, ty->name, lptr); + methods[i].ml_doc = ndoc; + } + } + } + } + } + } + +#ifdef __cplusplus +} +#endif + +/* -----------------------------------------------------------------------------* + * Partial Init method + * -----------------------------------------------------------------------------*/ + +#ifdef __cplusplus +extern "C" +#endif +SWIGEXPORT void SWIG_init(void) { + PyObject *m, *d; + + /* Fix SwigMethods to carry the callback ptrs when needed */ + SWIG_Python_FixMethods(SwigMethods, swig_const_table, swig_types, swig_type_initial); + + m = Py_InitModule((char *) SWIG_name, SwigMethods); + d = PyModule_GetDict(m); + + SWIG_InitializeModule(0); + SWIG_InstallConstants(d,swig_const_table); + + + SWIG_Python_SetConstant(d, "XD3_SEC_DJW",SWIG_From_int((int)(XD3_SEC_DJW))); + SWIG_Python_SetConstant(d, "XD3_SEC_FGK",SWIG_From_int((int)(XD3_SEC_FGK))); + SWIG_Python_SetConstant(d, "XD3_SEC_NODATA",SWIG_From_int((int)(XD3_SEC_NODATA))); + SWIG_Python_SetConstant(d, "XD3_SEC_NOINST",SWIG_From_int((int)(XD3_SEC_NOINST))); + SWIG_Python_SetConstant(d, "XD3_SEC_NOADDR",SWIG_From_int((int)(XD3_SEC_NOADDR))); + SWIG_Python_SetConstant(d, "XD3_ADLER32",SWIG_From_int((int)(XD3_ADLER32))); + SWIG_Python_SetConstant(d, "XD3_ADLER32_NOVER",SWIG_From_int((int)(XD3_ADLER32_NOVER))); + SWIG_Python_SetConstant(d, "XD3_ALT_CODE_TABLE",SWIG_From_int((int)(XD3_ALT_CODE_TABLE))); + SWIG_Python_SetConstant(d, "XD3_NOCOMPRESS",SWIG_From_int((int)(XD3_NOCOMPRESS))); + SWIG_Python_SetConstant(d, "XD3_BEGREEDY",SWIG_From_int((int)(XD3_BEGREEDY))); + SWIG_Python_SetConstant(d, "XD3_COMPLEVEL_SHIFT",SWIG_From_int((int)(XD3_COMPLEVEL_SHIFT))); + SWIG_Python_SetConstant(d, "XD3_COMPLEVEL_MASK",SWIG_From_int((int)(XD3_COMPLEVEL_MASK))); + SWIG_Python_SetConstant(d, "XD3_COMPLEVEL_1",SWIG_From_int((int)(XD3_COMPLEVEL_1))); + SWIG_Python_SetConstant(d, "XD3_COMPLEVEL_3",SWIG_From_int((int)(XD3_COMPLEVEL_3))); + SWIG_Python_SetConstant(d, "XD3_COMPLEVEL_6",SWIG_From_int((int)(XD3_COMPLEVEL_6))); + SWIG_Python_SetConstant(d, "XD3_COMPLEVEL_9",SWIG_From_int((int)(XD3_COMPLEVEL_9))); +} + |