diff options
author | Julian Seward <jseward@acm.org> | 2000-06-24 22:13:13 +0200 |
---|---|---|
committer | Julian Seward <jseward@acm.org> | 2000-06-24 22:13:13 +0200 |
commit | 795b859eee96c700e8f3c3fe68e6a9a39d95797c (patch) | |
tree | 48f8a731cd5ec2f5f15c6d99f2207ebf4a1f35f6 | |
parent | f93cd82a9a7094ad90fd19bbc6ccf6f4627f8060 (diff) | |
download | bzip2-795b859eee96c700e8f3c3fe68e6a9a39d95797c.tar.gz |
bzip2-1.0.1
-rw-r--r-- | CHANGES | 67 | ||||
-rw-r--r-- | LICENSE | 4 | ||||
-rw-r--r-- | Makefile | 58 | ||||
-rw-r--r-- | Makefile-libbz2_so | 43 | ||||
-rw-r--r-- | README | 43 | ||||
-rw-r--r-- | README.COMPILATION.PROBLEMS | 130 | ||||
-rw-r--r-- | blocksort.c | 335 | ||||
-rw-r--r-- | bzip2.1 | 8 | ||||
-rw-r--r-- | bzip2.1.preformatted | 113 | ||||
-rw-r--r-- | bzip2.c | 508 | ||||
-rw-r--r-- | bzip2.txt | 15 | ||||
-rw-r--r-- | bzip2recover.c | 8 | ||||
-rw-r--r-- | bzlib.c | 248 | ||||
-rw-r--r-- | bzlib.h | 67 | ||||
-rw-r--r-- | bzlib_private.h | 52 | ||||
-rw-r--r-- | compress.c | 193 | ||||
-rw-r--r-- | crctable.c | 6 | ||||
-rw-r--r-- | decompress.c | 44 | ||||
-rw-r--r-- | dlltest.c | 341 | ||||
-rw-r--r-- | huffman.c | 36 | ||||
-rw-r--r-- | libbz2.def | 46 | ||||
-rw-r--r-- | makefile.msc | 18 | ||||
-rw-r--r-- | manual.texi | 516 | ||||
-rw-r--r-- | randtable.c | 6 | ||||
-rw-r--r-- | spewG.c | 39 | ||||
-rw-r--r-- | unzcrash.c | 126 | ||||
-rw-r--r-- | words0 | 5 |
27 files changed, 2160 insertions, 915 deletions
@@ -98,3 +98,70 @@ functioning of the bzip2 program or library. Added a couple of casts so the library compiles without warnings at level 3 in MS Visual Studio 6.0. Included a Y2K statement in the file Y2K_INFO. All other changes are minor documentation changes. + +1.0 +~~~ +Several minor bugfixes and enhancements: + +* Large file support. The library uses 64-bit counters to + count the volume of data passing through it. bzip2.c + is now compiled with -D_FILE_OFFSET_BITS=64 to get large + file support from the C library. -v correctly prints out + file sizes greater than 4 gigabytes. All these changes have + been made without assuming a 64-bit platform or a C compiler + which supports 64-bit ints, so, except for the C library + aspect, they are fully portable. + +* Decompression robustness. The library/program should be + robust to any corruption of compressed data, detecting and + handling _all_ corruption, instead of merely relying on + the CRCs. What this means is that the program should + never crash, given corrupted data, and the library should + always return BZ_DATA_ERROR. + +* Fixed an obscure race-condition bug only ever observed on + Solaris, in which, if you were very unlucky and issued + control-C at exactly the wrong time, both input and output + files would be deleted. + +* Don't run out of file handles on test/decompression when + large numbers of files have invalid magic numbers. + +* Avoid library namespace pollution. Prefix all exported + symbols with BZ2_. + +* Minor sorting enhancements from my DCC2000 paper. + +* Advance the version number to 1.0, so as to counteract the + (false-in-this-case) impression some people have that programs + with version numbers less than 1.0 are in someway, experimental, + pre-release versions. + +* Create an initial Makefile-libbz2_so to build a shared library. + Yes, I know I should really use libtool et al ... + +* Make the program exit with 2 instead of 0 when decompression + fails due to a bad magic number (ie, an invalid bzip2 header). + Also exit with 1 (as the manual claims :-) whenever a diagnostic + message would have been printed AND the corresponding operation + is aborted, for example + bzip2: Output file xx already exists. + When a diagnostic message is printed but the operation is not + aborted, for example + bzip2: Can't guess original name for wurble -- using wurble.out + then the exit value 0 is returned, unless some other problem is + also detected. + + I think it corresponds more closely to what the manual claims now. + + +1.0.1 +~~~~~ +* Modified dlltest.c so it uses the new BZ2_ naming scheme. +* Modified makefile-msc to fix minor build probs on Win2k. +* Updated README.COMPILATION.PROBLEMS. + +There are no functionality changes or bug fixes relative to version +1.0.0. This is just a documentation update + a fix for minor Win32 +build problems. For almost everyone, upgrading from 1.0.0 to 1.0.1 is +utterly pointless. Don't bother. @@ -1,6 +1,6 @@ This program, "bzip2" and associated library "libbzip2", are -copyright (C) 1996-1999 Julian R Seward. All rights reserved. +copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -35,5 +35,5 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Julian Seward, Cambridge, UK. jseward@acm.org -bzip2/libbzip2 version 0.9.5 of 24 May 1999 +bzip2/libbzip2 version 1.0 of 21 March 2000 @@ -1,7 +1,8 @@ SHELL=/bin/sh CC=gcc -CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce +BIGFILES=-D_FILE_OFFSET_BITS=64 +CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) OBJS= blocksort.o \ huffman.o \ @@ -73,6 +74,7 @@ clean: sample1.tst sample2.tst sample3.tst blocksort.o: blocksort.c + @cat words0 $(CC) $(CFLAGS) -c blocksort.c huffman.o: huffman.c $(CC) $(CFLAGS) -c huffman.c @@ -91,13 +93,49 @@ bzip2.o: bzip2.c bzip2recover.o: bzip2recover.c $(CC) $(CFLAGS) -c bzip2recover.c +DISTNAME=bzip2-1.0.1 tarfile: - tar cvf interim.tar blocksort.c huffman.c crctable.c \ - randtable.c compress.c decompress.c bzlib.c bzip2.c \ - bzip2recover.c bzlib.h bzlib_private.h Makefile manual.texi \ - manual.ps LICENSE bzip2.1 bzip2.1.preformatted bzip2.txt \ - words1 words2 words3 sample1.ref sample2.ref sample3.ref \ - sample1.bz2 sample2.bz2 sample3.bz2 dlltest.c \ - *.html README CHANGES libbz2.def libbz2.dsp \ - dlltest.dsp makefile.msc Y2K_INFO - + rm -f $(DISTNAME) + ln -sf . $(DISTNAME) + tar cvf $(DISTNAME).tar \ + $(DISTNAME)/blocksort.c \ + $(DISTNAME)/huffman.c \ + $(DISTNAME)/crctable.c \ + $(DISTNAME)/randtable.c \ + $(DISTNAME)/compress.c \ + $(DISTNAME)/decompress.c \ + $(DISTNAME)/bzlib.c \ + $(DISTNAME)/bzip2.c \ + $(DISTNAME)/bzip2recover.c \ + $(DISTNAME)/bzlib.h \ + $(DISTNAME)/bzlib_private.h \ + $(DISTNAME)/Makefile \ + $(DISTNAME)/manual.texi \ + $(DISTNAME)/manual.ps \ + $(DISTNAME)/LICENSE \ + $(DISTNAME)/bzip2.1 \ + $(DISTNAME)/bzip2.1.preformatted \ + $(DISTNAME)/bzip2.txt \ + $(DISTNAME)/words0 \ + $(DISTNAME)/words1 \ + $(DISTNAME)/words2 \ + $(DISTNAME)/words3 \ + $(DISTNAME)/sample1.ref \ + $(DISTNAME)/sample2.ref \ + $(DISTNAME)/sample3.ref \ + $(DISTNAME)/sample1.bz2 \ + $(DISTNAME)/sample2.bz2 \ + $(DISTNAME)/sample3.bz2 \ + $(DISTNAME)/dlltest.c \ + $(DISTNAME)/*.html \ + $(DISTNAME)/README \ + $(DISTNAME)/README.COMPILATION.PROBLEMS \ + $(DISTNAME)/CHANGES \ + $(DISTNAME)/libbz2.def \ + $(DISTNAME)/libbz2.dsp \ + $(DISTNAME)/dlltest.dsp \ + $(DISTNAME)/makefile.msc \ + $(DISTNAME)/Y2K_INFO \ + $(DISTNAME)/unzcrash.c \ + $(DISTNAME)/spewG.c \ + $(DISTNAME)/Makefile-libbz2_so diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so new file mode 100644 index 0000000..a347c50 --- /dev/null +++ b/Makefile-libbz2_so @@ -0,0 +1,43 @@ + +# This Makefile builds a shared version of the library, +# libbz2.so.1.0.1, with soname libbz2.so.1.0, +# at least on x86-Linux (RedHat 5.2), +# with gcc-2.7.2.3. Please see the README file for some +# important info about building the library like this. + +SHELL=/bin/sh +CC=gcc +BIGFILES=-D_FILE_OFFSET_BITS=64 +CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES) + +OBJS= blocksort.o \ + huffman.o \ + crctable.o \ + randtable.o \ + compress.o \ + decompress.o \ + bzlib.o + +all: $(OBJS) + $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS) + $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.1 + rm -f libbz2.so.1.0 + ln -s libbz2.so.1.0.1 libbz2.so.1.0 + +clean: + rm -f $(OBJS) bzip2.o libbz2.so.1.0.1 libbz2.so.1.0 bzip2-shared + +blocksort.o: blocksort.c + $(CC) $(CFLAGS) -c blocksort.c +huffman.o: huffman.c + $(CC) $(CFLAGS) -c huffman.c +crctable.o: crctable.c + $(CC) $(CFLAGS) -c crctable.c +randtable.o: randtable.c + $(CC) $(CFLAGS) -c randtable.c +compress.o: compress.c + $(CC) $(CFLAGS) -c compress.c +decompress.o: decompress.c + $(CC) $(CFLAGS) -c decompress.c +bzlib.o: bzlib.c + $(CC) $(CFLAGS) -c bzlib.c @@ -1,9 +1,9 @@ This is the README for bzip2, a block-sorting file compressor, version -0.9.5d. This version is fully compatible with the previous public -releases, bzip2-0.1pl2 and bzip2-0.9.0. +1.0. This version is fully compatible with the previous public +releases, bzip2-0.1pl2, bzip2-0.9.0 and bzip2-0.9.5. -bzip2-0.9.5 is distributed under a BSD-style license. For details, +bzip2-1.0 is distributed under a BSD-style license. For details, see the file LICENSE. Complete documentation is available in Postscript form (manual.ps) or @@ -30,15 +30,37 @@ The -n instructs make to show the commands it would execute, but not actually execute them. +HOW TO BUILD -- UNIX, shared library libbz2.so. + +Do 'make -f Makefile-libbz2_so'. This Makefile seems to work for +Linux-ELF (RedHat 5.2 on an x86 box), with gcc. I make no claims +that it works for any other platform, though I suspect it probably +will work for most platforms employing both ELF and gcc. + +bzip2-shared, a client of the shared library, is also build, but +not self-tested. So I suggest you also build using the normal +Makefile, since that conducts a self-test. + +Important note for people upgrading .so's from 0.9.0/0.9.5 to +version 1.0. All the functions in the library have been renamed, +from (eg) bzCompress to BZ2_bzCompress, to avoid namespace pollution. +Unfortunately this means that the libbz2.so created by +Makefile-libbz2_so will not work with any program which used an +older version of the library. Sorry. I do encourage library +clients to make the effort to upgrade to use version 1.0, since +it is both faster and more robust than previous versions. + + HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc. It's difficult for me to support compilation on all these platforms. My approach is to collect binaries for these platforms, and put them -on my web page (http://www.muraroa.demon.co.uk). Look there. However -(FWIW), bzip2-0.9.5 is very standard ANSI C and should compile -unmodified with MS Visual C. For Win32, there is one important -caveat: in bzip2.c, you must set BZ_UNIX to 0 and BZ_LCCWIN32 to 1 -before building. +on the master web page (http://sourceware.cygnus.com/bzip2). Look +there. However (FWIW), bzip2-1.0 is very standard ANSI C and should +compile unmodified with MS Visual C. For Win32, there is one +important caveat: in bzip2.c, you must set BZ_UNIX to 0 and +BZ_LCCWIN32 to 1 before building. If you have difficulties building, +you might want to read README.COMPILATION.PROBLEMS. VALIDATION @@ -116,6 +138,10 @@ WHAT'S NEW IN 0.9.5 ? * Many small improvements in file and flag handling. * A Y2K statement. +WHAT'S NEW IN 1.0 + + See the CHANGES file. + I hope you find bzip2 useful. Feel free to contact me at jseward@acm.org if you have any suggestions or queries. Many people mailed me with @@ -137,3 +163,4 @@ Cambridge, UK 23 August 1998 (bzip2, version 0.9.0) 8 June 1999 (bzip2, version 0.9.5) 4 Sept 1999 (bzip2, version 0.9.5d) + 5 May 2000 (bzip2, version 1.0pre8) diff --git a/README.COMPILATION.PROBLEMS b/README.COMPILATION.PROBLEMS new file mode 100644 index 0000000..d621ad5 --- /dev/null +++ b/README.COMPILATION.PROBLEMS @@ -0,0 +1,130 @@ + +bzip2-1.0 should compile without problems on the vast majority of +platforms. Using the supplied Makefile, I've built and tested it +myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and +alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can +build a native Win32 version too. Large file support seems to work +correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows +2000). + +When I say "large file" I mean a file of size 2,147,483,648 (2^31) +bytes or above. Many older OSs can't handle files above this size, +but many newer ones can. Large files are pretty huge -- most files +you'll encounter are not Large Files. + +Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide +variety of platforms without difficulty, and I hope this version will +continue in that tradition. However, in order to support large files, +I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile. +This can cause problems. + +The technique of adding -D_FILE_OFFSET_BITS=64 to get large file +support is, as far as I know, the Recommended Way to get correct large +file support. For more details, see the Large File Support +Specification, published by the Large File Summit, at + http://www.sas.com/standard/large.file/ + +As a general comment, if you get compilation errors which you think +are related to large file support, try removing the above define from +the Makefile, ie, delete the line + BIGFILES=-D_FILE_OFFSET_BITS=64 +from the Makefile, and do 'make clean ; make'. This will give you a +version of bzip2 without large file support, which, for most +applications, is probably not a problem. + +Alternatively, try some of the platform-specific hints listed below. + +You can use the spewG.c program to generate huge files to test bzip2's +large file support, if you are feeling paranoid. Be aware though that +any compilation problems which affect bzip2 will also affect spewG.c, +alas. + + +Known problems as of 1.0pre8: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large + number of warnings appear, including the following: + + /usr/include/sys/resource.h: In function `getrlimit': + /usr/include/sys/resource.h:168: + warning: implicit declaration of function `__getrlimit64' + /usr/include/sys/resource.h: In function `setrlimit': + /usr/include/sys/resource.h:170: + warning: implicit declaration of function `__setrlimit64' + + This would appear to be a problem with large file support, header + files and gcc. gcc may or may not give up at this point. If it + fails, you might be able to improve matters by adding + -D__STDC_EXT__=1 + to the BIGFILES variable in the Makefile (ie, change its definition + to + BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1 + + Even if gcc does produce a binary which appears to work (ie passes + its self-tests), you might want to test it to see if it works properly + on large files. + + +* HP/UX 10.20 and 11.00, using HP's cc compiler. + + No specific problems for this combination, except that you'll need to + specify the -Ae flag, and zap the gcc-specific stuff + -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce. + You should retain -D_FILE_OFFSET_BITS=64 in order to get large + file support -- which is reported to work ok for this HP/UX + cc + combination. + + +* SunOS 4.1.X. + + Amazingly, there are still people out there using this venerable old + banger. I shouldn't be too rude -- I started life on SunOS, and + it was a pretty darn good OS, way back then. Anyway: + + SunOS doesn't seem to have strerror(), so you'll have to use + perror(), perhaps by doing adding this (warning: UNTESTED CODE): + + char* strerror ( int errnum ) + { + if (errnum < 0 || errnum >= sys_nerr) + return "Unknown error"; + else + return sys_errlist[errnum]; + } + + Or you could comment out the relevant calls to strerror; they're + not mission-critical. Or you could upgrade to Solaris. Ha ha ha! + (what?? you think I've got Bad Attitude?) + + +* Making a shared library on Solaris. (Not really a compilation + problem, but many people ask ...) + + Firstly, if you have Solaris 8, either you have libbz2.so already + on your system, or you can install it from the Solaris CD. + + Secondly, be aware that there are potential naming conflicts + between the .so file supplied with Solaris 8, and the .so file + which Makefile-libbz2_so will make. Makefile-libbz2_so creates + a .so which has the names which I intend to be "official" as + of version 1.0.0 and onwards. Unfortunately, the .so in + Solaris 8 appeared before I decided on the final names, so + the two libraries are incompatible. We have since communicated + and I hope that the problems will have been solved in the next + version of Solaris, whenever that might appear. + + All that said: you might be able to get somewhere + by finding the line in Makefile-libbz2_so which says + + $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS) + + and replacing with + + ($CC) -G -shared -o libbz2.so.1.0.1 -h libbz2.so.1.0 $(OBJS) + + If gcc objects to the combination -fpic -fPIC, get rid of + the second one, leaving just "-fpic". + + +That's the end of the currently known compilation problems. diff --git a/blocksort.c b/blocksort.c index 85a02de..ec42672 100644 --- a/blocksort.c +++ b/blocksort.c @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -56,6 +56,13 @@ Jon L. Bentley For more information on these sources, see the manual. + + To get some idea how the block sorting algorithms in this file + work, read my paper + On the Performance of BWT Sorting Algorithms + in Proceedings of the IEEE Data Compression Conference 2000, + Snowbird, Utah, USA, 27-30 March 2000. The main sort in this + file implements the algorithm called cache in the paper. --*/ @@ -232,11 +239,11 @@ void fallbackQSort3 ( UInt32* fmap, /* Pre: nblock > 0 eclass exists for [0 .. nblock-1] - ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block + ((UChar*)eclass) [0 .. nblock-1] holds block ptr exists for [0 .. nblock-1] Post: - ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block + ((UChar*)eclass) [0 .. nblock-1] holds block All other areas of eclass destroyed fmap [0 .. nblock-1] holds sorted order bhtab [ 0 .. 2+(nblock/32) ] destroyed @@ -260,7 +267,7 @@ void fallbackSort ( UInt32* fmap, Int32 H, i, j, k, l, r, cc, cc1; Int32 nNotDone; Int32 nBhtab; - UInt16* eclass16 = (UInt16*)eclass; + UChar* eclass8 = (UChar*)eclass; /*-- Initial 1-char radix sort to generate @@ -269,12 +276,12 @@ void fallbackSort ( UInt32* fmap, if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); for (i = 0; i < 257; i++) ftab[i] = 0; - for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++; + for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; for (i = 0; i < nblock; i++) { - j = eclass16[i] >> 8; + j = eclass8[i]; k = ftab[j] - 1; ftab[j] = k; fmap[k] = i; @@ -354,7 +361,7 @@ void fallbackSort ( UInt32* fmap, /*-- Reconstruct the original block in - eclass16 [0 .. nblock-1] [15:8], since the + eclass8 [0 .. nblock-1], since the previous phase destroyed it. --*/ if (verb >= 4) @@ -363,7 +370,7 @@ void fallbackSort ( UInt32* fmap, for (i = 0; i < nblock; i++) { while (ftabCopy[j] == 0) j++; ftabCopy[j]--; - eclass16[fmap[i]] = j << 8; + eclass8[fmap[i]] = (UChar)j; } AssertH ( j < 256, 1005 ); } @@ -386,67 +393,116 @@ static __inline__ Bool mainGtU ( UInt32 i1, UInt32 i2, - UInt16* block, + UChar* block, UInt16* quadrant, UInt32 nblock, Int32* budget ) { - Int32 k; + Int32 k; + UChar c1, c2; UInt16 s1, s2; AssertD ( i1 != i2, "mainGtU" ); - - s1 = block[i1]; s2 = block[i2]; - if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; - if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; - if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; - if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; - if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; - if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 9 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 10 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 11 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 12 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; k = nblock + 8; do { - - s1 = block[i1]; s2 = block[i2]; + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; - - s1 = block[i1]; s2 = block[i2]; + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); - i1 += 2; i2 += 2; + i1++; i2++; if (i1 >= nblock) i1 -= nblock; if (i2 >= nblock) i2 -= nblock; @@ -467,13 +523,14 @@ Bool mainGtU ( UInt32 i1, because the number of elems to sort is usually small, typically <= 20. --*/ +static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484 }; static void mainSimpleSort ( UInt32* ptr, - UInt16* block, + UChar* block, UInt16* quadrant, Int32 nblock, Int32 lo, @@ -568,19 +625,19 @@ void mainSimpleSort ( UInt32* ptr, } \ } - static __inline__ -UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c ) +UChar mmed3 ( UChar a, UChar b, UChar c ) { - UInt16 t; + UChar t; if (a > b) { t = a; a = b; b = t; }; - if (b > c) { t = b; b = c; c = t; }; - if (a > b) b = a; + if (b > c) { + b = c; + if (a > b) b = a; + } return b; } - #define mmin(a,b) ((a) < (b)) ? (a) : (b) #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ @@ -609,7 +666,7 @@ UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c ) static void mainQSort3 ( UInt32* ptr, - UInt16* block, + UChar* block, UInt16* quadrant, Int32 nblock, Int32 loSt, @@ -679,7 +736,7 @@ void mainQSort3 ( UInt32* ptr, AssertD ( unHi == unLo-1, "mainQSort3(2)" ); if (gtHi < ltLo) { - mpush(lo, hi, d+2 ); + mpush(lo, hi, d+1 ); continue; } @@ -691,7 +748,7 @@ void mainQSort3 ( UInt32* ptr, nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; - nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); @@ -722,11 +779,11 @@ void mainQSort3 ( UInt32* ptr, /* Pre: nblock > N_OVERSHOOT block32 exists for [0 .. nblock-1 +N_OVERSHOOT] - ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block + ((UChar*)block32) [0 .. nblock-1] holds block ptr exists for [0 .. nblock-1] Post: - ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block + ((UChar*)block32) [0 .. nblock-1] holds block All other areas of block32 destroyed ftab [0 .. 65536 ] destroyed ptr [0 .. nblock-1] holds sorted order @@ -739,40 +796,47 @@ void mainQSort3 ( UInt32* ptr, static void mainSort ( UInt32* ptr, - UInt16* block, + UChar* block, UInt16* quadrant, UInt32* ftab, Int32 nblock, Int32 verb, Int32* budget ) { - Int32 i, j, k, m, ss, sb; + Int32 i, j, k, ss, sb; Int32 runningOrder[256]; - Int32 copy[256]; Bool bigDone[256]; + Int32 copyStart[256]; + Int32 copyEnd [256]; UChar c1; Int32 numQSorted; - Int32 biggestSoFar; UInt16 s; - if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); - /*-- Stripe the block data into 16 bits, and at the - same time set up the 2-byte frequency table - --*/ + /*-- set up the 2-byte frequency table --*/ for (i = 65536; i >= 0; i--) ftab[i] = 0; - s = block[0]; - for (i = 1; i < nblock; i++) { + j = block[0] << 8; + i = nblock-1; + for (; i >= 3; i -= 4) { + quadrant[i] = 0; + j = (j >> 8) | ( ((UInt16)block[i]) << 8); + ftab[j]++; + quadrant[i-1] = 0; + j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); + ftab[j]++; + quadrant[i-2] = 0; + j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); + ftab[j]++; + quadrant[i-3] = 0; + j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); + ftab[j]++; + } + for (; i >= 0; i--) { quadrant[i] = 0; - s = (s << 8) | block[i]; - block[i-1] = s; - ftab[s]++; + j = (j >> 8) | ( ((UInt16)block[i]) << 8); + ftab[j]++; } - quadrant[0] = 0; - s = (s << 8) | (block[0] >> 8); - block[nblock-1] = s; - ftab[s]++; /*-- (emphasises close relationship of block & quadrant) --*/ for (i = 0; i < BZ_N_OVERSHOOT; i++) { @@ -785,9 +849,29 @@ void mainSort ( UInt32* ptr, /*-- Complete the initial radix sort --*/ for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; - for (i = 0; i < nblock; i++) { - s = block[i]; - j = ftab[s] - 1; + s = block[0] << 8; + i = nblock-1; + for (; i >= 3; i -= 4) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i; + s = (s >> 8) | (block[i-1] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-1; + s = (s >> 8) | (block[i-2] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-2; + s = (s >> 8) | (block[i-3] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-3; + } + for (; i >= 0; i--) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; ftab[s] = j; ptr[j] = i; } @@ -826,13 +910,13 @@ void mainSort ( UInt32* ptr, The main sorting loop. --*/ - biggestSoFar = numQSorted = 0; + numQSorted = 0; for (i = 0; i <= 255; i++) { /*-- Process big buckets, starting with the least full. - Basically this is a 4-step process in which we call + Basically this is a 3-step process in which we call mainQSort3 to sort the small buckets [ss, j], but also make a big effort to avoid the calls if we can. --*/ @@ -869,39 +953,38 @@ void mainSort ( UInt32* ptr, } } + AssertH ( !bigDone[ss], 1006 ); + /*-- Step 2: - Deal specially with case [ss, ss]. This establishes the - sorted order for [ss, ss] without any comparisons. - A clever trick, cryptically described as steps Q6b and Q6c - in SRC-124 (aka BW94). Compared to bzip2, this makes it - practical not to use a preliminary run-length coder. + Now scan this big bucket [ss] so as to synthesise the + sorted order for small buckets [t, ss] for all t, + including, magically, the bucket [ss,ss] too. + This will avoid doing Real Work in subsequent Step 1's. --*/ { - Int32 put0, get0, put1, get1; - Int32 sbn = (ss << 8) + ss; - Int32 lo = ftab[sbn] & CLEARMASK; - Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1; - UChar ssc = (UChar)ss; - put0 = lo; - get0 = ftab[ss << 8] & CLEARMASK; - put1 = hi; - get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1; - while (get0 < put0) { - j = ptr[get0]-1; if (j < 0) j += nblock; - c1 = (UChar)(block[j] >> 8); - if (c1 == ssc) { ptr[put0] = j; put0++; }; - get0++; + for (j = 0; j <= 255; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = ptr[j]-1; if (k < 0) k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[ copyStart[c1]++ ] = k; } - while (get1 > put1) { - j = ptr[get1]-1; if (j < 0) j += nblock; - c1 = (UChar)(block[j] >> 8); - if (c1 == ssc) { ptr[put1] = j; put1--; }; - get1--; + for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = ptr[j]-1; if (k < 0) k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[ copyEnd[c1]-- ] = k; } - ftab[sbn] |= SETMASK; } + AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 ); + + for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; + /*-- Step 3: The [ss] big bucket is now done. Record this fact, @@ -950,7 +1033,7 @@ void mainSort ( UInt32* ptr, while ((bbSize >> shifts) > 65534) shifts++; - for (j = 0; j < bbSize; j++) { + for (j = bbSize-1; j >= 0; j--) { Int32 a2update = ptr[bbStart + j]; UInt16 qVal = (UInt16)(j >> shifts); quadrant[a2update] = qVal; @@ -960,26 +1043,6 @@ void mainSort ( UInt32* ptr, AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); } - /*-- - Step 4: - Now scan this big bucket [ss] so as to synthesise the - sorted order for small buckets [t, ss] for all t != ss. - This will avoid doing Real Work in subsequent Step 1's. - --*/ - for (j = 0; j <= 255; j++) - copy[j] = ftab[(j << 8) + ss] & CLEARMASK; - - m = ftab[(ss+1) << 8] & CLEARMASK; - for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) { - k = ptr[j] - 1; if (k < 0) k += nblock; - c1 = (UChar)(block[k] >> 8); - if ( ! bigDone[c1] ) { - ptr[copy[c1]] = k; - copy[c1] ++; - } - } - - for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; } if (verb >= 4) @@ -996,19 +1059,19 @@ void mainSort ( UInt32* ptr, /* Pre: nblock > 0 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] - ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block + ((UChar*)arr2) [0 .. nblock-1] holds block arr1 exists for [0 .. nblock-1] Post: - ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block + ((UChar*)arr2) [0 .. nblock-1] holds block All other areas of block destroyed ftab [ 0 .. 65536 ] destroyed arr1 [0 .. nblock-1] holds sorted order */ -void blockSort ( EState* s ) +void BZ2_blockSort ( EState* s ) { UInt32* ptr = s->ptr; - UInt16* block = s->block; + UChar* block = s->block; UInt32* ftab = s->ftab; Int32 nblock = s->nblock; Int32 verb = s->verbosity; @@ -1019,10 +1082,16 @@ void blockSort ( EState* s ) Int32 i; if (nblock < 10000) { - for (i = 0; i < nblock; i++) block[i] <<= 8; fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); } else { - quadrant = &(block[nblock+BZ_N_OVERSHOOT]); + /* Calculate the location for quadrant, remembering to get + the alignment right. Assumes that &(block[0]) is at least + 2-byte aligned -- this should be ok since block is really + the first section of arr2. + */ + i = nblock+BZ_N_OVERSHOOT; + if (i & 1) i++; + quadrant = (UInt16*)(&(block[i])); /* (wfact-1) / 3 puts the default-factor-30 transition point at very roughly the same place as @@ -1,7 +1,7 @@ .PU .TH bzip2 1 .SH NAME -bzip2, bunzip2 \- a block-sorting file compressor, v0.9.5 +bzip2, bunzip2 \- a block-sorting file compressor, v1.0 .br bzcat \- decompresses files to stdout .br @@ -397,11 +397,12 @@ I/O error messages are not as helpful as they could be. tries hard to detect I/O errors and exit cleanly, but the details of what the problem is sometimes seem rather misleading. -This manual page pertains to version 0.9.5 of +This manual page pertains to version 1.0 of .I bzip2. Compressed data created by this version is entirely forwards and backwards -compatible with the previous public releases, versions 0.1pl2 and 0.9.0, +compatible with the previous public releases, versions 0.1pl2, 0.9.0 +and 0.9.5, but with the following exception: 0.9.0 and above can correctly decompress multiple concatenated compressed files. 0.1pl2 cannot do this; it will stop after decompressing just the first file in the @@ -415,6 +416,7 @@ megabytes long. This could easily be fixed. .SH AUTHOR Julian Seward, jseward@acm.org. +http://sourceware.cygnus.com/bzip2 http://www.muraroa.demon.co.uk The ideas embodied in diff --git a/bzip2.1.preformatted b/bzip2.1.preformatted index 96b44be..9f18339 100644 --- a/bzip2.1.preformatted +++ b/bzip2.1.preformatted @@ -1,7 +1,11 @@ + +bzip2(1) bzip2(1) + + NNAAMMEE - bzip2, bunzip2 - a block-sorting file compressor, v0.9.5 + bzip2, bunzip2 - a block-sorting file compressor, v1.0 bzcat - decompresses files to stdout bzip2recover - recovers data from damaged bzip2 files @@ -54,6 +58,18 @@ DDEESSCCRRIIPPTTIIOONN filename.bz2 becomes filename filename.bz becomes filename filename.tbz2 becomes filename.tar + + + + 1 + + + + + +bzip2(1) bzip2(1) + + filename.tbz becomes filename.tar anyothername becomes anyothername.out @@ -109,6 +125,17 @@ DDEESSCCRRIIPPTTIIOONN you recover the original uncompressed data. You can use _b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files. + + + 2 + + + + + +bzip2(1) bzip2(1) + + Return values: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, &c), 2 to indicate a corrupt compressed file, 3 for an internal @@ -163,6 +190,18 @@ OOPPTTIIOONNSS --qq ----qquuiieett Suppress non-essential warning messages. Messages pertaining to I/O errors and other critical events + + + + 3 + + + + + +bzip2(1) bzip2(1) + + will not be suppressed. --vv ----vveerrbboossee @@ -217,6 +256,18 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT Larger block sizes give rapidly diminishing marginal returns. Most of the compression comes from the first two + + + + 4 + + + + + +bzip2(1) bzip2(1) + + or three hundred k of block size, a fact worth bearing in mind when using _b_z_i_p_2 on small machines. It is also important to appreciate that the decompression memory @@ -270,6 +321,19 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT -9 7600k 3700k 2350k 828642 + + + + + 5 + + + + + +bzip2(1) bzip2(1) + + RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS _b_z_i_p_2 compresses files in blocks, usually 900kbytes long. Each block is handled independently. If a media or trans- @@ -324,6 +388,18 @@ PPEERRFFOORRMMAANNCCEE NNOOTTEESS operate in, and then charges all over it in a fairly ran- dom fashion. This means that performance, both for com- pressing and decompressing, is largely determined by the + + + + 6 + + + + + +bzip2(1) bzip2(1) + + speed at which your machine can service cache misses. Because of this, small changes to the code to reduce the miss rate have been observed to give disproportionately @@ -337,14 +413,14 @@ CCAAVVEEAATTSS but the details of what the problem is sometimes seem rather misleading. - This manual page pertains to version 0.9.5 of _b_z_i_p_2_. Com- + This manual page pertains to version 1.0 of _b_z_i_p_2_. Com- pressed data created by this version is entirely forwards and backwards compatible with the previous public - releases, versions 0.1pl2 and 0.9.0, but with the follow- - ing exception: 0.9.0 and above can correctly decompress - multiple concatenated compressed files. 0.1pl2 cannot do - this; it will stop after decompressing just the first file - in the stream. + releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the + following exception: 0.9.0 and above can correctly decom- + press multiple concatenated compressed files. 0.1pl2 can- + not do this; it will stop after decompressing just the + first file in the stream. _b_z_i_p_2_r_e_c_o_v_e_r uses 32-bit integers to represent bit posi- tions in compressed files, so it cannot handle compressed @@ -355,21 +431,32 @@ CCAAVVEEAATTSS AAUUTTHHOORR Julian Seward, jseward@acm.org. + http://sourceware.cygnus.com/bzip2 http://www.muraroa.demon.co.uk The ideas embodied in _b_z_i_p_2 are due to (at least) the fol- - lowing people: Michael Burrows and David Wheeler (for the - block sorting transformation), David Wheeler (again, for + lowing people: Michael Burrows and David Wheeler (for the + block sorting transformation), David Wheeler (again, for the Huffman coder), Peter Fenwick (for the structured cod- ing model in the original _b_z_i_p_, and many refinements), and - Alistair Moffat, Radford Neal and Ian Witten (for the + Alistair Moffat, Radford Neal and Ian Witten (for the arithmetic coder in the original _b_z_i_p_)_. I am much indebted for their help, support and advice. See the man- - ual in the source distribution for pointers to sources of + ual in the source distribution for pointers to sources of documentation. Christian von Roques encouraged me to look - for faster sorting algorithms, so as to speed up compres- + for faster sorting algorithms, so as to speed up compres- sion. Bela Lubkin encouraged me to improve the worst-case compression performance. Many people sent patches, helped - with portability problems, lent machines, gave advice and + with portability problems, lent machines, gave advice and were generally helpful. + + + + + + + + 7 + + @@ -7,7 +7,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -42,7 +42,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -123,10 +123,10 @@ --*/ #define BZ_LCCWIN32 0 -#if defined(_WIN32) && !defined(__CYGWIN32__) -#undef BZ_LCCWIN32 +#if defined(_WIN32) && !defined(__CYGWIN__) +#undef BZ_LCCWIN32 #define BZ_LCCWIN32 1 -#undef BZ_UNIX +#undef BZ_UNIX #define BZ_UNIX 0 #endif @@ -193,6 +193,17 @@ ERROR_IF_MINUS_ONE ( retVal ); \ } while ( 0 ) # endif +# ifdef __CYGWIN__ +# include <io.h> +# include <fcntl.h> +# undef SET_BINARY_MODE +# define SET_BINARY_MODE(fd) \ + do { \ + int retVal = setmode ( fileno ( fd ), \ + O_BINARY ); \ + ERROR_IF_MINUS_ONE ( retVal ); \ + } while ( 0 ) +# endif #endif @@ -276,10 +287,10 @@ typedef int IntNative; /*---------------------------------------------------*/ Int32 verbosity; -Bool keepInputFiles, smallMode; -Bool forceOverwrite, testFailsExist, noisy; +Bool keepInputFiles, smallMode, deleteOutputOnInterrupt; +Bool forceOverwrite, testFailsExist, unzFailsExist, noisy; Int32 numFileNames, numFilesProcessed, blockSize100k; - +Int32 exitValue; /*-- source modes; F==file, I==stdin, O==stdout --*/ #define SM_I2O 1 @@ -305,27 +316,204 @@ Char progNameReally[FILE_NAME_LEN]; FILE *outputHandleJustInCase; Int32 workFactor; -void panic ( Char* ) NORETURN; -void ioError ( void ) NORETURN; -void outOfMemory ( void ) NORETURN; -void blockOverrun ( void ) NORETURN; -void badBlockHeader ( void ) NORETURN; -void badBGLengths ( void ) NORETURN; -void crcError ( void ) NORETURN; -void bitStreamEOF ( void ) NORETURN; -void cleanUpAndFail ( Int32 ) NORETURN; -void compressedStreamEOF ( void ) NORETURN; +static void panic ( Char* ) NORETURN; +static void ioError ( void ) NORETURN; +static void outOfMemory ( void ) NORETURN; +static void configError ( void ) NORETURN; +static void crcError ( void ) NORETURN; +static void cleanUpAndFail ( Int32 ) NORETURN; +static void compressedStreamEOF ( void ) NORETURN; -void copyFileName ( Char*, Char* ); -void* myMalloc ( Int32 ); +static void copyFileName ( Char*, Char* ); +static void* myMalloc ( Int32 ); /*---------------------------------------------------*/ +/*--- An implementation of 64-bit ints. Sigh. ---*/ +/*--- Roll on widespread deployment of ANSI C9X ! ---*/ +/*---------------------------------------------------*/ + +typedef + struct { UChar b[8]; } + UInt64; + +static +void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 ) +{ + n->b[7] = (UChar)((hi32 >> 24) & 0xFF); + n->b[6] = (UChar)((hi32 >> 16) & 0xFF); + n->b[5] = (UChar)((hi32 >> 8) & 0xFF); + n->b[4] = (UChar) (hi32 & 0xFF); + n->b[3] = (UChar)((lo32 >> 24) & 0xFF); + n->b[2] = (UChar)((lo32 >> 16) & 0xFF); + n->b[1] = (UChar)((lo32 >> 8) & 0xFF); + n->b[0] = (UChar) (lo32 & 0xFF); +} + +static +double uInt64_to_double ( UInt64* n ) +{ + Int32 i; + double base = 1.0; + double sum = 0.0; + for (i = 0; i < 8; i++) { + sum += base * (double)(n->b[i]); + base *= 256.0; + } + return sum; +} + +static +void uInt64_add ( UInt64* src, UInt64* dst ) +{ + Int32 i; + Int32 carry = 0; + for (i = 0; i < 8; i++) { + carry += ( ((Int32)src->b[i]) + ((Int32)dst->b[i]) ); + dst->b[i] = (UChar)(carry & 0xFF); + carry >>= 8; + } +} + +static +void uInt64_sub ( UInt64* src, UInt64* dst ) +{ + Int32 t, i; + Int32 borrow = 0; + for (i = 0; i < 8; i++) { + t = ((Int32)dst->b[i]) - ((Int32)src->b[i]) - borrow; + if (t < 0) { + dst->b[i] = (UChar)(t + 256); + borrow = 1; + } else { + dst->b[i] = (UChar)t; + borrow = 0; + } + } +} + +static +void uInt64_mul ( UInt64* a, UInt64* b, UInt64* r_hi, UInt64* r_lo ) +{ + UChar sum[16]; + Int32 ia, ib, carry; + for (ia = 0; ia < 16; ia++) sum[ia] = 0; + for (ia = 0; ia < 8; ia++) { + carry = 0; + for (ib = 0; ib < 8; ib++) { + carry += ( ((Int32)sum[ia+ib]) + + ((Int32)a->b[ia]) * ((Int32)b->b[ib]) ); + sum[ia+ib] = (UChar)(carry & 0xFF); + carry >>= 8; + } + sum[ia+8] = (UChar)(carry & 0xFF); + if ((carry >>= 8) != 0) panic ( "uInt64_mul" ); + } + + for (ia = 0; ia < 8; ia++) r_hi->b[ia] = sum[ia+8]; + for (ia = 0; ia < 8; ia++) r_lo->b[ia] = sum[ia]; +} + + +static +void uInt64_shr1 ( UInt64* n ) +{ + Int32 i; + for (i = 0; i < 8; i++) { + n->b[i] >>= 1; + if (i < 7 && (n->b[i+1] & 1)) n->b[i] |= 0x80; + } +} + +static +void uInt64_shl1 ( UInt64* n ) +{ + Int32 i; + for (i = 7; i >= 0; i--) { + n->b[i] <<= 1; + if (i > 0 && (n->b[i-1] & 0x80)) n->b[i]++; + } +} + +static +Bool uInt64_isZero ( UInt64* n ) +{ + Int32 i; + for (i = 0; i < 8; i++) + if (n->b[i] != 0) return 0; + return 1; +} + +static +Int32 uInt64_qrm10 ( UInt64* n ) +{ + /* Divide *n by 10, and return the remainder. Long division + is difficult, so we cheat and instead multiply by + 0xCCCC CCCC CCCC CCCD, which is 0.8 (viz, 0.1 << 3). + */ + Int32 i; + UInt64 tmp1, tmp2, n_orig, zero_point_eight; + + zero_point_eight.b[1] = zero_point_eight.b[2] = + zero_point_eight.b[3] = zero_point_eight.b[4] = + zero_point_eight.b[5] = zero_point_eight.b[6] = + zero_point_eight.b[7] = 0xCC; + zero_point_eight.b[0] = 0xCD; + + n_orig = *n; + + /* divide n by 10, + by multiplying by 0.8 and then shifting right 3 times */ + uInt64_mul ( n, &zero_point_eight, &tmp1, &tmp2 ); + uInt64_shr1(&tmp1); uInt64_shr1(&tmp1); uInt64_shr1(&tmp1); + *n = tmp1; + + /* tmp1 = 8*n, tmp2 = 2*n */ + uInt64_shl1(&tmp1); uInt64_shl1(&tmp1); uInt64_shl1(&tmp1); + tmp2 = *n; uInt64_shl1(&tmp2); + + /* tmp1 = 10*n */ + uInt64_add ( &tmp2, &tmp1 ); + + /* n_orig = n_orig - 10*n */ + uInt64_sub ( &tmp1, &n_orig ); + + /* n_orig should now hold quotient, in range 0 .. 9 */ + for (i = 7; i >= 1; i--) + if (n_orig.b[i] != 0) panic ( "uInt64_qrm10(1)" ); + if (n_orig.b[0] > 9) + panic ( "uInt64_qrm10(2)" ); + + return (int)n_orig.b[0]; +} + +/* ... and the Whole Entire Point of all this UInt64 stuff is + so that we can supply the following function. +*/ +static +void uInt64_toAscii ( char* outbuf, UInt64* n ) +{ + Int32 i, q; + UChar buf[32]; + Int32 nBuf = 0; + UInt64 n_copy = *n; + do { + q = uInt64_qrm10 ( &n_copy ); + buf[nBuf] = q + '0'; + nBuf++; + } while (!uInt64_isZero(&n_copy)); + outbuf[nBuf] = 0; + for (i = 0; i < nBuf; i++) outbuf[i] = buf[nBuf-i-1]; +} + + +/*---------------------------------------------------*/ /*--- Processing of complete files and streams ---*/ /*---------------------------------------------------*/ /*---------------------------------------------*/ +static Bool myfeof ( FILE* f ) { Int32 c = fgetc ( f ); @@ -336,12 +524,14 @@ Bool myfeof ( FILE* f ) /*---------------------------------------------*/ +static void compressStream ( FILE *stream, FILE *zStream ) { BZFILE* bzf = NULL; UChar ibuf[5000]; Int32 nIbuf; - UInt32 nbytes_in, nbytes_out; + UInt32 nbytes_in_lo32, nbytes_in_hi32; + UInt32 nbytes_out_lo32, nbytes_out_hi32; Int32 bzerr, bzerr_dummy, ret; SET_BINARY_MODE(stream); @@ -350,8 +540,8 @@ void compressStream ( FILE *stream, FILE *zStream ) if (ferror(stream)) goto errhandler_io; if (ferror(zStream)) goto errhandler_io; - bzf = bzWriteOpen ( &bzerr, zStream, - blockSize100k, verbosity, workFactor ); + bzf = BZ2_bzWriteOpen ( &bzerr, zStream, + blockSize100k, verbosity, workFactor ); if (bzerr != BZ_OK) goto errhandler; if (verbosity >= 2) fprintf ( stderr, "\n" ); @@ -361,12 +551,14 @@ void compressStream ( FILE *stream, FILE *zStream ) if (myfeof(stream)) break; nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream ); if (ferror(stream)) goto errhandler_io; - if (nIbuf > 0) bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf ); + if (nIbuf > 0) BZ2_bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf ); if (bzerr != BZ_OK) goto errhandler; } - bzWriteClose ( &bzerr, bzf, 0, &nbytes_in, &nbytes_out ); + BZ2_bzWriteClose64 ( &bzerr, bzf, 0, + &nbytes_in_lo32, &nbytes_in_hi32, + &nbytes_out_lo32, &nbytes_out_hi32 ); if (bzerr != BZ_OK) goto errhandler; if (ferror(zStream)) goto errhandler_io; @@ -380,25 +572,42 @@ void compressStream ( FILE *stream, FILE *zStream ) ret = fclose ( stream ); if (ret == EOF) goto errhandler_io; - if (nbytes_in == 0) nbytes_in = 1; + if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0) + nbytes_in_lo32 = 1; - if (verbosity >= 1) + if (verbosity >= 1) { + Char buf_nin[32], buf_nout[32]; + UInt64 nbytes_in, nbytes_out; + double nbytes_in_d, nbytes_out_d; + uInt64_from_UInt32s ( &nbytes_in, + nbytes_in_lo32, nbytes_in_hi32 ); + uInt64_from_UInt32s ( &nbytes_out, + nbytes_out_lo32, nbytes_out_hi32 ); + nbytes_in_d = uInt64_to_double ( &nbytes_in ); + nbytes_out_d = uInt64_to_double ( &nbytes_out ); + uInt64_toAscii ( buf_nin, &nbytes_in ); + uInt64_toAscii ( buf_nout, &nbytes_out ); fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, " - "%5.2f%% saved, %d in, %d out.\n", - (float)nbytes_in / (float)nbytes_out, - (8.0 * (float)nbytes_out) / (float)nbytes_in, - 100.0 * (1.0 - (float)nbytes_out / (float)nbytes_in), - nbytes_in, - nbytes_out + "%5.2f%% saved, %s in, %s out.\n", + nbytes_in_d / nbytes_out_d, + (8.0 * nbytes_out_d) / nbytes_in_d, + 100.0 * (1.0 - nbytes_out_d / nbytes_in_d), + buf_nin, + buf_nout ); + } return; errhandler: - bzWriteClose ( &bzerr_dummy, bzf, 1, &nbytes_in, &nbytes_out ); + BZ2_bzWriteClose64 ( &bzerr_dummy, bzf, 1, + &nbytes_in_lo32, &nbytes_in_hi32, + &nbytes_out_lo32, &nbytes_out_hi32 ); switch (bzerr) { + case BZ_CONFIG_ERROR: + configError(); break; case BZ_MEM_ERROR: - outOfMemory (); + outOfMemory (); break; case BZ_IO_ERROR: errhandler_io: ioError(); break; @@ -413,6 +622,7 @@ void compressStream ( FILE *stream, FILE *zStream ) /*---------------------------------------------*/ +static Bool uncompressStream ( FILE *zStream, FILE *stream ) { BZFILE* bzf = NULL; @@ -433,7 +643,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) while (True) { - bzf = bzReadOpen ( + bzf = BZ2_bzReadOpen ( &bzerr, zStream, verbosity, (int)smallMode, unused, nUnused ); @@ -441,7 +651,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) streamNo++; while (bzerr == BZ_OK) { - nread = bzRead ( &bzerr, bzf, obuf, 5000 ); + nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0) fwrite ( obuf, sizeof(UChar), nread, stream ); @@ -449,12 +659,12 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) } if (bzerr != BZ_STREAM_END) goto errhandler; - bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); + BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; - bzReadClose ( &bzerr, bzf ); + BZ2_bzReadClose ( &bzerr, bzf ); if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); if (nUnused == 0 && myfeof(zStream)) break; @@ -476,8 +686,10 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) return True; errhandler: - bzReadClose ( &bzerr_dummy, bzf ); + BZ2_bzReadClose ( &bzerr_dummy, bzf ); switch (bzerr) { + case BZ_CONFIG_ERROR: + configError(); break; case BZ_IO_ERROR: errhandler_io: ioError(); break; @@ -488,6 +700,8 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) case BZ_UNEXPECTED_EOF: compressedStreamEOF(); case BZ_DATA_ERROR_MAGIC: + if (zStream != stdin) fclose(zStream); + if (stream != stdout) fclose(stream); if (streamNo == 1) { return False; } else { @@ -507,6 +721,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream ) /*---------------------------------------------*/ +static Bool testStream ( FILE *zStream ) { BZFILE* bzf = NULL; @@ -524,7 +739,7 @@ Bool testStream ( FILE *zStream ) while (True) { - bzf = bzReadOpen ( + bzf = BZ2_bzReadOpen ( &bzerr, zStream, verbosity, (int)smallMode, unused, nUnused ); @@ -532,17 +747,17 @@ Bool testStream ( FILE *zStream ) streamNo++; while (bzerr == BZ_OK) { - nread = bzRead ( &bzerr, bzf, obuf, 5000 ); + nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 ); if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler; } if (bzerr != BZ_STREAM_END) goto errhandler; - bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); + BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; - bzReadClose ( &bzerr, bzf ); + BZ2_bzReadClose ( &bzerr, bzf ); if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); if (nUnused == 0 && myfeof(zStream)) break; @@ -556,10 +771,12 @@ Bool testStream ( FILE *zStream ) return True; errhandler: - bzReadClose ( &bzerr_dummy, bzf ); + BZ2_bzReadClose ( &bzerr_dummy, bzf ); if (verbosity == 0) fprintf ( stderr, "%s: %s: ", progName, inName ); switch (bzerr) { + case BZ_CONFIG_ERROR: + configError(); break; case BZ_IO_ERROR: errhandler_io: ioError(); break; @@ -574,6 +791,7 @@ Bool testStream ( FILE *zStream ) "file ends unexpectedly\n" ); return False; case BZ_DATA_ERROR_MAGIC: + if (zStream != stdin) fclose(zStream); if (streamNo == 1) { fprintf ( stderr, "bad magic number (file not created by bzip2)\n" ); @@ -598,6 +816,15 @@ Bool testStream ( FILE *zStream ) /*---------------------------------------------------*/ /*---------------------------------------------*/ +static +void setExit ( Int32 v ) +{ + if (v > exitValue) exitValue = v; +} + + +/*---------------------------------------------*/ +static void cadvise ( void ) { if (noisy) @@ -612,6 +839,7 @@ void cadvise ( void ) /*---------------------------------------------*/ +static void showFileNames ( void ) { if (noisy) @@ -624,11 +852,14 @@ void showFileNames ( void ) /*---------------------------------------------*/ +static void cleanUpAndFail ( Int32 ec ) { IntNative retVal; - if ( srcMode == SM_F2F && opMode != OM_TEST ) { + if ( srcMode == SM_F2F + && opMode != OM_TEST + && deleteOutputOnInterrupt ) { if (noisy) fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n", progName, outName ); @@ -647,11 +878,13 @@ void cleanUpAndFail ( Int32 ec ) progName, numFileNames, numFileNames - numFilesProcessed ); } - exit ( ec ); + setExit(ec); + exit(exitValue); } /*---------------------------------------------*/ +static void panic ( Char* s ) { fprintf ( stderr, @@ -666,6 +899,7 @@ void panic ( Char* s ) /*---------------------------------------------*/ +static void crcError ( void ) { fprintf ( stderr, @@ -678,6 +912,7 @@ void crcError ( void ) /*---------------------------------------------*/ +static void compressedStreamEOF ( void ) { fprintf ( stderr, @@ -692,10 +927,12 @@ void compressedStreamEOF ( void ) /*---------------------------------------------*/ +static void ioError ( void ) { fprintf ( stderr, - "\n%s: I/O or other error, bailing out. Possible reason follows.\n", + "\n%s: I/O or other error, bailing out. " + "Possible reason follows.\n", progName ); perror ( progName ); showFileNames(); @@ -704,6 +941,7 @@ void ioError ( void ) /*---------------------------------------------*/ +static void mySignalCatcher ( IntNative n ) { fprintf ( stderr, @@ -714,20 +952,53 @@ void mySignalCatcher ( IntNative n ) /*---------------------------------------------*/ +static void mySIGSEGVorSIGBUScatcher ( IntNative n ) { if (opMode == OM_Z) - fprintf ( stderr, - "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n" - "\twhich probably indicates a bug in bzip2. Please\n" - "\treport it to me at: jseward@acm.org\n", - progName ); + fprintf ( + stderr, + "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing.\n" + "\n" + " Possible causes are (most likely first):\n" + " (1) This computer has unreliable memory or cache hardware\n" + " (a surprisingly common problem; try a different machine.)\n" + " (2) A bug in the compiler used to create this executable\n" + " (unlikely, if you didn't compile bzip2 yourself.)\n" + " (3) A real bug in bzip2 -- I hope this should never be the case.\n" + " The user's manual, Section 4.3, has more info on (1) and (2).\n" + " \n" + " If you suspect this is a bug in bzip2, or are unsure about (1)\n" + " or (2), feel free to report it to me at: jseward@acm.org.\n" + " Section 4.3 of the user's manual describes the info a useful\n" + " bug report should have. If the manual is available on your\n" + " system, please try and read it before mailing me. If you don't\n" + " have the manual or can't be bothered to read it, mail me anyway.\n" + "\n", + progName ); else - fprintf ( stderr, - "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n" - "\twhich probably indicates that the compressed data\n" - "\tis corrupted.\n", - progName ); + fprintf ( + stderr, + "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n" + "\n" + " Possible causes are (most likely first):\n" + " (1) The compressed data is corrupted, and bzip2's usual checks\n" + " failed to detect this. Try bzip2 -tvv my_file.bz2.\n" + " (2) This computer has unreliable memory or cache hardware\n" + " (a surprisingly common problem; try a different machine.)\n" + " (3) A bug in the compiler used to create this executable\n" + " (unlikely, if you didn't compile bzip2 yourself.)\n" + " (4) A real bug in bzip2 -- I hope this should never be the case.\n" + " The user's manual, Section 4.3, has more info on (2) and (3).\n" + " \n" + " If you suspect this is a bug in bzip2, or are unsure about (2)\n" + " or (3), feel free to report it to me at: jseward@acm.org.\n" + " Section 4.3 of the user's manual describes the info a useful\n" + " bug report should have. If the manual is available on your\n" + " system, please try and read it before mailing me. If you don't\n" + " have the manual or can't be bothered to read it, mail me anyway.\n" + "\n", + progName ); showFileNames(); if (opMode == OM_Z) @@ -737,6 +1008,7 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n ) /*---------------------------------------------*/ +static void outOfMemory ( void ) { fprintf ( stderr, @@ -747,11 +1019,27 @@ void outOfMemory ( void ) } +/*---------------------------------------------*/ +static +void configError ( void ) +{ + fprintf ( stderr, + "bzip2: I'm not configured correctly for this platform!\n" + "\tI require Int32, Int16 and Char to have sizes\n" + "\tof 4, 2 and 1 bytes to run properly, and they don't.\n" + "\tProbably you can fix this by defining them correctly,\n" + "\tand recompiling. Bye!\n" ); + setExit(3); + exit(exitValue); +} + + /*---------------------------------------------------*/ /*--- The main driver machinery ---*/ /*---------------------------------------------------*/ /*---------------------------------------------*/ +static void pad ( Char *s ) { Int32 i; @@ -762,6 +1050,7 @@ void pad ( Char *s ) /*---------------------------------------------*/ +static void copyFileName ( Char* to, Char* from ) { if ( strlen(from) > FILE_NAME_LEN-10 ) { @@ -772,7 +1061,8 @@ void copyFileName ( Char* to, Char* from ) "Try using a reasonable file name instead. Sorry! :-)\n", from, FILE_NAME_LEN-10 ); - exit(1); + setExit(1); + exit(exitValue); } strncpy(to,from,FILE_NAME_LEN-10); @@ -781,6 +1071,7 @@ void copyFileName ( Char* to, Char* from ) /*---------------------------------------------*/ +static Bool fileExists ( Char* name ) { FILE *tmp = fopen ( name, "rb" ); @@ -794,6 +1085,7 @@ Bool fileExists ( Char* name ) /*-- if in doubt, return True --*/ +static Bool notAStandardFile ( Char* name ) { IntNative i; @@ -810,6 +1102,7 @@ Bool notAStandardFile ( Char* name ) /*-- rac 11/21/98 see if file has hard links to it --*/ +static Int32 countHardLinks ( Char* name ) { IntNative i; @@ -822,6 +1115,7 @@ Int32 countHardLinks ( Char* name ) /*---------------------------------------------*/ +static void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName ) { #if BZ_UNIX @@ -849,6 +1143,7 @@ void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName ) /*---------------------------------------------*/ +static void setInterimPermissions ( Char *dstName ) { #if BZ_UNIX @@ -860,6 +1155,7 @@ void setInterimPermissions ( Char *dstName ) /*---------------------------------------------*/ +static Bool containsDubiousChars ( Char* name ) { Bool cdc = False; @@ -877,6 +1173,7 @@ Char* zSuffix[BZ_N_SUFFIX_PAIRS] Char* unzSuffix[BZ_N_SUFFIX_PAIRS] = { "", "", ".tar", ".tar" }; +static Bool hasSuffix ( Char* s, Char* suffix ) { Int32 ns = strlen(s); @@ -886,6 +1183,7 @@ Bool hasSuffix ( Char* s, Char* suffix ) return False; } +static Bool mapSuffix ( Char* name, Char* oldSuffix, Char* newSuffix ) { @@ -897,11 +1195,15 @@ Bool mapSuffix ( Char* name, /*---------------------------------------------*/ +static void compress ( Char *name ) { FILE *inStr; FILE *outStr; Int32 n, i; + + deleteOutputOnInterrupt = False; + if (name == NULL && srcMode != SM_I2O) panic ( "compress: bad modes\n" ); @@ -924,12 +1226,14 @@ void compress ( Char *name ) if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) { if (noisy) fprintf ( stderr, "%s: There are no files matching `%s'.\n", - progName, inName ); + progName, inName ); + setExit(1); return; } if ( srcMode != SM_I2O && !fileExists ( inName ) ) { fprintf ( stderr, "%s: Can't open input file %s: %s.\n", progName, inName, strerror(errno) ); + setExit(1); return; } for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) { @@ -938,6 +1242,7 @@ void compress ( Char *name ) fprintf ( stderr, "%s: Input file %s already has %s suffix.\n", progName, inName, zSuffix[i] ); + setExit(1); return; } } @@ -945,17 +1250,20 @@ void compress ( Char *name ) if (noisy) fprintf ( stderr, "%s: Input file %s is not a normal file.\n", progName, inName ); + setExit(1); return; } if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { fprintf ( stderr, "%s: Output file %s already exists.\n", progName, outName ); + setExit(1); return; } if ( srcMode == SM_F2F && !forceOverwrite && (n=countHardLinks ( inName )) > 0) { fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", progName, inName, n, n > 1 ? "s" : "" ); + setExit(1); return; } @@ -970,6 +1278,7 @@ void compress ( Char *name ) progName ); fprintf ( stderr, "%s: For help, type: `%s --help'.\n", progName, progName ); + setExit(1); return; }; break; @@ -984,11 +1293,13 @@ void compress ( Char *name ) fprintf ( stderr, "%s: For help, type: `%s --help'.\n", progName, progName ); if ( inStr != NULL ) fclose ( inStr ); + setExit(1); return; }; if ( inStr == NULL ) { fprintf ( stderr, "%s: Can't open input file %s: %s.\n", progName, inName, strerror(errno) ); + setExit(1); return; }; break; @@ -1000,12 +1311,14 @@ void compress ( Char *name ) fprintf ( stderr, "%s: Can't create output file %s: %s.\n", progName, outName, strerror(errno) ); if ( inStr != NULL ) fclose ( inStr ); + setExit(1); return; } if ( inStr == NULL ) { fprintf ( stderr, "%s: Can't open input file %s: %s.\n", progName, inName, strerror(errno) ); if ( outStr != NULL ) fclose ( outStr ); + setExit(1); return; }; setInterimPermissions ( outName ); @@ -1024,21 +1337,26 @@ void compress ( Char *name ) /*--- Now the input and output handles are sane. Do the Biz. ---*/ outputHandleJustInCase = outStr; + deleteOutputOnInterrupt = True; compressStream ( inStr, outStr ); outputHandleJustInCase = NULL; /*--- If there was an I/O error, we won't get here. ---*/ if ( srcMode == SM_F2F ) { copyDatePermissionsAndOwner ( inName, outName ); + deleteOutputOnInterrupt = False; if ( !keepInputFiles ) { IntNative retVal = remove ( inName ); ERROR_IF_NOT_ZERO ( retVal ); } } + + deleteOutputOnInterrupt = False; } /*---------------------------------------------*/ +static void uncompress ( Char *name ) { FILE *inStr; @@ -1047,6 +1365,8 @@ void uncompress ( Char *name ) Bool magicNumberOK; Bool cantGuess; + deleteOutputOnInterrupt = False; + if (name == NULL && srcMode != SM_I2O) panic ( "uncompress: bad modes\n" ); @@ -1076,17 +1396,20 @@ void uncompress ( Char *name ) if (noisy) fprintf ( stderr, "%s: There are no files matching `%s'.\n", progName, inName ); + setExit(1); return; } if ( srcMode != SM_I2O && !fileExists ( inName ) ) { fprintf ( stderr, "%s: Can't open input file %s: %s.\n", progName, inName, strerror(errno) ); + setExit(1); return; } if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) { if (noisy) fprintf ( stderr, "%s: Input file %s is not a normal file.\n", progName, inName ); + setExit(1); return; } if ( /* srcMode == SM_F2F implied && */ cantGuess ) { @@ -1099,12 +1422,14 @@ void uncompress ( Char *name ) if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) { fprintf ( stderr, "%s: Output file %s already exists.\n", progName, outName ); + setExit(1); return; } if ( srcMode == SM_F2F && !forceOverwrite && (n=countHardLinks ( inName ) ) > 0) { fprintf ( stderr, "%s: Input file %s has %d other link%s.\n", progName, inName, n, n > 1 ? "s" : "" ); + setExit(1); return; } @@ -1119,6 +1444,7 @@ void uncompress ( Char *name ) progName ); fprintf ( stderr, "%s: For help, type: `%s --help'.\n", progName, progName ); + setExit(1); return; }; break; @@ -1130,6 +1456,7 @@ void uncompress ( Char *name ) fprintf ( stderr, "%s: Can't open input file %s:%s.\n", progName, inName, strerror(errno) ); if ( inStr != NULL ) fclose ( inStr ); + setExit(1); return; }; break; @@ -1141,12 +1468,14 @@ void uncompress ( Char *name ) fprintf ( stderr, "%s: Can't create output file %s: %s.\n", progName, outName, strerror(errno) ); if ( inStr != NULL ) fclose ( inStr ); + setExit(1); return; } if ( inStr == NULL ) { fprintf ( stderr, "%s: Can't open input file %s: %s.\n", progName, inName, strerror(errno) ); if ( outStr != NULL ) fclose ( outStr ); + setExit(1); return; }; setInterimPermissions ( outName ); @@ -1165,6 +1494,7 @@ void uncompress ( Char *name ) /*--- Now the input and output handles are sane. Do the Biz. ---*/ outputHandleJustInCase = outStr; + deleteOutputOnInterrupt = True; magicNumberOK = uncompressStream ( inStr, outStr ); outputHandleJustInCase = NULL; @@ -1172,22 +1502,27 @@ void uncompress ( Char *name ) if ( magicNumberOK ) { if ( srcMode == SM_F2F ) { copyDatePermissionsAndOwner ( inName, outName ); + deleteOutputOnInterrupt = False; if ( !keepInputFiles ) { IntNative retVal = remove ( inName ); ERROR_IF_NOT_ZERO ( retVal ); } } } else { + unzFailsExist = True; + deleteOutputOnInterrupt = False; if ( srcMode == SM_F2F ) { IntNative retVal = remove ( outName ); ERROR_IF_NOT_ZERO ( retVal ); } } + deleteOutputOnInterrupt = False; if ( magicNumberOK ) { if (verbosity >= 1) fprintf ( stderr, "done\n" ); } else { + setExit(2); if (verbosity >= 1) fprintf ( stderr, "not a bzip2 file.\n" ); else fprintf ( stderr, @@ -1199,11 +1534,14 @@ void uncompress ( Char *name ) /*---------------------------------------------*/ +static void testf ( Char *name ) { FILE *inStr; Bool allOK; + deleteOutputOnInterrupt = False; + if (name == NULL && srcMode != SM_I2O) panic ( "testf: bad modes\n" ); @@ -1218,11 +1556,13 @@ void testf ( Char *name ) if (noisy) fprintf ( stderr, "%s: There are no files matching `%s'.\n", progName, inName ); + setExit(1); return; } if ( srcMode != SM_I2O && !fileExists ( inName ) ) { fprintf ( stderr, "%s: Can't open input %s: %s.\n", progName, inName, strerror(errno) ); + setExit(1); return; } @@ -1235,6 +1575,7 @@ void testf ( Char *name ) progName ); fprintf ( stderr, "%s: For help, type: `%s --help'.\n", progName, progName ); + setExit(1); return; }; inStr = stdin; @@ -1245,6 +1586,7 @@ void testf ( Char *name ) if ( inStr == NULL ) { fprintf ( stderr, "%s: Can't open input file %s:%s.\n", progName, inName, strerror(errno) ); + setExit(1); return; }; break; @@ -1269,35 +1611,38 @@ void testf ( Char *name ) /*---------------------------------------------*/ +static void license ( void ) { fprintf ( stderr, "bzip2, a block-sorting file compressor. " - "Version 0.9.5d, 4-Sept-99.\n" + "Version %s.\n" " \n" - " Copyright (C) 1996, 1997, 1998, 1999 by Julian Seward.\n" + " Copyright (C) 1996-2000 by Julian Seward.\n" " \n" " This program is free software; you can redistribute it and/or modify\n" " it under the terms set out in the LICENSE file, which is included\n" - " in the bzip2-0.9.5 source distribution.\n" + " in the bzip2-1.0 source distribution.\n" " \n" " This program is distributed in the hope that it will be useful,\n" " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" " LICENSE file for more details.\n" - " \n" + " \n", + BZ2_bzlibVersion() ); } /*---------------------------------------------*/ +static void usage ( Char *fullProgName ) { fprintf ( stderr, "bzip2, a block-sorting file compressor. " - "Version 0.9.5d, 4-Sept-99.\n" + "Version %s.\n" "\n usage: %s [flags and input files in any order]\n" "\n" " -h --help print this message\n" @@ -1326,12 +1671,14 @@ void usage ( Char *fullProgName ) #endif , + BZ2_bzlibVersion(), fullProgName ); } /*---------------------------------------------*/ +static void redundant ( Char* flag ) { fprintf ( @@ -1365,6 +1712,7 @@ typedef /*---------------------------------------------*/ +static void *myMalloc ( Int32 n ) { void* p; @@ -1376,6 +1724,7 @@ void *myMalloc ( Int32 n ) /*---------------------------------------------*/ +static Cell *mkCell ( void ) { Cell *c; @@ -1388,6 +1737,7 @@ Cell *mkCell ( void ) /*---------------------------------------------*/ +static Cell *snocString ( Cell *root, Char *name ) { if (root == NULL) { @@ -1405,6 +1755,7 @@ Cell *snocString ( Cell *root, Char *name ) /*---------------------------------------------*/ +static void addFlagsFromEnvVar ( Cell** argList, Char* varName ) { Int32 i, j, k; @@ -1445,16 +1796,8 @@ IntNative main ( IntNative argc, Char *argv[] ) /*-- Be really really really paranoid :-) --*/ if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 || sizeof(Int16) != 2 || sizeof(UInt16) != 2 || - sizeof(Char) != 1 || sizeof(UChar) != 1) { - fprintf ( stderr, - "bzip2: I'm not configured correctly for this platform!\n" - "\tI require Int32, Int16 and Char to have sizes\n" - "\tof 4, 2 and 1 bytes to run properly, and they don't.\n" - "\tProbably you can fix this by defining them correctly,\n" - "\tand recompiling. Bye!\n" ); - exit(3); - } - + sizeof(Char) != 1 || sizeof(UChar) != 1) + configError(); /*-- Initialise --*/ outputHandleJustInCase = NULL; @@ -1465,9 +1808,12 @@ IntNative main ( IntNative argc, Char *argv[] ) verbosity = 0; blockSize100k = 9; testFailsExist = False; + unzFailsExist = False; numFileNames = 0; numFilesProcessed = 0; workFactor = 30; + deleteOutputOnInterrupt = False; + exitValue = 0; i = j = 0; /* avoid bogus warning from egcs-1.1.X */ /*-- Set up signal handlers for mem access errors --*/ @@ -1636,6 +1982,7 @@ IntNative main ( IntNative argc, Char *argv[] ) else if (opMode == OM_UNZ) { + unzFailsExist = False; if (srcMode == SM_I2O) { uncompress ( NULL ); } else { @@ -1647,6 +1994,10 @@ IntNative main ( IntNative argc, Char *argv[] ) uncompress ( aa->name ); } } + if (unzFailsExist) { + setExit(2); + exit(exitValue); + } } else { @@ -1668,7 +2019,8 @@ IntNative main ( IntNative argc, Char *argv[] ) "You can use the `bzip2recover' program to attempt to recover\n" "data from undamaged sections of corrupted files.\n\n" ); - exit(2); + setExit(2); + exit(exitValue); } } @@ -1678,12 +2030,12 @@ IntNative main ( IntNative argc, Char *argv[] ) aa = argList; while (aa != NULL) { Cell* aa2 = aa->link; - if (aa->name) free(aa->name); + if (aa->name != NULL) free(aa->name); free(aa); aa = aa2; } - return 0; + return exitValue; } @@ -1,7 +1,7 @@ NAME - bzip2, bunzip2 - a block-sorting file compressor, v0.9.5 + bzip2, bunzip2 - a block-sorting file compressor, v1.0 bzcat - decompresses files to stdout bzip2recover - recovers data from damaged bzip2 files @@ -337,14 +337,14 @@ CAVEATS but the details of what the problem is sometimes seem rather misleading. - This manual page pertains to version 0.9.5 of bzip2. Com- + This manual page pertains to version 1.0 of bzip2. Com- pressed data created by this version is entirely forwards and backwards compatible with the previous public - releases, versions 0.1pl2 and 0.9.0, but with the follow- - ing exception: 0.9.0 and above can correctly decompress - multiple concatenated compressed files. 0.1pl2 cannot do - this; it will stop after decompressing just the first file - in the stream. + releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the + following exception: 0.9.0 and above can correctly decom- + press multiple concatenated compressed files. 0.1pl2 can- + not do this; it will stop after decompressing just the + first file in the stream. bzip2recover uses 32-bit integers to represent bit posi- tions in compressed files, so it cannot handle compressed @@ -355,6 +355,7 @@ CAVEATS AUTHOR Julian Seward, jseward@acm.org. + http://sourceware.cygnus.com/bzip2 http://www.muraroa.demon.co.uk The ideas embodied in bzip2 are due to (at least) the fol- diff --git a/bzip2recover.c b/bzip2recover.c index 1323b36..ba3d175 100644 --- a/bzip2recover.c +++ b/bzip2recover.c @@ -7,9 +7,9 @@ /*-- This program is bzip2recover, a program to attempt data salvage from damaged files created by the accompanying - bzip2-0.9.5 program. + bzip2-1.0 program. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -44,7 +44,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 --*/ /*-- @@ -282,7 +282,7 @@ Int32 main ( Int32 argc, Char** argv ) strcpy ( progName, argv[0] ); inFileName[0] = outFileName[0] = 0; - fprintf ( stderr, "bzip2recover 0.9.5d: extracts blocks from damaged .bz2 files.\n" ); + fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" ); if (argc != 2) { fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -83,18 +83,19 @@ /*---------------------------------------------------*/ #ifndef BZ_NO_STDIO -void bz__AssertH__fail ( int errcode ) +void BZ2_bz__AssertH__fail ( int errcode ) { fprintf(stderr, - "\n\nbzip2/libbzip2, v0.9.5d: internal error number %d.\n" - "This is a bug in bzip2/libbzip2, v0.9.5d. Please report\n" - "it to me at: jseward@acm.org. If this happened when\n" - "you were using some program which uses libbzip2 as a\n" + "\n\nbzip2/libbzip2: internal error number %d.\n" + "This is a bug in bzip2/libbzip2, %s.\n" + "Please report it to me at: jseward@acm.org. If this happened\n" + "when you were using some program which uses libbzip2 as a\n" "component, you should also report this bug to the author(s)\n" "of that program. Please make an effort to report this bug;\n" "timely and accurate bug reports eventually lead to higher\n" - "quality software. Thanks. Julian Seward, 4 Sept 1999.\n\n", - errcode + "quality software. Thanks. Julian Seward, 21 March 2000.\n\n", + errcode, + BZ2_bzlibVersion() ); exit(3); } @@ -103,6 +104,17 @@ void bz__AssertH__fail ( int errcode ) /*---------------------------------------------------*/ static +int bz_config_ok ( void ) +{ + if (sizeof(int) != 4) return 0; + if (sizeof(short) != 2) return 0; + if (sizeof(char) != 1) return 0; + return 1; +} + + +/*---------------------------------------------------*/ +static void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) { void* v = malloc ( items * size ); @@ -149,7 +161,7 @@ Bool isempty_RL ( EState* s ) /*---------------------------------------------------*/ -int BZ_API(bzCompressInit) +int BZ_API(BZ2_bzCompressInit) ( bz_stream* strm, int blockSize100k, int verbosity, @@ -158,6 +170,8 @@ int BZ_API(bzCompressInit) Int32 n; EState* s; + if (!bz_config_ok()) return BZ_CONFIG_ERROR; + if (strm == NULL || blockSize100k < 1 || blockSize100k > 9 || workFactor < 0 || workFactor > 250) @@ -197,14 +211,16 @@ int BZ_API(bzCompressInit) s->verbosity = verbosity; s->workFactor = workFactor; - s->block = (UInt16*)s->arr2; + s->block = (UChar*)s->arr2; s->mtfv = (UInt16*)s->arr1; s->zbits = NULL; s->ptr = (UInt32*)s->arr1; strm->state = s; - strm->total_in = 0; - strm->total_out = 0; + strm->total_in_lo32 = 0; + strm->total_in_hi32 = 0; + strm->total_out_lo32 = 0; + strm->total_out_hi32 = 0; init_RL ( s ); prepare_new_block ( s ); return BZ_OK; @@ -223,24 +239,24 @@ void add_pair_to_block ( EState* s ) s->inUse[s->state_in_ch] = True; switch (s->state_in_len) { case 1: - s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; break; case 2: - s->block[s->nblock] = (UInt16)ch; s->nblock++; - s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; break; case 3: - s->block[s->nblock] = (UInt16)ch; s->nblock++; - s->block[s->nblock] = (UInt16)ch; s->nblock++; - s->block[s->nblock] = (UInt16)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; break; default: s->inUse[s->state_in_len-4] = True; - s->block[s->nblock] = (UInt16)ch; s->nblock++; - s->block[s->nblock] = (UInt16)ch; s->nblock++; - s->block[s->nblock] = (UInt16)ch; s->nblock++; - s->block[s->nblock] = (UInt16)ch; s->nblock++; - s->block[s->nblock] = ((UInt16)(s->state_in_len-4)); + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = ((UChar)(s->state_in_len-4)); s->nblock++; break; } @@ -266,7 +282,7 @@ void flush_RL ( EState* s ) UChar ch = (UChar)(zs->state_in_ch); \ BZ_UPDATE_CRC( zs->blockCRC, ch ); \ zs->inUse[zs->state_in_ch] = True; \ - zs->block[zs->nblock] = (UInt16)ch; \ + zs->block[zs->nblock] = (UChar)ch; \ zs->nblock++; \ zs->state_in_ch = zchh; \ } \ @@ -302,7 +318,8 @@ Bool copy_input_until_stop ( EState* s ) ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); s->strm->next_in++; s->strm->avail_in--; - s->strm->total_in++; + s->strm->total_in_lo32++; + if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; } } else { @@ -319,7 +336,8 @@ Bool copy_input_until_stop ( EState* s ) ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); s->strm->next_in++; s->strm->avail_in--; - s->strm->total_in++; + s->strm->total_in_lo32++; + if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; s->avail_in_expect--; } } @@ -346,8 +364,8 @@ Bool copy_output_until_stop ( EState* s ) s->state_out_pos++; s->strm->avail_out--; s->strm->next_out++; - s->strm->total_out++; - + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; } return progress_out; @@ -381,12 +399,12 @@ Bool handle_compress ( bz_stream* strm ) progress_in |= copy_input_until_stop ( s ); if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { flush_RL ( s ); - compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); + BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); s->state = BZ_S_OUTPUT; } else if (s->nblock >= s->nblockMAX) { - compressBlock ( s, False ); + BZ2_compressBlock ( s, False ); s->state = BZ_S_OUTPUT; } else @@ -402,7 +420,7 @@ Bool handle_compress ( bz_stream* strm ) /*---------------------------------------------------*/ -int BZ_API(bzCompress) ( bz_stream *strm, int action ) +int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) { Bool progress; EState* s; @@ -439,7 +457,8 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action ) case BZ_M_FLUSHING: if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; - if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; progress = handle_compress ( strm ); if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) return BZ_FLUSH_OK; @@ -448,7 +467,8 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action ) case BZ_M_FINISHING: if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; - if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; progress = handle_compress ( strm ); if (!progress) return BZ_SEQUENCE_ERROR; if (s->avail_in_expect > 0 || !isempty_RL(s) || @@ -461,7 +481,7 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action ) /*---------------------------------------------------*/ -int BZ_API(bzCompressEnd) ( bz_stream *strm ) +int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) { EState* s; if (strm == NULL) return BZ_PARAM_ERROR; @@ -485,13 +505,15 @@ int BZ_API(bzCompressEnd) ( bz_stream *strm ) /*---------------------------------------------------*/ /*---------------------------------------------------*/ -int BZ_API(bzDecompressInit) +int BZ_API(BZ2_bzDecompressInit) ( bz_stream* strm, int verbosity, int small ) { DState* s; + if (!bz_config_ok()) return BZ_CONFIG_ERROR; + if (strm == NULL) return BZ_PARAM_ERROR; if (small != 0 && small != 1) return BZ_PARAM_ERROR; if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; @@ -507,8 +529,10 @@ int BZ_API(bzDecompressInit) s->bsLive = 0; s->bsBuff = 0; s->calculatedCombinedCRC = 0; - strm->total_in = 0; - strm->total_out = 0; + strm->total_in_lo32 = 0; + strm->total_in_hi32 = 0; + strm->total_out_lo32 = 0; + strm->total_out_hi32 = 0; s->smallDecompress = (Bool)small; s->ll4 = NULL; s->ll16 = NULL; @@ -538,7 +562,8 @@ void unRLE_obuf_to_output_FAST ( DState* s ) s->state_out_len--; s->strm->next_out++; s->strm->avail_out--; - s->strm->total_out++; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; } /* can a new run be started? */ @@ -585,8 +610,9 @@ void unRLE_obuf_to_output_FAST ( DState* s ) unsigned int cs_avail_out = s->strm->avail_out; /* end restore */ - UInt32 avail_out_INIT = cs_avail_out; - Int32 s_save_nblockPP = s->save_nblock+1; + UInt32 avail_out_INIT = cs_avail_out; + Int32 s_save_nblockPP = s->save_nblock+1; + unsigned int total_out_lo32_old; while (True) { @@ -640,7 +666,10 @@ void unRLE_obuf_to_output_FAST ( DState* s ) } return_notr: - s->strm->total_out += (avail_out_INIT - cs_avail_out); + total_out_lo32_old = s->strm->total_out_lo32; + s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out); + if (s->strm->total_out_lo32 < total_out_lo32_old) + s->strm->total_out_hi32++; /* save */ s->calculatedBlockCRC = c_calculatedBlockCRC; @@ -659,7 +688,7 @@ void unRLE_obuf_to_output_FAST ( DState* s ) /*---------------------------------------------------*/ -__inline__ Int32 indexIntoF ( Int32 indx, Int32 *cftab ) +__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) { Int32 nb, na, mid; nb = 0; @@ -691,7 +720,8 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) s->state_out_len--; s->strm->next_out++; s->strm->avail_out--; - s->strm->total_out++; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; } /* can a new run be started? */ @@ -736,7 +766,8 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) s->state_out_len--; s->strm->next_out++; s->strm->avail_out--; - s->strm->total_out++; + s->strm->total_out_lo32++; + if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; } /* can a new run be started? */ @@ -768,7 +799,7 @@ void unRLE_obuf_to_output_SMALL ( DState* s ) /*---------------------------------------------------*/ -int BZ_API(bzDecompress) ( bz_stream *strm ) +int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) { DState* s; if (strm == NULL) return BZ_PARAM_ERROR; @@ -800,7 +831,7 @@ int BZ_API(bzDecompress) ( bz_stream *strm ) } } if (s->state >= BZ_X_MAGIC_1) { - Int32 r = decompress ( s ); + Int32 r = BZ2_decompress ( s ); if (r == BZ_STREAM_END) { if (s->verbosity >= 3) VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x", @@ -820,7 +851,7 @@ int BZ_API(bzDecompress) ( bz_stream *strm ) /*---------------------------------------------------*/ -int BZ_API(bzDecompressEnd) ( bz_stream *strm ) +int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm ) { DState* s; if (strm == NULL) return BZ_PARAM_ERROR; @@ -874,7 +905,7 @@ static Bool myfeof ( FILE* f ) /*---------------------------------------------------*/ -BZFILE* BZ_API(bzWriteOpen) +BZFILE* BZ_API(BZ2_bzWriteOpen) ( int* bzerror, FILE* f, int blockSize100k, @@ -909,8 +940,8 @@ BZFILE* BZ_API(bzWriteOpen) bzf->strm.opaque = NULL; if (workFactor == 0) workFactor = 30; - ret = bzCompressInit ( &(bzf->strm), blockSize100k, - verbosity, workFactor ); + ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, + verbosity, workFactor ); if (ret != BZ_OK) { BZ_SETERR(ret); free(bzf); return NULL; }; @@ -922,7 +953,7 @@ BZFILE* BZ_API(bzWriteOpen) /*---------------------------------------------------*/ -void BZ_API(bzWrite) +void BZ_API(BZ2_bzWrite) ( int* bzerror, BZFILE* b, void* buf, @@ -948,7 +979,7 @@ void BZ_API(bzWrite) while (True) { bzf->strm.avail_out = BZ_MAX_UNUSED; bzf->strm.next_out = bzf->buf; - ret = bzCompress ( &(bzf->strm), BZ_RUN ); + ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN ); if (ret != BZ_RUN_OK) { BZ_SETERR(ret); return; }; @@ -967,13 +998,27 @@ void BZ_API(bzWrite) /*---------------------------------------------------*/ -void BZ_API(bzWriteClose) +void BZ_API(BZ2_bzWriteClose) ( int* bzerror, BZFILE* b, int abandon, unsigned int* nbytes_in, unsigned int* nbytes_out ) { + BZ2_bzWriteClose64 ( bzerror, b, abandon, + nbytes_in, NULL, nbytes_out, NULL ); +} + + +void BZ_API(BZ2_bzWriteClose64) + ( int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in_lo32, + unsigned int* nbytes_in_hi32, + unsigned int* nbytes_out_lo32, + unsigned int* nbytes_out_hi32 ) +{ Int32 n, n2, ret; bzFile* bzf = (bzFile*)b; @@ -984,14 +1029,16 @@ void BZ_API(bzWriteClose) if (ferror(bzf->handle)) { BZ_SETERR(BZ_IO_ERROR); return; }; - if (nbytes_in != NULL) *nbytes_in = 0; - if (nbytes_out != NULL) *nbytes_out = 0; + if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0; + if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0; + if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0; + if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0; if ((!abandon) && bzf->lastErr == BZ_OK) { while (True) { bzf->strm.avail_out = BZ_MAX_UNUSED; bzf->strm.next_out = bzf->buf; - ret = bzCompress ( &(bzf->strm), BZ_FINISH ); + ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH ); if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) { BZ_SETERR(ret); return; }; @@ -1013,17 +1060,23 @@ void BZ_API(bzWriteClose) { BZ_SETERR(BZ_IO_ERROR); return; }; } - if (nbytes_in != NULL) *nbytes_in = bzf->strm.total_in; - if (nbytes_out != NULL) *nbytes_out = bzf->strm.total_out; + if (nbytes_in_lo32 != NULL) + *nbytes_in_lo32 = bzf->strm.total_in_lo32; + if (nbytes_in_hi32 != NULL) + *nbytes_in_hi32 = bzf->strm.total_in_hi32; + if (nbytes_out_lo32 != NULL) + *nbytes_out_lo32 = bzf->strm.total_out_lo32; + if (nbytes_out_hi32 != NULL) + *nbytes_out_hi32 = bzf->strm.total_out_hi32; BZ_SETERR(BZ_OK); - bzCompressEnd ( &(bzf->strm) ); + BZ2_bzCompressEnd ( &(bzf->strm) ); free ( bzf ); } /*---------------------------------------------------*/ -BZFILE* BZ_API(bzReadOpen) +BZFILE* BZ_API(BZ2_bzReadOpen) ( int* bzerror, FILE* f, int verbosity, @@ -1066,7 +1119,7 @@ BZFILE* BZ_API(bzReadOpen) nUnused--; } - ret = bzDecompressInit ( &(bzf->strm), verbosity, small ); + ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small ); if (ret != BZ_OK) { BZ_SETERR(ret); free(bzf); return NULL; }; @@ -1079,7 +1132,7 @@ BZFILE* BZ_API(bzReadOpen) /*---------------------------------------------------*/ -void BZ_API(bzReadClose) ( int *bzerror, BZFILE *b ) +void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) { bzFile* bzf = (bzFile*)b; @@ -1091,13 +1144,13 @@ void BZ_API(bzReadClose) ( int *bzerror, BZFILE *b ) { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; if (bzf->initialisedOk) - (void)bzDecompressEnd ( &(bzf->strm) ); + (void)BZ2_bzDecompressEnd ( &(bzf->strm) ); free ( bzf ); } /*---------------------------------------------------*/ -int BZ_API(bzRead) +int BZ_API(BZ2_bzRead) ( int* bzerror, BZFILE* b, void* buf, @@ -1135,7 +1188,7 @@ int BZ_API(bzRead) bzf->strm.next_in = bzf->buf; } - ret = bzDecompress ( &(bzf->strm) ); + ret = BZ2_bzDecompress ( &(bzf->strm) ); if (ret != BZ_OK && ret != BZ_STREAM_END) { BZ_SETERR(ret); return 0; }; @@ -1157,7 +1210,7 @@ int BZ_API(bzRead) /*---------------------------------------------------*/ -void BZ_API(bzReadGetUnused) +void BZ_API(BZ2_bzReadGetUnused) ( int* bzerror, BZFILE* b, void** unused, @@ -1183,7 +1236,7 @@ void BZ_API(bzReadGetUnused) /*---------------------------------------------------*/ /*---------------------------------------------------*/ -int BZ_API(bzBuffToBuffCompress) +int BZ_API(BZ2_bzBuffToBuffCompress) ( char* dest, unsigned int* destLen, char* source, @@ -1206,8 +1259,8 @@ int BZ_API(bzBuffToBuffCompress) strm.bzalloc = NULL; strm.bzfree = NULL; strm.opaque = NULL; - ret = bzCompressInit ( &strm, blockSize100k, - verbosity, workFactor ); + ret = BZ2_bzCompressInit ( &strm, blockSize100k, + verbosity, workFactor ); if (ret != BZ_OK) return ret; strm.next_in = source; @@ -1215,27 +1268,27 @@ int BZ_API(bzBuffToBuffCompress) strm.avail_in = sourceLen; strm.avail_out = *destLen; - ret = bzCompress ( &strm, BZ_FINISH ); + ret = BZ2_bzCompress ( &strm, BZ_FINISH ); if (ret == BZ_FINISH_OK) goto output_overflow; if (ret != BZ_STREAM_END) goto errhandler; /* normal termination */ *destLen -= strm.avail_out; - bzCompressEnd ( &strm ); + BZ2_bzCompressEnd ( &strm ); return BZ_OK; output_overflow: - bzCompressEnd ( &strm ); + BZ2_bzCompressEnd ( &strm ); return BZ_OUTBUFF_FULL; errhandler: - bzCompressEnd ( &strm ); + BZ2_bzCompressEnd ( &strm ); return ret; } /*---------------------------------------------------*/ -int BZ_API(bzBuffToBuffDecompress) +int BZ_API(BZ2_bzBuffToBuffDecompress) ( char* dest, unsigned int* destLen, char* source, @@ -1255,7 +1308,7 @@ int BZ_API(bzBuffToBuffDecompress) strm.bzalloc = NULL; strm.bzfree = NULL; strm.opaque = NULL; - ret = bzDecompressInit ( &strm, verbosity, small ); + ret = BZ2_bzDecompressInit ( &strm, verbosity, small ); if (ret != BZ_OK) return ret; strm.next_in = source; @@ -1263,26 +1316,26 @@ int BZ_API(bzBuffToBuffDecompress) strm.avail_in = sourceLen; strm.avail_out = *destLen; - ret = bzDecompress ( &strm ); + ret = BZ2_bzDecompress ( &strm ); if (ret == BZ_OK) goto output_overflow_or_eof; if (ret != BZ_STREAM_END) goto errhandler; /* normal termination */ *destLen -= strm.avail_out; - bzDecompressEnd ( &strm ); + BZ2_bzDecompressEnd ( &strm ); return BZ_OK; output_overflow_or_eof: if (strm.avail_out > 0) { - bzDecompressEnd ( &strm ); + BZ2_bzDecompressEnd ( &strm ); return BZ_UNEXPECTED_EOF; } else { - bzDecompressEnd ( &strm ); + BZ2_bzDecompressEnd ( &strm ); return BZ_OUTBUFF_FULL; }; errhandler: - bzDecompressEnd ( &strm ); + BZ2_bzDecompressEnd ( &strm ); return ret; } @@ -1303,7 +1356,7 @@ int BZ_API(bzBuffToBuffDecompress) /*-- return version like "0.9.0c". --*/ -const char * BZ_API(bzlibVersion)(void) +const char * BZ_API(BZ2_bzlibVersion)(void) { return BZ_VERSION; } @@ -1377,9 +1430,11 @@ BZFILE * bzopen_or_bzdopen /* Guard against total chaos and anarchy -- JRS */ if (blockSize100k < 1) blockSize100k = 1; if (blockSize100k > 9) blockSize100k = 9; - bzfp = bzWriteOpen(&bzerr,fp,blockSize100k,verbosity,workFactor); + bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k, + verbosity,workFactor); } else { - bzfp = bzReadOpen(&bzerr,fp,verbosity,smallMode,unused,nUnused); + bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode, + unused,nUnused); } if (bzfp == NULL) { if (fp != stdin && fp != stdout) fclose(fp); @@ -1395,7 +1450,7 @@ BZFILE * bzopen_or_bzdopen ex) bzopen("file","w9") case path="" or NULL => use stdin or stdout. --*/ -BZFILE * BZ_API(bzopen) +BZFILE * BZ_API(BZ2_bzopen) ( const char *path, const char *mode ) { @@ -1404,7 +1459,7 @@ BZFILE * BZ_API(bzopen) /*---------------------------------------------------*/ -BZFILE * BZ_API(bzdopen) +BZFILE * BZ_API(BZ2_bzdopen) ( int fd, const char *mode ) { @@ -1413,11 +1468,11 @@ BZFILE * BZ_API(bzdopen) /*---------------------------------------------------*/ -int BZ_API(bzread) (BZFILE* b, void* buf, int len ) +int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len ) { int bzerr, nread; if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; - nread = bzRead(&bzerr,b,buf,len); + nread = BZ2_bzRead(&bzerr,b,buf,len); if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { return nread; } else { @@ -1427,11 +1482,11 @@ int BZ_API(bzread) (BZFILE* b, void* buf, int len ) /*---------------------------------------------------*/ -int BZ_API(bzwrite) (BZFILE* b, void* buf, int len ) +int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len ) { int bzerr; - bzWrite(&bzerr,b,buf,len); + BZ2_bzWrite(&bzerr,b,buf,len); if(bzerr == BZ_OK){ return len; }else{ @@ -1441,7 +1496,7 @@ int BZ_API(bzwrite) (BZFILE* b, void* buf, int len ) /*---------------------------------------------------*/ -int BZ_API(bzflush) (BZFILE *b) +int BZ_API(BZ2_bzflush) (BZFILE *b) { /* do nothing now... */ return 0; @@ -1449,19 +1504,19 @@ int BZ_API(bzflush) (BZFILE *b) /*---------------------------------------------------*/ -void BZ_API(bzclose) (BZFILE* b) +void BZ_API(BZ2_bzclose) (BZFILE* b) { int bzerr; FILE *fp = ((bzFile *)b)->handle; if (b==NULL) {return;} if(((bzFile*)b)->writing){ - bzWriteClose(&bzerr,b,0,NULL,NULL); + BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL); if(bzerr != BZ_OK){ - bzWriteClose(NULL,b,1,NULL,NULL); + BZ2_bzWriteClose(NULL,b,1,NULL,NULL); } }else{ - bzReadClose(&bzerr,b); + BZ2_bzReadClose(&bzerr,b); } if(fp!=stdin && fp!=stdout){ fclose(fp); @@ -1483,6 +1538,7 @@ static char *bzerrorstrings[] = { ,"IO_ERROR" ,"UNEXPECTED_EOF" ,"OUTBUFF_FULL" + ,"CONFIG_ERROR" ,"???" /* for future */ ,"???" /* for future */ ,"???" /* for future */ @@ -1492,7 +1548,7 @@ static char *bzerrorstrings[] = { }; -const char * BZ_API(bzerror) (BZFILE *b, int *errnum) +const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) { int err = ((bzFile *)b)->lastErr; @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -83,16 +83,19 @@ extern "C" { #define BZ_IO_ERROR (-6) #define BZ_UNEXPECTED_EOF (-7) #define BZ_OUTBUFF_FULL (-8) +#define BZ_CONFIG_ERROR (-9) typedef struct { char *next_in; unsigned int avail_in; - unsigned int total_in; + unsigned int total_in_lo32; + unsigned int total_in_hi32; char *next_out; unsigned int avail_out; - unsigned int total_out; + unsigned int total_out_lo32; + unsigned int total_out_hi32; void *state; @@ -130,33 +133,33 @@ typedef /*-- Core (low-level) library functions --*/ -BZ_EXTERN int BZ_API(bzCompressInit) ( +BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( bz_stream* strm, int blockSize100k, int verbosity, int workFactor ); -BZ_EXTERN int BZ_API(bzCompress) ( +BZ_EXTERN int BZ_API(BZ2_bzCompress) ( bz_stream* strm, int action ); -BZ_EXTERN int BZ_API(bzCompressEnd) ( +BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( bz_stream* strm ); -BZ_EXTERN int BZ_API(bzDecompressInit) ( +BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( bz_stream *strm, int verbosity, int small ); -BZ_EXTERN int BZ_API(bzDecompress) ( +BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( bz_stream* strm ); -BZ_EXTERN int BZ_API(bzDecompressEnd) ( +BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm ); @@ -169,7 +172,7 @@ BZ_EXTERN int BZ_API(bzDecompressEnd) ( typedef void BZFILE; -BZ_EXTERN BZFILE* BZ_API(bzReadOpen) ( +BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( int* bzerror, FILE* f, int verbosity, @@ -178,26 +181,26 @@ BZ_EXTERN BZFILE* BZ_API(bzReadOpen) ( int nUnused ); -BZ_EXTERN void BZ_API(bzReadClose) ( +BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( int* bzerror, BZFILE* b ); -BZ_EXTERN void BZ_API(bzReadGetUnused) ( +BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( int* bzerror, BZFILE* b, void** unused, int* nUnused ); -BZ_EXTERN int BZ_API(bzRead) ( +BZ_EXTERN int BZ_API(BZ2_bzRead) ( int* bzerror, BZFILE* b, void* buf, int len ); -BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) ( +BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( int* bzerror, FILE* f, int blockSize100k, @@ -205,26 +208,36 @@ BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) ( int workFactor ); -BZ_EXTERN void BZ_API(bzWrite) ( +BZ_EXTERN void BZ_API(BZ2_bzWrite) ( int* bzerror, BZFILE* b, void* buf, int len ); -BZ_EXTERN void BZ_API(bzWriteClose) ( +BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( int* bzerror, BZFILE* b, int abandon, unsigned int* nbytes_in, unsigned int* nbytes_out ); + +BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( + int* bzerror, + BZFILE* b, + int abandon, + unsigned int* nbytes_in_lo32, + unsigned int* nbytes_in_hi32, + unsigned int* nbytes_out_lo32, + unsigned int* nbytes_out_hi32 + ); #endif /*-- Utility functions --*/ -BZ_EXTERN int BZ_API(bzBuffToBuffCompress) ( +BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( char* dest, unsigned int* destLen, char* source, @@ -234,7 +247,7 @@ BZ_EXTERN int BZ_API(bzBuffToBuffCompress) ( int workFactor ); -BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) ( +BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( char* dest, unsigned int* destLen, char* source, @@ -254,42 +267,42 @@ BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) ( If this code breaks, please contact both Yoshioka and me. --*/ -BZ_EXTERN const char * BZ_API(bzlibVersion) ( +BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( void ); #ifndef BZ_NO_STDIO -BZ_EXTERN BZFILE * BZ_API(bzopen) ( +BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( const char *path, const char *mode ); -BZ_EXTERN BZFILE * BZ_API(bzdopen) ( +BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) ( int fd, const char *mode ); -BZ_EXTERN int BZ_API(bzread) ( +BZ_EXTERN int BZ_API(BZ2_bzread) ( BZFILE* b, void* buf, int len ); -BZ_EXTERN int BZ_API(bzwrite) ( +BZ_EXTERN int BZ_API(BZ2_bzwrite) ( BZFILE* b, void* buf, int len ); -BZ_EXTERN int BZ_API(bzflush) ( +BZ_EXTERN int BZ_API(BZ2_bzflush) ( BZFILE* b ); -BZ_EXTERN void BZ_API(bzclose) ( +BZ_EXTERN void BZ_API(BZ2_bzclose) ( BZFILE* b ); -BZ_EXTERN const char * BZ_API(bzerror) ( +BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( BZFILE *b, int *errnum ); diff --git a/bzlib_private.h b/bzlib_private.h index 8e93480..fb51c7a 100644 --- a/bzlib_private.h +++ b/bzlib_private.h @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -76,7 +76,7 @@ /*-- General stuff. --*/ -#define BZ_VERSION "0.9.5d" +#define BZ_VERSION "1.0.1, 23-June-2000" typedef char Char; typedef unsigned char Bool; @@ -94,9 +94,9 @@ typedef unsigned short UInt16; #endif #ifndef BZ_NO_STDIO -extern void bz__AssertH__fail ( int errcode ); +extern void BZ2_bz__AssertH__fail ( int errcode ); #define AssertH(cond,errcode) \ - { if (!(cond)) bz__AssertH__fail ( errcode ); } + { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); } #if BZ_DEBUG #define AssertD(cond,msg) \ { if (!(cond)) { \ @@ -155,7 +155,7 @@ extern void bz_internal_error ( int errcode ); /*-- Stuff for randomising repetitive blocks. --*/ -extern Int32 rNums[512]; +extern Int32 BZ2_rNums[512]; #define BZ_RAND_DECLS \ Int32 rNToGo; \ @@ -169,7 +169,7 @@ extern Int32 rNums[512]; #define BZ_RAND_UPD_MASK \ if (s->rNToGo == 0) { \ - s->rNToGo = rNums[s->rTPos]; \ + s->rNToGo = BZ2_rNums[s->rTPos]; \ s->rTPos++; \ if (s->rTPos == 512) s->rTPos = 0; \ } \ @@ -179,7 +179,7 @@ extern Int32 rNums[512]; /*-- Stuff for doing CRCs. --*/ -extern UInt32 crc32Table[256]; +extern UInt32 BZ2_crc32Table[256]; #define BZ_INITIALISE_CRC(crcVar) \ { \ @@ -194,8 +194,8 @@ extern UInt32 crc32Table[256]; #define BZ_UPDATE_CRC(crcVar,cha) \ { \ crcVar = (crcVar << 8) ^ \ - crc32Table[(crcVar >> 24) ^ \ - ((UChar)cha)]; \ + BZ2_crc32Table[(crcVar >> 24) ^ \ + ((UChar)cha)]; \ } @@ -241,7 +241,7 @@ typedef /* aliases for arr1 and arr2 */ UInt32* ptr; - UInt16* block; + UChar* block; UInt16* mtfv; UChar* zbits; @@ -283,9 +283,11 @@ typedef UChar selector [BZ_MAX_SELECTORS]; UChar selectorMtf[BZ_MAX_SELECTORS]; - UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + /* second dimension: only 3 needed; 4 makes index calculations faster */ + UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4]; } EState; @@ -295,19 +297,19 @@ typedef /*-- externs for compression. --*/ extern void -blockSort ( EState* ); +BZ2_blockSort ( EState* ); extern void -compressBlock ( EState*, Bool ); +BZ2_compressBlock ( EState*, Bool ); extern void -bsInitWrite ( EState* ); +BZ2_bsInitWrite ( EState* ); extern void -hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); +BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); extern void -hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); +BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); @@ -493,22 +495,22 @@ typedef #define GET_LL(i) \ (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) -#define BZ_GET_SMALL(cccc) \ - cccc = indexIntoF ( s->tPos, s->cftab ); \ +#define BZ_GET_SMALL(cccc) \ + cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \ s->tPos = GET_LL(s->tPos); /*-- externs for decompression. --*/ extern Int32 -indexIntoF ( Int32, Int32* ); +BZ2_indexIntoF ( Int32, Int32* ); extern Int32 -decompress ( DState* ); +BZ2_decompress ( DState* ); extern void -hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, - Int32, Int32, Int32 ); +BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, + Int32, Int32, Int32 ); #endif @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -78,7 +78,7 @@ /*---------------------------------------------------*/ /*---------------------------------------------------*/ -void bsInitWrite ( EState* s ) +void BZ2_bsInitWrite ( EState* s ) { s->bsLive = 0; s->bsBuff = 0; @@ -113,6 +113,7 @@ void bsFinishWrite ( EState* s ) /*---------------------------------------------------*/ static +__inline__ void bsW ( EState* s, Int32 n, UInt32 v ) { bsNEEDW ( n ); @@ -164,8 +165,6 @@ void generateMTFValues ( EState* s ) { UChar yy[256]; Int32 i, j; - UChar tmp; - UChar tmp2; Int32 zPend; Int32 wr; Int32 EOB; @@ -174,7 +173,7 @@ void generateMTFValues ( EState* s ) After sorting (eg, here), s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, and - ((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8] + ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] holds the original block data. The first thing to do is generate the MTF values, @@ -186,14 +185,14 @@ void generateMTFValues ( EState* s ) The final compressed bitstream is generated into the area starting at - (UChar*) (&((UInt16)s->arr2)[s->nblock]) + (UChar*) (&((UChar*)s->arr2)[s->nblock]) These storage aliases are set up in bzCompressInit(), except for the last one, which is arranged in compressBlock(). */ UInt32* ptr = s->ptr; - UInt16* block = s->block; + UChar* block = s->block; UInt16* mtfv = s->mtfv; makeMaps_e ( s ); @@ -207,27 +206,14 @@ void generateMTFValues ( EState* s ) for (i = 0; i < s->nblock; i++) { UChar ll_i; - AssertD ( wr <= i, "generateMTFValues(1)" ); j = ptr[i]-1; if (j < 0) j += s->nblock; - ll_i = s->unseqToSeq[block[j] >> 8]; + ll_i = s->unseqToSeq[block[j]]; AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); - tmp = yy[0]; - if (tmp == ll_i) { + if (yy[0] == ll_i) { zPend++; } else { - tmp2 = tmp; - tmp = yy[1]; - yy[1] = tmp2; - j = 1; - while ( ll_i != tmp ) { - j++; - tmp2 = tmp; - tmp = yy[j]; - yy[j] = tmp2; - }; - yy[0] = tmp; if (zPend > 0) { zPend--; @@ -244,7 +230,26 @@ void generateMTFValues ( EState* s ) }; zPend = 0; } - mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; + { + register UChar rtmp; + register UChar* ryy_j; + register UChar rll_i; + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while ( rll_i != rtmp ) { + register UChar rtmp2; + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; + } + } } @@ -261,6 +266,7 @@ void generateMTFValues ( EState* s ) if (zPend < 2) break; zPend = (zPend - 2) / 2; }; + zPend = 0; } mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; @@ -365,6 +371,18 @@ void sendMTFValues ( EState* s ) for (v = 0; v < alphaSize; v++) s->rfreq[t][v] = 0; + /*--- + Set up an auxiliary length table which is used to fast-track + the common case (nGroups == 6). + ---*/ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } + nSelectors = 0; totc = 0; gs = 0; @@ -381,21 +399,37 @@ void sendMTFValues ( EState* s ) --*/ for (t = 0; t < nGroups; t++) cost[t] = 0; - if (nGroups == 6) { - register UInt16 cost0, cost1, cost2, cost3, cost4, cost5; - cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0; - for (i = gs; i <= ge; i++) { - UInt16 icv = mtfv[i]; - cost0 += s->len[0][icv]; - cost1 += s->len[1][icv]; - cost2 += s->len[2][icv]; - cost3 += s->len[3][icv]; - cost4 += s->len[4][icv]; - cost5 += s->len[5][icv]; - } - cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; - cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + register UInt32 cost01, cost23, cost45; + register UInt16 icv; + cost01 = cost23 = cost45 = 0; + +# define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; \ + + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); + +# undef BZ_ITER + + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + } else { + /*--- slow version which correctly handles all situations ---*/ for (i = gs; i <= ge; i++) { UInt16 icv = mtfv[i]; for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; @@ -417,8 +451,29 @@ void sendMTFValues ( EState* s ) /*-- Increment the symbol frequencies for the selected table. --*/ - for (i = gs; i <= ge; i++) - s->rfreq[bt][ mtfv[i] ]++; + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + +# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ + + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); + +# undef BZ_ITUR + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) + s->rfreq[bt][ mtfv[i] ]++; + } gs = ge+1; } @@ -434,8 +489,8 @@ void sendMTFValues ( EState* s ) Recompute the tables based on the accumulated frequencies. --*/ for (t = 0; t < nGroups; t++) - hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), - alphaSize, 20 ); + BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), + alphaSize, 20 ); } @@ -474,8 +529,8 @@ void sendMTFValues ( EState* s ) } AssertH ( !(maxLen > 20), 3004 ); AssertH ( !(minLen < 1), 3005 ); - hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), - minLen, maxLen, alphaSize ); + BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), + minLen, maxLen, alphaSize ); } /*--- Transmit the mapping table. ---*/ @@ -536,13 +591,45 @@ void sendMTFValues ( EState* s ) if (gs >= s->nMTF) break; ge = gs + BZ_G_SIZE - 1; if (ge >= s->nMTF) ge = s->nMTF-1; - for (i = gs; i <= ge; i++) { - AssertH ( s->selector[selCtr] < nGroups, 3006 ); - bsW ( s, - s->len [s->selector[selCtr]] [mtfv[i]], - s->code [s->selector[selCtr]] [mtfv[i]] ); + AssertH ( s->selector[selCtr] < nGroups, 3006 ); + + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + UInt16 mtfv_i; + UChar* s_len_sel_selCtr + = &(s->len[s->selector[selCtr]][0]); + Int32* s_code_sel_selCtr + = &(s->code[s->selector[selCtr]][0]); + +# define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW ( s, \ + s_len_sel_selCtr[mtfv_i], \ + s_code_sel_selCtr[mtfv_i] ) + + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); + +# undef BZ_ITAH + + } else { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + bsW ( s, + s->len [s->selector[selCtr]] [mtfv[i]], + s->code [s->selector[selCtr]] [mtfv[i]] ); + } } + gs = ge+1; selCtr++; } @@ -554,7 +641,7 @@ void sendMTFValues ( EState* s ) /*---------------------------------------------------*/ -void compressBlock ( EState* s, Bool is_last_block ) +void BZ2_compressBlock ( EState* s, Bool is_last_block ) { if (s->nblock > 0) { @@ -568,14 +655,14 @@ void compressBlock ( EState* s, Bool is_last_block ) "combined CRC = 0x%8x, size = %d\n", s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); - blockSort ( s ); + BZ2_blockSort ( s ); } - s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]); + s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); /*-- If this is the first block, create the stream header. --*/ if (s->blockNo == 1) { - bsInitWrite ( s ); + BZ2_bsInitWrite ( s ); bsPutUChar ( s, 'B' ); bsPutUChar ( s, 'Z' ); bsPutUChar ( s, 'h' ); @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -68,7 +68,7 @@ comp.compression FAQ. --*/ -UInt32 crc32Table[256] = { +UInt32 BZ2_crc32Table[256] = { /*-- Ugly, innit? --*/ diff --git a/decompress.c b/decompress.c index 31f8b67..cdced18 100644 --- a/decompress.c +++ b/decompress.c @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -99,7 +99,9 @@ void makeMaps_d ( DState* s ) s->bsLive += 8; \ s->strm->next_in++; \ s->strm->avail_in--; \ - s->strm->total_in++; \ + s->strm->total_in_lo32++; \ + if (s->strm->total_in_lo32 == 0) \ + s->strm->total_in_hi32++; \ } #define GET_UCHAR(lll,uuu) \ @@ -113,6 +115,8 @@ void makeMaps_d ( DState* s ) { \ if (groupPos == 0) { \ groupNo++; \ + if (groupNo >= nSelectors) \ + RETURN(BZ_DATA_ERROR); \ groupPos = BZ_G_SIZE; \ gSel = s->selector[groupNo]; \ gMinlen = s->minLens[gSel]; \ @@ -123,17 +127,23 @@ void makeMaps_d ( DState* s ) groupPos--; \ zn = gMinlen; \ GET_BITS(label1, zvec, zn); \ - while (zvec > gLimit[zn]) { \ + while (1) { \ + if (zn > 20 /* the longest code */) \ + RETURN(BZ_DATA_ERROR); \ + if (zvec <= gLimit[zn]) break; \ zn++; \ GET_BIT(label2, zj); \ zvec = (zvec << 1) | zj; \ }; \ + if (zvec - gBase[zn] < 0 \ + || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ + RETURN(BZ_DATA_ERROR); \ lval = gPerm[zvec - gBase[zn]]; \ } /*---------------------------------------------------*/ -Int32 decompress ( DState* s ) +Int32 BZ2_decompress ( DState* s ) { UChar uc; Int32 retVal; @@ -288,6 +298,11 @@ Int32 decompress ( DState* s ) GET_UCHAR(BZ_X_ORIGPTR_3, uc); s->origPtr = (s->origPtr << 8) | ((Int32)uc); + if (s->origPtr < 0) + RETURN(BZ_DATA_ERROR); + if (s->origPtr > 10 + 100000*s->blockSize100k) + RETURN(BZ_DATA_ERROR); + /*--- Receive the mapping table ---*/ for (i = 0; i < 16; i++) { GET_BIT(BZ_X_MAPPING_1, uc); @@ -305,18 +320,21 @@ Int32 decompress ( DState* s ) if (uc == 1) s->inUse[i * 16 + j] = True; } makeMaps_d ( s ); + if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); alphaSize = s->nInUse+2; /*--- Now the selectors ---*/ GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); + if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR); GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); + if (nSelectors < 1) RETURN(BZ_DATA_ERROR); for (i = 0; i < nSelectors; i++) { j = 0; while (True) { GET_BIT(BZ_X_SELECTOR_3, uc); if (uc == 0) break; j++; - if (j > 5) RETURN(BZ_DATA_ERROR); + if (j >= nGroups) RETURN(BZ_DATA_ERROR); } s->selectorMtf[i] = j; } @@ -358,7 +376,7 @@ Int32 decompress ( DState* s ) if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; if (s->len[t][i] < minLen) minLen = s->len[t][i]; } - hbCreateDecodeTables ( + BZ2_hbCreateDecodeTables ( &(s->limit[t][0]), &(s->base[t][0]), &(s->perm[t][0]), @@ -392,7 +410,6 @@ Int32 decompress ( DState* s ) /*-- end MTF init --*/ nblock = 0; - GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); while (True) { @@ -417,23 +434,24 @@ Int32 decompress ( DState* s ) if (s->smallDecompress) while (es > 0) { + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); s->ll16[nblock] = (UInt16)uc; nblock++; es--; } else while (es > 0) { + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); s->tt[nblock] = (UInt32)uc; nblock++; es--; }; - if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR); continue; } else { - if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR); + if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); /*-- uc = MTF ( nextSym-1 ) --*/ { @@ -500,6 +518,12 @@ Int32 decompress ( DState* s ) } } + /* Now we know what nblock is, we can do a better sanity + check on s->origPtr. + */ + if (s->origPtr < 0 || s->origPtr >= nblock) + RETURN(BZ_DATA_ERROR); + s->state_out_len = 0; s->state_out_ch = 0; BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); @@ -1,165 +1,176 @@ -/* - minibz2 - libbz2.dll test program. - by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp) - This file is Public Domain. - welcome any email to me. - - usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename] -*/ - -#define BZ_IMPORT -#include <stdio.h> -#include <stdlib.h> -#include "bzlib.h" -#ifdef _WIN32 -#include <io.h> -#endif - - -#ifdef _WIN32 - -#include <windows.h> -static int BZ2DLLLoaded = 0; -static HINSTANCE BZ2DLLhLib; -int BZ2DLLLoadLibrary(void) -{ - HINSTANCE hLib; - - if(BZ2DLLLoaded==1){return 0;} - hLib=LoadLibrary("libbz2.dll"); - if(hLib == NULL){ - puts("Can't load libbz2.dll"); - return -1; - } - BZ2DLLLoaded=1; - BZ2DLLhLib=hLib; - bzlibVersion=GetProcAddress(hLib,"bzlibVersion"); - bzopen=GetProcAddress(hLib,"bzopen"); - bzdopen=GetProcAddress(hLib,"bzdopen"); - bzread=GetProcAddress(hLib,"bzread"); - bzwrite=GetProcAddress(hLib,"bzwrite"); - bzflush=GetProcAddress(hLib,"bzflush"); - bzclose=GetProcAddress(hLib,"bzclose"); - bzerror=GetProcAddress(hLib,"bzerror"); - return 0; - -} -int BZ2DLLFreeLibrary(void) -{ - if(BZ2DLLLoaded==0){return 0;} - FreeLibrary(BZ2DLLhLib); - BZ2DLLLoaded=0; -} -#endif /* WIN32 */ - -void usage(void) -{ - puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]"); -} - -int main(int argc,char *argv[]) -{ - int decompress = 0; - int level = 9; - char *fn_r = NULL; - char *fn_w = NULL; - -#ifdef _WIN32 - if(BZ2DLLLoadLibrary()<0){ - puts("can't load dll"); - exit(1); - } -#endif - while(++argv,--argc){ - if(**argv =='-' || **argv=='/'){ - char *p; - - for(p=*argv+1;*p;p++){ - if(*p=='d'){ - decompress = 1; - }else if('1'<=*p && *p<='9'){ - level = *p - '0'; - }else{ - usage(); - exit(1); - } - } - }else{ - break; - } - } - if(argc>=1){ - fn_r = *argv; - argc--;argv++; - }else{ - fn_r = NULL; - } - if(argc>=1){ - fn_w = *argv; - argc--;argv++; - }else{ - fn_w = NULL; - } - { - int len; - char buff[0x1000]; - char mode[10]; - - if(decompress){ - BZFILE *BZ2fp_r = NULL; - FILE *fp_w = NULL; - - if(fn_w){ - if((fp_w = fopen(fn_w,"wb"))==NULL){ - printf("can't open [%s]\n",fn_w); - perror("reason:"); - exit(1); - } - }else{ - fp_w = stdout; - } - if((BZ2fp_r == NULL && (BZ2fp_r = bzdopen(fileno(stdin),"rb"))==NULL) - || (BZ2fp_r != NULL && (BZ2fp_r = bzopen(fn_r,"rb"))==NULL)){ - printf("can't bz2openstream\n"); - exit(1); - } - while((len=bzread(BZ2fp_r,buff,0x1000))>0){ - fwrite(buff,1,len,fp_w); - } - bzclose(BZ2fp_r); - if(fp_w != stdout) fclose(fp_w); - }else{ - BZFILE *BZ2fp_w = NULL; - FILE *fp_r = NULL; - - if(fn_r){ - if((fp_r = fopen(fn_r,"rb"))==NULL){ - printf("can't open [%s]\n",fn_r); - perror("reason:"); - exit(1); - } - }else{ - fp_r = stdin; - } - mode[0]='w'; - mode[1] = '0' + level; - mode[2] = '\0'; - - if((fn_w == NULL && (BZ2fp_w = bzdopen(fileno(stdout),mode))==NULL) - || (fn_w !=NULL && (BZ2fp_w = bzopen(fn_w,mode))==NULL)){ - printf("can't bz2openstream\n"); - exit(1); - } - while((len=fread(buff,1,0x1000,fp_r))>0){ - bzwrite(BZ2fp_w,buff,len); - } - bzclose(BZ2fp_w); - if(fp_r!=stdin)fclose(fp_r); - } - } -#ifdef _WIN32 - BZ2DLLFreeLibrary(); -#endif - return 0; -} +/*
+ minibz2
+ libbz2.dll test program.
+ by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
+ This file is Public Domain.
+ welcome any email to me.
+
+ usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
+*/
+
+#define BZ_IMPORT
+#include <stdio.h>
+#include <stdlib.h>
+#include "bzlib.h"
+#ifdef _WIN32
+#include <io.h>
+#endif
+
+
+#ifdef _WIN32
+
+#define BZ2_LIBNAME "libbz2-1.0.0.DLL"
+
+#include <windows.h>
+static int BZ2DLLLoaded = 0;
+static HINSTANCE BZ2DLLhLib;
+int BZ2DLLLoadLibrary(void)
+{
+ HINSTANCE hLib;
+
+ if(BZ2DLLLoaded==1){return 0;}
+ hLib=LoadLibrary(BZ2_LIBNAME);
+ if(hLib == NULL){
+ fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME);
+ return -1;
+ }
+ BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion");
+ BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen");
+ BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen");
+ BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread");
+ BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite");
+ BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush");
+ BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose");
+ BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror");
+
+ if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen
+ || !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush
+ || !BZ2_bzclose || !BZ2_bzerror) {
+ fprintf(stderr,"GetProcAddress failed.\n");
+ return -1;
+ }
+ BZ2DLLLoaded=1;
+ BZ2DLLhLib=hLib;
+ return 0;
+
+}
+int BZ2DLLFreeLibrary(void)
+{
+ if(BZ2DLLLoaded==0){return 0;}
+ FreeLibrary(BZ2DLLhLib);
+ BZ2DLLLoaded=0;
+}
+#endif /* WIN32 */
+
+void usage(void)
+{
+ puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
+}
+
+int main(int argc,char *argv[])
+{
+ int decompress = 0;
+ int level = 9;
+ char *fn_r = NULL;
+ char *fn_w = NULL;
+
+#ifdef _WIN32
+ if(BZ2DLLLoadLibrary()<0){
+ fprintf(stderr,"Loading of %s failed. Giving up.\n", BZ2_LIBNAME);
+ exit(1);
+ }
+ printf("Loading of %s succeeded. Library version is %s.\n",
+ BZ2_LIBNAME, BZ2_bzlibVersion() );
+#endif
+ while(++argv,--argc){
+ if(**argv =='-' || **argv=='/'){
+ char *p;
+
+ for(p=*argv+1;*p;p++){
+ if(*p=='d'){
+ decompress = 1;
+ }else if('1'<=*p && *p<='9'){
+ level = *p - '0';
+ }else{
+ usage();
+ exit(1);
+ }
+ }
+ }else{
+ break;
+ }
+ }
+ if(argc>=1){
+ fn_r = *argv;
+ argc--;argv++;
+ }else{
+ fn_r = NULL;
+ }
+ if(argc>=1){
+ fn_w = *argv;
+ argc--;argv++;
+ }else{
+ fn_w = NULL;
+ }
+ {
+ int len;
+ char buff[0x1000];
+ char mode[10];
+
+ if(decompress){
+ BZFILE *BZ2fp_r = NULL;
+ FILE *fp_w = NULL;
+
+ if(fn_w){
+ if((fp_w = fopen(fn_w,"wb"))==NULL){
+ printf("can't open [%s]\n",fn_w);
+ perror("reason:");
+ exit(1);
+ }
+ }else{
+ fp_w = stdout;
+ }
+ if((BZ2fp_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL)
+ || (BZ2fp_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){
+ printf("can't bz2openstream\n");
+ exit(1);
+ }
+ while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){
+ fwrite(buff,1,len,fp_w);
+ }
+ BZ2_bzclose(BZ2fp_r);
+ if(fp_w != stdout) fclose(fp_w);
+ }else{
+ BZFILE *BZ2fp_w = NULL;
+ FILE *fp_r = NULL;
+
+ if(fn_r){
+ if((fp_r = fopen(fn_r,"rb"))==NULL){
+ printf("can't open [%s]\n",fn_r);
+ perror("reason:");
+ exit(1);
+ }
+ }else{
+ fp_r = stdin;
+ }
+ mode[0]='w';
+ mode[1] = '0' + level;
+ mode[2] = '\0';
+
+ if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL)
+ || (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){
+ printf("can't bz2openstream\n");
+ exit(1);
+ }
+ while((len=fread(buff,1,0x1000,fp_r))>0){
+ BZ2_bzwrite(BZ2fp_w,buff,len);
+ }
+ BZ2_bzclose(BZ2fp_w);
+ if(fp_r!=stdin)fclose(fp_r);
+ }
+ }
+#ifdef _WIN32
+ BZ2DLLFreeLibrary();
+#endif
+ return 0;
+}
@@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -100,10 +100,10 @@ /*---------------------------------------------------*/ -void hbMakeCodeLengths ( UChar *len, - Int32 *freq, - Int32 alphaSize, - Int32 maxLen ) +void BZ2_hbMakeCodeLengths ( UChar *len, + Int32 *freq, + Int32 alphaSize, + Int32 maxLen ) { /*-- Nodes and heap entries run from 1. Entry 0 @@ -172,11 +172,11 @@ void hbMakeCodeLengths ( UChar *len, /*---------------------------------------------------*/ -void hbAssignCodes ( Int32 *code, - UChar *length, - Int32 minLen, - Int32 maxLen, - Int32 alphaSize ) +void BZ2_hbAssignCodes ( Int32 *code, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) { Int32 n, vec, i; @@ -190,13 +190,13 @@ void hbAssignCodes ( Int32 *code, /*---------------------------------------------------*/ -void hbCreateDecodeTables ( Int32 *limit, - Int32 *base, - Int32 *perm, - UChar *length, - Int32 minLen, - Int32 maxLen, - Int32 alphaSize ) +void BZ2_hbCreateDecodeTables ( Int32 *limit, + Int32 *base, + Int32 *perm, + UChar *length, + Int32 minLen, + Int32 maxLen, + Int32 alphaSize ) { Int32 pp, i, j, vec; @@ -1,25 +1,27 @@ LIBRARY LIBBZ2
DESCRIPTION "libbzip2: library for data compression"
EXPORTS
- bzCompressInit
- bzCompress
- bzCompressEnd
- bzDecompressInit
- bzDecompress
- bzDecompressEnd
- bzReadOpen
- bzReadClose
- bzReadGetUnused
- bzRead
- bzWriteOpen
- bzWrite
- bzWriteClose
- bzBuffToBuffCompress
- bzBuffToBuffDecompress
- bzlibVersion
- bzopen
- bzdopen
- bzread
- bzwrite
- bzflush
- bzclose
+ BZ2_bzCompressInit
+ BZ2_bzCompress
+ BZ2_bzCompressEnd
+ BZ2_bzDecompressInit
+ BZ2_bzDecompress
+ BZ2_bzDecompressEnd
+ BZ2_bzReadOpen
+ BZ2_bzReadClose
+ BZ2_bzReadGetUnused
+ BZ2_bzRead
+ BZ2_bzWriteOpen
+ BZ2_bzWrite
+ BZ2_bzWriteClose
+ BZ2_bzWriteClose64
+ BZ2_bzBuffToBuffCompress
+ BZ2_bzBuffToBuffDecompress
+ BZ2_bzlibVersion
+ BZ2_bzopen
+ BZ2_bzdopen
+ BZ2_bzread
+ BZ2_bzwrite
+ BZ2_bzflush
+ BZ2_bzclose
+ BZ2_bzerror
diff --git a/makefile.msc b/makefile.msc index 4b49f78..3fe4232 100644 --- a/makefile.msc +++ b/makefile.msc @@ -4,7 +4,7 @@ # Fixed up by JRS for bzip2-0.9.5d release.
CC=cl
-CFLAGS= -DWIN32 -MD -Ox
+CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64
OBJS= blocksort.obj \
huffman.obj \
@@ -21,7 +21,6 @@ bzip2: lib $(CC) $(CFLAGS) -o bzip2recover bzip2recover.c
lib: $(OBJS)
- del libbz2.lib
lib /out:libbz2.lib $(OBJS)
test: bzip2
@@ -32,20 +31,19 @@ test: bzip2 .\\bzip2 -d < sample1.bz2 > sample1.tst
.\\bzip2 -d < sample2.bz2 > sample2.tst
.\\bzip2 -ds < sample3.bz2 > sample3.tst
+ @echo All six of the fc's should find no differences.
+ @echo If fc finds an error on sample3.bz2, this could be
+ @echo because WinZip's 'TAR file smart CR/LF conversion'
+ @echo is too clever for its own good. Disable this option.
+ @echo The correct size for sample3.ref is 120,244. If it
+ @echo is 150,251, WinZip has messed it up.
fc sample1.bz2 sample1.rb2
fc sample2.bz2 sample2.rb2
fc sample3.bz2 sample3.rb2
fc sample1.tst sample1.ref
fc sample2.tst sample2.ref
fc sample3.tst sample3.ref
- @echo All six of the fc's should find no differences.
- @echo If fc finds an error on sample3.tst, this could be
- @echo because WinZips 'TAR file smart CR/LF conversion'
- @echo is too clever for its own good. Disable this option.
- @echo The correct size for sample3.ref is 120,244. If it
- @echo is around 150k, WinZip has stuffed it up.
- @echo Also remember to set BZ_UNIX to 0 and BZ_LCCWIN32
- @echo to 1 in bzip2.c.
+
clean:
diff --git a/manual.texi b/manual.texi index e48e656..336776a 100644 --- a/manual.texi +++ b/manual.texi @@ -2,10 +2,10 @@ @setfilename bzip2.info @ignore -This file documents bzip2 version 0.9.5, and associated library +This file documents bzip2 version 1.0, and associated library libbzip2, written by Julian Seward (jseward@acm.org). -Copyright (C) 1996-1999 Julian R Seward +Copyright (C) 1996-2000 Julian R Seward Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -30,8 +30,8 @@ END-INFO-DIR-ENTRY @titlepage @title bzip2 and libbzip2 @subtitle a program and library for data compression -@subtitle copyright (C) 1996-1999 Julian Seward -@subtitle version 0.9.5d of 4 September 1999 +@subtitle copyright (C) 1996-2000 Julian Seward +@subtitle version 1.0 of 21 March 2000 @author Julian Seward @end titlepage @@ -44,7 +44,7 @@ END-INFO-DIR-ENTRY This program, @code{bzip2}, and associated library @code{libbzip2}, are -Copyright (C) 1996-1999 Julian R Seward. All rights reserved. +Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -82,9 +82,13 @@ Julian Seward, Cambridge, UK. @code{jseward@@acm.org} +@code{http://sourceware.cygnus.com/bzip2} + +@code{http://www.cacheprof.org} + @code{http://www.muraroa.demon.co.uk} -@code{bzip2}/@code{libbzip2} version 0.9.5 of 24 May 1999. +@code{bzip2}/@code{libbzip2} version 1.0 of 21 March 2000. PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented algorithms. However, I do not have the resources available to carry out @@ -130,7 +134,7 @@ and nothing else. @unnumberedsubsubsec NAME @itemize @item @code{bzip2}, @code{bunzip2} -- a block-sorting file compressor, v0.9.5 +- a block-sorting file compressor, v1.0 @item @code{bzcat} - decompresses files to stdout @item @code{bzip2recover} @@ -431,10 +435,10 @@ I/O error messages are not as helpful as they could be. @code{bzip2} tries hard to detect I/O errors and exit cleanly, but the details of what the problem is sometimes seem rather misleading. -This manual page pertains to version 0.9.5 of @code{bzip2}. Compressed +This manual page pertains to version 1.0 of @code{bzip2}. Compressed data created by this version is entirely forwards and backwards -compatible with the previous public releases, versions 0.1pl2 and 0.9.0, -but with the following exception: 0.9.0 and above can correctly +compatible with the previous public releases, versions 0.1pl2, 0.9.0 and +0.9.5, but with the following exception: 0.9.0 and above can correctly decompress multiple concatenated compressed files. 0.1pl2 cannot do this; it will stop after decompressing just the first file in the stream. @@ -486,6 +490,10 @@ The structure of @code{libbzip2}'s interfaces is similar to that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib} library. +All externally visible symbols have names beginning @code{BZ2_}. +This is new in version 1.0. The intention is to minimise pollution +of the namespaces of library clients. + @subsection Low-level summary This interface provides services for compressing and decompressing @@ -498,17 +506,17 @@ The low-level part of the library has no global variables and is therefore thread-safe. Six routines make up the low level interface: -@code{bzCompressInit}, @code{bzCompress}, and @* @code{bzCompressEnd} +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd} for compression, -and a corresponding trio @code{bzDecompressInit}, @* @code{bzDecompress} -and @code{bzDecompressEnd} for decompression. +and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress} +and @code{BZ2_bzDecompressEnd} for decompression. The @code{*Init} functions allocate memory for compression/decompression and do other initialisations, whilst the @code{*End} functions close down operations and release memory. -The real work is done by @code{bzCompress} and @code{bzDecompress}. -These compress/decompress data from a user-supplied input buffer +The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}. +These compress and decompress data from a user-supplied input buffer to a user-supplied output buffer. These buffers can be any size; arbitrary quantities of data are handled by making repeated calls to these functions. This is a flexible mechanism allowing a @@ -526,10 +534,10 @@ reading files in which the @code{bzip2} data stream is embedded within some larger-scale file structure, or where there are multiple @code{bzip2} data streams concatenated end-to-end. -For reading files, @code{bzReadOpen}, @code{bzRead}, @code{bzReadClose} -and @code{bzReadGetUnused} are supplied. For writing files, -@code{bzWriteOpen}, @code{bzWrite} and @code{bzWriteFinish} are -available. +For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead}, +@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For +writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and +@code{BZ2_bzWriteFinish} are available. As with the low-level library, no global variables are used so the library is per se thread-safe. However, if I/O errors @@ -539,7 +547,7 @@ the error. In that case, you'd need a C library which correctly supports @code{errno} in a multithreaded environment. To make the library a little simpler and more portable, -@code{bzReadOpen} and @code{bzWriteOpen} require you to pass them file +@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file handles (@code{FILE*}s) which have previously been opened for reading or writing respectively. That avoids portability problems associated with file operations and file attributes, whilst not being much of an @@ -548,8 +556,8 @@ imposition on the programmer. @subsection Utility functions summary -For very simple needs, @code{bzBuffToBuffCompress} and -@code{bzBuffToBuffDecompress} are provided. These compress +For very simple needs, @code{BZ2_bzBuffToBuffCompress} and +@code{BZ2_bzBuffToBuffDecompress} are provided. These compress data in memory from one buffer to another buffer in a single function call. You should assess whether these functions fulfill your memory-to-memory compression/decompression @@ -559,9 +567,9 @@ general but more complex low-level interface. Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} / @code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to give better @code{zlib} compatibility. These functions are -@code{bzopen}, @code{bzread}, @code{bzwrite}, @code{bzflush}, -@code{bzclose}, -@code{bzerror} and @code{bzlibVersion}. You may find these functions +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, +@code{BZ2_bzclose}, +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions more convenient for simple file reading and writing, than those in the high-level interface. These functions are not (yet) officially part of the library, and are minimally documented here. If they break, you @@ -582,6 +590,15 @@ if you are feeling especially paranoid. I would be interested in hearing more about the robustness of the library to corrupted compressed data. +Version 1.0 is much more robust in this respect than +0.9.0 or 0.9.5. Investigations with Checker (a tool for +detecting problems with memory management, similar to Purify) +indicate that, at least for the few files I tested, all single-bit +errors in the decompressed data are caught properly, with no +segmentation faults, no reads of uninitialised data and no +out of range reads or writes. So it's certainly much improved, +although I wouldn't claim it to be totally bombproof. + The file @code{bzlib.h} contains all definitions needed to use the library. In particular, you should definitely not include @code{bzlib_private.h}. @@ -598,7 +615,7 @@ The requested action was completed successfully. @item BZ_RUN_OK @itemx BZ_FLUSH_OK @itemx BZ_FINISH_OK -In @code{bzCompress}, the requested flush/finish/nothing-special action +In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action was completed successfully. @item BZ_STREAM_END Compression of data was completed, or the logical stream end was @@ -607,6 +624,16 @@ detected during decompression. The following return values indicate an error of some kind. @table @code +@item BZ_CONFIG_ERROR +Indicates that the library has been improperly compiled on your +platform -- a major configuration error. Specifically, it means +that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)} +are not 1, 2 and 4 respectively, as they should be. Note that the +library should still work properly on 64-bit platforms which follow +the LP64 programming model -- that is, where @code{sizeof(long)} +and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is +still 4, so @code{libbzip2}, which doesn't use the @code{long} type, +is OK. @item BZ_SEQUENCE_ERROR When using the library, it is important to call the functions in the correct sequence and with data structures (buffers etc) in the correct @@ -624,10 +651,10 @@ making. @item BZ_MEM_ERROR Returned when a request to allocate memory failed. Note that the quantity of memory needed to decompress a stream cannot be determined -until the stream's header has been read. So @code{bzDecompress} and -@code{bzRead} may return @code{BZ_MEM_ERROR} even though some of +until the stream's header has been read. So @code{BZ2_bzDecompress} and +@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of the compressed data has been read. The same is not true for -compression; once @code{bzCompressInit} or @code{bzWriteOpen} have +compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have successfully completed, @code{BZ_MEM_ERROR} cannot occur. @item BZ_DATA_ERROR Returned when a data integrity error is detected during decompression. @@ -639,19 +666,19 @@ As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to know when the compressed stream does not start with the correct magic bytes (@code{'B' 'Z' 'h'}). @item BZ_IO_ERROR -Returned by @code{bzRead} and @code{bzRead} when there is an error -reading or writing in the compressed file, and by @code{bzReadOpen} -and @code{bzWriteOpen} for attempts to use a file for which the +Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error +reading or writing in the compressed file, and by @code{BZ2_bzReadOpen} +and @code{BZ2_bzWriteOpen} for attempts to use a file for which the error indicator (viz, @code{ferror(f)}) is set. On receipt of @code{BZ_IO_ERROR}, the caller should consult @code{errno} and/or @code{perror} to acquire operating-system specific information about the problem. @item BZ_UNEXPECTED_EOF -Returned by @code{bzRead} when the compressed file finishes +Returned by @code{BZ2_bzRead} when the compressed file finishes before the logical end of stream is detected. @item BZ_OUTBUFF_FULL -Returned by @code{bzBuffToBuffCompress} and -@code{bzBuffToBuffDecompress} to indicate that the output data +Returned by @code{BZ2_bzBuffToBuffCompress} and +@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data will not fit into the output buffer provided. @end table @@ -659,17 +686,19 @@ will not fit into the output buffer provided. @section Low-level interface -@subsection @code{bzCompressInit} +@subsection @code{BZ2_bzCompressInit} @example typedef struct @{ char *next_in; unsigned int avail_in; - unsigned int total_in; + unsigned int total_in_lo32; + unsigned int total_in_hi32; char *next_out; unsigned int avail_out; - unsigned int total_out; + unsigned int total_out_lo32; + unsigned int total_out_hi32; void *state; @@ -679,10 +708,10 @@ typedef @} bz_stream; -int bzCompressInit ( bz_stream *strm, - int blockSize100k, - int verbosity, - int workFactor ); +int BZ2_bzCompressInit ( bz_stream *strm, + int blockSize100k, + int verbosity, + int workFactor ); @end example @@ -712,14 +741,19 @@ If you don't want to use a custom memory allocator, set @code{bzalloc}, and the library will then use the standard @code{malloc}/@code{free} routines. -Before calling @code{bzCompressInit}, fields @code{bzalloc}, +Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be filled appropriately, as just described. Upon return, the internal -state will have been allocated and initialised, and @code{total_in} and -@code{total_out} will have been set to zero. -These last two fields are used by the library +state will have been allocated and initialised, and @code{total_in_lo32}, +@code{total_in_hi32}, @code{total_out_lo32} and +@code{total_out_hi32} will have been set to zero. +These four fields are used by the library to inform the caller of the total amount of data passed into and out of the library, respectively. You should not try to change them. +As of version 1.0, 64-bit counts are maintained, even on 32-bit +platforms, using the @code{_hi32} fields to store the upper 32 bits +of the count. So, for example, the total amount of data in +is @code{(total_in_hi32 << 32) + total_in_lo32}. Parameter @code{blockSize100k} specifies the block size to be used for compression. It should be a value between 1 and 9 inclusive, and the @@ -761,6 +795,8 @@ mechanism would render the parameter obsolete. Possible return values: @display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled @code{BZ_PARAM_ERROR} if @code{strm} is @code{NULL} or @code{blockSize} < 1 or @code{blockSize} > 9 @@ -773,86 +809,86 @@ Possible return values: @end display Allowable next actions: @display - @code{bzCompress} + @code{BZ2_bzCompress} if @code{BZ_OK} is returned no specific action needed in case of error @end display -@subsection @code{bzCompress} +@subsection @code{BZ2_bzCompress} @example - int bzCompress ( bz_stream *strm, int action ); + int BZ2_bzCompress ( bz_stream *strm, int action ); @end example Provides more input and/or output buffer space for the library. The -caller maintains input and output buffers, and calls @code{bzCompress} to +caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to transfer data between them. -Before each call to @code{bzCompress}, @code{next_in} should point at +Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at the data to be compressed, and @code{avail_in} should indicate how many -bytes the library may read. @code{bzCompress} updates @code{next_in}, +bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in}, @code{avail_in} and @code{total_in} to reflect the number of bytes it has read. Similarly, @code{next_out} should point to a buffer in which the compressed data is to be placed, with @code{avail_out} indicating how -much output space is available. @code{bzCompress} updates +much output space is available. @code{BZ2_bzCompress} updates @code{next_out}, @code{avail_out} and @code{total_out} to reflect the number of bytes output. You may provide and remove as little or as much data as you like on each -call of @code{bzCompress}. In the limit, it is acceptable to supply and +call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and remove data one byte at a time, although this would be terribly inefficient. You should always ensure that at least one byte of output space is available at each call. -A second purpose of @code{bzCompress} is to request a change of mode of the +A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the compressed stream. Conceptually, a compressed stream can be in one of four states: IDLE, RUNNING, FLUSHING and FINISHING. Before initialisation -(@code{bzCompressInit}) and after termination (@code{bzCompressEnd}), a +(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a stream is regarded as IDLE. -Upon initialisation (@code{bzCompressInit}), the stream is placed in the -RUNNING state. Subsequent calls to @code{bzCompress} should pass +Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the +RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass @code{BZ_RUN} as the requested action; other actions are illegal and will result in @code{BZ_SEQUENCE_ERROR}. At some point, the calling program will have provided all the input data it wants to. It will then want to finish up -- in effect, asking the library to process any data it might have buffered internally. In this -state, @code{bzCompress} will no longer attempt to read data from +state, @code{BZ2_bzCompress} will no longer attempt to read data from @code{next_in}, but it will want to write data to @code{next_out}. Because the output buffer supplied by the user can be arbitrarily small, the finishing-up operation cannot necessarily be done with a single call -of @code{bzCompress}. +of @code{BZ2_bzCompress}. Instead, the calling program passes @code{BZ_FINISH} as an action to -@code{bzCompress}. This changes the stream's state to FINISHING. Any +@code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and -transferred to the output buffer. To do this, @code{bzCompress} must be +transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be called repeatedly until all the output has been consumed. At that -point, @code{bzCompress} returns @code{BZ_STREAM_END}, and the stream's -state is set back to IDLE. @code{bzCompressEnd} should then be +point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's +state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be called. Just to make sure the calling program does not cheat, the library makes a note of @code{avail_in} at the time of the first call to -@code{bzCompress} which has @code{BZ_FINISH} as an action (ie, at the +@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the time the program has announced its intention to not supply any more input). By comparing this value with that of @code{avail_in} over -subsequent calls to @code{bzCompress}, the library can detect any +subsequent calls to @code{BZ2_bzCompress}, the library can detect any attempts to slip in more data to compress. Any calls for which this is detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a programming mistake which should be corrected. Instead of asking to finish, the calling program may ask -@code{bzCompress} to take all the remaining input, compress it and +@code{BZ2_bzCompress} to take all the remaining input, compress it and terminate the current (Burrows-Wheeler) compression block. This could be useful for error control purposes. The mechanism is analogous to -that for finishing: call @code{bzCompress} with an action of +that for finishing: call @code{BZ2_bzCompress} with an action of @code{BZ_FLUSH}, remove output data, and persist with the @code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As -with finishing, @code{bzCompress} detects any attempt to provide more +with finishing, @code{BZ2_bzCompress} detects any attempt to provide more input data once the flush has begun. Once the flush is complete, the stream returns to the normal RUNNING @@ -863,11 +899,11 @@ which shows which actions are allowable in each state, what action will be taken, what the next state is, and what the non-error return values are. Note that you can't explicitly ask what state the stream is in, but nor do you need to -- it can be inferred from the -values returned by @code{bzCompress}. +values returned by @code{BZ2_bzCompress}. @display IDLE/@code{any} - Illegal. IDLE state only exists after @code{bzCompressEnd} or - before @code{bzCompressInit}. + Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or + before @code{BZ2_bzCompressInit}. Return value = @code{BZ_SEQUENCE_ERROR} RUNNING/@code{BZ_RUN} @@ -917,21 +953,21 @@ FINISHING/other That still looks complicated? Well, fair enough. The usual sequence of calls for compressing a load of data is: @itemize @bullet -@item Get started with @code{bzCompressInit}. +@item Get started with @code{BZ2_bzCompressInit}. @item Shovel data in and shlurp out its compressed form using zero or more -calls of @code{bzCompress} with action = @code{BZ_RUN}. +calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}. @item Finish up. -Repeatedly call @code{bzCompress} with action = @code{BZ_FINISH}, +Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH}, copying out the compressed output, until @code{BZ_STREAM_END} is returned. -@item Close up and go home. Call @code{bzCompressEnd}. +@item Close up and go home. Call @code{BZ2_bzCompressEnd}. @end itemize If the data you want to compress fits into your input buffer all -at once, you can skip the calls of @code{bzCompress ( ..., BZ_RUN )} and -just do the @code{bzCompress ( ..., BZ_FINISH )} calls. +at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and +just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls. -All required memory is allocated by @code{bzCompressInit}. The +All required memory is allocated by @code{BZ2_bzCompressInit}. The compression library can accept any data at all (obviously). So you -shouldn't get any error return values from the @code{bzCompress} calls. +shouldn't get any error return values from the @code{BZ2_bzCompress} calls. If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in your programming. @@ -941,9 +977,9 @@ Trivial other possible return values: if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL} @end display -@subsection @code{bzCompressEnd} +@subsection @code{BZ2_bzCompressEnd} @example -int bzCompressEnd ( bz_stream *strm ); +int BZ2_bzCompressEnd ( bz_stream *strm ); @end example Releases all memory associated with a compression stream. @@ -954,11 +990,11 @@ Possible return values: @end display -@subsection @code{bzDecompressInit} +@subsection @code{BZ2_bzDecompressInit} @example -int bzDecompressInit ( bz_stream *strm, int verbosity, int small ); +int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small ); @end example -Prepares for decompression. As with @code{bzCompressInit}, a +Prepares for decompression. As with @code{BZ2_bzCompressInit}, a @code{bz_stream} record should be allocated and initialised before the call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be set if a custom memory allocator is required, or made @code{NULL} for @@ -966,7 +1002,7 @@ the normal @code{malloc}/@code{free} routines. Upon return, the internal state will have been initialised, and @code{total_in} and @code{total_out} will be zero. -For the meaning of parameter @code{verbosity}, see @code{bzCompressInit}. +For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}. If @code{small} is nonzero, the library will use an alternative decompression algorithm which uses less memory but at the cost of @@ -976,11 +1012,13 @@ more information on memory management. Note that the amount of memory needed to decompress a stream cannot be determined until the stream's header has been read, -so even if @code{bzDecompressInit} succeeds, a subsequent -@code{bzDecompress} could fail with @code{BZ_MEM_ERROR}. +so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent +@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}. Possible return values: @display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled @code{BZ_PARAM_ERROR} if @code{(small != 0 && small != 1)} or @code{(verbosity < 0 || verbosity > 4)} @@ -990,54 +1028,54 @@ Possible return values: Allowable next actions: @display - @code{bzDecompress} + @code{BZ2_bzDecompress} if @code{BZ_OK} was returned no specific action required in case of error @end display -@subsection @code{bzDecompress} +@subsection @code{BZ2_bzDecompress} @example -int bzDecompress ( bz_stream *strm ); +int BZ2_bzDecompress ( bz_stream *strm ); @end example Provides more input and/out output buffer space for the library. The -caller maintains input and output buffers, and uses @code{bzDecompress} +caller maintains input and output buffers, and uses @code{BZ2_bzDecompress} to transfer data between them. -Before each call to @code{bzDecompress}, @code{next_in} +Before each call to @code{BZ2_bzDecompress}, @code{next_in} should point at the compressed data, and @code{avail_in} should indicate how many bytes the library -may read. @code{bzDecompress} updates @code{next_in}, @code{avail_in} +may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in} and @code{total_in} to reflect the number of bytes it has read. Similarly, @code{next_out} should point to a buffer in which the uncompressed output is to be placed, with @code{avail_out} indicating how much output space -is available. @code{bzCompress} updates @code{next_out}, +is available. @code{BZ2_bzCompress} updates @code{next_out}, @code{avail_out} and @code{total_out} to reflect the number of bytes output. You may provide and remove as little or as much data as you like on -each call of @code{bzDecompress}. +each call of @code{BZ2_bzDecompress}. In the limit, it is acceptable to supply and remove data one byte at a time, although this would be terribly inefficient. You should always ensure that at least one byte of output space is available at each call. -Use of @code{bzDecompress} is simpler than @code{bzCompress}. +Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}. You should provide input and remove output as described above, and -repeatedly call @code{bzDecompress} until @code{BZ_STREAM_END} is +repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is returned. Appearance of @code{BZ_STREAM_END} denotes that -@code{bzDecompress} has detected the logical end of the compressed -stream. @code{bzDecompress} will not produce @code{BZ_STREAM_END} until +@code{BZ2_bzDecompress} has detected the logical end of the compressed +stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until all output data has been placed into the output buffer, so once @code{BZ_STREAM_END} appears, you are guaranteed to have available all -the decompressed output, and @code{bzDecompressEnd} can safely be +the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be called. -If case of an error return value, you should call @code{bzDecompressEnd} +If case of an error return value, you should call @code{BZ2_bzDecompressEnd} to clean up and release memory. Possible return values: @@ -1059,16 +1097,16 @@ Possible return values: @end display Allowable next actions: @display - @code{bzDecompress} + @code{BZ2_bzDecompress} if @code{BZ_OK} was returned - @code{bzDecompressEnd} + @code{BZ2_bzDecompressEnd} otherwise @end display -@subsection @code{bzDecompressEnd} +@subsection @code{BZ2_bzDecompressEnd} @example -int bzDecompressEnd ( bz_stream *strm ); +int BZ2_bzDecompressEnd ( bz_stream *strm ); @end example Releases all memory associated with a decompression stream. @@ -1107,16 +1145,16 @@ This interface provides functions for reading and writing given on a per-function basis below. @item If @code{bzerror} indicates an error (ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}), - you should immediately call @code{bzReadClose} (or @code{bzWriteClose}, + you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose}, depending on whether you are attempting to read or to write) to free up all resources associated with the stream. Once an error has been indicated, behaviour of all calls - except @code{bzReadClose} (@code{bzWriteClose}) is undefined. + except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined. The implication is that (1) @code{bzerror} should be checked after each call, and (2) if @code{bzerror} indicates an error, - @code{bzReadClose} (@code{bzWriteClose}) should then be called to clean up. + @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up. @item The @code{FILE*} arguments passed to - @code{bzReadOpen}/@code{bzWriteOpen} + @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen} should be set to binary mode. Most Unix systems will do this by default, but other platforms, including Windows and Mac, will not. If you omit this, you may @@ -1130,13 +1168,13 @@ This interface provides functions for reading and writing -@subsection @code{bzReadOpen} +@subsection @code{BZ2_bzReadOpen} @example typedef void BZFILE; - BZFILE *bzReadOpen ( int *bzerror, FILE *f, - int small, int verbosity, - void *unused, int nUnused ); + BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f, + int small, int verbosity, + void *unused, int nUnused ); @end example Prepare to read compressed data from file handle @code{f}. @code{f} should refer to a file which has been opened for reading, and for which @@ -1144,7 +1182,7 @@ the error indicator (@code{ferror(f)})is not set. If @code{small} is 1, the library will try to decompress using less memory, at the expense of speed. -For reasons explained below, @code{bzRead} will decompress the +For reasons explained below, @code{BZ2_bzRead} will decompress the @code{nUnused} bytes starting at @code{unused}, before starting to read from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be supplied like this. If this facility is not required, you should pass @@ -1152,15 +1190,17 @@ supplied like this. If this facility is not required, you should pass respectively. For the meaning of parameters @code{small} and @code{verbosity}, -see @code{bzDecompressInit}. +see @code{BZ2_bzDecompressInit}. The amount of memory needed to decompress a file cannot be determined until the file's header has been read. So it is possible that -@code{bzReadOpen} returns @code{BZ_OK} but a subsequent call of -@code{bzRead} will return @code{BZ_MEM_ERROR}. +@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of +@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}. Possible assignments to @code{bzerror}: @display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled @code{BZ_PARAM_ERROR} if @code{f} is @code{NULL} or @code{small} is neither @code{0} nor @code{1} @@ -1184,16 +1224,16 @@ Possible return values: Allowable next actions: @display - @code{bzRead} + @code{BZ2_bzRead} if @code{bzerror} is @code{BZ_OK} - @code{bzClose} + @code{BZ2_bzClose} otherwise @end display -@subsection @code{bzRead} +@subsection @code{BZ2_bzRead} @example - int bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); + int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len ); @end example Reads up to @code{len} (uncompressed) bytes from the compressed file @code{b} into @@ -1204,7 +1244,7 @@ was detected, @code{bzerror} will be set to @code{BZ_STREAM_END}, and the number of bytes read is returned. All other @code{bzerror} values denote an error. -@code{bzRead} will supply @code{len} bytes, +@code{BZ2_bzRead} will supply @code{len} bytes, unless the logical stream end is detected or an error occurs. Because of this, it is possible to detect the stream end by observing when the number of bytes returned is @@ -1213,20 +1253,20 @@ requested. Nevertheless, this is regarded as inadvisable; you should instead check @code{bzerror} after every call and watch out for @code{BZ_STREAM_END}. -Internally, @code{bzRead} copies data from the compressed file in chunks +Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks of size @code{BZ_MAX_UNUSED} bytes before decompressing it. If the file contains more bytes than strictly -needed to reach the logical end-of-stream, @code{bzRead} will almost certainly +needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly read some of the trailing data before signalling @code{BZ_SEQUENCE_END}. To collect the read but unused data once @code{BZ_SEQUENCE_END} has -appeared, call @code{bzReadGetUnused} immediately before @code{bzReadClose}. +appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}. Possible assignments to @code{bzerror}: @display @code{BZ_PARAM_ERROR} if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} @code{BZ_SEQUENCE_ERROR} - if @code{b} was opened with @code{bzWriteOpen} + if @code{b} was opened with @code{BZ2_bzWriteOpen} @code{BZ_IO_ERROR} if there is an error reading from the compressed file @code{BZ_UNEXPECTED_EOF} @@ -1254,28 +1294,28 @@ Possible return values: Allowable next actions: @display - collect data from @code{buf}, then @code{bzRead} or @code{bzReadClose} + collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose} if @code{bzerror} is @code{BZ_OK} - collect data from @code{buf}, then @code{bzReadClose} or @code{bzReadGetUnused} + collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused} if @code{bzerror} is @code{BZ_SEQUENCE_END} - @code{bzReadClose} + @code{BZ2_bzReadClose} otherwise @end display -@subsection @code{bzReadGetUnused} +@subsection @code{BZ2_bzReadGetUnused} @example - void bzReadGetUnused ( int* bzerror, BZFILE *b, - void** unused, int* nUnused ); + void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b, + void** unused, int* nUnused ); @end example Returns data which was read from the compressed file but was not needed to get to the logical end-of-stream. @code{*unused} is set to the address of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive. -This function may only be called once @code{bzRead} has signalled -@code{BZ_STREAM_END} but before @code{bzReadClose}. +This function may only be called once @code{BZ2_bzRead} has signalled +@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}. Possible assignments to @code{bzerror}: @display @@ -1284,31 +1324,31 @@ Possible assignments to @code{bzerror}: or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL} @code{BZ_SEQUENCE_ERROR} if @code{BZ_STREAM_END} has not been signalled - or if @code{b} was opened with @code{bzWriteOpen} + or if @code{b} was opened with @code{BZ2_bzWriteOpen} @code{BZ_OK} otherwise @end display Allowable next actions: @display - @code{bzReadClose} + @code{BZ2_bzReadClose} @end display -@subsection @code{bzReadClose} +@subsection @code{BZ2_bzReadClose} @example - void bzReadClose ( int *bzerror, BZFILE *b ); + void BZ2_bzReadClose ( int *bzerror, BZFILE *b ); @end example Releases all memory pertaining to the compressed file @code{b}. -@code{bzReadClose} does not call @code{fclose} on the underlying file +@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file handle, so you should do that yourself if appropriate. -@code{bzReadClose} should be called to clean up after all error +@code{BZ2_bzReadClose} should be called to clean up after all error situations. Possible assignments to @code{bzerror}: @display @code{BZ_SEQUENCE_ERROR} - if @code{b} was opened with @code{bzOpenWrite} + if @code{b} was opened with @code{BZ2_bzOpenWrite} @code{BZ_OK} otherwise @end display @@ -1320,11 +1360,11 @@ Allowable next actions: -@subsection @code{bzWriteOpen} +@subsection @code{BZ2_bzWriteOpen} @example - BZFILE *bzWriteOpen ( int *bzerror, FILE *f, - int blockSize100k, int verbosity, - int workFactor ); + BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f, + int blockSize100k, int verbosity, + int workFactor ); @end example Prepare to write compressed data to file handle @code{f}. @code{f} should refer to @@ -1333,14 +1373,16 @@ indicator (@code{ferror(f)})is not set. For the meaning of parameters @code{blockSize100k}, @code{verbosity} and @code{workFactor}, see -@* @code{bzCompressInit}. +@* @code{BZ2_bzCompressInit}. All required memory is allocated at this stage, so if the call completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a -subsequent call to @code{bzWrite}. +subsequent call to @code{BZ2_bzWrite}. Possible assignments to @code{bzerror}: @display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled @code{BZ_PARAM_ERROR} if @code{f} is @code{NULL} or @code{blockSize100k < 1} or @code{blockSize100k > 9} @@ -1362,18 +1404,18 @@ Possible return values: Allowable next actions: @display - @code{bzWrite} + @code{BZ2_bzWrite} if @code{bzerror} is @code{BZ_OK} - (you could go directly to @code{bzWriteClose}, but this would be pretty pointless) - @code{bzWriteClose} + (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless) + @code{BZ2_bzWriteClose} otherwise @end display -@subsection @code{bzWrite} +@subsection @code{BZ2_bzWrite} @example - void bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); + void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len ); @end example Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be compressed and written to the file. @@ -1383,7 +1425,7 @@ Possible assignments to @code{bzerror}: @code{BZ_PARAM_ERROR} if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0} @code{BZ_SEQUENCE_ERROR} - if b was opened with @code{bzReadOpen} + if b was opened with @code{BZ2_bzReadOpen} @code{BZ_IO_ERROR} if there is an error writing the compressed file. @code{BZ_OK} @@ -1393,22 +1435,29 @@ Possible assignments to @code{bzerror}: -@subsection @code{bzWriteClose} +@subsection @code{BZ2_bzWriteClose} @example - int bzWriteClose ( int *bzerror, BZFILE* f, - int abandon, - unsigned int* nbytes_in, - unsigned int* nbytes_out ); + void BZ2_bzWriteClose ( int *bzerror, BZFILE* f, + int abandon, + unsigned int* nbytes_in, + unsigned int* nbytes_out ); + + void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f, + int abandon, + unsigned int* nbytes_in_lo32, + unsigned int* nbytes_in_hi32, + unsigned int* nbytes_out_lo32, + unsigned int* nbytes_out_hi32 ); @end example Compresses and flushes to the compressed file all data so far supplied -by @code{bzWrite}. The logical end-of-stream markers are also written, so -subsequent calls to @code{bzWrite} are illegal. All memory associated +by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so +subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated with the compressed file @code{b} is released. @code{fflush} is called on the compressed file, but it is not @code{fclose}'d. -If @code{bzWriteClose} is called to clean up after an error, the only +If @code{BZ2_bzWriteClose} is called to clean up after an error, the only action is to release the memory. The library records the error codes issued by previous calls, so this situation will be detected automatically. There is no attempt to complete the compression @@ -1418,12 +1467,17 @@ value to @code{abandon}. If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the total volume of uncompressed data handled. Similarly, @code{nbytes_out} -will be set to the total volume of compressed data written. +will be set to the total volume of compressed data written. For +compatibility with older versions of the library, @code{BZ2_bzWriteClose} +only yields the lower 32 bits of these counts. Use +@code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These +two functions are otherwise absolutely identical. + Possible assignments to @code{bzerror}: @display @code{BZ_SEQUENCE_ERROR} - if @code{b} was opened with @code{bzReadOpen} + if @code{b} was opened with @code{BZ2_bzReadOpen} @code{BZ_IO_ERROR} if there is an error writing the compressed file @code{BZ_OK} @@ -1442,26 +1496,26 @@ The calling application can write its own data before and after the compressed data stream, using that same file handle. @item Reading is more complex, and the facilities are not as general as they could be since generality is hard to reconcile with efficiency. -@code{bzRead} reads from the compressed file in blocks of size +@code{BZ2_bzRead} reads from the compressed file in blocks of size @code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot the logical end of compressed stream. To recover this data once decompression has -ended, call @code{bzReadGetUnused} after the last call of @code{bzRead} +ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead} (the one returning @code{BZ_STREAM_END}) but before calling -@code{bzReadClose}. +@code{BZ2_bzReadClose}. @end itemize This mechanism makes it easy to decompress multiple @code{bzip2} -streams placed end-to-end. As the end of one stream, when @code{bzRead} -returns @code{BZ_STREAM_END}, call @code{bzReadGetUnused} to collect the +streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead} +returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the unused data (copy it into your own buffer somewhere). That data forms the start of the next compressed stream. -To start uncompressing that next stream, call @code{bzReadOpen} again, +To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again, feeding in the unused data via the @code{unused}/@code{nUnused} parameters. Keep doing this until @code{BZ_STREAM_END} return coincides with the physical end of file (@code{feof(f)}). In this situation -@code{bzReadGetUnused} +@code{BZ2_bzReadGetUnused} will of course return no data. This should give some feel for how the high-level interface can be used. @@ -1482,22 +1536,22 @@ f = fopen ( "myfile.bz2", "w" ); if (!f) @{ /* handle error */ @} -b = bzWriteOpen ( &bzerror, f, 9 ); +b = BZ2_bzWriteOpen ( &bzerror, f, 9 ); if (bzerror != BZ_OK) @{ - bzWriteClose ( b ); + BZ2_bzWriteClose ( b ); /* handle error */ @} while ( /* condition */ ) @{ /* get data to write into buf, and set nBuf appropriately */ - nWritten = bzWrite ( &bzerror, b, buf, nBuf ); + nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf ); if (bzerror == BZ_IO_ERROR) @{ - bzWriteClose ( &bzerror, b ); + BZ2_bzWriteClose ( &bzerror, b ); /* handle error */ @} @} -bzWriteClose ( &bzerror, b ); +BZ2_bzWriteClose ( &bzerror, b ); if (bzerror == BZ_IO_ERROR) @{ /* handle error */ @} @@ -1515,39 +1569,39 @@ f = fopen ( "myfile.bz2", "r" ); if (!f) @{ /* handle error */ @} -b = bzReadOpen ( &bzerror, f, 0, NULL, 0 ); +b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 ); if (bzerror != BZ_OK) @{ - bzReadClose ( &bzerror, b ); + BZ2_bzReadClose ( &bzerror, b ); /* handle error */ @} bzerror = BZ_OK; while (bzerror == BZ_OK && /* arbitrary other conditions */) @{ - nBuf = bzRead ( &bzerror, b, buf, /* size of buf */ ); + nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ ); if (bzerror == BZ_OK) @{ /* do something with buf[0 .. nBuf-1] */ @} @} if (bzerror != BZ_STREAM_END) @{ - bzReadClose ( &bzerror, b ); + BZ2_bzReadClose ( &bzerror, b ); /* handle error */ @} else @{ - bzReadClose ( &bzerror ); + BZ2_bzReadClose ( &bzerror ); @} @end example @section Utility functions -@subsection @code{bzBuffToBuffCompress} +@subsection @code{BZ2_bzBuffToBuffCompress} @example - int bzBuffToBuffCompress( char* dest, - unsigned int* destLen, - char* source, - unsigned int sourceLen, - int blockSize100k, - int verbosity, - int workFactor ); + int BZ2_bzBuffToBuffCompress( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k, + int verbosity, + int workFactor ); @end example Attempts to compress the data in @code{source[0 .. sourceLen-1]} into the destination buffer, @code{dest[0 .. *destLen-1]}. @@ -1563,17 +1617,19 @@ additional calls to provide extra input data. If you want that kind of mechanism, use the low-level interface. For the meaning of parameters @code{blockSize100k}, @code{verbosity} -and @code{workFactor}, @* see @code{bzCompressInit}. +and @code{workFactor}, @* see @code{BZ2_bzCompressInit}. To guarantee that the compressed data will fit in its buffer, allocate an output buffer of size 1% larger than the uncompressed data, plus six hundred extra bytes. -@code{bzBuffToBuffDecompress} will not write data at or +@code{BZ2_bzBuffToBuffDecompress} will not write data at or beyond @code{dest[*destLen]}, even in case of buffer overflow. Possible return values: @display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled @code{BZ_PARAM_ERROR} if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} or @code{blockSize100k < 1} or @code{blockSize100k > 9} @@ -1589,14 +1645,14 @@ Possible return values: -@subsection @code{bzBuffToBuffDecompress} +@subsection @code{BZ2_bzBuffToBuffDecompress} @example - int bzBuffToBuffDecompress ( char* dest, - unsigned int* destLen, - char* source, - unsigned int sourceLen, - int small, - int verbosity ); + int BZ2_bzBuffToBuffDecompress ( char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int small, + int verbosity ); @end example Attempts to decompress the data in @code{source[0 .. sourceLen-1]} into the destination buffer, @code{dest[0 .. *destLen-1]}. @@ -1606,11 +1662,11 @@ returned. If the compressed data won't fit, @code{*destLen} is unchanged, and @code{BZ_OUTBUFF_FULL} is returned. @code{source} is assumed to hold a complete @code{bzip2} format -data stream. @code{bzBuffToBuffDecompress} tries to decompress +data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress the entirety of the stream into the output buffer. For the meaning of parameters @code{small} and @code{verbosity}, -see @code{bzDecompressInit}. +see @code{BZ2_bzDecompressInit}. Because the compression ratio of the compressed data cannot be known in advance, there is no easy way to guarantee that the output buffer will @@ -1618,11 +1674,13 @@ be big enough. You may of course make arrangements in your code to record the size of the uncompressed data, but such a mechanism is beyond the scope of this library. -@code{bzBuffToBuffDecompress} will not write data at or +@code{BZ2_bzBuffToBuffDecompress} will not write data at or beyond @code{dest[*destLen]}, even in case of buffer overflow. Possible return values: @display + @code{BZ_CONFIG_ERROR} + if the library has been mis-compiled @code{BZ_PARAM_ERROR} if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL} or @code{small != 0 && small != 1} @@ -1646,40 +1704,40 @@ Possible return values: @section @code{zlib} compatibility functions Yoshioka Tsuneo has contributed some functions to give better @code{zlib} compatibility. These functions are -@code{bzopen}, @code{bzread}, @code{bzwrite}, @code{bzflush}, -@code{bzclose}, -@code{bzerror} and @code{bzlibVersion}. +@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush}, +@code{BZ2_bzclose}, +@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. These functions are not (yet) officially part of the library. If they break, you get to keep all the pieces. Nevertheless, I think they work ok. @example typedef void BZFILE; -const char * bzlibVersion ( void ); +const char * BZ2_bzlibVersion ( void ); @end example Returns a string indicating the library version. @example -BZFILE * bzopen ( const char *path, const char *mode ); -BZFILE * bzdopen ( int fd, const char *mode ); +BZFILE * BZ2_bzopen ( const char *path, const char *mode ); +BZFILE * BZ2_bzdopen ( int fd, const char *mode ); @end example Opens a @code{.bz2} file for reading or writing, using either its name or a pre-existing file descriptor. Analogous to @code{fopen} and @code{fdopen}. @example -int bzread ( BZFILE* b, void* buf, int len ); -int bzwrite ( BZFILE* b, void* buf, int len ); +int BZ2_bzread ( BZFILE* b, void* buf, int len ); +int BZ2_bzwrite ( BZFILE* b, void* buf, int len ); @end example Reads/writes data from/to a previously opened @code{BZFILE}. Analogous to @code{fread} and @code{fwrite}. @example -int bzflush ( BZFILE* b ); -void bzclose ( BZFILE* b ); +int BZ2_bzflush ( BZFILE* b ); +void BZ2_bzclose ( BZFILE* b ); @end example -Flushes/closes a @code{BZFILE}. @code{bzflush} doesn't actually do +Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do anything. Analogous to @code{fflush} and @code{fclose}. @example -const char * bzerror ( BZFILE *b, int *errnum ) +const char * BZ2_bzerror ( BZFILE *b, int *errnum ) @end example Returns a string describing the more recent error status of @code{b}, and also sets @code{*errnum} to its numerical value. @@ -1695,9 +1753,9 @@ by compiling the library with preprocessor symbol @code{BZ_NO_STDIO} defined. Doing this gives you a library containing only the following eight functions: -@code{bzCompressInit}, @code{bzCompress}, @code{bzCompressEnd} @* -@code{bzDecompressInit}, @code{bzDecompress}, @code{bzDecompressEnd} @* -@code{bzBuffToBuffCompress}, @code{bzBuffToBuffDecompress} +@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @* +@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @* +@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress} When compiled like this, all functions will ignore @code{verbosity} settings. @@ -1710,14 +1768,14 @@ was compiled with @code{BZ_NO_STDIO} set. For a normal compile, an assertion failure yields the message @example - bzip2/libbzip2, v0.9.5: internal error number N. - This is a bug in bzip2/libbzip2, v0.9.5. Please report - it to me at: jseward@@acm.org. If this happened when - you were using some program which uses libbzip2 as a + bzip2/libbzip2: internal error number N. + This is a bug in bzip2/libbzip2, 1.0 of 21-Mar-2000. + Please report it to me at: jseward@@acm.org. If this happened + when you were using some program which uses libbzip2 as a component, you should also report this bug to the author(s) of that program. Please make an effort to report this bug; timely and accurate bug reports eventually lead to higher - quality software. Thanks. Julian Seward, 24 May 1999. + quality software. Thanks. Julian Seward, 21 March 2000. @end example where @code{N} is some error code number. @code{exit(3)} is then called. @@ -1781,7 +1839,7 @@ These are just some random thoughts of mine. Your mileage may vary. @section Limitations of the compressed file format -@code{bzip2-0.9.5} and @code{0.9.0} +@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0} use exactly the same file format as the previous version, @code{bzip2-0.1}. This decision was made in the interests of stability. Creating yet another incompatible compressed file format @@ -1860,7 +1918,7 @@ require some careful design of compressed file formats. @section Portability issues After some consideration, I have decided not to use -GNU @code{autoconf} to configure 0.9.5. +GNU @code{autoconf} to configure 0.9.5 or 1.0. @code{autoconf}, admirable and wonderful though it is, mainly assists with portability problems between Unix-like @@ -1925,7 +1983,7 @@ If you get problems, try using the flags @code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}. You should specifically @emph{not} use @code{-funroll-loops}. -You may notice that the Makefile runs four tests as part of +You may notice that the Makefile runs six tests as part of the build process. If the program passes all of these, it's a pretty good (but not 100%) indication that the compiler has done its job correctly. @@ -2000,6 +2058,7 @@ memory but gets pretty good compression, and has minimal latency, consider Jean-loup Gailly's and Mark Adler's work, @code{zlib-1.1.2} and @code{gzip-1.2.4}. Look for them at + @code{http://www.cdrom.com/pub/infozip/zlib} and @code{http://www.gzip.org} respectively. @@ -2140,7 +2199,14 @@ available from: @example http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps @end example - +Finally, the following paper documents some recent investigations +I made into the performance of sorting algorithms: +@example +Julian Seward: + On the Performance of BWT Sorting Algorithms + Proceedings of the IEEE Data Compression Conference 2000 + Snowbird, Utah. 28-30 March 2000. +@end example @contents diff --git a/randtable.c b/randtable.c index 8f6266f..983089d 100644 --- a/randtable.c +++ b/randtable.c @@ -8,7 +8,7 @@ This file is a part of bzip2 and/or libbzip2, a program and library for lossless, block-sorting data compression. - Copyright (C) 1996-1999 Julian R Seward. All rights reserved. + Copyright (C) 1996-2000 Julian R Seward. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -43,7 +43,7 @@ Julian Seward, Cambridge, UK. jseward@acm.org - bzip2/libbzip2 version 0.9.5 of 24 May 1999 + bzip2/libbzip2 version 1.0 of 21 March 2000 This program is based on (at least) the work of: Mike Burrows @@ -63,7 +63,7 @@ /*---------------------------------------------*/ -Int32 rNums[512] = { +Int32 BZ2_rNums[512] = { 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, @@ -0,0 +1,39 @@ + +/* spew out a thoroughly gigantic file designed so that bzip2 + can compress it reasonably rapidly. This is to help test + support for large files (> 2GB) in a reasonable amount of time. + I suggest you use the undocumented --exponential option to + bzip2 when compressing the resulting file; this saves a bit of + time. Note: *don't* bother with --exponential when compressing + Real Files; it'll just waste a lot of CPU time :-) + (but is otherwise harmless). +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <stdio.h> +#include <stdlib.h> + +/* The number of megabytes of junk to spew out (roughly) */ +#define MEGABYTES 5000 + +#define N_BUF 1000000 +char buf[N_BUF]; + +int main ( int argc, char** argv ) +{ + int ii, kk, p; + srandom(1); + setbuffer ( stdout, buf, N_BUF ); + for (kk = 0; kk < MEGABYTES * 515; kk+=3) { + p = 25+random()%50; + for (ii = 0; ii < p; ii++) + printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ); + for (ii = 0; ii < p-1; ii++) + printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" ); + for (ii = 0; ii < p+1; ii++) + printf ( "ccccccccccccccccccccccccccccccccccccc" ); + } + fflush(stdout); + return 0; +} diff --git a/unzcrash.c b/unzcrash.c new file mode 100644 index 0000000..f0f17fc --- /dev/null +++ b/unzcrash.c @@ -0,0 +1,126 @@ + +/* A test program written to test robustness to decompression of + corrupted data. Usage is + unzcrash filename + and the program will read the specified file, compress it (in memory), + and then repeatedly decompress it, each time with a different bit of + the compressed data inverted, so as to test all possible one-bit errors. + This should not cause any invalid memory accesses. If it does, + I want to know about it! + + p.s. As you can see from the above description, the process is + incredibly slow. A file of size eg 5KB will cause it to run for + many hours. +*/ + +#include <stdio.h> +#include <assert.h> +#include "bzlib.h" + +#define M_BLOCK 1000000 + +typedef unsigned char uchar; + +#define M_BLOCK_OUT (M_BLOCK + 1000000) +uchar inbuf[M_BLOCK]; +uchar outbuf[M_BLOCK_OUT]; +uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)]; + +int nIn, nOut, nZ; + +static char *bzerrorstrings[] = { + "OK" + ,"SEQUENCE_ERROR" + ,"PARAM_ERROR" + ,"MEM_ERROR" + ,"DATA_ERROR" + ,"DATA_ERROR_MAGIC" + ,"IO_ERROR" + ,"UNEXPECTED_EOF" + ,"OUTBUFF_FULL" + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ + ,"???" /* for future */ +}; + +void flip_bit ( int bit ) +{ + int byteno = bit / 8; + int bitno = bit % 8; + uchar mask = 1 << bitno; + //fprintf ( stderr, "(byte %d bit %d mask %d)", + // byteno, bitno, (int)mask ); + zbuf[byteno] ^= mask; +} + +int main ( int argc, char** argv ) +{ + FILE* f; + int r; + int bit; + int i; + + if (argc != 2) { + fprintf ( stderr, "usage: unzcrash filename\n" ); + return 1; + } + + f = fopen ( argv[1], "r" ); + if (!f) { + fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] ); + return 1; + } + + nIn = fread ( inbuf, 1, M_BLOCK, f ); + fprintf ( stderr, "%d bytes read\n", nIn ); + + nZ = M_BLOCK; + r = BZ2_bzBuffToBuffCompress ( + zbuf, &nZ, inbuf, nIn, 9, 0, 30 ); + + assert (r == BZ_OK); + fprintf ( stderr, "%d after compression\n", nZ ); + + for (bit = 0; bit < nZ*8; bit++) { + fprintf ( stderr, "bit %d ", bit ); + flip_bit ( bit ); + nOut = M_BLOCK_OUT; + r = BZ2_bzBuffToBuffDecompress ( + outbuf, &nOut, zbuf, nZ, 0, 0 ); + fprintf ( stderr, " %d %s ", r, bzerrorstrings[-r] ); + + if (r != BZ_OK) { + fprintf ( stderr, "\n" ); + } else { + if (nOut != nIn) { + fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut ); + return 1; + } else { + for (i = 0; i < nOut; i++) + if (inbuf[i] != outbuf[i]) { + fprintf(stderr, "mismatch at %d\n", i ); + return 1; + } + if (i == nOut) fprintf(stderr, "really ok!\n" ); + } + } + + flip_bit ( bit ); + } + +#if 0 + assert (nOut == nIn); + for (i = 0; i < nOut; i++) { + if (inbuf[i] != outbuf[i]) { + fprintf ( stderr, "difference at %d !\n", i ); + return 1; + } + } +#endif + + fprintf ( stderr, "all ok\n" ); + return 0; +} @@ -0,0 +1,5 @@ + +If compilation produces errors, or a large number of warnings, +please read README.COMPILATION.PROBLEMS -- you might be able to +adjust the flags in this Makefile to improve matters. + |