aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Seward <jseward@acm.org>2000-06-24 22:13:13 +0200
committerJulian Seward <jseward@acm.org>2000-06-24 22:13:13 +0200
commit795b859eee96c700e8f3c3fe68e6a9a39d95797c (patch)
tree48f8a731cd5ec2f5f15c6d99f2207ebf4a1f35f6
parentf93cd82a9a7094ad90fd19bbc6ccf6f4627f8060 (diff)
downloadbzip2-795b859eee96c700e8f3c3fe68e6a9a39d95797c.tar.gz
bzip2-1.0.1
-rw-r--r--CHANGES67
-rw-r--r--LICENSE4
-rw-r--r--Makefile58
-rw-r--r--Makefile-libbz2_so43
-rw-r--r--README43
-rw-r--r--README.COMPILATION.PROBLEMS130
-rw-r--r--blocksort.c335
-rw-r--r--bzip2.18
-rw-r--r--bzip2.1.preformatted113
-rw-r--r--bzip2.c508
-rw-r--r--bzip2.txt15
-rw-r--r--bzip2recover.c8
-rw-r--r--bzlib.c248
-rw-r--r--bzlib.h67
-rw-r--r--bzlib_private.h52
-rw-r--r--compress.c193
-rw-r--r--crctable.c6
-rw-r--r--decompress.c44
-rw-r--r--dlltest.c341
-rw-r--r--huffman.c36
-rw-r--r--libbz2.def46
-rw-r--r--makefile.msc18
-rw-r--r--manual.texi516
-rw-r--r--randtable.c6
-rw-r--r--spewG.c39
-rw-r--r--unzcrash.c126
-rw-r--r--words05
27 files changed, 2160 insertions, 915 deletions
diff --git a/CHANGES b/CHANGES
index 0acb1c2..ecaf417 100644
--- a/CHANGES
+++ b/CHANGES
@@ -98,3 +98,70 @@ functioning of the bzip2 program or library. Added a couple of casts
so the library compiles without warnings at level 3 in MS Visual
Studio 6.0. Included a Y2K statement in the file Y2K_INFO. All other
changes are minor documentation changes.
+
+1.0
+~~~
+Several minor bugfixes and enhancements:
+
+* Large file support. The library uses 64-bit counters to
+ count the volume of data passing through it. bzip2.c
+ is now compiled with -D_FILE_OFFSET_BITS=64 to get large
+ file support from the C library. -v correctly prints out
+ file sizes greater than 4 gigabytes. All these changes have
+ been made without assuming a 64-bit platform or a C compiler
+ which supports 64-bit ints, so, except for the C library
+ aspect, they are fully portable.
+
+* Decompression robustness. The library/program should be
+ robust to any corruption of compressed data, detecting and
+ handling _all_ corruption, instead of merely relying on
+ the CRCs. What this means is that the program should
+ never crash, given corrupted data, and the library should
+ always return BZ_DATA_ERROR.
+
+* Fixed an obscure race-condition bug only ever observed on
+ Solaris, in which, if you were very unlucky and issued
+ control-C at exactly the wrong time, both input and output
+ files would be deleted.
+
+* Don't run out of file handles on test/decompression when
+ large numbers of files have invalid magic numbers.
+
+* Avoid library namespace pollution. Prefix all exported
+ symbols with BZ2_.
+
+* Minor sorting enhancements from my DCC2000 paper.
+
+* Advance the version number to 1.0, so as to counteract the
+ (false-in-this-case) impression some people have that programs
+ with version numbers less than 1.0 are in someway, experimental,
+ pre-release versions.
+
+* Create an initial Makefile-libbz2_so to build a shared library.
+ Yes, I know I should really use libtool et al ...
+
+* Make the program exit with 2 instead of 0 when decompression
+ fails due to a bad magic number (ie, an invalid bzip2 header).
+ Also exit with 1 (as the manual claims :-) whenever a diagnostic
+ message would have been printed AND the corresponding operation
+ is aborted, for example
+ bzip2: Output file xx already exists.
+ When a diagnostic message is printed but the operation is not
+ aborted, for example
+ bzip2: Can't guess original name for wurble -- using wurble.out
+ then the exit value 0 is returned, unless some other problem is
+ also detected.
+
+ I think it corresponds more closely to what the manual claims now.
+
+
+1.0.1
+~~~~~
+* Modified dlltest.c so it uses the new BZ2_ naming scheme.
+* Modified makefile-msc to fix minor build probs on Win2k.
+* Updated README.COMPILATION.PROBLEMS.
+
+There are no functionality changes or bug fixes relative to version
+1.0.0. This is just a documentation update + a fix for minor Win32
+build problems. For almost everyone, upgrading from 1.0.0 to 1.0.1 is
+utterly pointless. Don't bother.
diff --git a/LICENSE b/LICENSE
index bc0069a..88fa6d8 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
This program, "bzip2" and associated library "libbzip2", are
-copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -35,5 +35,5 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Julian Seward, Cambridge, UK.
jseward@acm.org
-bzip2/libbzip2 version 0.9.5 of 24 May 1999
+bzip2/libbzip2 version 1.0 of 21 March 2000
diff --git a/Makefile b/Makefile
index 8a1235d..ab17f49 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,8 @@
SHELL=/bin/sh
CC=gcc
-CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce
+BIGFILES=-D_FILE_OFFSET_BITS=64
+CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
OBJS= blocksort.o \
huffman.o \
@@ -73,6 +74,7 @@ clean:
sample1.tst sample2.tst sample3.tst
blocksort.o: blocksort.c
+ @cat words0
$(CC) $(CFLAGS) -c blocksort.c
huffman.o: huffman.c
$(CC) $(CFLAGS) -c huffman.c
@@ -91,13 +93,49 @@ bzip2.o: bzip2.c
bzip2recover.o: bzip2recover.c
$(CC) $(CFLAGS) -c bzip2recover.c
+DISTNAME=bzip2-1.0.1
tarfile:
- tar cvf interim.tar blocksort.c huffman.c crctable.c \
- randtable.c compress.c decompress.c bzlib.c bzip2.c \
- bzip2recover.c bzlib.h bzlib_private.h Makefile manual.texi \
- manual.ps LICENSE bzip2.1 bzip2.1.preformatted bzip2.txt \
- words1 words2 words3 sample1.ref sample2.ref sample3.ref \
- sample1.bz2 sample2.bz2 sample3.bz2 dlltest.c \
- *.html README CHANGES libbz2.def libbz2.dsp \
- dlltest.dsp makefile.msc Y2K_INFO
-
+ rm -f $(DISTNAME)
+ ln -sf . $(DISTNAME)
+ tar cvf $(DISTNAME).tar \
+ $(DISTNAME)/blocksort.c \
+ $(DISTNAME)/huffman.c \
+ $(DISTNAME)/crctable.c \
+ $(DISTNAME)/randtable.c \
+ $(DISTNAME)/compress.c \
+ $(DISTNAME)/decompress.c \
+ $(DISTNAME)/bzlib.c \
+ $(DISTNAME)/bzip2.c \
+ $(DISTNAME)/bzip2recover.c \
+ $(DISTNAME)/bzlib.h \
+ $(DISTNAME)/bzlib_private.h \
+ $(DISTNAME)/Makefile \
+ $(DISTNAME)/manual.texi \
+ $(DISTNAME)/manual.ps \
+ $(DISTNAME)/LICENSE \
+ $(DISTNAME)/bzip2.1 \
+ $(DISTNAME)/bzip2.1.preformatted \
+ $(DISTNAME)/bzip2.txt \
+ $(DISTNAME)/words0 \
+ $(DISTNAME)/words1 \
+ $(DISTNAME)/words2 \
+ $(DISTNAME)/words3 \
+ $(DISTNAME)/sample1.ref \
+ $(DISTNAME)/sample2.ref \
+ $(DISTNAME)/sample3.ref \
+ $(DISTNAME)/sample1.bz2 \
+ $(DISTNAME)/sample2.bz2 \
+ $(DISTNAME)/sample3.bz2 \
+ $(DISTNAME)/dlltest.c \
+ $(DISTNAME)/*.html \
+ $(DISTNAME)/README \
+ $(DISTNAME)/README.COMPILATION.PROBLEMS \
+ $(DISTNAME)/CHANGES \
+ $(DISTNAME)/libbz2.def \
+ $(DISTNAME)/libbz2.dsp \
+ $(DISTNAME)/dlltest.dsp \
+ $(DISTNAME)/makefile.msc \
+ $(DISTNAME)/Y2K_INFO \
+ $(DISTNAME)/unzcrash.c \
+ $(DISTNAME)/spewG.c \
+ $(DISTNAME)/Makefile-libbz2_so
diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so
new file mode 100644
index 0000000..a347c50
--- /dev/null
+++ b/Makefile-libbz2_so
@@ -0,0 +1,43 @@
+
+# This Makefile builds a shared version of the library,
+# libbz2.so.1.0.1, with soname libbz2.so.1.0,
+# at least on x86-Linux (RedHat 5.2),
+# with gcc-2.7.2.3. Please see the README file for some
+# important info about building the library like this.
+
+SHELL=/bin/sh
+CC=gcc
+BIGFILES=-D_FILE_OFFSET_BITS=64
+CFLAGS=-fpic -fPIC -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce $(BIGFILES)
+
+OBJS= blocksort.o \
+ huffman.o \
+ crctable.o \
+ randtable.o \
+ compress.o \
+ decompress.o \
+ bzlib.o
+
+all: $(OBJS)
+ $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS)
+ $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.1
+ rm -f libbz2.so.1.0
+ ln -s libbz2.so.1.0.1 libbz2.so.1.0
+
+clean:
+ rm -f $(OBJS) bzip2.o libbz2.so.1.0.1 libbz2.so.1.0 bzip2-shared
+
+blocksort.o: blocksort.c
+ $(CC) $(CFLAGS) -c blocksort.c
+huffman.o: huffman.c
+ $(CC) $(CFLAGS) -c huffman.c
+crctable.o: crctable.c
+ $(CC) $(CFLAGS) -c crctable.c
+randtable.o: randtable.c
+ $(CC) $(CFLAGS) -c randtable.c
+compress.o: compress.c
+ $(CC) $(CFLAGS) -c compress.c
+decompress.o: decompress.c
+ $(CC) $(CFLAGS) -c decompress.c
+bzlib.o: bzlib.c
+ $(CC) $(CFLAGS) -c bzlib.c
diff --git a/README b/README
index ee70649..22945a2 100644
--- a/README
+++ b/README
@@ -1,9 +1,9 @@
This is the README for bzip2, a block-sorting file compressor, version
-0.9.5d. This version is fully compatible with the previous public
-releases, bzip2-0.1pl2 and bzip2-0.9.0.
+1.0. This version is fully compatible with the previous public
+releases, bzip2-0.1pl2, bzip2-0.9.0 and bzip2-0.9.5.
-bzip2-0.9.5 is distributed under a BSD-style license. For details,
+bzip2-1.0 is distributed under a BSD-style license. For details,
see the file LICENSE.
Complete documentation is available in Postscript form (manual.ps) or
@@ -30,15 +30,37 @@ The -n instructs make to show the commands it would execute, but
not actually execute them.
+HOW TO BUILD -- UNIX, shared library libbz2.so.
+
+Do 'make -f Makefile-libbz2_so'. This Makefile seems to work for
+Linux-ELF (RedHat 5.2 on an x86 box), with gcc. I make no claims
+that it works for any other platform, though I suspect it probably
+will work for most platforms employing both ELF and gcc.
+
+bzip2-shared, a client of the shared library, is also build, but
+not self-tested. So I suggest you also build using the normal
+Makefile, since that conducts a self-test.
+
+Important note for people upgrading .so's from 0.9.0/0.9.5 to
+version 1.0. All the functions in the library have been renamed,
+from (eg) bzCompress to BZ2_bzCompress, to avoid namespace pollution.
+Unfortunately this means that the libbz2.so created by
+Makefile-libbz2_so will not work with any program which used an
+older version of the library. Sorry. I do encourage library
+clients to make the effort to upgrade to use version 1.0, since
+it is both faster and more robust than previous versions.
+
+
HOW TO BUILD -- Windows 95, NT, DOS, Mac, etc.
It's difficult for me to support compilation on all these platforms.
My approach is to collect binaries for these platforms, and put them
-on my web page (http://www.muraroa.demon.co.uk). Look there. However
-(FWIW), bzip2-0.9.5 is very standard ANSI C and should compile
-unmodified with MS Visual C. For Win32, there is one important
-caveat: in bzip2.c, you must set BZ_UNIX to 0 and BZ_LCCWIN32 to 1
-before building.
+on the master web page (http://sourceware.cygnus.com/bzip2). Look
+there. However (FWIW), bzip2-1.0 is very standard ANSI C and should
+compile unmodified with MS Visual C. For Win32, there is one
+important caveat: in bzip2.c, you must set BZ_UNIX to 0 and
+BZ_LCCWIN32 to 1 before building. If you have difficulties building,
+you might want to read README.COMPILATION.PROBLEMS.
VALIDATION
@@ -116,6 +138,10 @@ WHAT'S NEW IN 0.9.5 ?
* Many small improvements in file and flag handling.
* A Y2K statement.
+WHAT'S NEW IN 1.0
+
+ See the CHANGES file.
+
I hope you find bzip2 useful. Feel free to contact me at
jseward@acm.org
if you have any suggestions or queries. Many people mailed me with
@@ -137,3 +163,4 @@ Cambridge, UK
23 August 1998 (bzip2, version 0.9.0)
8 June 1999 (bzip2, version 0.9.5)
4 Sept 1999 (bzip2, version 0.9.5d)
+ 5 May 2000 (bzip2, version 1.0pre8)
diff --git a/README.COMPILATION.PROBLEMS b/README.COMPILATION.PROBLEMS
new file mode 100644
index 0000000..d621ad5
--- /dev/null
+++ b/README.COMPILATION.PROBLEMS
@@ -0,0 +1,130 @@
+
+bzip2-1.0 should compile without problems on the vast majority of
+platforms. Using the supplied Makefile, I've built and tested it
+myself for x86-linux, sparc-solaris, alpha-linux, x86-cygwin32 and
+alpha-tru64unix. With makefile.msc, Visual C++ 6.0 and nmake, you can
+build a native Win32 version too. Large file support seems to work
+correctly on at least alpha-tru64unix and x86-cygwin32 (on Windows
+2000).
+
+When I say "large file" I mean a file of size 2,147,483,648 (2^31)
+bytes or above. Many older OSs can't handle files above this size,
+but many newer ones can. Large files are pretty huge -- most files
+you'll encounter are not Large Files.
+
+Earlier versions of bzip2 (0.1, 0.9.0, 0.9.5) compiled on a wide
+variety of platforms without difficulty, and I hope this version will
+continue in that tradition. However, in order to support large files,
+I've had to include the define -D_FILE_OFFSET_BITS=64 in the Makefile.
+This can cause problems.
+
+The technique of adding -D_FILE_OFFSET_BITS=64 to get large file
+support is, as far as I know, the Recommended Way to get correct large
+file support. For more details, see the Large File Support
+Specification, published by the Large File Summit, at
+ http://www.sas.com/standard/large.file/
+
+As a general comment, if you get compilation errors which you think
+are related to large file support, try removing the above define from
+the Makefile, ie, delete the line
+ BIGFILES=-D_FILE_OFFSET_BITS=64
+from the Makefile, and do 'make clean ; make'. This will give you a
+version of bzip2 without large file support, which, for most
+applications, is probably not a problem.
+
+Alternatively, try some of the platform-specific hints listed below.
+
+You can use the spewG.c program to generate huge files to test bzip2's
+large file support, if you are feeling paranoid. Be aware though that
+any compilation problems which affect bzip2 will also affect spewG.c,
+alas.
+
+
+Known problems as of 1.0pre8:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* HP/UX 10.20 and 11.00, using gcc (2.7.2.3 and 2.95.2): A large
+ number of warnings appear, including the following:
+
+ /usr/include/sys/resource.h: In function `getrlimit':
+ /usr/include/sys/resource.h:168:
+ warning: implicit declaration of function `__getrlimit64'
+ /usr/include/sys/resource.h: In function `setrlimit':
+ /usr/include/sys/resource.h:170:
+ warning: implicit declaration of function `__setrlimit64'
+
+ This would appear to be a problem with large file support, header
+ files and gcc. gcc may or may not give up at this point. If it
+ fails, you might be able to improve matters by adding
+ -D__STDC_EXT__=1
+ to the BIGFILES variable in the Makefile (ie, change its definition
+ to
+ BIGFILES=-D_FILE_OFFSET_BITS=64 -D__STDC_EXT__=1
+
+ Even if gcc does produce a binary which appears to work (ie passes
+ its self-tests), you might want to test it to see if it works properly
+ on large files.
+
+
+* HP/UX 10.20 and 11.00, using HP's cc compiler.
+
+ No specific problems for this combination, except that you'll need to
+ specify the -Ae flag, and zap the gcc-specific stuff
+ -Wall -Winline -O2 -fomit-frame-pointer -fno-strength-reduce.
+ You should retain -D_FILE_OFFSET_BITS=64 in order to get large
+ file support -- which is reported to work ok for this HP/UX + cc
+ combination.
+
+
+* SunOS 4.1.X.
+
+ Amazingly, there are still people out there using this venerable old
+ banger. I shouldn't be too rude -- I started life on SunOS, and
+ it was a pretty darn good OS, way back then. Anyway:
+
+ SunOS doesn't seem to have strerror(), so you'll have to use
+ perror(), perhaps by doing adding this (warning: UNTESTED CODE):
+
+ char* strerror ( int errnum )
+ {
+ if (errnum < 0 || errnum >= sys_nerr)
+ return "Unknown error";
+ else
+ return sys_errlist[errnum];
+ }
+
+ Or you could comment out the relevant calls to strerror; they're
+ not mission-critical. Or you could upgrade to Solaris. Ha ha ha!
+ (what?? you think I've got Bad Attitude?)
+
+
+* Making a shared library on Solaris. (Not really a compilation
+ problem, but many people ask ...)
+
+ Firstly, if you have Solaris 8, either you have libbz2.so already
+ on your system, or you can install it from the Solaris CD.
+
+ Secondly, be aware that there are potential naming conflicts
+ between the .so file supplied with Solaris 8, and the .so file
+ which Makefile-libbz2_so will make. Makefile-libbz2_so creates
+ a .so which has the names which I intend to be "official" as
+ of version 1.0.0 and onwards. Unfortunately, the .so in
+ Solaris 8 appeared before I decided on the final names, so
+ the two libraries are incompatible. We have since communicated
+ and I hope that the problems will have been solved in the next
+ version of Solaris, whenever that might appear.
+
+ All that said: you might be able to get somewhere
+ by finding the line in Makefile-libbz2_so which says
+
+ $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.1 $(OBJS)
+
+ and replacing with
+
+ ($CC) -G -shared -o libbz2.so.1.0.1 -h libbz2.so.1.0 $(OBJS)
+
+ If gcc objects to the combination -fpic -fPIC, get rid of
+ the second one, leaving just "-fpic".
+
+
+That's the end of the currently known compilation problems.
diff --git a/blocksort.c b/blocksort.c
index 85a02de..ec42672 100644
--- a/blocksort.c
+++ b/blocksort.c
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -56,6 +56,13 @@
Jon L. Bentley
For more information on these sources, see the manual.
+
+ To get some idea how the block sorting algorithms in this file
+ work, read my paper
+ On the Performance of BWT Sorting Algorithms
+ in Proceedings of the IEEE Data Compression Conference 2000,
+ Snowbird, Utah, USA, 27-30 March 2000. The main sort in this
+ file implements the algorithm called cache in the paper.
--*/
@@ -232,11 +239,11 @@ void fallbackQSort3 ( UInt32* fmap,
/* Pre:
nblock > 0
eclass exists for [0 .. nblock-1]
- ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)eclass) [0 .. nblock-1] holds block
ptr exists for [0 .. nblock-1]
Post:
- ((UInt16*)eclass) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)eclass) [0 .. nblock-1] holds block
All other areas of eclass destroyed
fmap [0 .. nblock-1] holds sorted order
bhtab [ 0 .. 2+(nblock/32) ] destroyed
@@ -260,7 +267,7 @@ void fallbackSort ( UInt32* fmap,
Int32 H, i, j, k, l, r, cc, cc1;
Int32 nNotDone;
Int32 nBhtab;
- UInt16* eclass16 = (UInt16*)eclass;
+ UChar* eclass8 = (UChar*)eclass;
/*--
Initial 1-char radix sort to generate
@@ -269,12 +276,12 @@ void fallbackSort ( UInt32* fmap,
if (verb >= 4)
VPrintf0 ( " bucket sorting ...\n" );
for (i = 0; i < 257; i++) ftab[i] = 0;
- for (i = 0; i < nblock; i++) ftab[eclass16[i] >> 8]++;
+ for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
for (i = 0; i < nblock; i++) {
- j = eclass16[i] >> 8;
+ j = eclass8[i];
k = ftab[j] - 1;
ftab[j] = k;
fmap[k] = i;
@@ -354,7 +361,7 @@ void fallbackSort ( UInt32* fmap,
/*--
Reconstruct the original block in
- eclass16 [0 .. nblock-1] [15:8], since the
+ eclass8 [0 .. nblock-1], since the
previous phase destroyed it.
--*/
if (verb >= 4)
@@ -363,7 +370,7 @@ void fallbackSort ( UInt32* fmap,
for (i = 0; i < nblock; i++) {
while (ftabCopy[j] == 0) j++;
ftabCopy[j]--;
- eclass16[fmap[i]] = j << 8;
+ eclass8[fmap[i]] = (UChar)j;
}
AssertH ( j < 256, 1005 );
}
@@ -386,67 +393,116 @@ static
__inline__
Bool mainGtU ( UInt32 i1,
UInt32 i2,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
UInt32 nblock,
Int32* budget )
{
- Int32 k;
+ Int32 k;
+ UChar c1, c2;
UInt16 s1, s2;
AssertD ( i1 != i2, "mainGtU" );
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
- if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 9 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 10 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 11 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 12 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
k = nblock + 8;
do {
-
- s1 = block[i1]; s2 = block[i2];
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
-
- s1 = block[i1]; s2 = block[i2];
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
s1 = quadrant[i1]; s2 = quadrant[i2];
if (s1 != s2) return (s1 > s2);
- i1 += 2; i2 += 2;
+ i1++; i2++;
if (i1 >= nblock) i1 -= nblock;
if (i2 >= nblock) i2 -= nblock;
@@ -467,13 +523,14 @@ Bool mainGtU ( UInt32 i1,
because the number of elems to sort is
usually small, typically <= 20.
--*/
+static
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
9841, 29524, 88573, 265720,
797161, 2391484 };
static
void mainSimpleSort ( UInt32* ptr,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
Int32 nblock,
Int32 lo,
@@ -568,19 +625,19 @@ void mainSimpleSort ( UInt32* ptr,
} \
}
-
static
__inline__
-UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c )
+UChar mmed3 ( UChar a, UChar b, UChar c )
{
- UInt16 t;
+ UChar t;
if (a > b) { t = a; a = b; b = t; };
- if (b > c) { t = b; b = c; c = t; };
- if (a > b) b = a;
+ if (b > c) {
+ b = c;
+ if (a > b) b = a;
+ }
return b;
}
-
#define mmin(a,b) ((a) < (b)) ? (a) : (b)
#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
@@ -609,7 +666,7 @@ UInt16 mmed3 ( UInt16 a, UInt16 b, UInt16 c )
static
void mainQSort3 ( UInt32* ptr,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
Int32 nblock,
Int32 loSt,
@@ -679,7 +736,7 @@ void mainQSort3 ( UInt32* ptr,
AssertD ( unHi == unLo-1, "mainQSort3(2)" );
if (gtHi < ltLo) {
- mpush(lo, hi, d+2 );
+ mpush(lo, hi, d+1 );
continue;
}
@@ -691,7 +748,7 @@ void mainQSort3 ( UInt32* ptr,
nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
- nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+2;
+ nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
@@ -722,11 +779,11 @@ void mainQSort3 ( UInt32* ptr,
/* Pre:
nblock > N_OVERSHOOT
block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
- ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)block32) [0 .. nblock-1] holds block
ptr exists for [0 .. nblock-1]
Post:
- ((UInt16*)block32) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)block32) [0 .. nblock-1] holds block
All other areas of block32 destroyed
ftab [0 .. 65536 ] destroyed
ptr [0 .. nblock-1] holds sorted order
@@ -739,40 +796,47 @@ void mainQSort3 ( UInt32* ptr,
static
void mainSort ( UInt32* ptr,
- UInt16* block,
+ UChar* block,
UInt16* quadrant,
UInt32* ftab,
Int32 nblock,
Int32 verb,
Int32* budget )
{
- Int32 i, j, k, m, ss, sb;
+ Int32 i, j, k, ss, sb;
Int32 runningOrder[256];
- Int32 copy[256];
Bool bigDone[256];
+ Int32 copyStart[256];
+ Int32 copyEnd [256];
UChar c1;
Int32 numQSorted;
- Int32 biggestSoFar;
UInt16 s;
-
if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
- /*-- Stripe the block data into 16 bits, and at the
- same time set up the 2-byte frequency table
- --*/
+ /*-- set up the 2-byte frequency table --*/
for (i = 65536; i >= 0; i--) ftab[i] = 0;
- s = block[0];
- for (i = 1; i < nblock; i++) {
+ j = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ quadrant[i] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
+ quadrant[i-1] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+ ftab[j]++;
+ quadrant[i-2] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+ ftab[j]++;
+ quadrant[i-3] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+ ftab[j]++;
+ }
+ for (; i >= 0; i--) {
quadrant[i] = 0;
- s = (s << 8) | block[i];
- block[i-1] = s;
- ftab[s]++;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
}
- quadrant[0] = 0;
- s = (s << 8) | (block[0] >> 8);
- block[nblock-1] = s;
- ftab[s]++;
/*-- (emphasises close relationship of block & quadrant) --*/
for (i = 0; i < BZ_N_OVERSHOOT; i++) {
@@ -785,9 +849,29 @@ void mainSort ( UInt32* ptr,
/*-- Complete the initial radix sort --*/
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
- for (i = 0; i < nblock; i++) {
- s = block[i];
- j = ftab[s] - 1;
+ s = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i;
+ s = (s >> 8) | (block[i-1] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-1;
+ s = (s >> 8) | (block[i-2] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-2;
+ s = (s >> 8) | (block[i-3] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-3;
+ }
+ for (; i >= 0; i--) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
ftab[s] = j;
ptr[j] = i;
}
@@ -826,13 +910,13 @@ void mainSort ( UInt32* ptr,
The main sorting loop.
--*/
- biggestSoFar = numQSorted = 0;
+ numQSorted = 0;
for (i = 0; i <= 255; i++) {
/*--
Process big buckets, starting with the least full.
- Basically this is a 4-step process in which we call
+ Basically this is a 3-step process in which we call
mainQSort3 to sort the small buckets [ss, j], but
also make a big effort to avoid the calls if we can.
--*/
@@ -869,39 +953,38 @@ void mainSort ( UInt32* ptr,
}
}
+ AssertH ( !bigDone[ss], 1006 );
+
/*--
Step 2:
- Deal specially with case [ss, ss]. This establishes the
- sorted order for [ss, ss] without any comparisons.
- A clever trick, cryptically described as steps Q6b and Q6c
- in SRC-124 (aka BW94). Compared to bzip2, this makes it
- practical not to use a preliminary run-length coder.
+ Now scan this big bucket [ss] so as to synthesise the
+ sorted order for small buckets [t, ss] for all t,
+ including, magically, the bucket [ss,ss] too.
+ This will avoid doing Real Work in subsequent Step 1's.
--*/
{
- Int32 put0, get0, put1, get1;
- Int32 sbn = (ss << 8) + ss;
- Int32 lo = ftab[sbn] & CLEARMASK;
- Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
- UChar ssc = (UChar)ss;
- put0 = lo;
- get0 = ftab[ss << 8] & CLEARMASK;
- put1 = hi;
- get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
- while (get0 < put0) {
- j = ptr[get0]-1; if (j < 0) j += nblock;
- c1 = (UChar)(block[j] >> 8);
- if (c1 == ssc) { ptr[put0] = j; put0++; };
- get0++;
+ for (j = 0; j <= 255; j++) {
+ copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
+ copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+ }
+ for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyStart[c1]++ ] = k;
}
- while (get1 > put1) {
- j = ptr[get1]-1; if (j < 0) j += nblock;
- c1 = (UChar)(block[j] >> 8);
- if (c1 == ssc) { ptr[put1] = j; put1--; };
- get1--;
+ for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyEnd[c1]-- ] = k;
}
- ftab[sbn] |= SETMASK;
}
+ AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
+
+ for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
/*--
Step 3:
The [ss] big bucket is now done. Record this fact,
@@ -950,7 +1033,7 @@ void mainSort ( UInt32* ptr,
while ((bbSize >> shifts) > 65534) shifts++;
- for (j = 0; j < bbSize; j++) {
+ for (j = bbSize-1; j >= 0; j--) {
Int32 a2update = ptr[bbStart + j];
UInt16 qVal = (UInt16)(j >> shifts);
quadrant[a2update] = qVal;
@@ -960,26 +1043,6 @@ void mainSort ( UInt32* ptr,
AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
}
- /*--
- Step 4:
- Now scan this big bucket [ss] so as to synthesise the
- sorted order for small buckets [t, ss] for all t != ss.
- This will avoid doing Real Work in subsequent Step 1's.
- --*/
- for (j = 0; j <= 255; j++)
- copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
-
- m = ftab[(ss+1) << 8] & CLEARMASK;
- for (j = ftab[ss << 8] & CLEARMASK; j < m; j++) {
- k = ptr[j] - 1; if (k < 0) k += nblock;
- c1 = (UChar)(block[k] >> 8);
- if ( ! bigDone[c1] ) {
- ptr[copy[c1]] = k;
- copy[c1] ++;
- }
- }
-
- for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
}
if (verb >= 4)
@@ -996,19 +1059,19 @@ void mainSort ( UInt32* ptr,
/* Pre:
nblock > 0
arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
- ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)arr2) [0 .. nblock-1] holds block
arr1 exists for [0 .. nblock-1]
Post:
- ((UInt16*)arr2) [0 .. nblock-1] [15:8] holds block
+ ((UChar*)arr2) [0 .. nblock-1] holds block
All other areas of block destroyed
ftab [ 0 .. 65536 ] destroyed
arr1 [0 .. nblock-1] holds sorted order
*/
-void blockSort ( EState* s )
+void BZ2_blockSort ( EState* s )
{
UInt32* ptr = s->ptr;
- UInt16* block = s->block;
+ UChar* block = s->block;
UInt32* ftab = s->ftab;
Int32 nblock = s->nblock;
Int32 verb = s->verbosity;
@@ -1019,10 +1082,16 @@ void blockSort ( EState* s )
Int32 i;
if (nblock < 10000) {
- for (i = 0; i < nblock; i++) block[i] <<= 8;
fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
} else {
- quadrant = &(block[nblock+BZ_N_OVERSHOOT]);
+ /* Calculate the location for quadrant, remembering to get
+ the alignment right. Assumes that &(block[0]) is at least
+ 2-byte aligned -- this should be ok since block is really
+ the first section of arr2.
+ */
+ i = nblock+BZ_N_OVERSHOOT;
+ if (i & 1) i++;
+ quadrant = (UInt16*)(&(block[i]));
/* (wfact-1) / 3 puts the default-factor-30
transition point at very roughly the same place as
diff --git a/bzip2.1 b/bzip2.1
index 99eda9b..7de54a0 100644
--- a/bzip2.1
+++ b/bzip2.1
@@ -1,7 +1,7 @@
.PU
.TH bzip2 1
.SH NAME
-bzip2, bunzip2 \- a block-sorting file compressor, v0.9.5
+bzip2, bunzip2 \- a block-sorting file compressor, v1.0
.br
bzcat \- decompresses files to stdout
.br
@@ -397,11 +397,12 @@ I/O error messages are not as helpful as they could be.
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
-This manual page pertains to version 0.9.5 of
+This manual page pertains to version 1.0 of
.I bzip2.
Compressed
data created by this version is entirely forwards and backwards
-compatible with the previous public releases, versions 0.1pl2 and 0.9.0,
+compatible with the previous public releases, versions 0.1pl2, 0.9.0
+and 0.9.5,
but with the following exception: 0.9.0 and above can correctly
decompress multiple concatenated compressed files. 0.1pl2 cannot do
this; it will stop after decompressing just the first file in the
@@ -415,6 +416,7 @@ megabytes long. This could easily be fixed.
.SH AUTHOR
Julian Seward, jseward@acm.org.
+http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk
The ideas embodied in
diff --git a/bzip2.1.preformatted b/bzip2.1.preformatted
index 96b44be..9f18339 100644
--- a/bzip2.1.preformatted
+++ b/bzip2.1.preformatted
@@ -1,7 +1,11 @@
+
+bzip2(1) bzip2(1)
+
+
NNAAMMEE
- bzip2, bunzip2 - a block-sorting file compressor, v0.9.5
+ bzip2, bunzip2 - a block-sorting file compressor, v1.0
bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files
@@ -54,6 +58,18 @@ DDEESSCCRRIIPPTTIIOONN
filename.bz2 becomes filename
filename.bz becomes filename
filename.tbz2 becomes filename.tar
+
+
+
+ 1
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
filename.tbz becomes filename.tar
anyothername becomes anyothername.out
@@ -109,6 +125,17 @@ DDEESSCCRRIIPPTTIIOONN
you recover the original uncompressed data. You can use
_b_z_i_p_2_r_e_c_o_v_e_r to try to recover data from damaged files.
+
+
+ 2
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
Return values: 0 for a normal exit, 1 for environmental
problems (file not found, invalid flags, I/O errors, &c),
2 to indicate a corrupt compressed file, 3 for an internal
@@ -163,6 +190,18 @@ OOPPTTIIOONNSS
--qq ----qquuiieett
Suppress non-essential warning messages. Messages
pertaining to I/O errors and other critical events
+
+
+
+ 3
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
will not be suppressed.
--vv ----vveerrbboossee
@@ -217,6 +256,18 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
Larger block sizes give rapidly diminishing marginal
returns. Most of the compression comes from the first two
+
+
+
+ 4
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
or three hundred k of block size, a fact worth bearing in
mind when using _b_z_i_p_2 on small machines. It is also
important to appreciate that the decompression memory
@@ -270,6 +321,19 @@ MMEEMMOORRYY MMAANNAAGGEEMMEENNTT
-9 7600k 3700k 2350k 828642
+
+
+
+
+ 5
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
RREECCOOVVEERRIINNGG DDAATTAA FFRROOMM DDAAMMAAGGEEDD FFIILLEESS
_b_z_i_p_2 compresses files in blocks, usually 900kbytes long.
Each block is handled independently. If a media or trans-
@@ -324,6 +388,18 @@ PPEERRFFOORRMMAANNCCEE NNOOTTEESS
operate in, and then charges all over it in a fairly ran-
dom fashion. This means that performance, both for com-
pressing and decompressing, is largely determined by the
+
+
+
+ 6
+
+
+
+
+
+bzip2(1) bzip2(1)
+
+
speed at which your machine can service cache misses.
Because of this, small changes to the code to reduce the
miss rate have been observed to give disproportionately
@@ -337,14 +413,14 @@ CCAAVVEEAATTSS
but the details of what the problem is sometimes seem
rather misleading.
- This manual page pertains to version 0.9.5 of _b_z_i_p_2_. Com-
+ This manual page pertains to version 1.0 of _b_z_i_p_2_. Com-
pressed data created by this version is entirely forwards
and backwards compatible with the previous public
- releases, versions 0.1pl2 and 0.9.0, but with the follow-
- ing exception: 0.9.0 and above can correctly decompress
- multiple concatenated compressed files. 0.1pl2 cannot do
- this; it will stop after decompressing just the first file
- in the stream.
+ releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the
+ following exception: 0.9.0 and above can correctly decom-
+ press multiple concatenated compressed files. 0.1pl2 can-
+ not do this; it will stop after decompressing just the
+ first file in the stream.
_b_z_i_p_2_r_e_c_o_v_e_r uses 32-bit integers to represent bit posi-
tions in compressed files, so it cannot handle compressed
@@ -355,21 +431,32 @@ CCAAVVEEAATTSS
AAUUTTHHOORR
Julian Seward, jseward@acm.org.
+ http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk
The ideas embodied in _b_z_i_p_2 are due to (at least) the fol-
- lowing people: Michael Burrows and David Wheeler (for the
- block sorting transformation), David Wheeler (again, for
+ lowing people: Michael Burrows and David Wheeler (for the
+ block sorting transformation), David Wheeler (again, for
the Huffman coder), Peter Fenwick (for the structured cod-
ing model in the original _b_z_i_p_, and many refinements), and
- Alistair Moffat, Radford Neal and Ian Witten (for the
+ Alistair Moffat, Radford Neal and Ian Witten (for the
arithmetic coder in the original _b_z_i_p_)_. I am much
indebted for their help, support and advice. See the man-
- ual in the source distribution for pointers to sources of
+ ual in the source distribution for pointers to sources of
documentation. Christian von Roques encouraged me to look
- for faster sorting algorithms, so as to speed up compres-
+ for faster sorting algorithms, so as to speed up compres-
sion. Bela Lubkin encouraged me to improve the worst-case
compression performance. Many people sent patches, helped
- with portability problems, lent machines, gave advice and
+ with portability problems, lent machines, gave advice and
were generally helpful.
+
+
+
+
+
+
+
+ 7
+
+
diff --git a/bzip2.c b/bzip2.c
index abb9530..56adfdc 100644
--- a/bzip2.c
+++ b/bzip2.c
@@ -7,7 +7,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -42,7 +42,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -123,10 +123,10 @@
--*/
#define BZ_LCCWIN32 0
-#if defined(_WIN32) && !defined(__CYGWIN32__)
-#undef BZ_LCCWIN32
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#undef BZ_LCCWIN32
#define BZ_LCCWIN32 1
-#undef BZ_UNIX
+#undef BZ_UNIX
#define BZ_UNIX 0
#endif
@@ -193,6 +193,17 @@
ERROR_IF_MINUS_ONE ( retVal ); \
} while ( 0 )
# endif
+# ifdef __CYGWIN__
+# include <io.h>
+# include <fcntl.h>
+# undef SET_BINARY_MODE
+# define SET_BINARY_MODE(fd) \
+ do { \
+ int retVal = setmode ( fileno ( fd ), \
+ O_BINARY ); \
+ ERROR_IF_MINUS_ONE ( retVal ); \
+ } while ( 0 )
+# endif
#endif
@@ -276,10 +287,10 @@ typedef int IntNative;
/*---------------------------------------------------*/
Int32 verbosity;
-Bool keepInputFiles, smallMode;
-Bool forceOverwrite, testFailsExist, noisy;
+Bool keepInputFiles, smallMode, deleteOutputOnInterrupt;
+Bool forceOverwrite, testFailsExist, unzFailsExist, noisy;
Int32 numFileNames, numFilesProcessed, blockSize100k;
-
+Int32 exitValue;
/*-- source modes; F==file, I==stdin, O==stdout --*/
#define SM_I2O 1
@@ -305,27 +316,204 @@ Char progNameReally[FILE_NAME_LEN];
FILE *outputHandleJustInCase;
Int32 workFactor;
-void panic ( Char* ) NORETURN;
-void ioError ( void ) NORETURN;
-void outOfMemory ( void ) NORETURN;
-void blockOverrun ( void ) NORETURN;
-void badBlockHeader ( void ) NORETURN;
-void badBGLengths ( void ) NORETURN;
-void crcError ( void ) NORETURN;
-void bitStreamEOF ( void ) NORETURN;
-void cleanUpAndFail ( Int32 ) NORETURN;
-void compressedStreamEOF ( void ) NORETURN;
+static void panic ( Char* ) NORETURN;
+static void ioError ( void ) NORETURN;
+static void outOfMemory ( void ) NORETURN;
+static void configError ( void ) NORETURN;
+static void crcError ( void ) NORETURN;
+static void cleanUpAndFail ( Int32 ) NORETURN;
+static void compressedStreamEOF ( void ) NORETURN;
-void copyFileName ( Char*, Char* );
-void* myMalloc ( Int32 );
+static void copyFileName ( Char*, Char* );
+static void* myMalloc ( Int32 );
/*---------------------------------------------------*/
+/*--- An implementation of 64-bit ints. Sigh. ---*/
+/*--- Roll on widespread deployment of ANSI C9X ! ---*/
+/*---------------------------------------------------*/
+
+typedef
+ struct { UChar b[8]; }
+ UInt64;
+
+static
+void uInt64_from_UInt32s ( UInt64* n, UInt32 lo32, UInt32 hi32 )
+{
+ n->b[7] = (UChar)((hi32 >> 24) & 0xFF);
+ n->b[6] = (UChar)((hi32 >> 16) & 0xFF);
+ n->b[5] = (UChar)((hi32 >> 8) & 0xFF);
+ n->b[4] = (UChar) (hi32 & 0xFF);
+ n->b[3] = (UChar)((lo32 >> 24) & 0xFF);
+ n->b[2] = (UChar)((lo32 >> 16) & 0xFF);
+ n->b[1] = (UChar)((lo32 >> 8) & 0xFF);
+ n->b[0] = (UChar) (lo32 & 0xFF);
+}
+
+static
+double uInt64_to_double ( UInt64* n )
+{
+ Int32 i;
+ double base = 1.0;
+ double sum = 0.0;
+ for (i = 0; i < 8; i++) {
+ sum += base * (double)(n->b[i]);
+ base *= 256.0;
+ }
+ return sum;
+}
+
+static
+void uInt64_add ( UInt64* src, UInt64* dst )
+{
+ Int32 i;
+ Int32 carry = 0;
+ for (i = 0; i < 8; i++) {
+ carry += ( ((Int32)src->b[i]) + ((Int32)dst->b[i]) );
+ dst->b[i] = (UChar)(carry & 0xFF);
+ carry >>= 8;
+ }
+}
+
+static
+void uInt64_sub ( UInt64* src, UInt64* dst )
+{
+ Int32 t, i;
+ Int32 borrow = 0;
+ for (i = 0; i < 8; i++) {
+ t = ((Int32)dst->b[i]) - ((Int32)src->b[i]) - borrow;
+ if (t < 0) {
+ dst->b[i] = (UChar)(t + 256);
+ borrow = 1;
+ } else {
+ dst->b[i] = (UChar)t;
+ borrow = 0;
+ }
+ }
+}
+
+static
+void uInt64_mul ( UInt64* a, UInt64* b, UInt64* r_hi, UInt64* r_lo )
+{
+ UChar sum[16];
+ Int32 ia, ib, carry;
+ for (ia = 0; ia < 16; ia++) sum[ia] = 0;
+ for (ia = 0; ia < 8; ia++) {
+ carry = 0;
+ for (ib = 0; ib < 8; ib++) {
+ carry += ( ((Int32)sum[ia+ib])
+ + ((Int32)a->b[ia]) * ((Int32)b->b[ib]) );
+ sum[ia+ib] = (UChar)(carry & 0xFF);
+ carry >>= 8;
+ }
+ sum[ia+8] = (UChar)(carry & 0xFF);
+ if ((carry >>= 8) != 0) panic ( "uInt64_mul" );
+ }
+
+ for (ia = 0; ia < 8; ia++) r_hi->b[ia] = sum[ia+8];
+ for (ia = 0; ia < 8; ia++) r_lo->b[ia] = sum[ia];
+}
+
+
+static
+void uInt64_shr1 ( UInt64* n )
+{
+ Int32 i;
+ for (i = 0; i < 8; i++) {
+ n->b[i] >>= 1;
+ if (i < 7 && (n->b[i+1] & 1)) n->b[i] |= 0x80;
+ }
+}
+
+static
+void uInt64_shl1 ( UInt64* n )
+{
+ Int32 i;
+ for (i = 7; i >= 0; i--) {
+ n->b[i] <<= 1;
+ if (i > 0 && (n->b[i-1] & 0x80)) n->b[i]++;
+ }
+}
+
+static
+Bool uInt64_isZero ( UInt64* n )
+{
+ Int32 i;
+ for (i = 0; i < 8; i++)
+ if (n->b[i] != 0) return 0;
+ return 1;
+}
+
+static
+Int32 uInt64_qrm10 ( UInt64* n )
+{
+ /* Divide *n by 10, and return the remainder. Long division
+ is difficult, so we cheat and instead multiply by
+ 0xCCCC CCCC CCCC CCCD, which is 0.8 (viz, 0.1 << 3).
+ */
+ Int32 i;
+ UInt64 tmp1, tmp2, n_orig, zero_point_eight;
+
+ zero_point_eight.b[1] = zero_point_eight.b[2] =
+ zero_point_eight.b[3] = zero_point_eight.b[4] =
+ zero_point_eight.b[5] = zero_point_eight.b[6] =
+ zero_point_eight.b[7] = 0xCC;
+ zero_point_eight.b[0] = 0xCD;
+
+ n_orig = *n;
+
+ /* divide n by 10,
+ by multiplying by 0.8 and then shifting right 3 times */
+ uInt64_mul ( n, &zero_point_eight, &tmp1, &tmp2 );
+ uInt64_shr1(&tmp1); uInt64_shr1(&tmp1); uInt64_shr1(&tmp1);
+ *n = tmp1;
+
+ /* tmp1 = 8*n, tmp2 = 2*n */
+ uInt64_shl1(&tmp1); uInt64_shl1(&tmp1); uInt64_shl1(&tmp1);
+ tmp2 = *n; uInt64_shl1(&tmp2);
+
+ /* tmp1 = 10*n */
+ uInt64_add ( &tmp2, &tmp1 );
+
+ /* n_orig = n_orig - 10*n */
+ uInt64_sub ( &tmp1, &n_orig );
+
+ /* n_orig should now hold quotient, in range 0 .. 9 */
+ for (i = 7; i >= 1; i--)
+ if (n_orig.b[i] != 0) panic ( "uInt64_qrm10(1)" );
+ if (n_orig.b[0] > 9)
+ panic ( "uInt64_qrm10(2)" );
+
+ return (int)n_orig.b[0];
+}
+
+/* ... and the Whole Entire Point of all this UInt64 stuff is
+ so that we can supply the following function.
+*/
+static
+void uInt64_toAscii ( char* outbuf, UInt64* n )
+{
+ Int32 i, q;
+ UChar buf[32];
+ Int32 nBuf = 0;
+ UInt64 n_copy = *n;
+ do {
+ q = uInt64_qrm10 ( &n_copy );
+ buf[nBuf] = q + '0';
+ nBuf++;
+ } while (!uInt64_isZero(&n_copy));
+ outbuf[nBuf] = 0;
+ for (i = 0; i < nBuf; i++) outbuf[i] = buf[nBuf-i-1];
+}
+
+
+/*---------------------------------------------------*/
/*--- Processing of complete files and streams ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
+static
Bool myfeof ( FILE* f )
{
Int32 c = fgetc ( f );
@@ -336,12 +524,14 @@ Bool myfeof ( FILE* f )
/*---------------------------------------------*/
+static
void compressStream ( FILE *stream, FILE *zStream )
{
BZFILE* bzf = NULL;
UChar ibuf[5000];
Int32 nIbuf;
- UInt32 nbytes_in, nbytes_out;
+ UInt32 nbytes_in_lo32, nbytes_in_hi32;
+ UInt32 nbytes_out_lo32, nbytes_out_hi32;
Int32 bzerr, bzerr_dummy, ret;
SET_BINARY_MODE(stream);
@@ -350,8 +540,8 @@ void compressStream ( FILE *stream, FILE *zStream )
if (ferror(stream)) goto errhandler_io;
if (ferror(zStream)) goto errhandler_io;
- bzf = bzWriteOpen ( &bzerr, zStream,
- blockSize100k, verbosity, workFactor );
+ bzf = BZ2_bzWriteOpen ( &bzerr, zStream,
+ blockSize100k, verbosity, workFactor );
if (bzerr != BZ_OK) goto errhandler;
if (verbosity >= 2) fprintf ( stderr, "\n" );
@@ -361,12 +551,14 @@ void compressStream ( FILE *stream, FILE *zStream )
if (myfeof(stream)) break;
nIbuf = fread ( ibuf, sizeof(UChar), 5000, stream );
if (ferror(stream)) goto errhandler_io;
- if (nIbuf > 0) bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf );
+ if (nIbuf > 0) BZ2_bzWrite ( &bzerr, bzf, (void*)ibuf, nIbuf );
if (bzerr != BZ_OK) goto errhandler;
}
- bzWriteClose ( &bzerr, bzf, 0, &nbytes_in, &nbytes_out );
+ BZ2_bzWriteClose64 ( &bzerr, bzf, 0,
+ &nbytes_in_lo32, &nbytes_in_hi32,
+ &nbytes_out_lo32, &nbytes_out_hi32 );
if (bzerr != BZ_OK) goto errhandler;
if (ferror(zStream)) goto errhandler_io;
@@ -380,25 +572,42 @@ void compressStream ( FILE *stream, FILE *zStream )
ret = fclose ( stream );
if (ret == EOF) goto errhandler_io;
- if (nbytes_in == 0) nbytes_in = 1;
+ if (nbytes_in_lo32 == 0 && nbytes_in_hi32 == 0)
+ nbytes_in_lo32 = 1;
- if (verbosity >= 1)
+ if (verbosity >= 1) {
+ Char buf_nin[32], buf_nout[32];
+ UInt64 nbytes_in, nbytes_out;
+ double nbytes_in_d, nbytes_out_d;
+ uInt64_from_UInt32s ( &nbytes_in,
+ nbytes_in_lo32, nbytes_in_hi32 );
+ uInt64_from_UInt32s ( &nbytes_out,
+ nbytes_out_lo32, nbytes_out_hi32 );
+ nbytes_in_d = uInt64_to_double ( &nbytes_in );
+ nbytes_out_d = uInt64_to_double ( &nbytes_out );
+ uInt64_toAscii ( buf_nin, &nbytes_in );
+ uInt64_toAscii ( buf_nout, &nbytes_out );
fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
- "%5.2f%% saved, %d in, %d out.\n",
- (float)nbytes_in / (float)nbytes_out,
- (8.0 * (float)nbytes_out) / (float)nbytes_in,
- 100.0 * (1.0 - (float)nbytes_out / (float)nbytes_in),
- nbytes_in,
- nbytes_out
+ "%5.2f%% saved, %s in, %s out.\n",
+ nbytes_in_d / nbytes_out_d,
+ (8.0 * nbytes_out_d) / nbytes_in_d,
+ 100.0 * (1.0 - nbytes_out_d / nbytes_in_d),
+ buf_nin,
+ buf_nout
);
+ }
return;
errhandler:
- bzWriteClose ( &bzerr_dummy, bzf, 1, &nbytes_in, &nbytes_out );
+ BZ2_bzWriteClose64 ( &bzerr_dummy, bzf, 1,
+ &nbytes_in_lo32, &nbytes_in_hi32,
+ &nbytes_out_lo32, &nbytes_out_hi32 );
switch (bzerr) {
+ case BZ_CONFIG_ERROR:
+ configError(); break;
case BZ_MEM_ERROR:
- outOfMemory ();
+ outOfMemory (); break;
case BZ_IO_ERROR:
errhandler_io:
ioError(); break;
@@ -413,6 +622,7 @@ void compressStream ( FILE *stream, FILE *zStream )
/*---------------------------------------------*/
+static
Bool uncompressStream ( FILE *zStream, FILE *stream )
{
BZFILE* bzf = NULL;
@@ -433,7 +643,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
while (True) {
- bzf = bzReadOpen (
+ bzf = BZ2_bzReadOpen (
&bzerr, zStream, verbosity,
(int)smallMode, unused, nUnused
);
@@ -441,7 +651,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
streamNo++;
while (bzerr == BZ_OK) {
- nread = bzRead ( &bzerr, bzf, obuf, 5000 );
+ nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 );
if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler;
if ((bzerr == BZ_OK || bzerr == BZ_STREAM_END) && nread > 0)
fwrite ( obuf, sizeof(UChar), nread, stream );
@@ -449,12 +659,12 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
}
if (bzerr != BZ_STREAM_END) goto errhandler;
- bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
+ BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" );
for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i];
- bzReadClose ( &bzerr, bzf );
+ BZ2_bzReadClose ( &bzerr, bzf );
if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" );
if (nUnused == 0 && myfeof(zStream)) break;
@@ -476,8 +686,10 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
return True;
errhandler:
- bzReadClose ( &bzerr_dummy, bzf );
+ BZ2_bzReadClose ( &bzerr_dummy, bzf );
switch (bzerr) {
+ case BZ_CONFIG_ERROR:
+ configError(); break;
case BZ_IO_ERROR:
errhandler_io:
ioError(); break;
@@ -488,6 +700,8 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
case BZ_UNEXPECTED_EOF:
compressedStreamEOF();
case BZ_DATA_ERROR_MAGIC:
+ if (zStream != stdin) fclose(zStream);
+ if (stream != stdout) fclose(stream);
if (streamNo == 1) {
return False;
} else {
@@ -507,6 +721,7 @@ Bool uncompressStream ( FILE *zStream, FILE *stream )
/*---------------------------------------------*/
+static
Bool testStream ( FILE *zStream )
{
BZFILE* bzf = NULL;
@@ -524,7 +739,7 @@ Bool testStream ( FILE *zStream )
while (True) {
- bzf = bzReadOpen (
+ bzf = BZ2_bzReadOpen (
&bzerr, zStream, verbosity,
(int)smallMode, unused, nUnused
);
@@ -532,17 +747,17 @@ Bool testStream ( FILE *zStream )
streamNo++;
while (bzerr == BZ_OK) {
- nread = bzRead ( &bzerr, bzf, obuf, 5000 );
+ nread = BZ2_bzRead ( &bzerr, bzf, obuf, 5000 );
if (bzerr == BZ_DATA_ERROR_MAGIC) goto errhandler;
}
if (bzerr != BZ_STREAM_END) goto errhandler;
- bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
+ BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused );
if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" );
for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i];
- bzReadClose ( &bzerr, bzf );
+ BZ2_bzReadClose ( &bzerr, bzf );
if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" );
if (nUnused == 0 && myfeof(zStream)) break;
@@ -556,10 +771,12 @@ Bool testStream ( FILE *zStream )
return True;
errhandler:
- bzReadClose ( &bzerr_dummy, bzf );
+ BZ2_bzReadClose ( &bzerr_dummy, bzf );
if (verbosity == 0)
fprintf ( stderr, "%s: %s: ", progName, inName );
switch (bzerr) {
+ case BZ_CONFIG_ERROR:
+ configError(); break;
case BZ_IO_ERROR:
errhandler_io:
ioError(); break;
@@ -574,6 +791,7 @@ Bool testStream ( FILE *zStream )
"file ends unexpectedly\n" );
return False;
case BZ_DATA_ERROR_MAGIC:
+ if (zStream != stdin) fclose(zStream);
if (streamNo == 1) {
fprintf ( stderr,
"bad magic number (file not created by bzip2)\n" );
@@ -598,6 +816,15 @@ Bool testStream ( FILE *zStream )
/*---------------------------------------------------*/
/*---------------------------------------------*/
+static
+void setExit ( Int32 v )
+{
+ if (v > exitValue) exitValue = v;
+}
+
+
+/*---------------------------------------------*/
+static
void cadvise ( void )
{
if (noisy)
@@ -612,6 +839,7 @@ void cadvise ( void )
/*---------------------------------------------*/
+static
void showFileNames ( void )
{
if (noisy)
@@ -624,11 +852,14 @@ void showFileNames ( void )
/*---------------------------------------------*/
+static
void cleanUpAndFail ( Int32 ec )
{
IntNative retVal;
- if ( srcMode == SM_F2F && opMode != OM_TEST ) {
+ if ( srcMode == SM_F2F
+ && opMode != OM_TEST
+ && deleteOutputOnInterrupt ) {
if (noisy)
fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
progName, outName );
@@ -647,11 +878,13 @@ void cleanUpAndFail ( Int32 ec )
progName, numFileNames,
numFileNames - numFilesProcessed );
}
- exit ( ec );
+ setExit(ec);
+ exit(exitValue);
}
/*---------------------------------------------*/
+static
void panic ( Char* s )
{
fprintf ( stderr,
@@ -666,6 +899,7 @@ void panic ( Char* s )
/*---------------------------------------------*/
+static
void crcError ( void )
{
fprintf ( stderr,
@@ -678,6 +912,7 @@ void crcError ( void )
/*---------------------------------------------*/
+static
void compressedStreamEOF ( void )
{
fprintf ( stderr,
@@ -692,10 +927,12 @@ void compressedStreamEOF ( void )
/*---------------------------------------------*/
+static
void ioError ( void )
{
fprintf ( stderr,
- "\n%s: I/O or other error, bailing out. Possible reason follows.\n",
+ "\n%s: I/O or other error, bailing out. "
+ "Possible reason follows.\n",
progName );
perror ( progName );
showFileNames();
@@ -704,6 +941,7 @@ void ioError ( void )
/*---------------------------------------------*/
+static
void mySignalCatcher ( IntNative n )
{
fprintf ( stderr,
@@ -714,20 +952,53 @@ void mySignalCatcher ( IntNative n )
/*---------------------------------------------*/
+static
void mySIGSEGVorSIGBUScatcher ( IntNative n )
{
if (opMode == OM_Z)
- fprintf ( stderr,
- "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
- "\twhich probably indicates a bug in bzip2. Please\n"
- "\treport it to me at: jseward@acm.org\n",
- progName );
+ fprintf (
+ stderr,
+ "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing.\n"
+ "\n"
+ " Possible causes are (most likely first):\n"
+ " (1) This computer has unreliable memory or cache hardware\n"
+ " (a surprisingly common problem; try a different machine.)\n"
+ " (2) A bug in the compiler used to create this executable\n"
+ " (unlikely, if you didn't compile bzip2 yourself.)\n"
+ " (3) A real bug in bzip2 -- I hope this should never be the case.\n"
+ " The user's manual, Section 4.3, has more info on (1) and (2).\n"
+ " \n"
+ " If you suspect this is a bug in bzip2, or are unsure about (1)\n"
+ " or (2), feel free to report it to me at: jseward@acm.org.\n"
+ " Section 4.3 of the user's manual describes the info a useful\n"
+ " bug report should have. If the manual is available on your\n"
+ " system, please try and read it before mailing me. If you don't\n"
+ " have the manual or can't be bothered to read it, mail me anyway.\n"
+ "\n",
+ progName );
else
- fprintf ( stderr,
- "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
- "\twhich probably indicates that the compressed data\n"
- "\tis corrupted.\n",
- progName );
+ fprintf (
+ stderr,
+ "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing.\n"
+ "\n"
+ " Possible causes are (most likely first):\n"
+ " (1) The compressed data is corrupted, and bzip2's usual checks\n"
+ " failed to detect this. Try bzip2 -tvv my_file.bz2.\n"
+ " (2) This computer has unreliable memory or cache hardware\n"
+ " (a surprisingly common problem; try a different machine.)\n"
+ " (3) A bug in the compiler used to create this executable\n"
+ " (unlikely, if you didn't compile bzip2 yourself.)\n"
+ " (4) A real bug in bzip2 -- I hope this should never be the case.\n"
+ " The user's manual, Section 4.3, has more info on (2) and (3).\n"
+ " \n"
+ " If you suspect this is a bug in bzip2, or are unsure about (2)\n"
+ " or (3), feel free to report it to me at: jseward@acm.org.\n"
+ " Section 4.3 of the user's manual describes the info a useful\n"
+ " bug report should have. If the manual is available on your\n"
+ " system, please try and read it before mailing me. If you don't\n"
+ " have the manual or can't be bothered to read it, mail me anyway.\n"
+ "\n",
+ progName );
showFileNames();
if (opMode == OM_Z)
@@ -737,6 +1008,7 @@ void mySIGSEGVorSIGBUScatcher ( IntNative n )
/*---------------------------------------------*/
+static
void outOfMemory ( void )
{
fprintf ( stderr,
@@ -747,11 +1019,27 @@ void outOfMemory ( void )
}
+/*---------------------------------------------*/
+static
+void configError ( void )
+{
+ fprintf ( stderr,
+ "bzip2: I'm not configured correctly for this platform!\n"
+ "\tI require Int32, Int16 and Char to have sizes\n"
+ "\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
+ "\tProbably you can fix this by defining them correctly,\n"
+ "\tand recompiling. Bye!\n" );
+ setExit(3);
+ exit(exitValue);
+}
+
+
/*---------------------------------------------------*/
/*--- The main driver machinery ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
+static
void pad ( Char *s )
{
Int32 i;
@@ -762,6 +1050,7 @@ void pad ( Char *s )
/*---------------------------------------------*/
+static
void copyFileName ( Char* to, Char* from )
{
if ( strlen(from) > FILE_NAME_LEN-10 ) {
@@ -772,7 +1061,8 @@ void copyFileName ( Char* to, Char* from )
"Try using a reasonable file name instead. Sorry! :-)\n",
from, FILE_NAME_LEN-10
);
- exit(1);
+ setExit(1);
+ exit(exitValue);
}
strncpy(to,from,FILE_NAME_LEN-10);
@@ -781,6 +1071,7 @@ void copyFileName ( Char* to, Char* from )
/*---------------------------------------------*/
+static
Bool fileExists ( Char* name )
{
FILE *tmp = fopen ( name, "rb" );
@@ -794,6 +1085,7 @@ Bool fileExists ( Char* name )
/*--
if in doubt, return True
--*/
+static
Bool notAStandardFile ( Char* name )
{
IntNative i;
@@ -810,6 +1102,7 @@ Bool notAStandardFile ( Char* name )
/*--
rac 11/21/98 see if file has hard links to it
--*/
+static
Int32 countHardLinks ( Char* name )
{
IntNative i;
@@ -822,6 +1115,7 @@ Int32 countHardLinks ( Char* name )
/*---------------------------------------------*/
+static
void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName )
{
#if BZ_UNIX
@@ -849,6 +1143,7 @@ void copyDatePermissionsAndOwner ( Char *srcName, Char *dstName )
/*---------------------------------------------*/
+static
void setInterimPermissions ( Char *dstName )
{
#if BZ_UNIX
@@ -860,6 +1155,7 @@ void setInterimPermissions ( Char *dstName )
/*---------------------------------------------*/
+static
Bool containsDubiousChars ( Char* name )
{
Bool cdc = False;
@@ -877,6 +1173,7 @@ Char* zSuffix[BZ_N_SUFFIX_PAIRS]
Char* unzSuffix[BZ_N_SUFFIX_PAIRS]
= { "", "", ".tar", ".tar" };
+static
Bool hasSuffix ( Char* s, Char* suffix )
{
Int32 ns = strlen(s);
@@ -886,6 +1183,7 @@ Bool hasSuffix ( Char* s, Char* suffix )
return False;
}
+static
Bool mapSuffix ( Char* name,
Char* oldSuffix, Char* newSuffix )
{
@@ -897,11 +1195,15 @@ Bool mapSuffix ( Char* name,
/*---------------------------------------------*/
+static
void compress ( Char *name )
{
FILE *inStr;
FILE *outStr;
Int32 n, i;
+
+ deleteOutputOnInterrupt = False;
+
if (name == NULL && srcMode != SM_I2O)
panic ( "compress: bad modes\n" );
@@ -924,12 +1226,14 @@ void compress ( Char *name )
if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
if (noisy)
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
- progName, inName );
+ progName, inName );
+ setExit(1);
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
}
for (i = 0; i < BZ_N_SUFFIX_PAIRS; i++) {
@@ -938,6 +1242,7 @@ void compress ( Char *name )
fprintf ( stderr,
"%s: Input file %s already has %s suffix.\n",
progName, inName, zSuffix[i] );
+ setExit(1);
return;
}
}
@@ -945,17 +1250,20 @@ void compress ( Char *name )
if (noisy)
fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
progName, inName );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
fprintf ( stderr, "%s: Output file %s already exists.\n",
progName, outName );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite &&
(n=countHardLinks ( inName )) > 0) {
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
progName, inName, n, n > 1 ? "s" : "" );
+ setExit(1);
return;
}
@@ -970,6 +1278,7 @@ void compress ( Char *name )
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
+ setExit(1);
return;
};
break;
@@ -984,11 +1293,13 @@ void compress ( Char *name )
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
};
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
};
break;
@@ -1000,12 +1311,14 @@ void compress ( Char *name )
fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
progName, outName, strerror(errno) );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
}
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
if ( outStr != NULL ) fclose ( outStr );
+ setExit(1);
return;
};
setInterimPermissions ( outName );
@@ -1024,21 +1337,26 @@ void compress ( Char *name )
/*--- Now the input and output handles are sane. Do the Biz. ---*/
outputHandleJustInCase = outStr;
+ deleteOutputOnInterrupt = True;
compressStream ( inStr, outStr );
outputHandleJustInCase = NULL;
/*--- If there was an I/O error, we won't get here. ---*/
if ( srcMode == SM_F2F ) {
copyDatePermissionsAndOwner ( inName, outName );
+ deleteOutputOnInterrupt = False;
if ( !keepInputFiles ) {
IntNative retVal = remove ( inName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
+
+ deleteOutputOnInterrupt = False;
}
/*---------------------------------------------*/
+static
void uncompress ( Char *name )
{
FILE *inStr;
@@ -1047,6 +1365,8 @@ void uncompress ( Char *name )
Bool magicNumberOK;
Bool cantGuess;
+ deleteOutputOnInterrupt = False;
+
if (name == NULL && srcMode != SM_I2O)
panic ( "uncompress: bad modes\n" );
@@ -1076,17 +1396,20 @@ void uncompress ( Char *name )
if (noisy)
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
progName, inName );
+ setExit(1);
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite && notAStandardFile ( inName )) {
if (noisy)
fprintf ( stderr, "%s: Input file %s is not a normal file.\n",
progName, inName );
+ setExit(1);
return;
}
if ( /* srcMode == SM_F2F implied && */ cantGuess ) {
@@ -1099,12 +1422,14 @@ void uncompress ( Char *name )
if ( srcMode == SM_F2F && !forceOverwrite && fileExists ( outName ) ) {
fprintf ( stderr, "%s: Output file %s already exists.\n",
progName, outName );
+ setExit(1);
return;
}
if ( srcMode == SM_F2F && !forceOverwrite &&
(n=countHardLinks ( inName ) ) > 0) {
fprintf ( stderr, "%s: Input file %s has %d other link%s.\n",
progName, inName, n, n > 1 ? "s" : "" );
+ setExit(1);
return;
}
@@ -1119,6 +1444,7 @@ void uncompress ( Char *name )
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
+ setExit(1);
return;
};
break;
@@ -1130,6 +1456,7 @@ void uncompress ( Char *name )
fprintf ( stderr, "%s: Can't open input file %s:%s.\n",
progName, inName, strerror(errno) );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
};
break;
@@ -1141,12 +1468,14 @@ void uncompress ( Char *name )
fprintf ( stderr, "%s: Can't create output file %s: %s.\n",
progName, outName, strerror(errno) );
if ( inStr != NULL ) fclose ( inStr );
+ setExit(1);
return;
}
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s: %s.\n",
progName, inName, strerror(errno) );
if ( outStr != NULL ) fclose ( outStr );
+ setExit(1);
return;
};
setInterimPermissions ( outName );
@@ -1165,6 +1494,7 @@ void uncompress ( Char *name )
/*--- Now the input and output handles are sane. Do the Biz. ---*/
outputHandleJustInCase = outStr;
+ deleteOutputOnInterrupt = True;
magicNumberOK = uncompressStream ( inStr, outStr );
outputHandleJustInCase = NULL;
@@ -1172,22 +1502,27 @@ void uncompress ( Char *name )
if ( magicNumberOK ) {
if ( srcMode == SM_F2F ) {
copyDatePermissionsAndOwner ( inName, outName );
+ deleteOutputOnInterrupt = False;
if ( !keepInputFiles ) {
IntNative retVal = remove ( inName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
} else {
+ unzFailsExist = True;
+ deleteOutputOnInterrupt = False;
if ( srcMode == SM_F2F ) {
IntNative retVal = remove ( outName );
ERROR_IF_NOT_ZERO ( retVal );
}
}
+ deleteOutputOnInterrupt = False;
if ( magicNumberOK ) {
if (verbosity >= 1)
fprintf ( stderr, "done\n" );
} else {
+ setExit(2);
if (verbosity >= 1)
fprintf ( stderr, "not a bzip2 file.\n" ); else
fprintf ( stderr,
@@ -1199,11 +1534,14 @@ void uncompress ( Char *name )
/*---------------------------------------------*/
+static
void testf ( Char *name )
{
FILE *inStr;
Bool allOK;
+ deleteOutputOnInterrupt = False;
+
if (name == NULL && srcMode != SM_I2O)
panic ( "testf: bad modes\n" );
@@ -1218,11 +1556,13 @@ void testf ( Char *name )
if (noisy)
fprintf ( stderr, "%s: There are no files matching `%s'.\n",
progName, inName );
+ setExit(1);
return;
}
if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
fprintf ( stderr, "%s: Can't open input %s: %s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
}
@@ -1235,6 +1575,7 @@ void testf ( Char *name )
progName );
fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
progName, progName );
+ setExit(1);
return;
};
inStr = stdin;
@@ -1245,6 +1586,7 @@ void testf ( Char *name )
if ( inStr == NULL ) {
fprintf ( stderr, "%s: Can't open input file %s:%s.\n",
progName, inName, strerror(errno) );
+ setExit(1);
return;
};
break;
@@ -1269,35 +1611,38 @@ void testf ( Char *name )
/*---------------------------------------------*/
+static
void license ( void )
{
fprintf ( stderr,
"bzip2, a block-sorting file compressor. "
- "Version 0.9.5d, 4-Sept-99.\n"
+ "Version %s.\n"
" \n"
- " Copyright (C) 1996, 1997, 1998, 1999 by Julian Seward.\n"
+ " Copyright (C) 1996-2000 by Julian Seward.\n"
" \n"
" This program is free software; you can redistribute it and/or modify\n"
" it under the terms set out in the LICENSE file, which is included\n"
- " in the bzip2-0.9.5 source distribution.\n"
+ " in the bzip2-1.0 source distribution.\n"
" \n"
" This program is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" LICENSE file for more details.\n"
- " \n"
+ " \n",
+ BZ2_bzlibVersion()
);
}
/*---------------------------------------------*/
+static
void usage ( Char *fullProgName )
{
fprintf (
stderr,
"bzip2, a block-sorting file compressor. "
- "Version 0.9.5d, 4-Sept-99.\n"
+ "Version %s.\n"
"\n usage: %s [flags and input files in any order]\n"
"\n"
" -h --help print this message\n"
@@ -1326,12 +1671,14 @@ void usage ( Char *fullProgName )
#endif
,
+ BZ2_bzlibVersion(),
fullProgName
);
}
/*---------------------------------------------*/
+static
void redundant ( Char* flag )
{
fprintf (
@@ -1365,6 +1712,7 @@ typedef
/*---------------------------------------------*/
+static
void *myMalloc ( Int32 n )
{
void* p;
@@ -1376,6 +1724,7 @@ void *myMalloc ( Int32 n )
/*---------------------------------------------*/
+static
Cell *mkCell ( void )
{
Cell *c;
@@ -1388,6 +1737,7 @@ Cell *mkCell ( void )
/*---------------------------------------------*/
+static
Cell *snocString ( Cell *root, Char *name )
{
if (root == NULL) {
@@ -1405,6 +1755,7 @@ Cell *snocString ( Cell *root, Char *name )
/*---------------------------------------------*/
+static
void addFlagsFromEnvVar ( Cell** argList, Char* varName )
{
Int32 i, j, k;
@@ -1445,16 +1796,8 @@ IntNative main ( IntNative argc, Char *argv[] )
/*-- Be really really really paranoid :-) --*/
if (sizeof(Int32) != 4 || sizeof(UInt32) != 4 ||
sizeof(Int16) != 2 || sizeof(UInt16) != 2 ||
- sizeof(Char) != 1 || sizeof(UChar) != 1) {
- fprintf ( stderr,
- "bzip2: I'm not configured correctly for this platform!\n"
- "\tI require Int32, Int16 and Char to have sizes\n"
- "\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
- "\tProbably you can fix this by defining them correctly,\n"
- "\tand recompiling. Bye!\n" );
- exit(3);
- }
-
+ sizeof(Char) != 1 || sizeof(UChar) != 1)
+ configError();
/*-- Initialise --*/
outputHandleJustInCase = NULL;
@@ -1465,9 +1808,12 @@ IntNative main ( IntNative argc, Char *argv[] )
verbosity = 0;
blockSize100k = 9;
testFailsExist = False;
+ unzFailsExist = False;
numFileNames = 0;
numFilesProcessed = 0;
workFactor = 30;
+ deleteOutputOnInterrupt = False;
+ exitValue = 0;
i = j = 0; /* avoid bogus warning from egcs-1.1.X */
/*-- Set up signal handlers for mem access errors --*/
@@ -1636,6 +1982,7 @@ IntNative main ( IntNative argc, Char *argv[] )
else
if (opMode == OM_UNZ) {
+ unzFailsExist = False;
if (srcMode == SM_I2O) {
uncompress ( NULL );
} else {
@@ -1647,6 +1994,10 @@ IntNative main ( IntNative argc, Char *argv[] )
uncompress ( aa->name );
}
}
+ if (unzFailsExist) {
+ setExit(2);
+ exit(exitValue);
+ }
}
else {
@@ -1668,7 +2019,8 @@ IntNative main ( IntNative argc, Char *argv[] )
"You can use the `bzip2recover' program to attempt to recover\n"
"data from undamaged sections of corrupted files.\n\n"
);
- exit(2);
+ setExit(2);
+ exit(exitValue);
}
}
@@ -1678,12 +2030,12 @@ IntNative main ( IntNative argc, Char *argv[] )
aa = argList;
while (aa != NULL) {
Cell* aa2 = aa->link;
- if (aa->name) free(aa->name);
+ if (aa->name != NULL) free(aa->name);
free(aa);
aa = aa2;
}
- return 0;
+ return exitValue;
}
diff --git a/bzip2.txt b/bzip2.txt
index da23c64..4f1ae86 100644
--- a/bzip2.txt
+++ b/bzip2.txt
@@ -1,7 +1,7 @@
NAME
- bzip2, bunzip2 - a block-sorting file compressor, v0.9.5
+ bzip2, bunzip2 - a block-sorting file compressor, v1.0
bzcat - decompresses files to stdout
bzip2recover - recovers data from damaged bzip2 files
@@ -337,14 +337,14 @@ CAVEATS
but the details of what the problem is sometimes seem
rather misleading.
- This manual page pertains to version 0.9.5 of bzip2. Com-
+ This manual page pertains to version 1.0 of bzip2. Com-
pressed data created by this version is entirely forwards
and backwards compatible with the previous public
- releases, versions 0.1pl2 and 0.9.0, but with the follow-
- ing exception: 0.9.0 and above can correctly decompress
- multiple concatenated compressed files. 0.1pl2 cannot do
- this; it will stop after decompressing just the first file
- in the stream.
+ releases, versions 0.1pl2, 0.9.0 and 0.9.5, but with the
+ following exception: 0.9.0 and above can correctly decom-
+ press multiple concatenated compressed files. 0.1pl2 can-
+ not do this; it will stop after decompressing just the
+ first file in the stream.
bzip2recover uses 32-bit integers to represent bit posi-
tions in compressed files, so it cannot handle compressed
@@ -355,6 +355,7 @@ CAVEATS
AUTHOR
Julian Seward, jseward@acm.org.
+ http://sourceware.cygnus.com/bzip2
http://www.muraroa.demon.co.uk
The ideas embodied in bzip2 are due to (at least) the fol-
diff --git a/bzip2recover.c b/bzip2recover.c
index 1323b36..ba3d175 100644
--- a/bzip2recover.c
+++ b/bzip2recover.c
@@ -7,9 +7,9 @@
/*--
This program is bzip2recover, a program to attempt data
salvage from damaged files created by the accompanying
- bzip2-0.9.5 program.
+ bzip2-1.0 program.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -44,7 +44,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
--*/
/*--
@@ -282,7 +282,7 @@ Int32 main ( Int32 argc, Char** argv )
strcpy ( progName, argv[0] );
inFileName[0] = outFileName[0] = 0;
- fprintf ( stderr, "bzip2recover 0.9.5d: extracts blocks from damaged .bz2 files.\n" );
+ fprintf ( stderr, "bzip2recover 1.0: extracts blocks from damaged .bz2 files.\n" );
if (argc != 2) {
fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
diff --git a/bzlib.c b/bzlib.c
index 24e8bd5..4a06d9f 100644
--- a/bzlib.c
+++ b/bzlib.c
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -83,18 +83,19 @@
/*---------------------------------------------------*/
#ifndef BZ_NO_STDIO
-void bz__AssertH__fail ( int errcode )
+void BZ2_bz__AssertH__fail ( int errcode )
{
fprintf(stderr,
- "\n\nbzip2/libbzip2, v0.9.5d: internal error number %d.\n"
- "This is a bug in bzip2/libbzip2, v0.9.5d. Please report\n"
- "it to me at: jseward@acm.org. If this happened when\n"
- "you were using some program which uses libbzip2 as a\n"
+ "\n\nbzip2/libbzip2: internal error number %d.\n"
+ "This is a bug in bzip2/libbzip2, %s.\n"
+ "Please report it to me at: jseward@acm.org. If this happened\n"
+ "when you were using some program which uses libbzip2 as a\n"
"component, you should also report this bug to the author(s)\n"
"of that program. Please make an effort to report this bug;\n"
"timely and accurate bug reports eventually lead to higher\n"
- "quality software. Thanks. Julian Seward, 4 Sept 1999.\n\n",
- errcode
+ "quality software. Thanks. Julian Seward, 21 March 2000.\n\n",
+ errcode,
+ BZ2_bzlibVersion()
);
exit(3);
}
@@ -103,6 +104,17 @@ void bz__AssertH__fail ( int errcode )
/*---------------------------------------------------*/
static
+int bz_config_ok ( void )
+{
+ if (sizeof(int) != 4) return 0;
+ if (sizeof(short) != 2) return 0;
+ if (sizeof(char) != 1) return 0;
+ return 1;
+}
+
+
+/*---------------------------------------------------*/
+static
void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
{
void* v = malloc ( items * size );
@@ -149,7 +161,7 @@ Bool isempty_RL ( EState* s )
/*---------------------------------------------------*/
-int BZ_API(bzCompressInit)
+int BZ_API(BZ2_bzCompressInit)
( bz_stream* strm,
int blockSize100k,
int verbosity,
@@ -158,6 +170,8 @@ int BZ_API(bzCompressInit)
Int32 n;
EState* s;
+ if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
if (strm == NULL ||
blockSize100k < 1 || blockSize100k > 9 ||
workFactor < 0 || workFactor > 250)
@@ -197,14 +211,16 @@ int BZ_API(bzCompressInit)
s->verbosity = verbosity;
s->workFactor = workFactor;
- s->block = (UInt16*)s->arr2;
+ s->block = (UChar*)s->arr2;
s->mtfv = (UInt16*)s->arr1;
s->zbits = NULL;
s->ptr = (UInt32*)s->arr1;
strm->state = s;
- strm->total_in = 0;
- strm->total_out = 0;
+ strm->total_in_lo32 = 0;
+ strm->total_in_hi32 = 0;
+ strm->total_out_lo32 = 0;
+ strm->total_out_hi32 = 0;
init_RL ( s );
prepare_new_block ( s );
return BZ_OK;
@@ -223,24 +239,24 @@ void add_pair_to_block ( EState* s )
s->inUse[s->state_in_ch] = True;
switch (s->state_in_len) {
case 1:
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
break;
case 2:
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
break;
case 3:
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
break;
default:
s->inUse[s->state_in_len-4] = True;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = (UInt16)ch; s->nblock++;
- s->block[s->nblock] = ((UInt16)(s->state_in_len-4));
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = ((UChar)(s->state_in_len-4));
s->nblock++;
break;
}
@@ -266,7 +282,7 @@ void flush_RL ( EState* s )
UChar ch = (UChar)(zs->state_in_ch); \
BZ_UPDATE_CRC( zs->blockCRC, ch ); \
zs->inUse[zs->state_in_ch] = True; \
- zs->block[zs->nblock] = (UInt16)ch; \
+ zs->block[zs->nblock] = (UChar)ch; \
zs->nblock++; \
zs->state_in_ch = zchh; \
} \
@@ -302,7 +318,8 @@ Bool copy_input_until_stop ( EState* s )
ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
s->strm->next_in++;
s->strm->avail_in--;
- s->strm->total_in++;
+ s->strm->total_in_lo32++;
+ if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
}
} else {
@@ -319,7 +336,8 @@ Bool copy_input_until_stop ( EState* s )
ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
s->strm->next_in++;
s->strm->avail_in--;
- s->strm->total_in++;
+ s->strm->total_in_lo32++;
+ if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
s->avail_in_expect--;
}
}
@@ -346,8 +364,8 @@ Bool copy_output_until_stop ( EState* s )
s->state_out_pos++;
s->strm->avail_out--;
s->strm->next_out++;
- s->strm->total_out++;
-
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
return progress_out;
@@ -381,12 +399,12 @@ Bool handle_compress ( bz_stream* strm )
progress_in |= copy_input_until_stop ( s );
if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
flush_RL ( s );
- compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
+ BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
s->state = BZ_S_OUTPUT;
}
else
if (s->nblock >= s->nblockMAX) {
- compressBlock ( s, False );
+ BZ2_compressBlock ( s, False );
s->state = BZ_S_OUTPUT;
}
else
@@ -402,7 +420,7 @@ Bool handle_compress ( bz_stream* strm )
/*---------------------------------------------------*/
-int BZ_API(bzCompress) ( bz_stream *strm, int action )
+int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
{
Bool progress;
EState* s;
@@ -439,7 +457,8 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action )
case BZ_M_FLUSHING:
if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
- if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR;
+ if (s->avail_in_expect != s->strm->avail_in)
+ return BZ_SEQUENCE_ERROR;
progress = handle_compress ( strm );
if (s->avail_in_expect > 0 || !isempty_RL(s) ||
s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
@@ -448,7 +467,8 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action )
case BZ_M_FINISHING:
if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
- if (s->avail_in_expect != s->strm->avail_in) return BZ_SEQUENCE_ERROR;
+ if (s->avail_in_expect != s->strm->avail_in)
+ return BZ_SEQUENCE_ERROR;
progress = handle_compress ( strm );
if (!progress) return BZ_SEQUENCE_ERROR;
if (s->avail_in_expect > 0 || !isempty_RL(s) ||
@@ -461,7 +481,7 @@ int BZ_API(bzCompress) ( bz_stream *strm, int action )
/*---------------------------------------------------*/
-int BZ_API(bzCompressEnd) ( bz_stream *strm )
+int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
{
EState* s;
if (strm == NULL) return BZ_PARAM_ERROR;
@@ -485,13 +505,15 @@ int BZ_API(bzCompressEnd) ( bz_stream *strm )
/*---------------------------------------------------*/
/*---------------------------------------------------*/
-int BZ_API(bzDecompressInit)
+int BZ_API(BZ2_bzDecompressInit)
( bz_stream* strm,
int verbosity,
int small )
{
DState* s;
+ if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
if (strm == NULL) return BZ_PARAM_ERROR;
if (small != 0 && small != 1) return BZ_PARAM_ERROR;
if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
@@ -507,8 +529,10 @@ int BZ_API(bzDecompressInit)
s->bsLive = 0;
s->bsBuff = 0;
s->calculatedCombinedCRC = 0;
- strm->total_in = 0;
- strm->total_out = 0;
+ strm->total_in_lo32 = 0;
+ strm->total_in_hi32 = 0;
+ strm->total_out_lo32 = 0;
+ strm->total_out_hi32 = 0;
s->smallDecompress = (Bool)small;
s->ll4 = NULL;
s->ll16 = NULL;
@@ -538,7 +562,8 @@ void unRLE_obuf_to_output_FAST ( DState* s )
s->state_out_len--;
s->strm->next_out++;
s->strm->avail_out--;
- s->strm->total_out++;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
/* can a new run be started? */
@@ -585,8 +610,9 @@ void unRLE_obuf_to_output_FAST ( DState* s )
unsigned int cs_avail_out = s->strm->avail_out;
/* end restore */
- UInt32 avail_out_INIT = cs_avail_out;
- Int32 s_save_nblockPP = s->save_nblock+1;
+ UInt32 avail_out_INIT = cs_avail_out;
+ Int32 s_save_nblockPP = s->save_nblock+1;
+ unsigned int total_out_lo32_old;
while (True) {
@@ -640,7 +666,10 @@ void unRLE_obuf_to_output_FAST ( DState* s )
}
return_notr:
- s->strm->total_out += (avail_out_INIT - cs_avail_out);
+ total_out_lo32_old = s->strm->total_out_lo32;
+ s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+ if (s->strm->total_out_lo32 < total_out_lo32_old)
+ s->strm->total_out_hi32++;
/* save */
s->calculatedBlockCRC = c_calculatedBlockCRC;
@@ -659,7 +688,7 @@ void unRLE_obuf_to_output_FAST ( DState* s )
/*---------------------------------------------------*/
-__inline__ Int32 indexIntoF ( Int32 indx, Int32 *cftab )
+__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
{
Int32 nb, na, mid;
nb = 0;
@@ -691,7 +720,8 @@ void unRLE_obuf_to_output_SMALL ( DState* s )
s->state_out_len--;
s->strm->next_out++;
s->strm->avail_out--;
- s->strm->total_out++;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
/* can a new run be started? */
@@ -736,7 +766,8 @@ void unRLE_obuf_to_output_SMALL ( DState* s )
s->state_out_len--;
s->strm->next_out++;
s->strm->avail_out--;
- s->strm->total_out++;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
}
/* can a new run be started? */
@@ -768,7 +799,7 @@ void unRLE_obuf_to_output_SMALL ( DState* s )
/*---------------------------------------------------*/
-int BZ_API(bzDecompress) ( bz_stream *strm )
+int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
{
DState* s;
if (strm == NULL) return BZ_PARAM_ERROR;
@@ -800,7 +831,7 @@ int BZ_API(bzDecompress) ( bz_stream *strm )
}
}
if (s->state >= BZ_X_MAGIC_1) {
- Int32 r = decompress ( s );
+ Int32 r = BZ2_decompress ( s );
if (r == BZ_STREAM_END) {
if (s->verbosity >= 3)
VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x",
@@ -820,7 +851,7 @@ int BZ_API(bzDecompress) ( bz_stream *strm )
/*---------------------------------------------------*/
-int BZ_API(bzDecompressEnd) ( bz_stream *strm )
+int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm )
{
DState* s;
if (strm == NULL) return BZ_PARAM_ERROR;
@@ -874,7 +905,7 @@ static Bool myfeof ( FILE* f )
/*---------------------------------------------------*/
-BZFILE* BZ_API(bzWriteOpen)
+BZFILE* BZ_API(BZ2_bzWriteOpen)
( int* bzerror,
FILE* f,
int blockSize100k,
@@ -909,8 +940,8 @@ BZFILE* BZ_API(bzWriteOpen)
bzf->strm.opaque = NULL;
if (workFactor == 0) workFactor = 30;
- ret = bzCompressInit ( &(bzf->strm), blockSize100k,
- verbosity, workFactor );
+ ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k,
+ verbosity, workFactor );
if (ret != BZ_OK)
{ BZ_SETERR(ret); free(bzf); return NULL; };
@@ -922,7 +953,7 @@ BZFILE* BZ_API(bzWriteOpen)
/*---------------------------------------------------*/
-void BZ_API(bzWrite)
+void BZ_API(BZ2_bzWrite)
( int* bzerror,
BZFILE* b,
void* buf,
@@ -948,7 +979,7 @@ void BZ_API(bzWrite)
while (True) {
bzf->strm.avail_out = BZ_MAX_UNUSED;
bzf->strm.next_out = bzf->buf;
- ret = bzCompress ( &(bzf->strm), BZ_RUN );
+ ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
if (ret != BZ_RUN_OK)
{ BZ_SETERR(ret); return; };
@@ -967,13 +998,27 @@ void BZ_API(bzWrite)
/*---------------------------------------------------*/
-void BZ_API(bzWriteClose)
+void BZ_API(BZ2_bzWriteClose)
( int* bzerror,
BZFILE* b,
int abandon,
unsigned int* nbytes_in,
unsigned int* nbytes_out )
{
+ BZ2_bzWriteClose64 ( bzerror, b, abandon,
+ nbytes_in, NULL, nbytes_out, NULL );
+}
+
+
+void BZ_API(BZ2_bzWriteClose64)
+ ( int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32 )
+{
Int32 n, n2, ret;
bzFile* bzf = (bzFile*)b;
@@ -984,14 +1029,16 @@ void BZ_API(bzWriteClose)
if (ferror(bzf->handle))
{ BZ_SETERR(BZ_IO_ERROR); return; };
- if (nbytes_in != NULL) *nbytes_in = 0;
- if (nbytes_out != NULL) *nbytes_out = 0;
+ if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
+ if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
+ if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
+ if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
if ((!abandon) && bzf->lastErr == BZ_OK) {
while (True) {
bzf->strm.avail_out = BZ_MAX_UNUSED;
bzf->strm.next_out = bzf->buf;
- ret = bzCompress ( &(bzf->strm), BZ_FINISH );
+ ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
{ BZ_SETERR(ret); return; };
@@ -1013,17 +1060,23 @@ void BZ_API(bzWriteClose)
{ BZ_SETERR(BZ_IO_ERROR); return; };
}
- if (nbytes_in != NULL) *nbytes_in = bzf->strm.total_in;
- if (nbytes_out != NULL) *nbytes_out = bzf->strm.total_out;
+ if (nbytes_in_lo32 != NULL)
+ *nbytes_in_lo32 = bzf->strm.total_in_lo32;
+ if (nbytes_in_hi32 != NULL)
+ *nbytes_in_hi32 = bzf->strm.total_in_hi32;
+ if (nbytes_out_lo32 != NULL)
+ *nbytes_out_lo32 = bzf->strm.total_out_lo32;
+ if (nbytes_out_hi32 != NULL)
+ *nbytes_out_hi32 = bzf->strm.total_out_hi32;
BZ_SETERR(BZ_OK);
- bzCompressEnd ( &(bzf->strm) );
+ BZ2_bzCompressEnd ( &(bzf->strm) );
free ( bzf );
}
/*---------------------------------------------------*/
-BZFILE* BZ_API(bzReadOpen)
+BZFILE* BZ_API(BZ2_bzReadOpen)
( int* bzerror,
FILE* f,
int verbosity,
@@ -1066,7 +1119,7 @@ BZFILE* BZ_API(bzReadOpen)
nUnused--;
}
- ret = bzDecompressInit ( &(bzf->strm), verbosity, small );
+ ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
if (ret != BZ_OK)
{ BZ_SETERR(ret); free(bzf); return NULL; };
@@ -1079,7 +1132,7 @@ BZFILE* BZ_API(bzReadOpen)
/*---------------------------------------------------*/
-void BZ_API(bzReadClose) ( int *bzerror, BZFILE *b )
+void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
{
bzFile* bzf = (bzFile*)b;
@@ -1091,13 +1144,13 @@ void BZ_API(bzReadClose) ( int *bzerror, BZFILE *b )
{ BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
if (bzf->initialisedOk)
- (void)bzDecompressEnd ( &(bzf->strm) );
+ (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
free ( bzf );
}
/*---------------------------------------------------*/
-int BZ_API(bzRead)
+int BZ_API(BZ2_bzRead)
( int* bzerror,
BZFILE* b,
void* buf,
@@ -1135,7 +1188,7 @@ int BZ_API(bzRead)
bzf->strm.next_in = bzf->buf;
}
- ret = bzDecompress ( &(bzf->strm) );
+ ret = BZ2_bzDecompress ( &(bzf->strm) );
if (ret != BZ_OK && ret != BZ_STREAM_END)
{ BZ_SETERR(ret); return 0; };
@@ -1157,7 +1210,7 @@ int BZ_API(bzRead)
/*---------------------------------------------------*/
-void BZ_API(bzReadGetUnused)
+void BZ_API(BZ2_bzReadGetUnused)
( int* bzerror,
BZFILE* b,
void** unused,
@@ -1183,7 +1236,7 @@ void BZ_API(bzReadGetUnused)
/*---------------------------------------------------*/
/*---------------------------------------------------*/
-int BZ_API(bzBuffToBuffCompress)
+int BZ_API(BZ2_bzBuffToBuffCompress)
( char* dest,
unsigned int* destLen,
char* source,
@@ -1206,8 +1259,8 @@ int BZ_API(bzBuffToBuffCompress)
strm.bzalloc = NULL;
strm.bzfree = NULL;
strm.opaque = NULL;
- ret = bzCompressInit ( &strm, blockSize100k,
- verbosity, workFactor );
+ ret = BZ2_bzCompressInit ( &strm, blockSize100k,
+ verbosity, workFactor );
if (ret != BZ_OK) return ret;
strm.next_in = source;
@@ -1215,27 +1268,27 @@ int BZ_API(bzBuffToBuffCompress)
strm.avail_in = sourceLen;
strm.avail_out = *destLen;
- ret = bzCompress ( &strm, BZ_FINISH );
+ ret = BZ2_bzCompress ( &strm, BZ_FINISH );
if (ret == BZ_FINISH_OK) goto output_overflow;
if (ret != BZ_STREAM_END) goto errhandler;
/* normal termination */
*destLen -= strm.avail_out;
- bzCompressEnd ( &strm );
+ BZ2_bzCompressEnd ( &strm );
return BZ_OK;
output_overflow:
- bzCompressEnd ( &strm );
+ BZ2_bzCompressEnd ( &strm );
return BZ_OUTBUFF_FULL;
errhandler:
- bzCompressEnd ( &strm );
+ BZ2_bzCompressEnd ( &strm );
return ret;
}
/*---------------------------------------------------*/
-int BZ_API(bzBuffToBuffDecompress)
+int BZ_API(BZ2_bzBuffToBuffDecompress)
( char* dest,
unsigned int* destLen,
char* source,
@@ -1255,7 +1308,7 @@ int BZ_API(bzBuffToBuffDecompress)
strm.bzalloc = NULL;
strm.bzfree = NULL;
strm.opaque = NULL;
- ret = bzDecompressInit ( &strm, verbosity, small );
+ ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
if (ret != BZ_OK) return ret;
strm.next_in = source;
@@ -1263,26 +1316,26 @@ int BZ_API(bzBuffToBuffDecompress)
strm.avail_in = sourceLen;
strm.avail_out = *destLen;
- ret = bzDecompress ( &strm );
+ ret = BZ2_bzDecompress ( &strm );
if (ret == BZ_OK) goto output_overflow_or_eof;
if (ret != BZ_STREAM_END) goto errhandler;
/* normal termination */
*destLen -= strm.avail_out;
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return BZ_OK;
output_overflow_or_eof:
if (strm.avail_out > 0) {
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return BZ_UNEXPECTED_EOF;
} else {
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return BZ_OUTBUFF_FULL;
};
errhandler:
- bzDecompressEnd ( &strm );
+ BZ2_bzDecompressEnd ( &strm );
return ret;
}
@@ -1303,7 +1356,7 @@ int BZ_API(bzBuffToBuffDecompress)
/*--
return version like "0.9.0c".
--*/
-const char * BZ_API(bzlibVersion)(void)
+const char * BZ_API(BZ2_bzlibVersion)(void)
{
return BZ_VERSION;
}
@@ -1377,9 +1430,11 @@ BZFILE * bzopen_or_bzdopen
/* Guard against total chaos and anarchy -- JRS */
if (blockSize100k < 1) blockSize100k = 1;
if (blockSize100k > 9) blockSize100k = 9;
- bzfp = bzWriteOpen(&bzerr,fp,blockSize100k,verbosity,workFactor);
+ bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
+ verbosity,workFactor);
} else {
- bzfp = bzReadOpen(&bzerr,fp,verbosity,smallMode,unused,nUnused);
+ bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
+ unused,nUnused);
}
if (bzfp == NULL) {
if (fp != stdin && fp != stdout) fclose(fp);
@@ -1395,7 +1450,7 @@ BZFILE * bzopen_or_bzdopen
ex) bzopen("file","w9")
case path="" or NULL => use stdin or stdout.
--*/
-BZFILE * BZ_API(bzopen)
+BZFILE * BZ_API(BZ2_bzopen)
( const char *path,
const char *mode )
{
@@ -1404,7 +1459,7 @@ BZFILE * BZ_API(bzopen)
/*---------------------------------------------------*/
-BZFILE * BZ_API(bzdopen)
+BZFILE * BZ_API(BZ2_bzdopen)
( int fd,
const char *mode )
{
@@ -1413,11 +1468,11 @@ BZFILE * BZ_API(bzdopen)
/*---------------------------------------------------*/
-int BZ_API(bzread) (BZFILE* b, void* buf, int len )
+int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
{
int bzerr, nread;
if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
- nread = bzRead(&bzerr,b,buf,len);
+ nread = BZ2_bzRead(&bzerr,b,buf,len);
if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
return nread;
} else {
@@ -1427,11 +1482,11 @@ int BZ_API(bzread) (BZFILE* b, void* buf, int len )
/*---------------------------------------------------*/
-int BZ_API(bzwrite) (BZFILE* b, void* buf, int len )
+int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
{
int bzerr;
- bzWrite(&bzerr,b,buf,len);
+ BZ2_bzWrite(&bzerr,b,buf,len);
if(bzerr == BZ_OK){
return len;
}else{
@@ -1441,7 +1496,7 @@ int BZ_API(bzwrite) (BZFILE* b, void* buf, int len )
/*---------------------------------------------------*/
-int BZ_API(bzflush) (BZFILE *b)
+int BZ_API(BZ2_bzflush) (BZFILE *b)
{
/* do nothing now... */
return 0;
@@ -1449,19 +1504,19 @@ int BZ_API(bzflush) (BZFILE *b)
/*---------------------------------------------------*/
-void BZ_API(bzclose) (BZFILE* b)
+void BZ_API(BZ2_bzclose) (BZFILE* b)
{
int bzerr;
FILE *fp = ((bzFile *)b)->handle;
if (b==NULL) {return;}
if(((bzFile*)b)->writing){
- bzWriteClose(&bzerr,b,0,NULL,NULL);
+ BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
if(bzerr != BZ_OK){
- bzWriteClose(NULL,b,1,NULL,NULL);
+ BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
}
}else{
- bzReadClose(&bzerr,b);
+ BZ2_bzReadClose(&bzerr,b);
}
if(fp!=stdin && fp!=stdout){
fclose(fp);
@@ -1483,6 +1538,7 @@ static char *bzerrorstrings[] = {
,"IO_ERROR"
,"UNEXPECTED_EOF"
,"OUTBUFF_FULL"
+ ,"CONFIG_ERROR"
,"???" /* for future */
,"???" /* for future */
,"???" /* for future */
@@ -1492,7 +1548,7 @@ static char *bzerrorstrings[] = {
};
-const char * BZ_API(bzerror) (BZFILE *b, int *errnum)
+const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
{
int err = ((bzFile *)b)->lastErr;
diff --git a/bzlib.h b/bzlib.h
index d74938d..c9447a2 100644
--- a/bzlib.h
+++ b/bzlib.h
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -83,16 +83,19 @@ extern "C" {
#define BZ_IO_ERROR (-6)
#define BZ_UNEXPECTED_EOF (-7)
#define BZ_OUTBUFF_FULL (-8)
+#define BZ_CONFIG_ERROR (-9)
typedef
struct {
char *next_in;
unsigned int avail_in;
- unsigned int total_in;
+ unsigned int total_in_lo32;
+ unsigned int total_in_hi32;
char *next_out;
unsigned int avail_out;
- unsigned int total_out;
+ unsigned int total_out_lo32;
+ unsigned int total_out_hi32;
void *state;
@@ -130,33 +133,33 @@ typedef
/*-- Core (low-level) library functions --*/
-BZ_EXTERN int BZ_API(bzCompressInit) (
+BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
bz_stream* strm,
int blockSize100k,
int verbosity,
int workFactor
);
-BZ_EXTERN int BZ_API(bzCompress) (
+BZ_EXTERN int BZ_API(BZ2_bzCompress) (
bz_stream* strm,
int action
);
-BZ_EXTERN int BZ_API(bzCompressEnd) (
+BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
bz_stream* strm
);
-BZ_EXTERN int BZ_API(bzDecompressInit) (
+BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
bz_stream *strm,
int verbosity,
int small
);
-BZ_EXTERN int BZ_API(bzDecompress) (
+BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
bz_stream* strm
);
-BZ_EXTERN int BZ_API(bzDecompressEnd) (
+BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
bz_stream *strm
);
@@ -169,7 +172,7 @@ BZ_EXTERN int BZ_API(bzDecompressEnd) (
typedef void BZFILE;
-BZ_EXTERN BZFILE* BZ_API(bzReadOpen) (
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
int* bzerror,
FILE* f,
int verbosity,
@@ -178,26 +181,26 @@ BZ_EXTERN BZFILE* BZ_API(bzReadOpen) (
int nUnused
);
-BZ_EXTERN void BZ_API(bzReadClose) (
+BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
int* bzerror,
BZFILE* b
);
-BZ_EXTERN void BZ_API(bzReadGetUnused) (
+BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
int* bzerror,
BZFILE* b,
void** unused,
int* nUnused
);
-BZ_EXTERN int BZ_API(bzRead) (
+BZ_EXTERN int BZ_API(BZ2_bzRead) (
int* bzerror,
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) (
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
int* bzerror,
FILE* f,
int blockSize100k,
@@ -205,26 +208,36 @@ BZ_EXTERN BZFILE* BZ_API(bzWriteOpen) (
int workFactor
);
-BZ_EXTERN void BZ_API(bzWrite) (
+BZ_EXTERN void BZ_API(BZ2_bzWrite) (
int* bzerror,
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN void BZ_API(bzWriteClose) (
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
int* bzerror,
BZFILE* b,
int abandon,
unsigned int* nbytes_in,
unsigned int* nbytes_out
);
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
+ int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32
+ );
#endif
/*-- Utility functions --*/
-BZ_EXTERN int BZ_API(bzBuffToBuffCompress) (
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
char* dest,
unsigned int* destLen,
char* source,
@@ -234,7 +247,7 @@ BZ_EXTERN int BZ_API(bzBuffToBuffCompress) (
int workFactor
);
-BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) (
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
char* dest,
unsigned int* destLen,
char* source,
@@ -254,42 +267,42 @@ BZ_EXTERN int BZ_API(bzBuffToBuffDecompress) (
If this code breaks, please contact both Yoshioka and me.
--*/
-BZ_EXTERN const char * BZ_API(bzlibVersion) (
+BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
void
);
#ifndef BZ_NO_STDIO
-BZ_EXTERN BZFILE * BZ_API(bzopen) (
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
const char *path,
const char *mode
);
-BZ_EXTERN BZFILE * BZ_API(bzdopen) (
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
int fd,
const char *mode
);
-BZ_EXTERN int BZ_API(bzread) (
+BZ_EXTERN int BZ_API(BZ2_bzread) (
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN int BZ_API(bzwrite) (
+BZ_EXTERN int BZ_API(BZ2_bzwrite) (
BZFILE* b,
void* buf,
int len
);
-BZ_EXTERN int BZ_API(bzflush) (
+BZ_EXTERN int BZ_API(BZ2_bzflush) (
BZFILE* b
);
-BZ_EXTERN void BZ_API(bzclose) (
+BZ_EXTERN void BZ_API(BZ2_bzclose) (
BZFILE* b
);
-BZ_EXTERN const char * BZ_API(bzerror) (
+BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
BZFILE *b,
int *errnum
);
diff --git a/bzlib_private.h b/bzlib_private.h
index 8e93480..fb51c7a 100644
--- a/bzlib_private.h
+++ b/bzlib_private.h
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -76,7 +76,7 @@
/*-- General stuff. --*/
-#define BZ_VERSION "0.9.5d"
+#define BZ_VERSION "1.0.1, 23-June-2000"
typedef char Char;
typedef unsigned char Bool;
@@ -94,9 +94,9 @@ typedef unsigned short UInt16;
#endif
#ifndef BZ_NO_STDIO
-extern void bz__AssertH__fail ( int errcode );
+extern void BZ2_bz__AssertH__fail ( int errcode );
#define AssertH(cond,errcode) \
- { if (!(cond)) bz__AssertH__fail ( errcode ); }
+ { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
#if BZ_DEBUG
#define AssertD(cond,msg) \
{ if (!(cond)) { \
@@ -155,7 +155,7 @@ extern void bz_internal_error ( int errcode );
/*-- Stuff for randomising repetitive blocks. --*/
-extern Int32 rNums[512];
+extern Int32 BZ2_rNums[512];
#define BZ_RAND_DECLS \
Int32 rNToGo; \
@@ -169,7 +169,7 @@ extern Int32 rNums[512];
#define BZ_RAND_UPD_MASK \
if (s->rNToGo == 0) { \
- s->rNToGo = rNums[s->rTPos]; \
+ s->rNToGo = BZ2_rNums[s->rTPos]; \
s->rTPos++; \
if (s->rTPos == 512) s->rTPos = 0; \
} \
@@ -179,7 +179,7 @@ extern Int32 rNums[512];
/*-- Stuff for doing CRCs. --*/
-extern UInt32 crc32Table[256];
+extern UInt32 BZ2_crc32Table[256];
#define BZ_INITIALISE_CRC(crcVar) \
{ \
@@ -194,8 +194,8 @@ extern UInt32 crc32Table[256];
#define BZ_UPDATE_CRC(crcVar,cha) \
{ \
crcVar = (crcVar << 8) ^ \
- crc32Table[(crcVar >> 24) ^ \
- ((UChar)cha)]; \
+ BZ2_crc32Table[(crcVar >> 24) ^ \
+ ((UChar)cha)]; \
}
@@ -241,7 +241,7 @@ typedef
/* aliases for arr1 and arr2 */
UInt32* ptr;
- UInt16* block;
+ UChar* block;
UInt16* mtfv;
UChar* zbits;
@@ -283,9 +283,11 @@ typedef
UChar selector [BZ_MAX_SELECTORS];
UChar selectorMtf[BZ_MAX_SELECTORS];
- UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ /* second dimension: only 3 needed; 4 makes index calculations faster */
+ UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4];
}
EState;
@@ -295,19 +297,19 @@ typedef
/*-- externs for compression. --*/
extern void
-blockSort ( EState* );
+BZ2_blockSort ( EState* );
extern void
-compressBlock ( EState*, Bool );
+BZ2_compressBlock ( EState*, Bool );
extern void
-bsInitWrite ( EState* );
+BZ2_bsInitWrite ( EState* );
extern void
-hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
+BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
extern void
-hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
+BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
@@ -493,22 +495,22 @@ typedef
#define GET_LL(i) \
(((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
-#define BZ_GET_SMALL(cccc) \
- cccc = indexIntoF ( s->tPos, s->cftab ); \
+#define BZ_GET_SMALL(cccc) \
+ cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \
s->tPos = GET_LL(s->tPos);
/*-- externs for decompression. --*/
extern Int32
-indexIntoF ( Int32, Int32* );
+BZ2_indexIntoF ( Int32, Int32* );
extern Int32
-decompress ( DState* );
+BZ2_decompress ( DState* );
extern void
-hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
- Int32, Int32, Int32 );
+BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
+ Int32, Int32, Int32 );
#endif
diff --git a/compress.c b/compress.c
index 7b192c3..cc5e31d 100644
--- a/compress.c
+++ b/compress.c
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -78,7 +78,7 @@
/*---------------------------------------------------*/
/*---------------------------------------------------*/
-void bsInitWrite ( EState* s )
+void BZ2_bsInitWrite ( EState* s )
{
s->bsLive = 0;
s->bsBuff = 0;
@@ -113,6 +113,7 @@ void bsFinishWrite ( EState* s )
/*---------------------------------------------------*/
static
+__inline__
void bsW ( EState* s, Int32 n, UInt32 v )
{
bsNEEDW ( n );
@@ -164,8 +165,6 @@ void generateMTFValues ( EState* s )
{
UChar yy[256];
Int32 i, j;
- UChar tmp;
- UChar tmp2;
Int32 zPend;
Int32 wr;
Int32 EOB;
@@ -174,7 +173,7 @@ void generateMTFValues ( EState* s )
After sorting (eg, here),
s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
and
- ((UInt16*)s->arr2) [ 0 .. s->nblock-1 ] [15:8]
+ ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
holds the original block data.
The first thing to do is generate the MTF values,
@@ -186,14 +185,14 @@ void generateMTFValues ( EState* s )
The final compressed bitstream is generated into the
area starting at
- (UChar*) (&((UInt16)s->arr2)[s->nblock])
+ (UChar*) (&((UChar*)s->arr2)[s->nblock])
These storage aliases are set up in bzCompressInit(),
except for the last one, which is arranged in
compressBlock().
*/
UInt32* ptr = s->ptr;
- UInt16* block = s->block;
+ UChar* block = s->block;
UInt16* mtfv = s->mtfv;
makeMaps_e ( s );
@@ -207,27 +206,14 @@ void generateMTFValues ( EState* s )
for (i = 0; i < s->nblock; i++) {
UChar ll_i;
-
AssertD ( wr <= i, "generateMTFValues(1)" );
j = ptr[i]-1; if (j < 0) j += s->nblock;
- ll_i = s->unseqToSeq[block[j] >> 8];
+ ll_i = s->unseqToSeq[block[j]];
AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
- tmp = yy[0];
- if (tmp == ll_i) {
+ if (yy[0] == ll_i) {
zPend++;
} else {
- tmp2 = tmp;
- tmp = yy[1];
- yy[1] = tmp2;
- j = 1;
- while ( ll_i != tmp ) {
- j++;
- tmp2 = tmp;
- tmp = yy[j];
- yy[j] = tmp2;
- };
- yy[0] = tmp;
if (zPend > 0) {
zPend--;
@@ -244,7 +230,26 @@ void generateMTFValues ( EState* s )
};
zPend = 0;
}
- mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+ {
+ register UChar rtmp;
+ register UChar* ryy_j;
+ register UChar rll_i;
+ rtmp = yy[1];
+ yy[1] = yy[0];
+ ryy_j = &(yy[1]);
+ rll_i = ll_i;
+ while ( rll_i != rtmp ) {
+ register UChar rtmp2;
+ ryy_j++;
+ rtmp2 = rtmp;
+ rtmp = *ryy_j;
+ *ryy_j = rtmp2;
+ };
+ yy[0] = rtmp;
+ j = ryy_j - &(yy[0]);
+ mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+ }
+
}
}
@@ -261,6 +266,7 @@ void generateMTFValues ( EState* s )
if (zPend < 2) break;
zPend = (zPend - 2) / 2;
};
+ zPend = 0;
}
mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
@@ -365,6 +371,18 @@ void sendMTFValues ( EState* s )
for (v = 0; v < alphaSize; v++)
s->rfreq[t][v] = 0;
+ /*---
+ Set up an auxiliary length table which is used to fast-track
+ the common case (nGroups == 6).
+ ---*/
+ if (nGroups == 6) {
+ for (v = 0; v < alphaSize; v++) {
+ s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
+ s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
+ s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
+ }
+ }
+
nSelectors = 0;
totc = 0;
gs = 0;
@@ -381,21 +399,37 @@ void sendMTFValues ( EState* s )
--*/
for (t = 0; t < nGroups; t++) cost[t] = 0;
- if (nGroups == 6) {
- register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
- cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
- for (i = gs; i <= ge; i++) {
- UInt16 icv = mtfv[i];
- cost0 += s->len[0][icv];
- cost1 += s->len[1][icv];
- cost2 += s->len[2][icv];
- cost3 += s->len[3][icv];
- cost4 += s->len[4][icv];
- cost5 += s->len[5][icv];
- }
- cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
- cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ register UInt32 cost01, cost23, cost45;
+ register UInt16 icv;
+ cost01 = cost23 = cost45 = 0;
+
+# define BZ_ITER(nn) \
+ icv = mtfv[gs+(nn)]; \
+ cost01 += s->len_pack[icv][0]; \
+ cost23 += s->len_pack[icv][1]; \
+ cost45 += s->len_pack[icv][2]; \
+
+ BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
+ BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
+ BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
+ BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
+ BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
+ BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
+ BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
+ BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
+ BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
+ BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
+
+# undef BZ_ITER
+
+ cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
+ cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
+ cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
+
} else {
+ /*--- slow version which correctly handles all situations ---*/
for (i = gs; i <= ge; i++) {
UInt16 icv = mtfv[i];
for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
@@ -417,8 +451,29 @@ void sendMTFValues ( EState* s )
/*--
Increment the symbol frequencies for the selected table.
--*/
- for (i = gs; i <= ge; i++)
- s->rfreq[bt][ mtfv[i] ]++;
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+
+# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
+
+ BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
+ BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
+ BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
+ BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
+ BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
+ BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
+ BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
+ BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
+ BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
+ BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
+
+# undef BZ_ITUR
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++)
+ s->rfreq[bt][ mtfv[i] ]++;
+ }
gs = ge+1;
}
@@ -434,8 +489,8 @@ void sendMTFValues ( EState* s )
Recompute the tables based on the accumulated frequencies.
--*/
for (t = 0; t < nGroups; t++)
- hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
- alphaSize, 20 );
+ BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
+ alphaSize, 20 );
}
@@ -474,8 +529,8 @@ void sendMTFValues ( EState* s )
}
AssertH ( !(maxLen > 20), 3004 );
AssertH ( !(minLen < 1), 3005 );
- hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
- minLen, maxLen, alphaSize );
+ BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
+ minLen, maxLen, alphaSize );
}
/*--- Transmit the mapping table. ---*/
@@ -536,13 +591,45 @@ void sendMTFValues ( EState* s )
if (gs >= s->nMTF) break;
ge = gs + BZ_G_SIZE - 1;
if (ge >= s->nMTF) ge = s->nMTF-1;
- for (i = gs; i <= ge; i++) {
- AssertH ( s->selector[selCtr] < nGroups, 3006 );
- bsW ( s,
- s->len [s->selector[selCtr]] [mtfv[i]],
- s->code [s->selector[selCtr]] [mtfv[i]] );
+ AssertH ( s->selector[selCtr] < nGroups, 3006 );
+
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ UInt16 mtfv_i;
+ UChar* s_len_sel_selCtr
+ = &(s->len[s->selector[selCtr]][0]);
+ Int32* s_code_sel_selCtr
+ = &(s->code[s->selector[selCtr]][0]);
+
+# define BZ_ITAH(nn) \
+ mtfv_i = mtfv[gs+(nn)]; \
+ bsW ( s, \
+ s_len_sel_selCtr[mtfv_i], \
+ s_code_sel_selCtr[mtfv_i] )
+
+ BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
+ BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
+ BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
+ BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
+ BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
+ BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
+ BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
+ BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
+ BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
+ BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
+
+# undef BZ_ITAH
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++) {
+ bsW ( s,
+ s->len [s->selector[selCtr]] [mtfv[i]],
+ s->code [s->selector[selCtr]] [mtfv[i]] );
+ }
}
+
gs = ge+1;
selCtr++;
}
@@ -554,7 +641,7 @@ void sendMTFValues ( EState* s )
/*---------------------------------------------------*/
-void compressBlock ( EState* s, Bool is_last_block )
+void BZ2_compressBlock ( EState* s, Bool is_last_block )
{
if (s->nblock > 0) {
@@ -568,14 +655,14 @@ void compressBlock ( EState* s, Bool is_last_block )
"combined CRC = 0x%8x, size = %d\n",
s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
- blockSort ( s );
+ BZ2_blockSort ( s );
}
- s->zbits = (UChar*) (&((UInt16*)s->arr2)[s->nblock]);
+ s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
/*-- If this is the first block, create the stream header. --*/
if (s->blockNo == 1) {
- bsInitWrite ( s );
+ BZ2_bsInitWrite ( s );
bsPutUChar ( s, 'B' );
bsPutUChar ( s, 'Z' );
bsPutUChar ( s, 'h' );
diff --git a/crctable.c b/crctable.c
index ab53df6..61c040c 100644
--- a/crctable.c
+++ b/crctable.c
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -68,7 +68,7 @@
comp.compression FAQ.
--*/
-UInt32 crc32Table[256] = {
+UInt32 BZ2_crc32Table[256] = {
/*-- Ugly, innit? --*/
diff --git a/decompress.c b/decompress.c
index 31f8b67..cdced18 100644
--- a/decompress.c
+++ b/decompress.c
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -99,7 +99,9 @@ void makeMaps_d ( DState* s )
s->bsLive += 8; \
s->strm->next_in++; \
s->strm->avail_in--; \
- s->strm->total_in++; \
+ s->strm->total_in_lo32++; \
+ if (s->strm->total_in_lo32 == 0) \
+ s->strm->total_in_hi32++; \
}
#define GET_UCHAR(lll,uuu) \
@@ -113,6 +115,8 @@ void makeMaps_d ( DState* s )
{ \
if (groupPos == 0) { \
groupNo++; \
+ if (groupNo >= nSelectors) \
+ RETURN(BZ_DATA_ERROR); \
groupPos = BZ_G_SIZE; \
gSel = s->selector[groupNo]; \
gMinlen = s->minLens[gSel]; \
@@ -123,17 +127,23 @@ void makeMaps_d ( DState* s )
groupPos--; \
zn = gMinlen; \
GET_BITS(label1, zvec, zn); \
- while (zvec > gLimit[zn]) { \
+ while (1) { \
+ if (zn > 20 /* the longest code */) \
+ RETURN(BZ_DATA_ERROR); \
+ if (zvec <= gLimit[zn]) break; \
zn++; \
GET_BIT(label2, zj); \
zvec = (zvec << 1) | zj; \
}; \
+ if (zvec - gBase[zn] < 0 \
+ || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
+ RETURN(BZ_DATA_ERROR); \
lval = gPerm[zvec - gBase[zn]]; \
}
/*---------------------------------------------------*/
-Int32 decompress ( DState* s )
+Int32 BZ2_decompress ( DState* s )
{
UChar uc;
Int32 retVal;
@@ -288,6 +298,11 @@ Int32 decompress ( DState* s )
GET_UCHAR(BZ_X_ORIGPTR_3, uc);
s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+ if (s->origPtr < 0)
+ RETURN(BZ_DATA_ERROR);
+ if (s->origPtr > 10 + 100000*s->blockSize100k)
+ RETURN(BZ_DATA_ERROR);
+
/*--- Receive the mapping table ---*/
for (i = 0; i < 16; i++) {
GET_BIT(BZ_X_MAPPING_1, uc);
@@ -305,18 +320,21 @@ Int32 decompress ( DState* s )
if (uc == 1) s->inUse[i * 16 + j] = True;
}
makeMaps_d ( s );
+ if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
alphaSize = s->nInUse+2;
/*--- Now the selectors ---*/
GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
+ if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
+ if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
for (i = 0; i < nSelectors; i++) {
j = 0;
while (True) {
GET_BIT(BZ_X_SELECTOR_3, uc);
if (uc == 0) break;
j++;
- if (j > 5) RETURN(BZ_DATA_ERROR);
+ if (j >= nGroups) RETURN(BZ_DATA_ERROR);
}
s->selectorMtf[i] = j;
}
@@ -358,7 +376,7 @@ Int32 decompress ( DState* s )
if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
if (s->len[t][i] < minLen) minLen = s->len[t][i];
}
- hbCreateDecodeTables (
+ BZ2_hbCreateDecodeTables (
&(s->limit[t][0]),
&(s->base[t][0]),
&(s->perm[t][0]),
@@ -392,7 +410,6 @@ Int32 decompress ( DState* s )
/*-- end MTF init --*/
nblock = 0;
-
GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
while (True) {
@@ -417,23 +434,24 @@ Int32 decompress ( DState* s )
if (s->smallDecompress)
while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->ll16[nblock] = (UInt16)uc;
nblock++;
es--;
}
else
while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
s->tt[nblock] = (UInt32)uc;
nblock++;
es--;
};
- if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR);
continue;
} else {
- if (nblock > nblockMAX) RETURN(BZ_DATA_ERROR);
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
/*-- uc = MTF ( nextSym-1 ) --*/
{
@@ -500,6 +518,12 @@ Int32 decompress ( DState* s )
}
}
+ /* Now we know what nblock is, we can do a better sanity
+ check on s->origPtr.
+ */
+ if (s->origPtr < 0 || s->origPtr >= nblock)
+ RETURN(BZ_DATA_ERROR);
+
s->state_out_len = 0;
s->state_out_ch = 0;
BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
diff --git a/dlltest.c b/dlltest.c
index e5639f1..f79279c 100644
--- a/dlltest.c
+++ b/dlltest.c
@@ -1,165 +1,176 @@
-/*
- minibz2
- libbz2.dll test program.
- by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
- This file is Public Domain.
- welcome any email to me.
-
- usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
-*/
-
-#define BZ_IMPORT
-#include <stdio.h>
-#include <stdlib.h>
-#include "bzlib.h"
-#ifdef _WIN32
-#include <io.h>
-#endif
-
-
-#ifdef _WIN32
-
-#include <windows.h>
-static int BZ2DLLLoaded = 0;
-static HINSTANCE BZ2DLLhLib;
-int BZ2DLLLoadLibrary(void)
-{
- HINSTANCE hLib;
-
- if(BZ2DLLLoaded==1){return 0;}
- hLib=LoadLibrary("libbz2.dll");
- if(hLib == NULL){
- puts("Can't load libbz2.dll");
- return -1;
- }
- BZ2DLLLoaded=1;
- BZ2DLLhLib=hLib;
- bzlibVersion=GetProcAddress(hLib,"bzlibVersion");
- bzopen=GetProcAddress(hLib,"bzopen");
- bzdopen=GetProcAddress(hLib,"bzdopen");
- bzread=GetProcAddress(hLib,"bzread");
- bzwrite=GetProcAddress(hLib,"bzwrite");
- bzflush=GetProcAddress(hLib,"bzflush");
- bzclose=GetProcAddress(hLib,"bzclose");
- bzerror=GetProcAddress(hLib,"bzerror");
- return 0;
-
-}
-int BZ2DLLFreeLibrary(void)
-{
- if(BZ2DLLLoaded==0){return 0;}
- FreeLibrary(BZ2DLLhLib);
- BZ2DLLLoaded=0;
-}
-#endif /* WIN32 */
-
-void usage(void)
-{
- puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
-}
-
-int main(int argc,char *argv[])
-{
- int decompress = 0;
- int level = 9;
- char *fn_r = NULL;
- char *fn_w = NULL;
-
-#ifdef _WIN32
- if(BZ2DLLLoadLibrary()<0){
- puts("can't load dll");
- exit(1);
- }
-#endif
- while(++argv,--argc){
- if(**argv =='-' || **argv=='/'){
- char *p;
-
- for(p=*argv+1;*p;p++){
- if(*p=='d'){
- decompress = 1;
- }else if('1'<=*p && *p<='9'){
- level = *p - '0';
- }else{
- usage();
- exit(1);
- }
- }
- }else{
- break;
- }
- }
- if(argc>=1){
- fn_r = *argv;
- argc--;argv++;
- }else{
- fn_r = NULL;
- }
- if(argc>=1){
- fn_w = *argv;
- argc--;argv++;
- }else{
- fn_w = NULL;
- }
- {
- int len;
- char buff[0x1000];
- char mode[10];
-
- if(decompress){
- BZFILE *BZ2fp_r = NULL;
- FILE *fp_w = NULL;
-
- if(fn_w){
- if((fp_w = fopen(fn_w,"wb"))==NULL){
- printf("can't open [%s]\n",fn_w);
- perror("reason:");
- exit(1);
- }
- }else{
- fp_w = stdout;
- }
- if((BZ2fp_r == NULL && (BZ2fp_r = bzdopen(fileno(stdin),"rb"))==NULL)
- || (BZ2fp_r != NULL && (BZ2fp_r = bzopen(fn_r,"rb"))==NULL)){
- printf("can't bz2openstream\n");
- exit(1);
- }
- while((len=bzread(BZ2fp_r,buff,0x1000))>0){
- fwrite(buff,1,len,fp_w);
- }
- bzclose(BZ2fp_r);
- if(fp_w != stdout) fclose(fp_w);
- }else{
- BZFILE *BZ2fp_w = NULL;
- FILE *fp_r = NULL;
-
- if(fn_r){
- if((fp_r = fopen(fn_r,"rb"))==NULL){
- printf("can't open [%s]\n",fn_r);
- perror("reason:");
- exit(1);
- }
- }else{
- fp_r = stdin;
- }
- mode[0]='w';
- mode[1] = '0' + level;
- mode[2] = '\0';
-
- if((fn_w == NULL && (BZ2fp_w = bzdopen(fileno(stdout),mode))==NULL)
- || (fn_w !=NULL && (BZ2fp_w = bzopen(fn_w,mode))==NULL)){
- printf("can't bz2openstream\n");
- exit(1);
- }
- while((len=fread(buff,1,0x1000,fp_r))>0){
- bzwrite(BZ2fp_w,buff,len);
- }
- bzclose(BZ2fp_w);
- if(fp_r!=stdin)fclose(fp_r);
- }
- }
-#ifdef _WIN32
- BZ2DLLFreeLibrary();
-#endif
- return 0;
-}
+/*
+ minibz2
+ libbz2.dll test program.
+ by Yoshioka Tsuneo(QWF00133@nifty.ne.jp/tsuneo-y@is.aist-nara.ac.jp)
+ This file is Public Domain.
+ welcome any email to me.
+
+ usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]
+*/
+
+#define BZ_IMPORT
+#include <stdio.h>
+#include <stdlib.h>
+#include "bzlib.h"
+#ifdef _WIN32
+#include <io.h>
+#endif
+
+
+#ifdef _WIN32
+
+#define BZ2_LIBNAME "libbz2-1.0.0.DLL"
+
+#include <windows.h>
+static int BZ2DLLLoaded = 0;
+static HINSTANCE BZ2DLLhLib;
+int BZ2DLLLoadLibrary(void)
+{
+ HINSTANCE hLib;
+
+ if(BZ2DLLLoaded==1){return 0;}
+ hLib=LoadLibrary(BZ2_LIBNAME);
+ if(hLib == NULL){
+ fprintf(stderr,"Can't load %s\n",BZ2_LIBNAME);
+ return -1;
+ }
+ BZ2_bzlibVersion=GetProcAddress(hLib,"BZ2_bzlibVersion");
+ BZ2_bzopen=GetProcAddress(hLib,"BZ2_bzopen");
+ BZ2_bzdopen=GetProcAddress(hLib,"BZ2_bzdopen");
+ BZ2_bzread=GetProcAddress(hLib,"BZ2_bzread");
+ BZ2_bzwrite=GetProcAddress(hLib,"BZ2_bzwrite");
+ BZ2_bzflush=GetProcAddress(hLib,"BZ2_bzflush");
+ BZ2_bzclose=GetProcAddress(hLib,"BZ2_bzclose");
+ BZ2_bzerror=GetProcAddress(hLib,"BZ2_bzerror");
+
+ if (!BZ2_bzlibVersion || !BZ2_bzopen || !BZ2_bzdopen
+ || !BZ2_bzread || !BZ2_bzwrite || !BZ2_bzflush
+ || !BZ2_bzclose || !BZ2_bzerror) {
+ fprintf(stderr,"GetProcAddress failed.\n");
+ return -1;
+ }
+ BZ2DLLLoaded=1;
+ BZ2DLLhLib=hLib;
+ return 0;
+
+}
+int BZ2DLLFreeLibrary(void)
+{
+ if(BZ2DLLLoaded==0){return 0;}
+ FreeLibrary(BZ2DLLhLib);
+ BZ2DLLLoaded=0;
+}
+#endif /* WIN32 */
+
+void usage(void)
+{
+ puts("usage: minibz2 [-d] [-{1,2,..9}] [[srcfilename] destfilename]");
+}
+
+int main(int argc,char *argv[])
+{
+ int decompress = 0;
+ int level = 9;
+ char *fn_r = NULL;
+ char *fn_w = NULL;
+
+#ifdef _WIN32
+ if(BZ2DLLLoadLibrary()<0){
+ fprintf(stderr,"Loading of %s failed. Giving up.\n", BZ2_LIBNAME);
+ exit(1);
+ }
+ printf("Loading of %s succeeded. Library version is %s.\n",
+ BZ2_LIBNAME, BZ2_bzlibVersion() );
+#endif
+ while(++argv,--argc){
+ if(**argv =='-' || **argv=='/'){
+ char *p;
+
+ for(p=*argv+1;*p;p++){
+ if(*p=='d'){
+ decompress = 1;
+ }else if('1'<=*p && *p<='9'){
+ level = *p - '0';
+ }else{
+ usage();
+ exit(1);
+ }
+ }
+ }else{
+ break;
+ }
+ }
+ if(argc>=1){
+ fn_r = *argv;
+ argc--;argv++;
+ }else{
+ fn_r = NULL;
+ }
+ if(argc>=1){
+ fn_w = *argv;
+ argc--;argv++;
+ }else{
+ fn_w = NULL;
+ }
+ {
+ int len;
+ char buff[0x1000];
+ char mode[10];
+
+ if(decompress){
+ BZFILE *BZ2fp_r = NULL;
+ FILE *fp_w = NULL;
+
+ if(fn_w){
+ if((fp_w = fopen(fn_w,"wb"))==NULL){
+ printf("can't open [%s]\n",fn_w);
+ perror("reason:");
+ exit(1);
+ }
+ }else{
+ fp_w = stdout;
+ }
+ if((BZ2fp_r == NULL && (BZ2fp_r = BZ2_bzdopen(fileno(stdin),"rb"))==NULL)
+ || (BZ2fp_r != NULL && (BZ2fp_r = BZ2_bzopen(fn_r,"rb"))==NULL)){
+ printf("can't bz2openstream\n");
+ exit(1);
+ }
+ while((len=BZ2_bzread(BZ2fp_r,buff,0x1000))>0){
+ fwrite(buff,1,len,fp_w);
+ }
+ BZ2_bzclose(BZ2fp_r);
+ if(fp_w != stdout) fclose(fp_w);
+ }else{
+ BZFILE *BZ2fp_w = NULL;
+ FILE *fp_r = NULL;
+
+ if(fn_r){
+ if((fp_r = fopen(fn_r,"rb"))==NULL){
+ printf("can't open [%s]\n",fn_r);
+ perror("reason:");
+ exit(1);
+ }
+ }else{
+ fp_r = stdin;
+ }
+ mode[0]='w';
+ mode[1] = '0' + level;
+ mode[2] = '\0';
+
+ if((fn_w == NULL && (BZ2fp_w = BZ2_bzdopen(fileno(stdout),mode))==NULL)
+ || (fn_w !=NULL && (BZ2fp_w = BZ2_bzopen(fn_w,mode))==NULL)){
+ printf("can't bz2openstream\n");
+ exit(1);
+ }
+ while((len=fread(buff,1,0x1000,fp_r))>0){
+ BZ2_bzwrite(BZ2fp_w,buff,len);
+ }
+ BZ2_bzclose(BZ2fp_w);
+ if(fp_r!=stdin)fclose(fp_r);
+ }
+ }
+#ifdef _WIN32
+ BZ2DLLFreeLibrary();
+#endif
+ return 0;
+}
diff --git a/huffman.c b/huffman.c
index bb2a6cc..9b446c4 100644
--- a/huffman.c
+++ b/huffman.c
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -100,10 +100,10 @@
/*---------------------------------------------------*/
-void hbMakeCodeLengths ( UChar *len,
- Int32 *freq,
- Int32 alphaSize,
- Int32 maxLen )
+void BZ2_hbMakeCodeLengths ( UChar *len,
+ Int32 *freq,
+ Int32 alphaSize,
+ Int32 maxLen )
{
/*--
Nodes and heap entries run from 1. Entry 0
@@ -172,11 +172,11 @@ void hbMakeCodeLengths ( UChar *len,
/*---------------------------------------------------*/
-void hbAssignCodes ( Int32 *code,
- UChar *length,
- Int32 minLen,
- Int32 maxLen,
- Int32 alphaSize )
+void BZ2_hbAssignCodes ( Int32 *code,
+ UChar *length,
+ Int32 minLen,
+ Int32 maxLen,
+ Int32 alphaSize )
{
Int32 n, vec, i;
@@ -190,13 +190,13 @@ void hbAssignCodes ( Int32 *code,
/*---------------------------------------------------*/
-void hbCreateDecodeTables ( Int32 *limit,
- Int32 *base,
- Int32 *perm,
- UChar *length,
- Int32 minLen,
- Int32 maxLen,
- Int32 alphaSize )
+void BZ2_hbCreateDecodeTables ( Int32 *limit,
+ Int32 *base,
+ Int32 *perm,
+ UChar *length,
+ Int32 minLen,
+ Int32 maxLen,
+ Int32 alphaSize )
{
Int32 pp, i, j, vec;
diff --git a/libbz2.def b/libbz2.def
index ba0f54e..2dc0dd8 100644
--- a/libbz2.def
+++ b/libbz2.def
@@ -1,25 +1,27 @@
LIBRARY LIBBZ2
DESCRIPTION "libbzip2: library for data compression"
EXPORTS
- bzCompressInit
- bzCompress
- bzCompressEnd
- bzDecompressInit
- bzDecompress
- bzDecompressEnd
- bzReadOpen
- bzReadClose
- bzReadGetUnused
- bzRead
- bzWriteOpen
- bzWrite
- bzWriteClose
- bzBuffToBuffCompress
- bzBuffToBuffDecompress
- bzlibVersion
- bzopen
- bzdopen
- bzread
- bzwrite
- bzflush
- bzclose
+ BZ2_bzCompressInit
+ BZ2_bzCompress
+ BZ2_bzCompressEnd
+ BZ2_bzDecompressInit
+ BZ2_bzDecompress
+ BZ2_bzDecompressEnd
+ BZ2_bzReadOpen
+ BZ2_bzReadClose
+ BZ2_bzReadGetUnused
+ BZ2_bzRead
+ BZ2_bzWriteOpen
+ BZ2_bzWrite
+ BZ2_bzWriteClose
+ BZ2_bzWriteClose64
+ BZ2_bzBuffToBuffCompress
+ BZ2_bzBuffToBuffDecompress
+ BZ2_bzlibVersion
+ BZ2_bzopen
+ BZ2_bzdopen
+ BZ2_bzread
+ BZ2_bzwrite
+ BZ2_bzflush
+ BZ2_bzclose
+ BZ2_bzerror
diff --git a/makefile.msc b/makefile.msc
index 4b49f78..3fe4232 100644
--- a/makefile.msc
+++ b/makefile.msc
@@ -4,7 +4,7 @@
# Fixed up by JRS for bzip2-0.9.5d release.
CC=cl
-CFLAGS= -DWIN32 -MD -Ox
+CFLAGS= -DWIN32 -MD -Ox -D_FILE_OFFSET_BITS=64
OBJS= blocksort.obj \
huffman.obj \
@@ -21,7 +21,6 @@ bzip2: lib
$(CC) $(CFLAGS) -o bzip2recover bzip2recover.c
lib: $(OBJS)
- del libbz2.lib
lib /out:libbz2.lib $(OBJS)
test: bzip2
@@ -32,20 +31,19 @@ test: bzip2
.\\bzip2 -d < sample1.bz2 > sample1.tst
.\\bzip2 -d < sample2.bz2 > sample2.tst
.\\bzip2 -ds < sample3.bz2 > sample3.tst
+ @echo All six of the fc's should find no differences.
+ @echo If fc finds an error on sample3.bz2, this could be
+ @echo because WinZip's 'TAR file smart CR/LF conversion'
+ @echo is too clever for its own good. Disable this option.
+ @echo The correct size for sample3.ref is 120,244. If it
+ @echo is 150,251, WinZip has messed it up.
fc sample1.bz2 sample1.rb2
fc sample2.bz2 sample2.rb2
fc sample3.bz2 sample3.rb2
fc sample1.tst sample1.ref
fc sample2.tst sample2.ref
fc sample3.tst sample3.ref
- @echo All six of the fc's should find no differences.
- @echo If fc finds an error on sample3.tst, this could be
- @echo because WinZips 'TAR file smart CR/LF conversion'
- @echo is too clever for its own good. Disable this option.
- @echo The correct size for sample3.ref is 120,244. If it
- @echo is around 150k, WinZip has stuffed it up.
- @echo Also remember to set BZ_UNIX to 0 and BZ_LCCWIN32
- @echo to 1 in bzip2.c.
+
clean:
diff --git a/manual.texi b/manual.texi
index e48e656..336776a 100644
--- a/manual.texi
+++ b/manual.texi
@@ -2,10 +2,10 @@
@setfilename bzip2.info
@ignore
-This file documents bzip2 version 0.9.5, and associated library
+This file documents bzip2 version 1.0, and associated library
libbzip2, written by Julian Seward (jseward@acm.org).
-Copyright (C) 1996-1999 Julian R Seward
+Copyright (C) 1996-2000 Julian R Seward
Permission is granted to make and distribute verbatim copies of
this manual provided the copyright notice and this permission notice
@@ -30,8 +30,8 @@ END-INFO-DIR-ENTRY
@titlepage
@title bzip2 and libbzip2
@subtitle a program and library for data compression
-@subtitle copyright (C) 1996-1999 Julian Seward
-@subtitle version 0.9.5d of 4 September 1999
+@subtitle copyright (C) 1996-2000 Julian Seward
+@subtitle version 1.0 of 21 March 2000
@author Julian Seward
@end titlepage
@@ -44,7 +44,7 @@ END-INFO-DIR-ENTRY
This program, @code{bzip2},
and associated library @code{libbzip2}, are
-Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -82,9 +82,13 @@ Julian Seward, Cambridge, UK.
@code{jseward@@acm.org}
+@code{http://sourceware.cygnus.com/bzip2}
+
+@code{http://www.cacheprof.org}
+
@code{http://www.muraroa.demon.co.uk}
-@code{bzip2}/@code{libbzip2} version 0.9.5 of 24 May 1999.
+@code{bzip2}/@code{libbzip2} version 1.0 of 21 March 2000.
PATENTS: To the best of my knowledge, @code{bzip2} does not use any patented
algorithms. However, I do not have the resources available to carry out
@@ -130,7 +134,7 @@ and nothing else.
@unnumberedsubsubsec NAME
@itemize
@item @code{bzip2}, @code{bunzip2}
-- a block-sorting file compressor, v0.9.5
+- a block-sorting file compressor, v1.0
@item @code{bzcat}
- decompresses files to stdout
@item @code{bzip2recover}
@@ -431,10 +435,10 @@ I/O error messages are not as helpful as they could be. @code{bzip2}
tries hard to detect I/O errors and exit cleanly, but the details of
what the problem is sometimes seem rather misleading.
-This manual page pertains to version 0.9.5 of @code{bzip2}. Compressed
+This manual page pertains to version 1.0 of @code{bzip2}. Compressed
data created by this version is entirely forwards and backwards
-compatible with the previous public releases, versions 0.1pl2 and 0.9.0,
-but with the following exception: 0.9.0 and above can correctly
+compatible with the previous public releases, versions 0.1pl2, 0.9.0 and
+0.9.5, but with the following exception: 0.9.0 and above can correctly
decompress multiple concatenated compressed files. 0.1pl2 cannot do
this; it will stop after decompressing just the first file in the
stream.
@@ -486,6 +490,10 @@ The structure of @code{libbzip2}'s interfaces is similar to
that of Jean-loup Gailly's and Mark Adler's excellent @code{zlib}
library.
+All externally visible symbols have names beginning @code{BZ2_}.
+This is new in version 1.0. The intention is to minimise pollution
+of the namespaces of library clients.
+
@subsection Low-level summary
This interface provides services for compressing and decompressing
@@ -498,17 +506,17 @@ The low-level part of the library has no global variables and
is therefore thread-safe.
Six routines make up the low level interface:
-@code{bzCompressInit}, @code{bzCompress}, and @* @code{bzCompressEnd}
+@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, and @* @code{BZ2_bzCompressEnd}
for compression,
-and a corresponding trio @code{bzDecompressInit}, @* @code{bzDecompress}
-and @code{bzDecompressEnd} for decompression.
+and a corresponding trio @code{BZ2_bzDecompressInit}, @* @code{BZ2_bzDecompress}
+and @code{BZ2_bzDecompressEnd} for decompression.
The @code{*Init} functions allocate
memory for compression/decompression and do other
initialisations, whilst the @code{*End} functions close down operations
and release memory.
-The real work is done by @code{bzCompress} and @code{bzDecompress}.
-These compress/decompress data from a user-supplied input buffer
+The real work is done by @code{BZ2_bzCompress} and @code{BZ2_bzDecompress}.
+These compress and decompress data from a user-supplied input buffer
to a user-supplied output buffer. These buffers can be any size;
arbitrary quantities of data are handled by making repeated calls
to these functions. This is a flexible mechanism allowing a
@@ -526,10 +534,10 @@ reading files in which the @code{bzip2} data stream is embedded
within some larger-scale file structure, or where there are
multiple @code{bzip2} data streams concatenated end-to-end.
-For reading files, @code{bzReadOpen}, @code{bzRead}, @code{bzReadClose}
-and @code{bzReadGetUnused} are supplied. For writing files,
-@code{bzWriteOpen}, @code{bzWrite} and @code{bzWriteFinish} are
-available.
+For reading files, @code{BZ2_bzReadOpen}, @code{BZ2_bzRead},
+@code{BZ2_bzReadClose} and @* @code{BZ2_bzReadGetUnused} are supplied. For
+writing files, @code{BZ2_bzWriteOpen}, @code{BZ2_bzWrite} and
+@code{BZ2_bzWriteFinish} are available.
As with the low-level library, no global variables are used
so the library is per se thread-safe. However, if I/O errors
@@ -539,7 +547,7 @@ the error. In that case, you'd need a C library which correctly
supports @code{errno} in a multithreaded environment.
To make the library a little simpler and more portable,
-@code{bzReadOpen} and @code{bzWriteOpen} require you to pass them file
+@code{BZ2_bzReadOpen} and @code{BZ2_bzWriteOpen} require you to pass them file
handles (@code{FILE*}s) which have previously been opened for reading or
writing respectively. That avoids portability problems associated with
file operations and file attributes, whilst not being much of an
@@ -548,8 +556,8 @@ imposition on the programmer.
@subsection Utility functions summary
-For very simple needs, @code{bzBuffToBuffCompress} and
-@code{bzBuffToBuffDecompress} are provided. These compress
+For very simple needs, @code{BZ2_bzBuffToBuffCompress} and
+@code{BZ2_bzBuffToBuffDecompress} are provided. These compress
data in memory from one buffer to another buffer in a single
function call. You should assess whether these functions
fulfill your memory-to-memory compression/decompression
@@ -559,9 +567,9 @@ general but more complex low-level interface.
Yoshioka Tsuneo (@code{QWF00133@@niftyserve.or.jp} /
@code{tsuneo-y@@is.aist-nara.ac.jp}) has contributed some functions to
give better @code{zlib} compatibility. These functions are
-@code{bzopen}, @code{bzread}, @code{bzwrite}, @code{bzflush},
-@code{bzclose},
-@code{bzerror} and @code{bzlibVersion}. You may find these functions
+@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush},
+@code{BZ2_bzclose},
+@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}. You may find these functions
more convenient for simple file reading and writing, than those in the
high-level interface. These functions are not (yet) officially part of
the library, and are minimally documented here. If they break, you
@@ -582,6 +590,15 @@ if you are feeling especially paranoid. I would be interested in
hearing more about the robustness of the library to corrupted
compressed data.
+Version 1.0 is much more robust in this respect than
+0.9.0 or 0.9.5. Investigations with Checker (a tool for
+detecting problems with memory management, similar to Purify)
+indicate that, at least for the few files I tested, all single-bit
+errors in the decompressed data are caught properly, with no
+segmentation faults, no reads of uninitialised data and no
+out of range reads or writes. So it's certainly much improved,
+although I wouldn't claim it to be totally bombproof.
+
The file @code{bzlib.h} contains all definitions needed to use
the library. In particular, you should definitely not include
@code{bzlib_private.h}.
@@ -598,7 +615,7 @@ The requested action was completed successfully.
@item BZ_RUN_OK
@itemx BZ_FLUSH_OK
@itemx BZ_FINISH_OK
-In @code{bzCompress}, the requested flush/finish/nothing-special action
+In @code{BZ2_bzCompress}, the requested flush/finish/nothing-special action
was completed successfully.
@item BZ_STREAM_END
Compression of data was completed, or the logical stream end was
@@ -607,6 +624,16 @@ detected during decompression.
The following return values indicate an error of some kind.
@table @code
+@item BZ_CONFIG_ERROR
+Indicates that the library has been improperly compiled on your
+platform -- a major configuration error. Specifically, it means
+that @code{sizeof(char)}, @code{sizeof(short)} and @code{sizeof(int)}
+are not 1, 2 and 4 respectively, as they should be. Note that the
+library should still work properly on 64-bit platforms which follow
+the LP64 programming model -- that is, where @code{sizeof(long)}
+and @code{sizeof(void*)} are 8. Under LP64, @code{sizeof(int)} is
+still 4, so @code{libbzip2}, which doesn't use the @code{long} type,
+is OK.
@item BZ_SEQUENCE_ERROR
When using the library, it is important to call the functions in the
correct sequence and with data structures (buffers etc) in the correct
@@ -624,10 +651,10 @@ making.
@item BZ_MEM_ERROR
Returned when a request to allocate memory failed. Note that the
quantity of memory needed to decompress a stream cannot be determined
-until the stream's header has been read. So @code{bzDecompress} and
-@code{bzRead} may return @code{BZ_MEM_ERROR} even though some of
+until the stream's header has been read. So @code{BZ2_bzDecompress} and
+@code{BZ2_bzRead} may return @code{BZ_MEM_ERROR} even though some of
the compressed data has been read. The same is not true for
-compression; once @code{bzCompressInit} or @code{bzWriteOpen} have
+compression; once @code{BZ2_bzCompressInit} or @code{BZ2_bzWriteOpen} have
successfully completed, @code{BZ_MEM_ERROR} cannot occur.
@item BZ_DATA_ERROR
Returned when a data integrity error is detected during decompression.
@@ -639,19 +666,19 @@ As a special case of @code{BZ_DATA_ERROR}, it is sometimes useful to
know when the compressed stream does not start with the correct
magic bytes (@code{'B' 'Z' 'h'}).
@item BZ_IO_ERROR
-Returned by @code{bzRead} and @code{bzRead} when there is an error
-reading or writing in the compressed file, and by @code{bzReadOpen}
-and @code{bzWriteOpen} for attempts to use a file for which the
+Returned by @code{BZ2_bzRead} and @code{BZ2_bzWrite} when there is an error
+reading or writing in the compressed file, and by @code{BZ2_bzReadOpen}
+and @code{BZ2_bzWriteOpen} for attempts to use a file for which the
error indicator (viz, @code{ferror(f)}) is set.
On receipt of @code{BZ_IO_ERROR}, the caller should consult
@code{errno} and/or @code{perror} to acquire operating-system
specific information about the problem.
@item BZ_UNEXPECTED_EOF
-Returned by @code{bzRead} when the compressed file finishes
+Returned by @code{BZ2_bzRead} when the compressed file finishes
before the logical end of stream is detected.
@item BZ_OUTBUFF_FULL
-Returned by @code{bzBuffToBuffCompress} and
-@code{bzBuffToBuffDecompress} to indicate that the output data
+Returned by @code{BZ2_bzBuffToBuffCompress} and
+@code{BZ2_bzBuffToBuffDecompress} to indicate that the output data
will not fit into the output buffer provided.
@end table
@@ -659,17 +686,19 @@ will not fit into the output buffer provided.
@section Low-level interface
-@subsection @code{bzCompressInit}
+@subsection @code{BZ2_bzCompressInit}
@example
typedef
struct @{
char *next_in;
unsigned int avail_in;
- unsigned int total_in;
+ unsigned int total_in_lo32;
+ unsigned int total_in_hi32;
char *next_out;
unsigned int avail_out;
- unsigned int total_out;
+ unsigned int total_out_lo32;
+ unsigned int total_out_hi32;
void *state;
@@ -679,10 +708,10 @@ typedef
@}
bz_stream;
-int bzCompressInit ( bz_stream *strm,
- int blockSize100k,
- int verbosity,
- int workFactor );
+int BZ2_bzCompressInit ( bz_stream *strm,
+ int blockSize100k,
+ int verbosity,
+ int workFactor );
@end example
@@ -712,14 +741,19 @@ If you don't want to use a custom memory allocator, set @code{bzalloc},
and the library will then use the standard @code{malloc}/@code{free}
routines.
-Before calling @code{bzCompressInit}, fields @code{bzalloc},
+Before calling @code{BZ2_bzCompressInit}, fields @code{bzalloc},
@code{bzfree} and @code{opaque} should
be filled appropriately, as just described. Upon return, the internal
-state will have been allocated and initialised, and @code{total_in} and
-@code{total_out} will have been set to zero.
-These last two fields are used by the library
+state will have been allocated and initialised, and @code{total_in_lo32},
+@code{total_in_hi32}, @code{total_out_lo32} and
+@code{total_out_hi32} will have been set to zero.
+These four fields are used by the library
to inform the caller of the total amount of data passed into and out of
the library, respectively. You should not try to change them.
+As of version 1.0, 64-bit counts are maintained, even on 32-bit
+platforms, using the @code{_hi32} fields to store the upper 32 bits
+of the count. So, for example, the total amount of data in
+is @code{(total_in_hi32 << 32) + total_in_lo32}.
Parameter @code{blockSize100k} specifies the block size to be used for
compression. It should be a value between 1 and 9 inclusive, and the
@@ -761,6 +795,8 @@ mechanism would render the parameter obsolete.
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{strm} is @code{NULL}
or @code{blockSize} < 1 or @code{blockSize} > 9
@@ -773,86 +809,86 @@ Possible return values:
@end display
Allowable next actions:
@display
- @code{bzCompress}
+ @code{BZ2_bzCompress}
if @code{BZ_OK} is returned
no specific action needed in case of error
@end display
-@subsection @code{bzCompress}
+@subsection @code{BZ2_bzCompress}
@example
- int bzCompress ( bz_stream *strm, int action );
+ int BZ2_bzCompress ( bz_stream *strm, int action );
@end example
Provides more input and/or output buffer space for the library. The
-caller maintains input and output buffers, and calls @code{bzCompress} to
+caller maintains input and output buffers, and calls @code{BZ2_bzCompress} to
transfer data between them.
-Before each call to @code{bzCompress}, @code{next_in} should point at
+Before each call to @code{BZ2_bzCompress}, @code{next_in} should point at
the data to be compressed, and @code{avail_in} should indicate how many
-bytes the library may read. @code{bzCompress} updates @code{next_in},
+bytes the library may read. @code{BZ2_bzCompress} updates @code{next_in},
@code{avail_in} and @code{total_in} to reflect the number of bytes it
has read.
Similarly, @code{next_out} should point to a buffer in which the
compressed data is to be placed, with @code{avail_out} indicating how
-much output space is available. @code{bzCompress} updates
+much output space is available. @code{BZ2_bzCompress} updates
@code{next_out}, @code{avail_out} and @code{total_out} to reflect the
number of bytes output.
You may provide and remove as little or as much data as you like on each
-call of @code{bzCompress}. In the limit, it is acceptable to supply and
+call of @code{BZ2_bzCompress}. In the limit, it is acceptable to supply and
remove data one byte at a time, although this would be terribly
inefficient. You should always ensure that at least one byte of output
space is available at each call.
-A second purpose of @code{bzCompress} is to request a change of mode of the
+A second purpose of @code{BZ2_bzCompress} is to request a change of mode of the
compressed stream.
Conceptually, a compressed stream can be in one of four states: IDLE,
RUNNING, FLUSHING and FINISHING. Before initialisation
-(@code{bzCompressInit}) and after termination (@code{bzCompressEnd}), a
+(@code{BZ2_bzCompressInit}) and after termination (@code{BZ2_bzCompressEnd}), a
stream is regarded as IDLE.
-Upon initialisation (@code{bzCompressInit}), the stream is placed in the
-RUNNING state. Subsequent calls to @code{bzCompress} should pass
+Upon initialisation (@code{BZ2_bzCompressInit}), the stream is placed in the
+RUNNING state. Subsequent calls to @code{BZ2_bzCompress} should pass
@code{BZ_RUN} as the requested action; other actions are illegal and
will result in @code{BZ_SEQUENCE_ERROR}.
At some point, the calling program will have provided all the input data
it wants to. It will then want to finish up -- in effect, asking the
library to process any data it might have buffered internally. In this
-state, @code{bzCompress} will no longer attempt to read data from
+state, @code{BZ2_bzCompress} will no longer attempt to read data from
@code{next_in}, but it will want to write data to @code{next_out}.
Because the output buffer supplied by the user can be arbitrarily small,
the finishing-up operation cannot necessarily be done with a single call
-of @code{bzCompress}.
+of @code{BZ2_bzCompress}.
Instead, the calling program passes @code{BZ_FINISH} as an action to
-@code{bzCompress}. This changes the stream's state to FINISHING. Any
+@code{BZ2_bzCompress}. This changes the stream's state to FINISHING. Any
remaining input (ie, @code{next_in[0 .. avail_in-1]}) is compressed and
-transferred to the output buffer. To do this, @code{bzCompress} must be
+transferred to the output buffer. To do this, @code{BZ2_bzCompress} must be
called repeatedly until all the output has been consumed. At that
-point, @code{bzCompress} returns @code{BZ_STREAM_END}, and the stream's
-state is set back to IDLE. @code{bzCompressEnd} should then be
+point, @code{BZ2_bzCompress} returns @code{BZ_STREAM_END}, and the stream's
+state is set back to IDLE. @code{BZ2_bzCompressEnd} should then be
called.
Just to make sure the calling program does not cheat, the library makes
a note of @code{avail_in} at the time of the first call to
-@code{bzCompress} which has @code{BZ_FINISH} as an action (ie, at the
+@code{BZ2_bzCompress} which has @code{BZ_FINISH} as an action (ie, at the
time the program has announced its intention to not supply any more
input). By comparing this value with that of @code{avail_in} over
-subsequent calls to @code{bzCompress}, the library can detect any
+subsequent calls to @code{BZ2_bzCompress}, the library can detect any
attempts to slip in more data to compress. Any calls for which this is
detected will return @code{BZ_SEQUENCE_ERROR}. This indicates a
programming mistake which should be corrected.
Instead of asking to finish, the calling program may ask
-@code{bzCompress} to take all the remaining input, compress it and
+@code{BZ2_bzCompress} to take all the remaining input, compress it and
terminate the current (Burrows-Wheeler) compression block. This could
be useful for error control purposes. The mechanism is analogous to
-that for finishing: call @code{bzCompress} with an action of
+that for finishing: call @code{BZ2_bzCompress} with an action of
@code{BZ_FLUSH}, remove output data, and persist with the
@code{BZ_FLUSH} action until the value @code{BZ_RUN} is returned. As
-with finishing, @code{bzCompress} detects any attempt to provide more
+with finishing, @code{BZ2_bzCompress} detects any attempt to provide more
input data once the flush has begun.
Once the flush is complete, the stream returns to the normal RUNNING
@@ -863,11 +899,11 @@ which shows which actions are allowable in each state, what action
will be taken, what the next state is, and what the non-error return
values are. Note that you can't explicitly ask what state the
stream is in, but nor do you need to -- it can be inferred from the
-values returned by @code{bzCompress}.
+values returned by @code{BZ2_bzCompress}.
@display
IDLE/@code{any}
- Illegal. IDLE state only exists after @code{bzCompressEnd} or
- before @code{bzCompressInit}.
+ Illegal. IDLE state only exists after @code{BZ2_bzCompressEnd} or
+ before @code{BZ2_bzCompressInit}.
Return value = @code{BZ_SEQUENCE_ERROR}
RUNNING/@code{BZ_RUN}
@@ -917,21 +953,21 @@ FINISHING/other
That still looks complicated? Well, fair enough. The usual sequence
of calls for compressing a load of data is:
@itemize @bullet
-@item Get started with @code{bzCompressInit}.
+@item Get started with @code{BZ2_bzCompressInit}.
@item Shovel data in and shlurp out its compressed form using zero or more
-calls of @code{bzCompress} with action = @code{BZ_RUN}.
+calls of @code{BZ2_bzCompress} with action = @code{BZ_RUN}.
@item Finish up.
-Repeatedly call @code{bzCompress} with action = @code{BZ_FINISH},
+Repeatedly call @code{BZ2_bzCompress} with action = @code{BZ_FINISH},
copying out the compressed output, until @code{BZ_STREAM_END} is returned.
-@item Close up and go home. Call @code{bzCompressEnd}.
+@item Close up and go home. Call @code{BZ2_bzCompressEnd}.
@end itemize
If the data you want to compress fits into your input buffer all
-at once, you can skip the calls of @code{bzCompress ( ..., BZ_RUN )} and
-just do the @code{bzCompress ( ..., BZ_FINISH )} calls.
+at once, you can skip the calls of @code{BZ2_bzCompress ( ..., BZ_RUN )} and
+just do the @code{BZ2_bzCompress ( ..., BZ_FINISH )} calls.
-All required memory is allocated by @code{bzCompressInit}. The
+All required memory is allocated by @code{BZ2_bzCompressInit}. The
compression library can accept any data at all (obviously). So you
-shouldn't get any error return values from the @code{bzCompress} calls.
+shouldn't get any error return values from the @code{BZ2_bzCompress} calls.
If you do, they will be @code{BZ_SEQUENCE_ERROR}, and indicate a bug in
your programming.
@@ -941,9 +977,9 @@ Trivial other possible return values:
if @code{strm} is @code{NULL}, or @code{strm->s} is @code{NULL}
@end display
-@subsection @code{bzCompressEnd}
+@subsection @code{BZ2_bzCompressEnd}
@example
-int bzCompressEnd ( bz_stream *strm );
+int BZ2_bzCompressEnd ( bz_stream *strm );
@end example
Releases all memory associated with a compression stream.
@@ -954,11 +990,11 @@ Possible return values:
@end display
-@subsection @code{bzDecompressInit}
+@subsection @code{BZ2_bzDecompressInit}
@example
-int bzDecompressInit ( bz_stream *strm, int verbosity, int small );
+int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small );
@end example
-Prepares for decompression. As with @code{bzCompressInit}, a
+Prepares for decompression. As with @code{BZ2_bzCompressInit}, a
@code{bz_stream} record should be allocated and initialised before the
call. Fields @code{bzalloc}, @code{bzfree} and @code{opaque} should be
set if a custom memory allocator is required, or made @code{NULL} for
@@ -966,7 +1002,7 @@ the normal @code{malloc}/@code{free} routines. Upon return, the internal
state will have been initialised, and @code{total_in} and
@code{total_out} will be zero.
-For the meaning of parameter @code{verbosity}, see @code{bzCompressInit}.
+For the meaning of parameter @code{verbosity}, see @code{BZ2_bzCompressInit}.
If @code{small} is nonzero, the library will use an alternative
decompression algorithm which uses less memory but at the cost of
@@ -976,11 +1012,13 @@ more information on memory management.
Note that the amount of memory needed to decompress
a stream cannot be determined until the stream's header has been read,
-so even if @code{bzDecompressInit} succeeds, a subsequent
-@code{bzDecompress} could fail with @code{BZ_MEM_ERROR}.
+so even if @code{BZ2_bzDecompressInit} succeeds, a subsequent
+@code{BZ2_bzDecompress} could fail with @code{BZ_MEM_ERROR}.
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{(small != 0 && small != 1)}
or @code{(verbosity < 0 || verbosity > 4)}
@@ -990,54 +1028,54 @@ Possible return values:
Allowable next actions:
@display
- @code{bzDecompress}
+ @code{BZ2_bzDecompress}
if @code{BZ_OK} was returned
no specific action required in case of error
@end display
-@subsection @code{bzDecompress}
+@subsection @code{BZ2_bzDecompress}
@example
-int bzDecompress ( bz_stream *strm );
+int BZ2_bzDecompress ( bz_stream *strm );
@end example
Provides more input and/out output buffer space for the library. The
-caller maintains input and output buffers, and uses @code{bzDecompress}
+caller maintains input and output buffers, and uses @code{BZ2_bzDecompress}
to transfer data between them.
-Before each call to @code{bzDecompress}, @code{next_in}
+Before each call to @code{BZ2_bzDecompress}, @code{next_in}
should point at the compressed data,
and @code{avail_in} should indicate how many bytes the library
-may read. @code{bzDecompress} updates @code{next_in}, @code{avail_in}
+may read. @code{BZ2_bzDecompress} updates @code{next_in}, @code{avail_in}
and @code{total_in}
to reflect the number of bytes it has read.
Similarly, @code{next_out} should point to a buffer in which the uncompressed
output is to be placed, with @code{avail_out} indicating how much output space
-is available. @code{bzCompress} updates @code{next_out},
+is available. @code{BZ2_bzCompress} updates @code{next_out},
@code{avail_out} and @code{total_out} to reflect
the number of bytes output.
You may provide and remove as little or as much data as you like on
-each call of @code{bzDecompress}.
+each call of @code{BZ2_bzDecompress}.
In the limit, it is acceptable to
supply and remove data one byte at a time, although this would be
terribly inefficient. You should always ensure that at least one
byte of output space is available at each call.
-Use of @code{bzDecompress} is simpler than @code{bzCompress}.
+Use of @code{BZ2_bzDecompress} is simpler than @code{BZ2_bzCompress}.
You should provide input and remove output as described above, and
-repeatedly call @code{bzDecompress} until @code{BZ_STREAM_END} is
+repeatedly call @code{BZ2_bzDecompress} until @code{BZ_STREAM_END} is
returned. Appearance of @code{BZ_STREAM_END} denotes that
-@code{bzDecompress} has detected the logical end of the compressed
-stream. @code{bzDecompress} will not produce @code{BZ_STREAM_END} until
+@code{BZ2_bzDecompress} has detected the logical end of the compressed
+stream. @code{BZ2_bzDecompress} will not produce @code{BZ_STREAM_END} until
all output data has been placed into the output buffer, so once
@code{BZ_STREAM_END} appears, you are guaranteed to have available all
-the decompressed output, and @code{bzDecompressEnd} can safely be
+the decompressed output, and @code{BZ2_bzDecompressEnd} can safely be
called.
-If case of an error return value, you should call @code{bzDecompressEnd}
+If case of an error return value, you should call @code{BZ2_bzDecompressEnd}
to clean up and release memory.
Possible return values:
@@ -1059,16 +1097,16 @@ Possible return values:
@end display
Allowable next actions:
@display
- @code{bzDecompress}
+ @code{BZ2_bzDecompress}
if @code{BZ_OK} was returned
- @code{bzDecompressEnd}
+ @code{BZ2_bzDecompressEnd}
otherwise
@end display
-@subsection @code{bzDecompressEnd}
+@subsection @code{BZ2_bzDecompressEnd}
@example
-int bzDecompressEnd ( bz_stream *strm );
+int BZ2_bzDecompressEnd ( bz_stream *strm );
@end example
Releases all memory associated with a decompression stream.
@@ -1107,16 +1145,16 @@ This interface provides functions for reading and writing
given on a per-function basis below.
@item If @code{bzerror} indicates an error
(ie, anything except @code{BZ_OK} and @code{BZ_STREAM_END}),
- you should immediately call @code{bzReadClose} (or @code{bzWriteClose},
+ you should immediately call @code{BZ2_bzReadClose} (or @code{BZ2_bzWriteClose},
depending on whether you are attempting to read or to write)
to free up all resources associated
with the stream. Once an error has been indicated, behaviour of all calls
- except @code{bzReadClose} (@code{bzWriteClose}) is undefined.
+ except @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) is undefined.
The implication is that (1) @code{bzerror} should
be checked after each call, and (2) if @code{bzerror} indicates an error,
- @code{bzReadClose} (@code{bzWriteClose}) should then be called to clean up.
+ @code{BZ2_bzReadClose} (@code{BZ2_bzWriteClose}) should then be called to clean up.
@item The @code{FILE*} arguments passed to
- @code{bzReadOpen}/@code{bzWriteOpen}
+ @code{BZ2_bzReadOpen}/@code{BZ2_bzWriteOpen}
should be set to binary mode.
Most Unix systems will do this by default, but other platforms,
including Windows and Mac, will not. If you omit this, you may
@@ -1130,13 +1168,13 @@ This interface provides functions for reading and writing
-@subsection @code{bzReadOpen}
+@subsection @code{BZ2_bzReadOpen}
@example
typedef void BZFILE;
- BZFILE *bzReadOpen ( int *bzerror, FILE *f,
- int small, int verbosity,
- void *unused, int nUnused );
+ BZFILE *BZ2_bzReadOpen ( int *bzerror, FILE *f,
+ int small, int verbosity,
+ void *unused, int nUnused );
@end example
Prepare to read compressed data from file handle @code{f}. @code{f}
should refer to a file which has been opened for reading, and for which
@@ -1144,7 +1182,7 @@ the error indicator (@code{ferror(f)})is not set. If @code{small} is 1,
the library will try to decompress using less memory, at the expense of
speed.
-For reasons explained below, @code{bzRead} will decompress the
+For reasons explained below, @code{BZ2_bzRead} will decompress the
@code{nUnused} bytes starting at @code{unused}, before starting to read
from the file @code{f}. At most @code{BZ_MAX_UNUSED} bytes may be
supplied like this. If this facility is not required, you should pass
@@ -1152,15 +1190,17 @@ supplied like this. If this facility is not required, you should pass
respectively.
For the meaning of parameters @code{small} and @code{verbosity},
-see @code{bzDecompressInit}.
+see @code{BZ2_bzDecompressInit}.
The amount of memory needed to decompress a file cannot be determined
until the file's header has been read. So it is possible that
-@code{bzReadOpen} returns @code{BZ_OK} but a subsequent call of
-@code{bzRead} will return @code{BZ_MEM_ERROR}.
+@code{BZ2_bzReadOpen} returns @code{BZ_OK} but a subsequent call of
+@code{BZ2_bzRead} will return @code{BZ_MEM_ERROR}.
Possible assignments to @code{bzerror}:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{f} is @code{NULL}
or @code{small} is neither @code{0} nor @code{1}
@@ -1184,16 +1224,16 @@ Possible return values:
Allowable next actions:
@display
- @code{bzRead}
+ @code{BZ2_bzRead}
if @code{bzerror} is @code{BZ_OK}
- @code{bzClose}
+ @code{BZ2_bzClose}
otherwise
@end display
-@subsection @code{bzRead}
+@subsection @code{BZ2_bzRead}
@example
- int bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
+ int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len );
@end example
Reads up to @code{len} (uncompressed) bytes from the compressed file
@code{b} into
@@ -1204,7 +1244,7 @@ was detected, @code{bzerror} will be set to @code{BZ_STREAM_END},
and the number
of bytes read is returned. All other @code{bzerror} values denote an error.
-@code{bzRead} will supply @code{len} bytes,
+@code{BZ2_bzRead} will supply @code{len} bytes,
unless the logical stream end is detected
or an error occurs. Because of this, it is possible to detect the
stream end by observing when the number of bytes returned is
@@ -1213,20 +1253,20 @@ requested. Nevertheless, this is regarded as inadvisable; you should
instead check @code{bzerror} after every call and watch out for
@code{BZ_STREAM_END}.
-Internally, @code{bzRead} copies data from the compressed file in chunks
+Internally, @code{BZ2_bzRead} copies data from the compressed file in chunks
of size @code{BZ_MAX_UNUSED} bytes
before decompressing it. If the file contains more bytes than strictly
-needed to reach the logical end-of-stream, @code{bzRead} will almost certainly
+needed to reach the logical end-of-stream, @code{BZ2_bzRead} will almost certainly
read some of the trailing data before signalling @code{BZ_SEQUENCE_END}.
To collect the read but unused data once @code{BZ_SEQUENCE_END} has
-appeared, call @code{bzReadGetUnused} immediately before @code{bzReadClose}.
+appeared, call @code{BZ2_bzReadGetUnused} immediately before @code{BZ2_bzReadClose}.
Possible assignments to @code{bzerror}:
@display
@code{BZ_PARAM_ERROR}
if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0}
@code{BZ_SEQUENCE_ERROR}
- if @code{b} was opened with @code{bzWriteOpen}
+ if @code{b} was opened with @code{BZ2_bzWriteOpen}
@code{BZ_IO_ERROR}
if there is an error reading from the compressed file
@code{BZ_UNEXPECTED_EOF}
@@ -1254,28 +1294,28 @@ Possible return values:
Allowable next actions:
@display
- collect data from @code{buf}, then @code{bzRead} or @code{bzReadClose}
+ collect data from @code{buf}, then @code{BZ2_bzRead} or @code{BZ2_bzReadClose}
if @code{bzerror} is @code{BZ_OK}
- collect data from @code{buf}, then @code{bzReadClose} or @code{bzReadGetUnused}
+ collect data from @code{buf}, then @code{BZ2_bzReadClose} or @code{BZ2_bzReadGetUnused}
if @code{bzerror} is @code{BZ_SEQUENCE_END}
- @code{bzReadClose}
+ @code{BZ2_bzReadClose}
otherwise
@end display
-@subsection @code{bzReadGetUnused}
+@subsection @code{BZ2_bzReadGetUnused}
@example
- void bzReadGetUnused ( int* bzerror, BZFILE *b,
- void** unused, int* nUnused );
+ void BZ2_bzReadGetUnused ( int* bzerror, BZFILE *b,
+ void** unused, int* nUnused );
@end example
Returns data which was read from the compressed file but was not needed
to get to the logical end-of-stream. @code{*unused} is set to the address
of the data, and @code{*nUnused} to the number of bytes. @code{*nUnused} will
be set to a value between @code{0} and @code{BZ_MAX_UNUSED} inclusive.
-This function may only be called once @code{bzRead} has signalled
-@code{BZ_STREAM_END} but before @code{bzReadClose}.
+This function may only be called once @code{BZ2_bzRead} has signalled
+@code{BZ_STREAM_END} but before @code{BZ2_bzReadClose}.
Possible assignments to @code{bzerror}:
@display
@@ -1284,31 +1324,31 @@ Possible assignments to @code{bzerror}:
or @code{unused} is @code{NULL} or @code{nUnused} is @code{NULL}
@code{BZ_SEQUENCE_ERROR}
if @code{BZ_STREAM_END} has not been signalled
- or if @code{b} was opened with @code{bzWriteOpen}
+ or if @code{b} was opened with @code{BZ2_bzWriteOpen}
@code{BZ_OK}
otherwise
@end display
Allowable next actions:
@display
- @code{bzReadClose}
+ @code{BZ2_bzReadClose}
@end display
-@subsection @code{bzReadClose}
+@subsection @code{BZ2_bzReadClose}
@example
- void bzReadClose ( int *bzerror, BZFILE *b );
+ void BZ2_bzReadClose ( int *bzerror, BZFILE *b );
@end example
Releases all memory pertaining to the compressed file @code{b}.
-@code{bzReadClose} does not call @code{fclose} on the underlying file
+@code{BZ2_bzReadClose} does not call @code{fclose} on the underlying file
handle, so you should do that yourself if appropriate.
-@code{bzReadClose} should be called to clean up after all error
+@code{BZ2_bzReadClose} should be called to clean up after all error
situations.
Possible assignments to @code{bzerror}:
@display
@code{BZ_SEQUENCE_ERROR}
- if @code{b} was opened with @code{bzOpenWrite}
+ if @code{b} was opened with @code{BZ2_bzOpenWrite}
@code{BZ_OK}
otherwise
@end display
@@ -1320,11 +1360,11 @@ Allowable next actions:
-@subsection @code{bzWriteOpen}
+@subsection @code{BZ2_bzWriteOpen}
@example
- BZFILE *bzWriteOpen ( int *bzerror, FILE *f,
- int blockSize100k, int verbosity,
- int workFactor );
+ BZFILE *BZ2_bzWriteOpen ( int *bzerror, FILE *f,
+ int blockSize100k, int verbosity,
+ int workFactor );
@end example
Prepare to write compressed data to file handle @code{f}.
@code{f} should refer to
@@ -1333,14 +1373,16 @@ indicator (@code{ferror(f)})is not set.
For the meaning of parameters @code{blockSize100k},
@code{verbosity} and @code{workFactor}, see
-@* @code{bzCompressInit}.
+@* @code{BZ2_bzCompressInit}.
All required memory is allocated at this stage, so if the call
completes successfully, @code{BZ_MEM_ERROR} cannot be signalled by a
-subsequent call to @code{bzWrite}.
+subsequent call to @code{BZ2_bzWrite}.
Possible assignments to @code{bzerror}:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{f} is @code{NULL}
or @code{blockSize100k < 1} or @code{blockSize100k > 9}
@@ -1362,18 +1404,18 @@ Possible return values:
Allowable next actions:
@display
- @code{bzWrite}
+ @code{BZ2_bzWrite}
if @code{bzerror} is @code{BZ_OK}
- (you could go directly to @code{bzWriteClose}, but this would be pretty pointless)
- @code{bzWriteClose}
+ (you could go directly to @code{BZ2_bzWriteClose}, but this would be pretty pointless)
+ @code{BZ2_bzWriteClose}
otherwise
@end display
-@subsection @code{bzWrite}
+@subsection @code{BZ2_bzWrite}
@example
- void bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
+ void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );
@end example
Absorbs @code{len} bytes from the buffer @code{buf}, eventually to be
compressed and written to the file.
@@ -1383,7 +1425,7 @@ Possible assignments to @code{bzerror}:
@code{BZ_PARAM_ERROR}
if @code{b} is @code{NULL} or @code{buf} is @code{NULL} or @code{len < 0}
@code{BZ_SEQUENCE_ERROR}
- if b was opened with @code{bzReadOpen}
+ if b was opened with @code{BZ2_bzReadOpen}
@code{BZ_IO_ERROR}
if there is an error writing the compressed file.
@code{BZ_OK}
@@ -1393,22 +1435,29 @@ Possible assignments to @code{bzerror}:
-@subsection @code{bzWriteClose}
+@subsection @code{BZ2_bzWriteClose}
@example
- int bzWriteClose ( int *bzerror, BZFILE* f,
- int abandon,
- unsigned int* nbytes_in,
- unsigned int* nbytes_out );
+ void BZ2_bzWriteClose ( int *bzerror, BZFILE* f,
+ int abandon,
+ unsigned int* nbytes_in,
+ unsigned int* nbytes_out );
+
+ void BZ2_bzWriteClose64 ( int *bzerror, BZFILE* f,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32 );
@end example
Compresses and flushes to the compressed file all data so far supplied
-by @code{bzWrite}. The logical end-of-stream markers are also written, so
-subsequent calls to @code{bzWrite} are illegal. All memory associated
+by @code{BZ2_bzWrite}. The logical end-of-stream markers are also written, so
+subsequent calls to @code{BZ2_bzWrite} are illegal. All memory associated
with the compressed file @code{b} is released.
@code{fflush} is called on the
compressed file, but it is not @code{fclose}'d.
-If @code{bzWriteClose} is called to clean up after an error, the only
+If @code{BZ2_bzWriteClose} is called to clean up after an error, the only
action is to release the memory. The library records the error codes
issued by previous calls, so this situation will be detected
automatically. There is no attempt to complete the compression
@@ -1418,12 +1467,17 @@ value to @code{abandon}.
If @code{nbytes_in} is non-null, @code{*nbytes_in} will be set to be the
total volume of uncompressed data handled. Similarly, @code{nbytes_out}
-will be set to the total volume of compressed data written.
+will be set to the total volume of compressed data written. For
+compatibility with older versions of the library, @code{BZ2_bzWriteClose}
+only yields the lower 32 bits of these counts. Use
+@code{BZ2_bzWriteClose64} if you want the full 64 bit counts. These
+two functions are otherwise absolutely identical.
+
Possible assignments to @code{bzerror}:
@display
@code{BZ_SEQUENCE_ERROR}
- if @code{b} was opened with @code{bzReadOpen}
+ if @code{b} was opened with @code{BZ2_bzReadOpen}
@code{BZ_IO_ERROR}
if there is an error writing the compressed file
@code{BZ_OK}
@@ -1442,26 +1496,26 @@ The calling application can write its own data before and after the
compressed data stream, using that same file handle.
@item Reading is more complex, and the facilities are not as general
as they could be since generality is hard to reconcile with efficiency.
-@code{bzRead} reads from the compressed file in blocks of size
+@code{BZ2_bzRead} reads from the compressed file in blocks of size
@code{BZ_MAX_UNUSED} bytes, and in doing so probably will overshoot
the logical end of compressed stream.
To recover this data once decompression has
-ended, call @code{bzReadGetUnused} after the last call of @code{bzRead}
+ended, call @code{BZ2_bzReadGetUnused} after the last call of @code{BZ2_bzRead}
(the one returning @code{BZ_STREAM_END}) but before calling
-@code{bzReadClose}.
+@code{BZ2_bzReadClose}.
@end itemize
This mechanism makes it easy to decompress multiple @code{bzip2}
-streams placed end-to-end. As the end of one stream, when @code{bzRead}
-returns @code{BZ_STREAM_END}, call @code{bzReadGetUnused} to collect the
+streams placed end-to-end. As the end of one stream, when @code{BZ2_bzRead}
+returns @code{BZ_STREAM_END}, call @code{BZ2_bzReadGetUnused} to collect the
unused data (copy it into your own buffer somewhere).
That data forms the start of the next compressed stream.
-To start uncompressing that next stream, call @code{bzReadOpen} again,
+To start uncompressing that next stream, call @code{BZ2_bzReadOpen} again,
feeding in the unused data via the @code{unused}/@code{nUnused}
parameters.
Keep doing this until @code{BZ_STREAM_END} return coincides with the
physical end of file (@code{feof(f)}). In this situation
-@code{bzReadGetUnused}
+@code{BZ2_bzReadGetUnused}
will of course return no data.
This should give some feel for how the high-level interface can be used.
@@ -1482,22 +1536,22 @@ f = fopen ( "myfile.bz2", "w" );
if (!f) @{
/* handle error */
@}
-b = bzWriteOpen ( &bzerror, f, 9 );
+b = BZ2_bzWriteOpen ( &bzerror, f, 9 );
if (bzerror != BZ_OK) @{
- bzWriteClose ( b );
+ BZ2_bzWriteClose ( b );
/* handle error */
@}
while ( /* condition */ ) @{
/* get data to write into buf, and set nBuf appropriately */
- nWritten = bzWrite ( &bzerror, b, buf, nBuf );
+ nWritten = BZ2_bzWrite ( &bzerror, b, buf, nBuf );
if (bzerror == BZ_IO_ERROR) @{
- bzWriteClose ( &bzerror, b );
+ BZ2_bzWriteClose ( &bzerror, b );
/* handle error */
@}
@}
-bzWriteClose ( &bzerror, b );
+BZ2_bzWriteClose ( &bzerror, b );
if (bzerror == BZ_IO_ERROR) @{
/* handle error */
@}
@@ -1515,39 +1569,39 @@ f = fopen ( "myfile.bz2", "r" );
if (!f) @{
/* handle error */
@}
-b = bzReadOpen ( &bzerror, f, 0, NULL, 0 );
+b = BZ2_bzReadOpen ( &bzerror, f, 0, NULL, 0 );
if (bzerror != BZ_OK) @{
- bzReadClose ( &bzerror, b );
+ BZ2_bzReadClose ( &bzerror, b );
/* handle error */
@}
bzerror = BZ_OK;
while (bzerror == BZ_OK && /* arbitrary other conditions */) @{
- nBuf = bzRead ( &bzerror, b, buf, /* size of buf */ );
+ nBuf = BZ2_bzRead ( &bzerror, b, buf, /* size of buf */ );
if (bzerror == BZ_OK) @{
/* do something with buf[0 .. nBuf-1] */
@}
@}
if (bzerror != BZ_STREAM_END) @{
- bzReadClose ( &bzerror, b );
+ BZ2_bzReadClose ( &bzerror, b );
/* handle error */
@} else @{
- bzReadClose ( &bzerror );
+ BZ2_bzReadClose ( &bzerror );
@}
@end example
@section Utility functions
-@subsection @code{bzBuffToBuffCompress}
+@subsection @code{BZ2_bzBuffToBuffCompress}
@example
- int bzBuffToBuffCompress( char* dest,
- unsigned int* destLen,
- char* source,
- unsigned int sourceLen,
- int blockSize100k,
- int verbosity,
- int workFactor );
+ int BZ2_bzBuffToBuffCompress( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int blockSize100k,
+ int verbosity,
+ int workFactor );
@end example
Attempts to compress the data in @code{source[0 .. sourceLen-1]}
into the destination buffer, @code{dest[0 .. *destLen-1]}.
@@ -1563,17 +1617,19 @@ additional calls to provide extra input data. If you want that kind of
mechanism, use the low-level interface.
For the meaning of parameters @code{blockSize100k}, @code{verbosity}
-and @code{workFactor}, @* see @code{bzCompressInit}.
+and @code{workFactor}, @* see @code{BZ2_bzCompressInit}.
To guarantee that the compressed data will fit in its buffer, allocate
an output buffer of size 1% larger than the uncompressed data, plus
six hundred extra bytes.
-@code{bzBuffToBuffDecompress} will not write data at or
+@code{BZ2_bzBuffToBuffDecompress} will not write data at or
beyond @code{dest[*destLen]}, even in case of buffer overflow.
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL}
or @code{blockSize100k < 1} or @code{blockSize100k > 9}
@@ -1589,14 +1645,14 @@ Possible return values:
-@subsection @code{bzBuffToBuffDecompress}
+@subsection @code{BZ2_bzBuffToBuffDecompress}
@example
- int bzBuffToBuffDecompress ( char* dest,
- unsigned int* destLen,
- char* source,
- unsigned int sourceLen,
- int small,
- int verbosity );
+ int BZ2_bzBuffToBuffDecompress ( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int small,
+ int verbosity );
@end example
Attempts to decompress the data in @code{source[0 .. sourceLen-1]}
into the destination buffer, @code{dest[0 .. *destLen-1]}.
@@ -1606,11 +1662,11 @@ returned. If the compressed data won't fit, @code{*destLen}
is unchanged, and @code{BZ_OUTBUFF_FULL} is returned.
@code{source} is assumed to hold a complete @code{bzip2} format
-data stream. @code{bzBuffToBuffDecompress} tries to decompress
+data stream. @* @code{BZ2_bzBuffToBuffDecompress} tries to decompress
the entirety of the stream into the output buffer.
For the meaning of parameters @code{small} and @code{verbosity},
-see @code{bzDecompressInit}.
+see @code{BZ2_bzDecompressInit}.
Because the compression ratio of the compressed data cannot be known in
advance, there is no easy way to guarantee that the output buffer will
@@ -1618,11 +1674,13 @@ be big enough. You may of course make arrangements in your code to
record the size of the uncompressed data, but such a mechanism is beyond
the scope of this library.
-@code{bzBuffToBuffDecompress} will not write data at or
+@code{BZ2_bzBuffToBuffDecompress} will not write data at or
beyond @code{dest[*destLen]}, even in case of buffer overflow.
Possible return values:
@display
+ @code{BZ_CONFIG_ERROR}
+ if the library has been mis-compiled
@code{BZ_PARAM_ERROR}
if @code{dest} is @code{NULL} or @code{destLen} is @code{NULL}
or @code{small != 0 && small != 1}
@@ -1646,40 +1704,40 @@ Possible return values:
@section @code{zlib} compatibility functions
Yoshioka Tsuneo has contributed some functions to
give better @code{zlib} compatibility. These functions are
-@code{bzopen}, @code{bzread}, @code{bzwrite}, @code{bzflush},
-@code{bzclose},
-@code{bzerror} and @code{bzlibVersion}.
+@code{BZ2_bzopen}, @code{BZ2_bzread}, @code{BZ2_bzwrite}, @code{BZ2_bzflush},
+@code{BZ2_bzclose},
+@code{BZ2_bzerror} and @code{BZ2_bzlibVersion}.
These functions are not (yet) officially part of
the library. If they break, you get to keep all the pieces.
Nevertheless, I think they work ok.
@example
typedef void BZFILE;
-const char * bzlibVersion ( void );
+const char * BZ2_bzlibVersion ( void );
@end example
Returns a string indicating the library version.
@example
-BZFILE * bzopen ( const char *path, const char *mode );
-BZFILE * bzdopen ( int fd, const char *mode );
+BZFILE * BZ2_bzopen ( const char *path, const char *mode );
+BZFILE * BZ2_bzdopen ( int fd, const char *mode );
@end example
Opens a @code{.bz2} file for reading or writing, using either its name
or a pre-existing file descriptor.
Analogous to @code{fopen} and @code{fdopen}.
@example
-int bzread ( BZFILE* b, void* buf, int len );
-int bzwrite ( BZFILE* b, void* buf, int len );
+int BZ2_bzread ( BZFILE* b, void* buf, int len );
+int BZ2_bzwrite ( BZFILE* b, void* buf, int len );
@end example
Reads/writes data from/to a previously opened @code{BZFILE}.
Analogous to @code{fread} and @code{fwrite}.
@example
-int bzflush ( BZFILE* b );
-void bzclose ( BZFILE* b );
+int BZ2_bzflush ( BZFILE* b );
+void BZ2_bzclose ( BZFILE* b );
@end example
-Flushes/closes a @code{BZFILE}. @code{bzflush} doesn't actually do
+Flushes/closes a @code{BZFILE}. @code{BZ2_bzflush} doesn't actually do
anything. Analogous to @code{fflush} and @code{fclose}.
@example
-const char * bzerror ( BZFILE *b, int *errnum )
+const char * BZ2_bzerror ( BZFILE *b, int *errnum )
@end example
Returns a string describing the more recent error status of
@code{b}, and also sets @code{*errnum} to its numerical value.
@@ -1695,9 +1753,9 @@ by compiling the library with preprocessor symbol @code{BZ_NO_STDIO}
defined. Doing this gives you a library containing only the following
eight functions:
-@code{bzCompressInit}, @code{bzCompress}, @code{bzCompressEnd} @*
-@code{bzDecompressInit}, @code{bzDecompress}, @code{bzDecompressEnd} @*
-@code{bzBuffToBuffCompress}, @code{bzBuffToBuffDecompress}
+@code{BZ2_bzCompressInit}, @code{BZ2_bzCompress}, @code{BZ2_bzCompressEnd} @*
+@code{BZ2_bzDecompressInit}, @code{BZ2_bzDecompress}, @code{BZ2_bzDecompressEnd} @*
+@code{BZ2_bzBuffToBuffCompress}, @code{BZ2_bzBuffToBuffDecompress}
When compiled like this, all functions will ignore @code{verbosity}
settings.
@@ -1710,14 +1768,14 @@ was compiled with @code{BZ_NO_STDIO} set.
For a normal compile, an assertion failure yields the message
@example
- bzip2/libbzip2, v0.9.5: internal error number N.
- This is a bug in bzip2/libbzip2, v0.9.5. Please report
- it to me at: jseward@@acm.org. If this happened when
- you were using some program which uses libbzip2 as a
+ bzip2/libbzip2: internal error number N.
+ This is a bug in bzip2/libbzip2, 1.0 of 21-Mar-2000.
+ Please report it to me at: jseward@@acm.org. If this happened
+ when you were using some program which uses libbzip2 as a
component, you should also report this bug to the author(s)
of that program. Please make an effort to report this bug;
timely and accurate bug reports eventually lead to higher
- quality software. Thanks. Julian Seward, 24 May 1999.
+ quality software. Thanks. Julian Seward, 21 March 2000.
@end example
where @code{N} is some error code number. @code{exit(3)}
is then called.
@@ -1781,7 +1839,7 @@ These are just some random thoughts of mine. Your mileage may
vary.
@section Limitations of the compressed file format
-@code{bzip2-0.9.5} and @code{0.9.0}
+@code{bzip2-1.0}, @code{0.9.5} and @code{0.9.0}
use exactly the same file format as the previous
version, @code{bzip2-0.1}. This decision was made in the interests of
stability. Creating yet another incompatible compressed file format
@@ -1860,7 +1918,7 @@ require some careful design of compressed file formats.
@section Portability issues
After some consideration, I have decided not to use
-GNU @code{autoconf} to configure 0.9.5.
+GNU @code{autoconf} to configure 0.9.5 or 1.0.
@code{autoconf}, admirable and wonderful though it is,
mainly assists with portability problems between Unix-like
@@ -1925,7 +1983,7 @@ If you get problems, try using the flags
@code{-O2} @code{-fomit-frame-pointer} @code{-fno-strength-reduce}.
You should specifically @emph{not} use @code{-funroll-loops}.
-You may notice that the Makefile runs four tests as part of
+You may notice that the Makefile runs six tests as part of
the build process. If the program passes all of these, it's
a pretty good (but not 100%) indication that the compiler has
done its job correctly.
@@ -2000,6 +2058,7 @@ memory but gets pretty good compression, and has minimal latency,
consider Jean-loup
Gailly's and Mark Adler's work, @code{zlib-1.1.2} and
@code{gzip-1.2.4}. Look for them at
+
@code{http://www.cdrom.com/pub/infozip/zlib} and
@code{http://www.gzip.org} respectively.
@@ -2140,7 +2199,14 @@ available from:
@example
http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps
@end example
-
+Finally, the following paper documents some recent investigations
+I made into the performance of sorting algorithms:
+@example
+Julian Seward:
+ On the Performance of BWT Sorting Algorithms
+ Proceedings of the IEEE Data Compression Conference 2000
+ Snowbird, Utah. 28-30 March 2000.
+@end example
@contents
diff --git a/randtable.c b/randtable.c
index 8f6266f..983089d 100644
--- a/randtable.c
+++ b/randtable.c
@@ -8,7 +8,7 @@
This file is a part of bzip2 and/or libbzip2, a program and
library for lossless, block-sorting data compression.
- Copyright (C) 1996-1999 Julian R Seward. All rights reserved.
+ Copyright (C) 1996-2000 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -43,7 +43,7 @@
Julian Seward, Cambridge, UK.
jseward@acm.org
- bzip2/libbzip2 version 0.9.5 of 24 May 1999
+ bzip2/libbzip2 version 1.0 of 21 March 2000
This program is based on (at least) the work of:
Mike Burrows
@@ -63,7 +63,7 @@
/*---------------------------------------------*/
-Int32 rNums[512] = {
+Int32 BZ2_rNums[512] = {
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
diff --git a/spewG.c b/spewG.c
new file mode 100644
index 0000000..7934e76
--- /dev/null
+++ b/spewG.c
@@ -0,0 +1,39 @@
+
+/* spew out a thoroughly gigantic file designed so that bzip2
+ can compress it reasonably rapidly. This is to help test
+ support for large files (> 2GB) in a reasonable amount of time.
+ I suggest you use the undocumented --exponential option to
+ bzip2 when compressing the resulting file; this saves a bit of
+ time. Note: *don't* bother with --exponential when compressing
+ Real Files; it'll just waste a lot of CPU time :-)
+ (but is otherwise harmless).
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* The number of megabytes of junk to spew out (roughly) */
+#define MEGABYTES 5000
+
+#define N_BUF 1000000
+char buf[N_BUF];
+
+int main ( int argc, char** argv )
+{
+ int ii, kk, p;
+ srandom(1);
+ setbuffer ( stdout, buf, N_BUF );
+ for (kk = 0; kk < MEGABYTES * 515; kk+=3) {
+ p = 25+random()%50;
+ for (ii = 0; ii < p; ii++)
+ printf ( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" );
+ for (ii = 0; ii < p-1; ii++)
+ printf ( "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" );
+ for (ii = 0; ii < p+1; ii++)
+ printf ( "ccccccccccccccccccccccccccccccccccccc" );
+ }
+ fflush(stdout);
+ return 0;
+}
diff --git a/unzcrash.c b/unzcrash.c
new file mode 100644
index 0000000..f0f17fc
--- /dev/null
+++ b/unzcrash.c
@@ -0,0 +1,126 @@
+
+/* A test program written to test robustness to decompression of
+ corrupted data. Usage is
+ unzcrash filename
+ and the program will read the specified file, compress it (in memory),
+ and then repeatedly decompress it, each time with a different bit of
+ the compressed data inverted, so as to test all possible one-bit errors.
+ This should not cause any invalid memory accesses. If it does,
+ I want to know about it!
+
+ p.s. As you can see from the above description, the process is
+ incredibly slow. A file of size eg 5KB will cause it to run for
+ many hours.
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#include "bzlib.h"
+
+#define M_BLOCK 1000000
+
+typedef unsigned char uchar;
+
+#define M_BLOCK_OUT (M_BLOCK + 1000000)
+uchar inbuf[M_BLOCK];
+uchar outbuf[M_BLOCK_OUT];
+uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
+
+int nIn, nOut, nZ;
+
+static char *bzerrorstrings[] = {
+ "OK"
+ ,"SEQUENCE_ERROR"
+ ,"PARAM_ERROR"
+ ,"MEM_ERROR"
+ ,"DATA_ERROR"
+ ,"DATA_ERROR_MAGIC"
+ ,"IO_ERROR"
+ ,"UNEXPECTED_EOF"
+ ,"OUTBUFF_FULL"
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+};
+
+void flip_bit ( int bit )
+{
+ int byteno = bit / 8;
+ int bitno = bit % 8;
+ uchar mask = 1 << bitno;
+ //fprintf ( stderr, "(byte %d bit %d mask %d)",
+ // byteno, bitno, (int)mask );
+ zbuf[byteno] ^= mask;
+}
+
+int main ( int argc, char** argv )
+{
+ FILE* f;
+ int r;
+ int bit;
+ int i;
+
+ if (argc != 2) {
+ fprintf ( stderr, "usage: unzcrash filename\n" );
+ return 1;
+ }
+
+ f = fopen ( argv[1], "r" );
+ if (!f) {
+ fprintf ( stderr, "unzcrash: can't open %s\n", argv[1] );
+ return 1;
+ }
+
+ nIn = fread ( inbuf, 1, M_BLOCK, f );
+ fprintf ( stderr, "%d bytes read\n", nIn );
+
+ nZ = M_BLOCK;
+ r = BZ2_bzBuffToBuffCompress (
+ zbuf, &nZ, inbuf, nIn, 9, 0, 30 );
+
+ assert (r == BZ_OK);
+ fprintf ( stderr, "%d after compression\n", nZ );
+
+ for (bit = 0; bit < nZ*8; bit++) {
+ fprintf ( stderr, "bit %d ", bit );
+ flip_bit ( bit );
+ nOut = M_BLOCK_OUT;
+ r = BZ2_bzBuffToBuffDecompress (
+ outbuf, &nOut, zbuf, nZ, 0, 0 );
+ fprintf ( stderr, " %d %s ", r, bzerrorstrings[-r] );
+
+ if (r != BZ_OK) {
+ fprintf ( stderr, "\n" );
+ } else {
+ if (nOut != nIn) {
+ fprintf(stderr, "nIn/nOut mismatch %d %d\n", nIn, nOut );
+ return 1;
+ } else {
+ for (i = 0; i < nOut; i++)
+ if (inbuf[i] != outbuf[i]) {
+ fprintf(stderr, "mismatch at %d\n", i );
+ return 1;
+ }
+ if (i == nOut) fprintf(stderr, "really ok!\n" );
+ }
+ }
+
+ flip_bit ( bit );
+ }
+
+#if 0
+ assert (nOut == nIn);
+ for (i = 0; i < nOut; i++) {
+ if (inbuf[i] != outbuf[i]) {
+ fprintf ( stderr, "difference at %d !\n", i );
+ return 1;
+ }
+ }
+#endif
+
+ fprintf ( stderr, "all ok\n" );
+ return 0;
+}
diff --git a/words0 b/words0
new file mode 100644
index 0000000..164a8ed
--- /dev/null
+++ b/words0
@@ -0,0 +1,5 @@
+
+If compilation produces errors, or a large number of warnings,
+please read README.COMPILATION.PROBLEMS -- you might be able to
+adjust the flags in this Makefile to improve matters.
+