aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYann Collet <Cyan4973@users.noreply.github.com>2024-03-21 12:33:15 -0700
committerGitHub <noreply@github.com>2024-03-21 12:33:15 -0700
commitc6e52572405e160ef25780cb5cc8244fdddb90ae (patch)
treecbbd27851891300281adb2e5429efae5764cb46e
parentdc1f7b560b23f5bd50a0fcddd677007c9c76ec0b (diff)
parent6f1215b874dbf74b50dcb64915e91e11ba198008 (diff)
downloadzstd-c6e52572405e160ef25780cb5cc8244fdddb90ae.tar.gz
Merge pull request #3977 from facebook/doc_advanced
Doc update
-rw-r--r--doc/zstd_manual.html36
-rw-r--r--lib/compress/zstd_compress.c16
-rw-r--r--lib/compress/zstd_compress_superblock.c4
-rw-r--r--lib/zstd.h49
4 files changed, 72 insertions, 33 deletions
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 58fe958d..bc4a2403 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -190,9 +190,9 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); </b>/* accept NULL pointer */<b>
const void* src, size_t srcSize,
int compressionLevel);
</b><p> Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- Important : in order to behave similarly to `ZSTD_compress()`,
- this function compresses at requested compression level,
- __ignoring any other parameter__ .
+ Important : in order to mirror `ZSTD_compress()` behavior,
+ this function compresses at the requested compression level,
+ __ignoring any other advanced parameter__ .
If any advanced parameter was set using the advanced API,
they will all be reset. Only `compressionLevel` remains.
@@ -212,7 +212,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </b>/* accept NULL pointer */<b>
const void* src, size_t srcSize);
</b><p> Same as ZSTD_decompress(),
requires an allocated ZSTD_DCtx.
- Compatible with sticky parameters.
+ Compatible with sticky parameters (see below).
</p></pre><BR>
@@ -296,6 +296,19 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </b>/* accept NULL pointer */<b>
* The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression.
* Special: value 0 means "use default strategy". */
+
+ ZSTD_c_targetCBlockSize=130, </b>/* v1.5.6+<b>
+ * Attempts to fit compressed block size into approximatively targetCBlockSize.
+ * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
+ * Note that it's not a guarantee, just a convergence target (default:0).
+ * No target when targetCBlockSize == 0.
+ * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
+ * when a client can make use of partial documents (a prominent example being Chrome).
+ * Note: this parameter is stable since v1.5.6.
+ * It was present as an experimental parameter in earlier versions,
+ * but it's not recommended using it with earlier library versions
+ * due to massive performance regressions.
+ */
</b>/* LDM mode parameters */<b>
ZSTD_c_enableLongDistanceMatching=160, </b>/* Enable long distance matching.<b>
* This parameter is designed to improve compression ratio
@@ -375,7 +388,6 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </b>/* accept NULL pointer */<b>
* ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
- * ZSTD_c_targetCBlockSize
* ZSTD_c_srcSizeHint
* ZSTD_c_enableDedicatedDictSearch
* ZSTD_c_stableInBuffer
@@ -396,7 +408,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </b>/* accept NULL pointer */<b>
ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
- ZSTD_c_experimentalParam6=1003,
+ </b>/* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */<b>
ZSTD_c_experimentalParam7=1004,
ZSTD_c_experimentalParam8=1005,
ZSTD_c_experimentalParam9=1006,
@@ -483,6 +495,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); </b>/* accept NULL pointer */<b>
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
</b><p> Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ (note that this entry point doesn't even expose a compression level parameter).
ZSTD_compress2() always starts a new frame.
Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
- Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
@@ -668,6 +681,11 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs); </b>/* accept NULL pointer */<b>
only ZSTD_e_end or ZSTD_e_flush operations are allowed.
Before starting a new compression job, or changing compression parameters,
it is required to fully flush internal buffers.
+ - note: if an operation ends with an error, it may leave @cctx in an undefined state.
+ Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
+ In order to be re-employed after an error, a state must be reset,
+ which can be done explicitly (ZSTD_CCtx_reset()),
+ or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
</p></pre><BR>
@@ -753,6 +771,12 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds); </b>/* accept NULL pointer */<b>
@return : 0 when a frame is completely decoded and fully flushed,
or an error code, which can be tested using ZSTD_isError(),
or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
+
+ Note: when an operation returns with an error code, the @zds state may be left in undefined state.
+ It's UB to invoke `ZSTD_decompressStream()` on such a state.
+ In order to re-use such a state, it must be first reset,
+ which can be done explicitly (`ZSTD_DCtx_reset()`),
+ or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
</p></pre><BR>
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index f6a84e6b..9284e2a4 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -870,7 +870,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
CCtxParams->nbWorkers = value;
- return CCtxParams->nbWorkers;
+ return (size_t)(CCtxParams->nbWorkers);
#endif
case ZSTD_c_jobSize :
@@ -894,7 +894,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->overlapLog = value;
- return CCtxParams->overlapLog;
+ return (size_t)CCtxParams->overlapLog;
#endif
case ZSTD_c_rsyncable :
@@ -904,7 +904,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
#else
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
CCtxParams->rsyncable = value;
- return CCtxParams->rsyncable;
+ return (size_t)CCtxParams->rsyncable;
#endif
case ZSTD_c_enableDedicatedDictSearch :
@@ -941,8 +941,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
return CCtxParams->ldmParams.hashRateLog;
case ZSTD_c_targetCBlockSize :
- if (value!=0) /* 0 ==> default */
+ if (value!=0) { /* 0 ==> default */
+ value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+ }
CCtxParams->targetCBlockSize = (U32)value;
return CCtxParams->targetCBlockSize;
@@ -970,7 +972,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value;
- return CCtxParams->validateSequences;
+ return (size_t)CCtxParams->validateSequences;
case ZSTD_c_useBlockSplitter:
BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
@@ -985,7 +987,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_deterministicRefPrefix:
BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
CCtxParams->deterministicRefPrefix = !!value;
- return CCtxParams->deterministicRefPrefix;
+ return (size_t)CCtxParams->deterministicRefPrefix;
case ZSTD_c_prefetchCDictTables:
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
@@ -995,7 +997,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_enableSeqProducerFallback:
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
CCtxParams->enableMatchFinderFallback = value;
- return CCtxParams->enableMatchFinderFallback;
+ return (size_t)CCtxParams->enableMatchFinderFallback;
case ZSTD_c_maxBlockSize:
if (value!=0) /* 0 ==> default */
diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 8c466c47..628a2dcc 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -469,8 +469,6 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
return n;
}
-#define CBLOCK_TARGET_SIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
-
/** ZSTD_compressSubBlock_multi() :
* Breaks super-block into multiple sub-blocks and compresses them.
* Entropy will be written into the first block.
@@ -504,7 +502,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
const BYTE* llCodePtr = seqStorePtr->llCode;
const BYTE* mlCodePtr = seqStorePtr->mlCode;
const BYTE* ofCodePtr = seqStorePtr->ofCode;
- size_t const minTarget = CBLOCK_TARGET_SIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
+ size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
int writeSeqEntropy = 1;
diff --git a/lib/zstd.h b/lib/zstd.h
index 9b387612..5d1fef8a 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -262,9 +262,9 @@ ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer *
/*! ZSTD_compressCCtx() :
* Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- * Important : in order to behave similarly to `ZSTD_compress()`,
- * this function compresses at requested compression level,
- * __ignoring any other parameter__ .
+ * Important : in order to mirror `ZSTD_compress()` behavior,
+ * this function compresses at the requested compression level,
+ * __ignoring any other advanced parameter__ .
* If any advanced parameter was set using the advanced API,
* they will all be reset. Only `compressionLevel` remains.
*/
@@ -286,7 +286,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer *
/*! ZSTD_decompressDCtx() :
* Same as ZSTD_decompress(),
* requires an allocated ZSTD_DCtx.
- * Compatible with sticky parameters.
+ * Compatible with sticky parameters (see below).
*/
ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
@@ -302,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
* using ZSTD_CCtx_set*() functions.
* Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
* "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
*
* It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
*
* This API supersedes all other "advanced" API entry points in the experimental section.
- * In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ * In the future, we expect to remove API entry points from experimental which are redundant with this API.
*/
@@ -390,16 +390,19 @@ typedef enum {
* The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression.
* Special: value 0 means "use default strategy". */
- ZSTD_c_targetCBlockSize=130, /* Tries to fit compressed block size to be
- * around targetCBlockSize. No target when
- * targetCBlockSize == 0. There is no guarantee
- * on compressed block size (default:0).
- * Since the decoder has to buffer a complete
- * block to begin decoding it, in low band-
- * width streaming environments this may
- * improve end-to-end latency. Bound by
- * ZSTD_TARGETCBLOCKSIZE_MIN and
- * ZSTD_TARGETCBLOCKSIZE_MAX. */
+
+ ZSTD_c_targetCBlockSize=130, /* v1.5.6+
+ * Attempts to fit compressed block size into approximatively targetCBlockSize.
+ * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
+ * Note that it's not a guarantee, just a convergence target (default:0).
+ * No target when targetCBlockSize == 0.
+ * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
+ * when a client can make use of partial documents (a prominent example being Chrome).
+ * Note: this parameter is stable since v1.5.6.
+ * It was present as an experimental parameter in earlier versions,
+ * but it's not recommended using it with earlier library versions
+ * due to massive performance regressions.
+ */
/* LDM mode parameters */
ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
* This parameter is designed to improve compression ratio
@@ -584,6 +587,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
/*! ZSTD_compress2() :
* Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ * (note that this entry point doesn't even expose a compression level parameter).
* ZSTD_compress2() always starts a new frame.
* Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
* - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
@@ -787,6 +791,11 @@ typedef enum {
* only ZSTD_e_end or ZSTD_e_flush operations are allowed.
* Before starting a new compression job, or changing compression parameters,
* it is required to fully flush internal buffers.
+ * - note: if an operation ends with an error, it may leave @cctx in an undefined state.
+ * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
+ * In order to be re-employed after an error, a state must be reset,
+ * which can be done explicitly (ZSTD_CCtx_reset()),
+ * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
*/
ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_outBuffer* output,
@@ -900,6 +909,12 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
* @return : 0 when a frame is completely decoded and fully flushed,
* or an error code, which can be tested using ZSTD_isError(),
* or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
+ *
+ * Note: when an operation returns with an error code, the @zds state may be left in undefined state.
+ * It's UB to invoke `ZSTD_decompressStream()` on such a state.
+ * In order to re-use such a state, it must be first reset,
+ * which can be done explicitly (`ZSTD_DCtx_reset()`),
+ * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
*/
ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
@@ -1250,7 +1265,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
/* Advanced parameter bounds */
-#define ZSTD_TARGETCBLOCKSIZE_MIN 64
+#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_SRCSIZEHINT_MIN 0
#define ZSTD_SRCSIZEHINT_MAX INT_MAX