aboutsummaryrefslogtreecommitdiff
path: root/lib/compress/zstdmt_compress.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compress/zstdmt_compress.c')
-rw-r--r--lib/compress/zstdmt_compress.c120
1 files changed, 80 insertions, 40 deletions
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 50454a50..f564822d 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -467,29 +467,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
ZSTD_dictContentType_e dictContentType)
{
/* Adjust parameters */
- if (params.ldmParams.enableLdm) {
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
assert(params.ldmParams.hashRateLog < 32);
- serialState->ldmState.hashPower =
- ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
} else {
ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
}
serialState->nextJobID = 0;
if (params.fParams.checksumFlag)
XXH64_reset(&serialState->xxhState, 0);
- if (params.ldmParams.enableLdm) {
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
ZSTD_customMem cMem = params.customMem;
unsigned const hashLog = params.ldmParams.hashLog;
size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
unsigned const bucketLog =
params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
- size_t const bucketSize = (size_t)1 << bucketLog;
unsigned const prevBucketLog =
serialState->params.ldmParams.hashLog -
serialState->params.ldmParams.bucketSizeLog;
+ size_t const numBuckets = (size_t)1 << bucketLog;
/* Size the seq pool tables */
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
/* Reset the window */
@@ -501,20 +499,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
}
if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
- serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
}
if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
return 1;
/* Zero the tables */
ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
- ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
+ ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
/* Update window state and fill hash table with dict */
serialState->ldmState.loadedDictEnd = 0;
if (dictSize > 0) {
if (dictContentType == ZSTD_dct_rawContent) {
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
- ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
} else {
@@ -566,12 +564,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
/* A future job may error and skip our job */
if (serialState->nextJobID == jobID) {
/* It is now our turn, do any processing necessary */
- if (serialState->params.ldmParams.enableLdm) {
+ if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
size_t error;
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
seqStore.size == 0 && seqStore.capacity > 0);
assert(src.size <= serialState->params.jobSize);
- ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
error = ZSTD_ldm_generateSequences(
&serialState->ldmState, &seqStore,
&serialState->params.ldmParams, src.start, src.size);
@@ -596,7 +594,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
if (seqStore.size > 0) {
size_t const err = ZSTD_referenceExternalSequences(
jobCCtx, seqStore.seq, seqStore.size);
- assert(serialState->params.ldmParams.enableLdm);
+ assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
assert(!ZSTD_isError(err));
(void)err;
}
@@ -674,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
}
- if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
+ if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
JOB_ERROR(ERROR(memory_allocation));
/* Don't compute the checksum for chunks, since we compute it externally,
@@ -682,7 +680,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
*/
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
/* Don't run LDM for the chunks, since we handle it externally */
- jobParams.ldmParams.enableLdm = 0;
+ jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
+ /* Correct nbWorkers to 0. */
+ jobParams.nbWorkers = 0;
/* init */
@@ -695,6 +695,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
{ size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
}
+ if (!job->firstJob) {
+ size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
+ if (ZSTD_isError(err)) JOB_ERROR(err);
+ }
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
ZSTD_dtlm_fast,
@@ -750,6 +754,13 @@ static void ZSTDMT_compressionJob(void* jobDescription)
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
lastCBlockSize = cSize;
} }
+ if (!job->firstJob) {
+ /* Double check that we don't have an ext-dict, because then our
+ * repcode invalidation doesn't work.
+ */
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+ }
+ ZSTD_CCtx_trace(cctx, 0);
_endJob:
ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
@@ -796,6 +807,15 @@ typedef struct {
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
#define RSYNC_LENGTH 32
+/* Don't create chunks smaller than the zstd block size.
+ * This stops us from regressing compression ratio too much,
+ * and ensures our output fits in ZSTD_compressBound().
+ *
+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
+ * ZSTD_COMPRESSBOUND() will need to be updated.
+ */
+#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
+#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
typedef struct {
U64 hash;
@@ -1124,7 +1144,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
{
unsigned jobLog;
- if (params->ldmParams.enableLdm) {
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on cycleLog instead. */
@@ -1168,7 +1188,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
assert(0 <= overlapRLog && overlapRLog <= 8);
- if (params->ldmParams.enableLdm) {
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead.
@@ -1239,9 +1259,11 @@ size_t ZSTDMT_initCStream_internal(
if (params.rsyncable) {
/* Aim for the targetsectionSize as the average job size. */
- U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
- U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
- assert(jobSizeMB >= 1);
+ U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
+ U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
+ /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
+ * expected job size is at least 4x larger. */
+ assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
mtctx->rsync.hash = 0;
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
@@ -1253,7 +1275,7 @@ size_t ZSTDMT_initCStream_internal(
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
{
/* If ldm is enabled we need windowSize space. */
- size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
/* Two buffers of slack, plus extra space for the overlap
* This is the minimum slack that LDM works with. One extra because
* flush might waste up to targetSectionSize-1 bytes. Another extra
@@ -1528,17 +1550,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
{
BYTE const* const bufferStart = (BYTE const*)buffer.start;
- BYTE const* const bufferEnd = bufferStart + buffer.capacity;
BYTE const* const rangeStart = (BYTE const*)range.start;
- BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
if (rangeStart == NULL || bufferStart == NULL)
return 0;
- /* Empty ranges cannot overlap */
- if (bufferStart == bufferEnd || rangeStart == rangeEnd)
- return 0;
- return bufferStart < rangeEnd && rangeStart < bufferEnd;
+ {
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
+ BYTE const* const rangeEnd = rangeStart + range.size;
+
+ /* Empty ranges cannot overlap */
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
+ return 0;
+
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
+ }
}
static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
@@ -1565,7 +1591,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
{
- if (mtctx->params.ldmParams.enableLdm) {
+ if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
DEBUGLOG(5, "source [0x%zx, 0x%zx)",
@@ -1668,6 +1694,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
if (!mtctx->params.rsyncable)
/* Rsync is disabled. */
return syncPoint;
+ if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
+ /* We don't emit synchronization points if it would produce too small blocks.
+ * We don't have enough input to find a synchronization point, so don't look.
+ */
+ return syncPoint;
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
/* Not enough to compute the hash.
* We will miss any synchronization points in this RSYNC_LENGTH byte
@@ -1678,10 +1709,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
*/
return syncPoint;
/* Initialize the loop variables. */
- if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
- /* We have enough bytes buffered to initialize the hash.
+ if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
+ /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
+ * because they can't possibly be a sync point. So we can start
+ * part way through the input buffer.
+ */
+ pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
+ if (pos >= RSYNC_LENGTH) {
+ prev = istart + pos - RSYNC_LENGTH;
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
+ } else {
+ assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
+ hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
+ }
+ } else {
+ /* We have enough bytes buffered to initialize the hash,
+ * and are have processed enough bytes to find a sync point.
* Start scanning at the beginning of the input.
*/
+ assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
+ assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
pos = 0;
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
@@ -1695,16 +1744,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
syncPoint.flush = 1;
return syncPoint;
}
- } else {
- /* We don't have enough bytes buffered to initialize the hash, but
- * we know we have at least RSYNC_LENGTH bytes total.
- * Start scanning after the first RSYNC_LENGTH bytes less the bytes
- * already buffered.
- */
- pos = RSYNC_LENGTH - mtctx->inBuff.filled;
- prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
- hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
- hash = ZSTD_rollingHash_append(hash, istart, pos);
}
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
* through the input. If we hit a synchronization point, then cut the
@@ -1716,8 +1755,9 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
*/
for (; pos < syncPoint.toLoad; ++pos) {
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
- /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
+ assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
if ((hash & hitMask) == hitMask) {
syncPoint.toLoad = pos + 1;
syncPoint.flush = 1;