aboutsummaryrefslogtreecommitdiff
path: root/src/zlib-ng/arch/s390/dfltcc_deflate.c
blob: 0210ddc17f9432ff5ad2b520789a81f3ec48ce4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL compression support. */

/*
   Use the following commands to build zlib-ng with DFLTCC compression support:

        $ ./configure --with-dfltcc-deflate
   or

        $ cmake -DWITH_DFLTCC_DEFLATE=1 .

   and then

        $ make
*/

#include "zbuild.h"
#include "deflate.h"
#include "trees_emit.h"
#include "dfltcc_deflate.h"
#include "dfltcc_detail.h"

struct dfltcc_deflate_state {
    struct dfltcc_state common;
    uint16_t level_mask;               /* Levels on which to use DFLTCC */
    uint32_t block_size;               /* New block each X bytes */
    size_t block_threshold;            /* New block after total_in > X */
    uint32_t dht_threshold;            /* New block only if avail_in >= X */
};

#define GET_DFLTCC_DEFLATE_STATE(state) ((struct dfltcc_deflate_state *)GET_DFLTCC_STATE(state))

void Z_INTERNAL *PREFIX(dfltcc_alloc_deflate_state)(PREFIX3(streamp) strm) {
    return dfltcc_alloc_state(strm, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state));
}

void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state);

    dfltcc_reset_state(&dfltcc_state->common);

    /* Initialize tuning parameters */
    dfltcc_state->level_mask = DFLTCC_LEVEL_MASK;
    dfltcc_state->block_size = DFLTCC_BLOCK_SIZE;
    dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE;
    dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE;
}

void Z_INTERNAL PREFIX(dfltcc_copy_deflate_state)(void *dst, const void *src) {
    dfltcc_copy_state(dst, src, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state));
}

static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy,
                                       int reproducible) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state);

    /* Unsupported compression settings */
    if ((dfltcc_state->level_mask & (1 << level)) == 0)
        return 0;
    if (window_bits != HB_BITS)
        return 0;
    if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)
        return 0;
    if (reproducible)
        return 0;

    /* Unsupported hardware */
    if (!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_GDHT) ||
            !is_bit_set(dfltcc_state->common.af.fns, DFLTCC_CMPR) ||
            !is_bit_set(dfltcc_state->common.af.fmts, DFLTCC_FMT0))
        return 0;

    return 1;
}

int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm) {
    deflate_state *state = (deflate_state *)strm->state;

    return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible);
}

static inline void dfltcc_gdht(PREFIX3(streamp) strm) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
    size_t avail_in = strm->avail_in;

    dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL);
}

static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
    size_t avail_in = strm->avail_in;
    size_t avail_out = strm->avail_out;
    dfltcc_cc cc;

    cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR,
                param, &strm->next_out, &avail_out,
                &strm->next_in, &avail_in, state->window);
    strm->total_in += (strm->avail_in - avail_in);
    strm->total_out += (strm->avail_out - avail_out);
    strm->avail_in = avail_in;
    strm->avail_out = avail_out;
    return cc;
}

static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) {
    deflate_state *state = (deflate_state *)strm->state;

    send_bits(state, PREFIX(bi_reverse)(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid);
    PREFIX(flush_pending)(strm);
    if (state->pending != 0) {
        /* The remaining data is located in pending_out[0:pending]. If someone
         * calls put_byte() - this might happen in deflate() - the byte will be
         * placed into pending_buf[pending], which is incorrect. Move the
         * remaining data to the beginning of pending_buf so that put_byte() is
         * usable again.
         */
        memmove(state->pending_buf, state->pending_out, state->pending);
        state->pending_out = state->pending_buf;
    }
#ifdef ZLIB_DEBUG
    state->compressed_len += param->eobl;
#endif
}

int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->common.param;
    uInt masked_avail_in;
    dfltcc_cc cc;
    int need_empty_block;
    int soft_bcc;
    int no_flush;

    if (!PREFIX(dfltcc_can_deflate)(strm)) {
        /* Clear history. */
        if (flush == Z_FULL_FLUSH)
            param->hl = 0;
        return 0;
    }

again:
    masked_avail_in = 0;
    soft_bcc = 0;
    no_flush = flush == Z_NO_FLUSH;

    /* No input data. Return, except when Continuation Flag is set, which means
     * that DFLTCC has buffered some output in the parameter block and needs to
     * be called again in order to flush it.
     */
    if (strm->avail_in == 0 && !param->cf) {
        /* A block is still open, and the hardware does not support closing
         * blocks without adding data. Thus, close it manually.
         */
        if (!no_flush && param->bcf) {
            send_eobs(strm, param);
            param->bcf = 0;
        }
        /* Let one of deflate_* functions write a trailing empty block. */
        if (flush == Z_FINISH)
            return 0;
        /* Clear history. */
        if (flush == Z_FULL_FLUSH)
            param->hl = 0;
        /* Trigger block post-processing if necessary. */
        *result = no_flush ? need_more : block_done;
        return 1;
    }

    /* There is an open non-BFINAL block, we are not going to close it just
     * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see
     * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new
     * DHT in order to adapt to a possibly changed input data distribution.
     */
    if (param->bcf && no_flush &&
            strm->total_in > dfltcc_state->block_threshold &&
            strm->avail_in >= dfltcc_state->dht_threshold) {
        if (param->cf) {
            /* We need to flush the DFLTCC buffer before writing the
             * End-of-block Symbol. Mask the input data and proceed as usual.
             */
            masked_avail_in += strm->avail_in;
            strm->avail_in = 0;
            no_flush = 0;
        } else {
            /* DFLTCC buffer is empty, so we can manually write the
             * End-of-block Symbol right away.
             */
            send_eobs(strm, param);
            param->bcf = 0;
            dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
        }
    }

    /* No space for compressed data. If we proceed, dfltcc_cmpr() will return
     * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still
     * set BCF=1, which is wrong. Avoid complications and return early.
     */
    if (strm->avail_out == 0) {
        *result = need_more;
        return 1;
    }

    /* The caller gave us too much data. Pass only one block worth of
     * uncompressed data to DFLTCC and mask the rest, so that on the next
     * iteration we start a new block.
     */
    if (no_flush && strm->avail_in > dfltcc_state->block_size) {
        masked_avail_in += (strm->avail_in - dfltcc_state->block_size);
        strm->avail_in = dfltcc_state->block_size;
    }

    /* When we have an open non-BFINAL deflate block and caller indicates that
     * the stream is ending, we need to close an open deflate block and open a
     * BFINAL one.
     */
    need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf;

    /* Translate stream to parameter block */
    param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
    if (!no_flush)
        /* We need to close a block. Always do this in software - when there is
         * no input data, the hardware will not honor BCC. */
        soft_bcc = 1;
    if (flush == Z_FINISH && !param->bcf)
        /* We are about to open a BFINAL block, set Block Header Final bit
         * until the stream ends.
         */
        param->bhf = 1;
    /* DFLTCC-CMPR will write to next_out, so make sure that buffers with
     * higher precedence are empty.
     */
    Assert(state->pending == 0, "There must be no pending bytes");
    Assert(state->bi_valid < 8, "There must be less than 8 pending bits");
    param->sbb = (unsigned int)state->bi_valid;
    if (param->sbb > 0)
        *strm->next_out = (unsigned char)state->bi_buf;
    /* Honor history and check value */
    param->nt = 0;
    param->cv = state->wrap == 2 ? ZSWAP32(state->crc_fold.value) : strm->adler;

    /* When opening a block, choose a Huffman-Table Type */
    if (!param->bcf) {
        if (state->strategy == Z_FIXED || (strm->total_in == 0 && dfltcc_state->block_threshold > 0))
            param->htt = HTT_FIXED;
        else {
            param->htt = HTT_DYNAMIC;
            dfltcc_gdht(strm);
        }
    }

    /* Deflate */
    do {
        cc = dfltcc_cmpr(strm);
        if (strm->avail_in < 4096 && masked_avail_in > 0)
            /* We are about to call DFLTCC with a small input buffer, which is
             * inefficient. Since there is masked data, there will be at least
             * one more DFLTCC call, so skip the current one and make the next
             * one handle more data.
             */
            break;
    } while (cc == DFLTCC_CC_AGAIN);

    /* Translate parameter block to stream */
    strm->msg = oesc_msg(dfltcc_state->common.msg, param->oesc);
    state->bi_valid = param->sbb;
    if (state->bi_valid == 0)
        state->bi_buf = 0; /* Avoid accessing next_out */
    else
        state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
    if (state->wrap == 2)
        state->crc_fold.value = ZSWAP32(param->cv);
    else
        strm->adler = param->cv;

    /* Unmask the input data */
    strm->avail_in += masked_avail_in;
    masked_avail_in = 0;

    /* If we encounter an error, it means there is a bug in DFLTCC call */
    Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG");

    /* Update Block-Continuation Flag. It will be used to check whether to call
     * GDHT the next time.
     */
    if (cc == DFLTCC_CC_OK) {
        if (soft_bcc) {
            send_eobs(strm, param);
            param->bcf = 0;
            dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
        } else
            param->bcf = 1;
        if (flush == Z_FINISH) {
            if (need_empty_block)
                /* Make the current deflate() call also close the stream */
                return 0;
            else {
                bi_windup(state);
                *result = finish_done;
            }
        } else {
            if (flush == Z_FULL_FLUSH)
                param->hl = 0; /* Clear history */
            *result = flush == Z_NO_FLUSH ? need_more : block_done;
        }
    } else {
        param->bcf = 1;
        *result = need_more;
    }
    if (strm->avail_in != 0 && strm->avail_out != 0)
        goto again; /* deflate() must use all input or all output */
    return 1;
}

/*
   Switching between hardware and software compression.

   DFLTCC does not support all zlib settings, e.g. generation of non-compressed
   blocks or alternative window sizes. When such settings are applied on the
   fly with deflateParams, we need to convert between hardware and software
   window formats.
*/
static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;

    return strm->total_in > 0 || param->nt == 0 || param->hl > 0;
}

int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush) {
    deflate_state *state = (deflate_state *)strm->state;
    int could_deflate = PREFIX(dfltcc_can_deflate)(strm);
    int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible);

    if (can_deflate == could_deflate)
        /* We continue to work in the same mode - no changes needed */
        return Z_OK;

    if (!dfltcc_was_deflate_used(strm))
        /* DFLTCC was not used yet - no changes needed */
        return Z_OK;

    /* For now, do not convert between window formats - simply get rid of the old data instead */
    *flush = Z_FULL_FLUSH;
    return Z_OK;
}

int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->param;

    /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
     * close the block without resetting the compression state. Detect this
     * situation and return that deflation is not done.
     */
    if (flush == Z_FULL_FLUSH && strm->avail_out == 0)
        return 0;

    /* Return that deflation is not done if DFLTCC is used and either it
     * buffered some data (Continuation Flag is set), or has not written EOBS
     * yet (Block-Continuation Flag is set).
     */
    return !PREFIX(dfltcc_can_deflate)(strm) || (!param->cf && !param->bcf);
}

int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible) {
    deflate_state *state = (deflate_state *)strm->state;

    return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm);
}

/*
   Preloading history.
*/
static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) {
    size_t offset;
    size_t n;

    /* Do not use more than 32K */
    if (count > HB_SIZE) {
        buf += count - HB_SIZE;
        count = HB_SIZE;
    }
    offset = (param->ho + param->hl) % HB_SIZE;
    if (offset + count <= HB_SIZE)
        /* Circular history buffer does not wrap - copy one chunk */
        memcpy(history + offset, buf, count);
    else {
        /* Circular history buffer wraps - copy two chunks */
        n = HB_SIZE - offset;
        memcpy(history + offset, buf, n);
        memcpy(history, buf + n, count - n);
    }
    n = param->hl + count;
    if (n <= HB_SIZE)
        /* All history fits into buffer - no need to discard anything */
        param->hl = n;
    else {
        /* History does not fit into buffer - discard extra bytes */
        param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE;
        param->hl = HB_SIZE;
    }
}

int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm,
                                                const unsigned char *dictionary, uInt dict_length) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->param;

    append_history(param, state->window, dictionary, dict_length);
    state->strstart = 1; /* Add FDICT to zlib header */
    state->block_start = state->strstart; /* Make deflate_stored happy */
    return Z_OK;
}

int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->param;

    if (dictionary) {
        if (param->ho + param->hl <= HB_SIZE)
            /* Circular history buffer does not wrap - copy one chunk */
            memcpy(dictionary, state->window + param->ho, param->hl);
        else {
            /* Circular history buffer wraps - copy two chunks */
            memcpy(dictionary, state->window + param->ho, HB_SIZE - param->ho);
            memcpy(dictionary + HB_SIZE - param->ho, state->window, param->ho + param->hl - HB_SIZE);
        }
    }
    if (dict_length)
        *dict_length = param->hl;
    return Z_OK;
}