aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2024-02-05 14:24:02 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2024-02-16 03:00:45 -0500
commit1f53f1e0a9b1e055222b28a70b2e327787e50d09 (patch)
tree0b2dcdd926f3dcc2d4fb7ab125ee7bef7c2225ed
parent183a820212381f6c447b5a7c9b92b34fa01c629b (diff)
downloadlibopus-1f53f1e0a9b1e055222b28a70b2e327787e50d09.tar.gz
Support for extra offset
Allows us to exclude the most recent silence from DRED
-rw-r--r--silk/dred_config.h2
-rw-r--r--silk/dred_decoder.c14
-rw-r--r--silk/dred_encoder.c31
-rw-r--r--src/opus_decoder.c4
4 files changed, 32 insertions, 19 deletions
diff --git a/silk/dred_config.h b/silk/dred_config.h
index a729e696..4d31199e 100644
--- a/silk/dred_config.h
+++ b/silk/dred_config.h
@@ -32,7 +32,7 @@
#define DRED_EXTENSION_ID 126
/* Remove these two completely once DRED gets an extension number assigned. */
-#define DRED_EXPERIMENTAL_VERSION 8
+#define DRED_EXPERIMENTAL_VERSION 9
#define DRED_EXPERIMENTAL_BYTES 2
diff --git a/silk/dred_decoder.c b/silk/dred_decoder.c
index 641589a4..fefbf41d 100644
--- a/silk/dred_decoder.c
+++ b/silk/dred_decoder.c
@@ -39,12 +39,6 @@
#include "dred_rdovae_stats_data.h"
#include "dred_rdovae_constants.h"
-/* From http://graphics.stanford.edu/~seander/bithacks.html#FixedSignExtend */
-static int sign_extend(int x, int b) {
- int m = 1U << (b - 1);
- return (x ^ m) - m;
-}
-
static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
int i;
for (i=0;i<dim;i++) {
@@ -64,17 +58,19 @@ int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int mi
int q0;
int dQ;
int state_qoffset;
-
+ int extra_offset;
/* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */
celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
/* decode initial state and initialize RDOVAE decoder */
ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
- /* Compute total offset, including DRED position in a multiframe packet. */
- dec->dred_offset = sign_extend(ec_dec_uint(&ec, 32), 5) + dred_frame_offset;
q0 = ec_dec_uint(&ec, 16);
dQ = ec_dec_uint(&ec, 8);
+ if (ec_dec_uint(&ec, 2)) extra_offset = 32*ec_dec_uint(&ec, 256);
+ else extra_offset = 0;
+ /* Compute total offset, including DRED position in a multiframe packet. */
+ dec->dred_offset = 16 - ec_dec_uint(&ec, 32) - extra_offset + dred_frame_offset;
/*printf("%d %d %d\n", dred_offset, q0, dQ);*/
state_qoffset = q0*DRED_STATE_DIM;
diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c
index 23a69743..d0da8d8f 100644
--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -267,17 +267,35 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk
int state_qoffset;
ec_enc ec_bak;
int prev_active=0;
+ int latent_offset;
+ int extra_dred_offset=0;
int dred_encoded=0;
+ int total_offset;
+
+ latent_offset = enc->latent_offset;
+ while (latent_offset < enc->latents_buffer_fill && !dred_voice_active(activity_mem, latent_offset)) {
+ latent_offset++;
+ extra_dred_offset++;
+ }
/* entropy coding of state and latents */
ec_enc_init(&ec_encoder, buf, max_bytes);
- ec_enc_uint(&ec_encoder, enc->dred_offset, 32);
ec_enc_uint(&ec_encoder, q0, 16);
ec_enc_uint(&ec_encoder, dQ, 8);
+ total_offset = 16 - (enc->dred_offset - extra_dred_offset*8);
+ celt_assert(total_offset>=0);
+ if (total_offset > 31) {
+ ec_enc_uint(&ec_encoder, 1, 2);
+ ec_enc_uint(&ec_encoder, total_offset>>5, 256);
+ ec_enc_uint(&ec_encoder, total_offset&31, 32);
+ } else {
+ ec_enc_uint(&ec_encoder, 0, 2);
+ ec_enc_uint(&ec_encoder, total_offset, 32);
+ }
state_qoffset = q0*DRED_STATE_DIM;
dred_encode_latents(
&ec_encoder,
- &enc->state_buffer[enc->latent_offset*DRED_STATE_DIM],
+ &enc->state_buffer[latent_offset*DRED_STATE_DIM],
dred_state_quant_scales_q8 + state_qoffset,
dred_state_dead_zone_q8 + state_qoffset,
dred_state_r_q8 + state_qoffset,
@@ -288,7 +306,7 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk
return 0;
}
ec_bak = ec_encoder;
- for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-enc->latent_offset-1); i += 2)
+ for (i = 0; i < IMIN(2*max_chunks, enc->latents_buffer_fill-latent_offset-1); i += 2)
{
int active;
q_level = compute_quantizer(q0, dQ, i/2);
@@ -296,7 +314,7 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk
dred_encode_latents(
&ec_encoder,
- enc->latents_buffer + (i+enc->latent_offset) * DRED_LATENT_DIM,
+ enc->latents_buffer + (i+latent_offset) * DRED_LATENT_DIM,
dred_latent_quant_scales_q8 + offset,
dred_latent_dead_zone_q8 + offset,
dred_latent_r_q8 + offset,
@@ -309,7 +327,7 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk
if (i==0) return 0;
break;
}
- active = dred_voice_active(activity_mem, i+enc->latent_offset);
+ active = dred_voice_active(activity_mem, i+latent_offset);
if (active || prev_active) {
ec_bak = ec_encoder;
dred_encoded = i+2;
@@ -317,8 +335,7 @@ int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunk
prev_active = active;
}
/* Avoid sending empty DRED packets. */
- if (dred_encoded==0) return 0;
-
+ if (dred_encoded==0 || (dred_encoded<=2 && extra_dred_offset)) return 0;
ec_encoder = ec_bak;
ec_buffer_fill = (ec_tell(&ec_encoder)+7)/8;
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index 09930020..35d39910 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -703,7 +703,7 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data,
if (feature_offset <= 4*dred->nb_latents-1 && feature_offset >= 0) {
lpcnet_plc_fec_add(&st->lpcnet, dred->fec_features+feature_offset*DRED_NUM_FEATURES);
} else {
- lpcnet_plc_fec_add(&st->lpcnet, NULL);
+ if (feature_offset >= 0) lpcnet_plc_fec_add(&st->lpcnet, NULL);
}
}
@@ -1417,7 +1417,7 @@ int opus_dred_parse(OpusDREDDecoder *dred_dec, OpusDRED *dred, const unsigned ch
dred_ec_decode(dred, payload, payload_len, min_feature_frames, dred_frame_offset);
if (!defer_processing)
opus_dred_process(dred_dec, dred, dred);
- return dred->nb_latents*sampling_rate/25 - sampling_rate/50;
+ return IMAX(0, dred->nb_latents*sampling_rate/25 - dred->dred_offset* sampling_rate/400);
}
return 0;
#else