aboutsummaryrefslogtreecommitdiff
path: root/src/opus_encoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/opus_encoder.c')
-rw-r--r--src/opus_encoder.c43
1 files changed, 27 insertions, 16 deletions
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 7b5f0abf..8c8db5a5 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -87,6 +87,7 @@ struct OpusEncoder {
int lfe;
int arch;
int use_dtx; /* general DTX for both SILK and CELT */
+ int fec_config;
#ifndef DISABLE_FLOAT_API
TonalityAnalysisState analysis;
#endif
@@ -112,7 +113,7 @@ struct OpusEncoder {
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
#ifndef DISABLE_FLOAT_API
int detected_bandwidth;
- int nb_no_activity_frames;
+ int nb_no_activity_ms_Q1;
opus_val32 peak_signal_energy;
#endif
int nonfinal_frame; /* current frame is not the final in a packet */
@@ -893,24 +894,28 @@ static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, in
/* Decides if DTX should be turned on (=1) or off (=0) */
static int decide_dtx_mode(opus_int activity, /* indicates if this frame contains speech/music */
- int *nb_no_activity_frames /* number of consecutive frames with no activity */
+ int *nb_no_activity_ms_Q1, /* number of consecutive milliseconds with no activity, in Q1 */
+ int frame_size_ms_Q1 /* number of miliseconds in this update, in Q1 */
)
{
if (!activity)
{
- /* The number of consecutive DTX frames should be within the allowed bounds */
- (*nb_no_activity_frames)++;
- if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
+ /* The number of consecutive DTX frames should be within the allowed bounds.
+ Note that the allowed bound is defined in the SILK headers and assumes 20 ms
+ frames. As this function can be called with any frame length, a conversion to
+ milliseconds is done before the comparisons. */
+ (*nb_no_activity_ms_Q1) += frame_size_ms_Q1;
+ if (*nb_no_activity_ms_Q1 > NB_SPEECH_FRAMES_BEFORE_DTX*20*2)
{
- if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
+ if (*nb_no_activity_ms_Q1 <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX)*20*2)
/* Valid frame for DTX! */
return 1;
else
- (*nb_no_activity_frames) = NB_SPEECH_FRAMES_BEFORE_DTX;
+ (*nb_no_activity_ms_Q1) = NB_SPEECH_FRAMES_BEFORE_DTX*20*2;
}
} else
- (*nb_no_activity_frames) = 0;
+ (*nb_no_activity_ms_Q1) = 0;
return 0;
}
@@ -1310,6 +1315,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->stream_channels = st->force_channels;
} else {
#ifdef FUZZING
+ (void)stereo_music_threshold;
+ (void)stereo_voice_threshold;
/* Random mono/stereo decision */
if (st->channels == 2 && (rand()&0x1F)==0)
st->stream_channels = 3-st->stream_channels;
@@ -1348,6 +1355,8 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
} else if (st->user_forced_mode == OPUS_AUTO)
{
#ifdef FUZZING
+ (void)stereo_width;
+ (void)mode_thresholds;
/* Random mode switching */
if ((rand()&0xF)==0)
{
@@ -1385,8 +1394,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
- /* When FEC is enabled and there's enough packet loss, use SILK */
- if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4)
+ /* When FEC is enabled and there's enough packet loss, use SILK.
+ Unless the FEC is set to 2, in which case we don't switch to SILK if we're confident we have music. */
+ if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4 && (st->fec_config != 2 || voice_est > 25))
st->mode = MODE_SILK_ONLY;
/* When encoding voice and DTX is enabled but the generalized DTX cannot be used,
use SILK in order to make use of its DTX. */
@@ -2132,7 +2142,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
#ifndef DISABLE_FLOAT_API
if (st->use_dtx && (analysis_info.valid || is_silence))
{
- if (decide_dtx_mode(activity, &st->nb_no_activity_frames))
+ if (decide_dtx_mode(activity, &st->nb_no_activity_ms_Q1, 2*1000*frame_size/st->Fs))
{
st->rangeFinal = 0;
data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
@@ -2140,7 +2150,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
return 1;
}
} else {
- st->nb_no_activity_frames = 0;
+ st->nb_no_activity_ms_Q1 = 0;
}
#endif
@@ -2435,11 +2445,12 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
case OPUS_SET_INBAND_FEC_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
- if(value<0 || value>1)
+ if(value<0 || value>2)
{
goto bad_arg;
}
- st->silk_mode.useInBandFEC = value;
+ st->fec_config = value;
+ st->silk_mode.useInBandFEC = (value != 0);
}
break;
case OPUS_GET_INBAND_FEC_REQUEST:
@@ -2449,7 +2460,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
{
goto bad_arg;
}
- *value = st->silk_mode.useInBandFEC;
+ *value = st->fec_config;
}
break;
case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
@@ -2733,7 +2744,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
#ifndef DISABLE_FLOAT_API
else if (st->use_dtx) {
/* DTX determined by Opus. */
- *value = st->nb_no_activity_frames >= NB_SPEECH_FRAMES_BEFORE_DTX;
+ *value = st->nb_no_activity_ms_Q1 >= NB_SPEECH_FRAMES_BEFORE_DTX*20*2;
}
#endif
else {