webrtc/modules/audio_coding/neteq/normal.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "webrtc/modules/audio_coding/neteq/normal.h"

#include <string.h>  // memset, memcpy

#include <algorithm>  // min

#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/codecs/cng/webrtc_cng.h"
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/expand.h"

namespace webrtc {

int Normal::Process(const int16_t* input,
                    size_t length,
                    Modes last_mode,
                    int16_t* external_mute_factor_array,
                    AudioMultiVector* output) {
  if (length == 0) {
    // Nothing to process.
    output->Clear();
    return static_cast<int>(length);
  }

  assert(output->Empty());
  // Output should be empty at this point.
  if (length % output->Channels() != 0) {
    // The length does not match the number of channels.
    output->Clear();
    return 0;
  }
  output->PushBackInterleaved(input, length);
  int16_t* signal = &(*output)[0][0];

  const int fs_mult = fs_hz_ / 8000;
  assert(fs_mult > 0);
  // fs_shift = log2(fs_mult), rounded down.
  // Note that |fs_shift| is not "exact" for 48 kHz.
  // TODO(hlundin): Investigate this further.
  const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);

  // Check if last RecOut call resulted in an Expand. If so, we have to take
  // care of some cross-fading and unmuting.
  if (last_mode == kModeExpand) {
    // Generate interpolation data using Expand.
    // First, set Expand parameters to appropriate values.
    expand_->SetParametersForNormalAfterExpand();

    // Call Expand.
    AudioMultiVector expanded(output->Channels());
    expand_->Process(&expanded);
    expand_->Reset();

    for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) {
      // Adjust muting factor (main muting factor times expand muting factor).
      external_mute_factor_array[channel_ix] = static_cast<int16_t>(
          (external_mute_factor_array[channel_ix] *
          expand_->MuteFactor(channel_ix)) >> 14);

      int16_t* signal = &(*output)[channel_ix][0];
      size_t length_per_channel = length / output->Channels();
      // Find largest absolute value in new data.
      int16_t decoded_max =
          WebRtcSpl_MaxAbsValueW16(signal, length_per_channel);
      // Adjust muting factor if needed (to BGN level).
      size_t energy_length =
          std::min(static_cast<size_t>(fs_mult * 64), length_per_channel);
      int scaling = 6 + fs_shift
          - WebRtcSpl_NormW32(decoded_max * decoded_max);
      scaling = std::max(scaling, 0);  // |scaling| should always be >= 0.
      int32_t energy = WebRtcSpl_DotProductWithScale(signal, signal,
                                                     energy_length, scaling);
      int32_t scaled_energy_length =
          static_cast<int32_t>(energy_length >> scaling);
      if (scaled_energy_length > 0) {
        energy = energy / scaled_energy_length;
      } else {
        energy = 0;
      }

      int mute_factor;
      if ((energy != 0) &&
          (energy > background_noise_.Energy(channel_ix))) {
        // Normalize new frame energy to 15 bits.
        scaling = WebRtcSpl_NormW32(energy) - 16;
        // We want background_noise_.energy() / energy in Q14.
        int32_t bgn_energy =
            background_noise_.Energy(channel_ix) << (scaling+14);
        int16_t energy_scaled = static_cast<int16_t>(energy << scaling);
        int32_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled);
        mute_factor = WebRtcSpl_SqrtFloor(ratio << 14);
      } else {
        mute_factor = 16384;  // 1.0 in Q14.
      }
      if (mute_factor > external_mute_factor_array[channel_ix]) {
        external_mute_factor_array[channel_ix] =
            static_cast<int16_t>(std::min(mute_factor, 16384));
      }

      // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
      int increment = 64 / fs_mult;
      for (size_t i = 0; i < length_per_channel; i++) {
        // Scale with mute factor.
        assert(channel_ix < output->Channels());
        assert(i < output->Size());
        int32_t scaled_signal = (*output)[channel_ix][i] *
            external_mute_factor_array[channel_ix];
        // Shift 14 with proper rounding.
        (*output)[channel_ix][i] =
            static_cast<int16_t>((scaled_signal + 8192) >> 14);
        // Increase mute_factor towards 16384.
        external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
            external_mute_factor_array[channel_ix] + increment, 16384));
      }

      // Interpolate the expanded data into the new vector.
      // (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
      assert(fs_shift < 3);  // Will always be 0, 1, or, 2.
      increment = 4 >> fs_shift;
      int fraction = increment;
      for (size_t i = 0; i < static_cast<size_t>(8 * fs_mult); i++) {
        // TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8
        // now for legacy bit-exactness.
        assert(channel_ix < output->Channels());
        assert(i < output->Size());
        (*output)[channel_ix][i] =
            static_cast<int16_t>((fraction * (*output)[channel_ix][i] +
                (32 - fraction) * expanded[channel_ix][i] + 8) >> 5);
        fraction += increment;
      }
    }
  } else if (last_mode == kModeRfc3389Cng) {
    assert(output->Channels() == 1);  // Not adapted for multi-channel yet.
    static const size_t kCngLength = 32;
    int16_t cng_output[kCngLength];
    // Reset mute factor and start up fresh.
    external_mute_factor_array[0] = 16384;
    AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();

    if (cng_decoder) {
      // Generate long enough for 32kHz.
      if (WebRtcCng_Generate(cng_decoder->CngDecoderInstance(), cng_output,
                             kCngLength, 0) < 0) {
        // Error returned; set return vector to all zeros.
        memset(cng_output, 0, sizeof(cng_output));
      }
    } else {
      // If no CNG instance is defined, just copy from the decoded data.
      // (This will result in interpolating the decoded with itself.)
      memcpy(cng_output, signal, fs_mult * 8 * sizeof(int16_t));
    }
    // Interpolate the CNG into the new vector.
    // (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
    assert(fs_shift < 3);  // Will always be 0, 1, or, 2.
    int16_t increment = 4 >> fs_shift;
    int16_t fraction = increment;
    for (size_t i = 0; i < static_cast<size_t>(8 * fs_mult); i++) {
      // TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8 now
      // for legacy bit-exactness.
      signal[i] =
          (fraction * signal[i] + (32 - fraction) * cng_output[i] + 8) >> 5;
      fraction += increment;
    }
  } else if (external_mute_factor_array[0] < 16384) {
    // Previous was neither of Expand, FadeToBGN or RFC3389_CNG, but we are
    // still ramping up from previous muting.
    // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
    int increment = 64 / fs_mult;
    size_t length_per_channel = length / output->Channels();
    for (size_t i = 0; i < length_per_channel; i++) {
      for (size_t channel_ix = 0; channel_ix < output->Channels();
          ++channel_ix) {
        // Scale with mute factor.
        assert(channel_ix < output->Channels());
        assert(i < output->Size());
        int32_t scaled_signal = (*output)[channel_ix][i] *
            external_mute_factor_array[channel_ix];
        // Shift 14 with proper rounding.
        (*output)[channel_ix][i] =
            static_cast<int16_t>((scaled_signal + 8192) >> 14);
        // Increase mute_factor towards 16384.
        external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
            16384, external_mute_factor_array[channel_ix] + increment));
      }
    }
  }

  return static_cast<int>(length);
}

}  // namespace webrtc