webrtc/modules/audio_coding/acm2/acm_receiver.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

/*
 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef WEBRTC_MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_
#define WEBRTC_MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_

#include <map>
#include <string>
#include <vector>

#include "webrtc/base/array_view.h"
#include "webrtc/base/optional.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/base/thread_annotations.h"
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "webrtc/engine_configurations.h"
#include "webrtc/modules/audio_coding/include/audio_coding_module.h"
#include "webrtc/modules/audio_coding/acm2/acm_resampler.h"
#include "webrtc/modules/audio_coding/acm2/call_statistics.h"
#include "webrtc/modules/audio_coding/acm2/initial_delay_manager.h"
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/typedefs.h"

namespace webrtc {

struct CodecInst;
class CriticalSectionWrapper;
class NetEq;

namespace acm2 {

class AcmReceiver {
 public:
  struct Decoder {
    int acm_codec_id;
    uint8_t payload_type;
    // This field is meaningful for codecs where both mono and
    // stereo versions are registered under the same ID.
    size_t channels;
    int sample_rate_hz;
  };

  // Constructor of the class
  explicit AcmReceiver(const AudioCodingModule::Config& config);

  // Destructor of the class.
  ~AcmReceiver();

  //
  // Inserts a payload with its associated RTP-header into NetEq.
  //
  // Input:
  //   - rtp_header           : RTP header for the incoming payload containing
  //                            information about payload type, sequence number,
  //                            timestamp, SSRC and marker bit.
  //   - incoming_payload     : Incoming audio payload.
  //   - length_payload       : Length of incoming audio payload in bytes.
  //
  // Return value             : 0 if OK.
  //                           <0 if NetEq returned an error.
  //
  int InsertPacket(const WebRtcRTPHeader& rtp_header,
                   rtc::ArrayView<const uint8_t> incoming_payload);

  //
  // Asks NetEq for 10 milliseconds of decoded audio.
  //
  // Input:
  //   -desired_freq_hz       : specifies the sampling rate [Hz] of the output
  //                            audio. If set -1 indicates to resampling is
  //                            is required and the audio returned at the
  //                            sampling rate of the decoder.
  //
  // Output:
  //   -audio_frame           : an audio frame were output data and
  //                            associated parameters are written to.
  //
  // Return value             : 0 if OK.
  //                           -1 if NetEq returned an error.
  //
  int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);

  //
  // Adds a new codec to the NetEq codec database.
  //
  // Input:
  //   - acm_codec_id        : ACM codec ID; -1 means external decoder.
  //   - payload_type        : payload type.
  //   - sample_rate_hz      : sample rate.
  //   - audio_decoder       : pointer to a decoder object. If it's null, then
  //                           NetEq will internally create a decoder object
  //                           based on the value of |acm_codec_id| (which
  //                           mustn't be -1). Otherwise, NetEq will use the
  //                           given decoder for the given payload type. NetEq
  //                           won't take ownership of the decoder; it's up to
  //                           the caller to delete it when it's no longer
  //                           needed.
  //
  //                           Providing an existing decoder object here is
  //                           necessary for external decoders, but may also be
  //                           used for built-in decoders if NetEq doesn't have
  //                           all the info it needs to construct them properly
  //                           (e.g. iSAC, where the decoder needs to be paired
  //                           with an encoder).
  //
  // Return value             : 0 if OK.
  //                           <0 if NetEq returned an error.
  //
  int AddCodec(int acm_codec_id,
               uint8_t payload_type,
               size_t channels,
               int sample_rate_hz,
               AudioDecoder* audio_decoder,
               const std::string& name);

  //
  // Sets a minimum delay for packet buffer. The given delay is maintained,
  // unless channel condition dictates a higher delay.
  //
  // Input:
  //   - delay_ms             : minimum delay in milliseconds.
  //
  // Return value             : 0 if OK.
  //                           <0 if NetEq returned an error.
  //
  int SetMinimumDelay(int delay_ms);

  //
  // Sets a maximum delay [ms] for the packet buffer. The target delay does not
  // exceed the given value, even if channel condition requires so.
  //
  // Input:
  //   - delay_ms             : maximum delay in milliseconds.
  //
  // Return value             : 0 if OK.
  //                           <0 if NetEq returned an error.
  //
  int SetMaximumDelay(int delay_ms);

  //
  // Get least required delay computed based on channel conditions. Note that
  // this is before applying any user-defined limits (specified by calling
  // (SetMinimumDelay() and/or SetMaximumDelay()).
  //
  int LeastRequiredDelayMs() const;

  //
  // Resets the initial delay to zero.
  //
  void ResetInitialDelay();

  // Returns the sample rate of the decoder associated with the last incoming
  // packet. If no packet of a registered non-CNG codec has been received, the
  // return value is empty. Also, if the decoder was unregistered since the last
  // packet was inserted, the return value is empty.
  rtc::Optional<int> last_packet_sample_rate_hz() const;

  // Returns last_output_sample_rate_hz from the NetEq instance.
  int last_output_sample_rate_hz() const;

  //
  // Get the current network statistics from NetEq.
  //
  // Output:
  //   - statistics           : The current network statistics.
  //
  void GetNetworkStatistics(NetworkStatistics* statistics);

  //
  // Enable post-decoding VAD.
  //
  void EnableVad();

  //
  // Disable post-decoding VAD.
  //
  void DisableVad();

  //
  // Returns whether post-decoding VAD is enabled (true) or disabled (false).
  //
  bool vad_enabled() const { return vad_enabled_; }

  //
  // Flushes the NetEq packet and speech buffers.
  //
  void FlushBuffers();

  //
  // Removes a payload-type from the NetEq codec database.
  //
  // Input:
  //   - payload_type         : the payload-type to be removed.
  //
  // Return value             : 0 if OK.
  //                           -1 if an error occurred.
  //
  int RemoveCodec(uint8_t payload_type);

  //
  // Remove all registered codecs.
  //
  int RemoveAllCodecs();

  //
  // Set ID.
  //
  void set_id(int id);  // TODO(turajs): can be inline.

  //
  // Gets the RTP timestamp of the last sample delivered by GetAudio().
  // Returns true if the RTP timestamp is valid, otherwise false.
  //
  bool GetPlayoutTimestamp(uint32_t* timestamp);

  //
  // Get the audio codec associated with the last non-CNG/non-DTMF received
  // payload. If no non-CNG/non-DTMF packet is received -1 is returned,
  // otherwise return 0.
  //
  int LastAudioCodec(CodecInst* codec) const;

  //
  // Get a decoder given its registered payload-type.
  //
  // Input:
  //    -payload_type         : the payload-type of the codec to be retrieved.
  //
  // Output:
  //    -codec                : codec associated with the given payload-type.
  //
  // Return value             : 0 if succeeded.
  //                           -1 if failed, e.g. given payload-type is not
  //                              registered.
  //
  int DecoderByPayloadType(uint8_t payload_type,
                           CodecInst* codec) const;

  //
  // Enable NACK and set the maximum size of the NACK list. If NACK is already
  // enabled then the maximum NACK list size is modified accordingly.
  //
  // Input:
  //    -max_nack_list_size  : maximum NACK list size
  //                           should be positive (none zero) and less than or
  //                           equal to |Nack::kNackListSizeLimit|
  // Return value
  //                         : 0 if succeeded.
  //                          -1 if failed
  //
  int EnableNack(size_t max_nack_list_size);

  // Disable NACK.
  void DisableNack();

  //
  // Get a list of packets to be retransmitted.
  //
  // Input:
  //    -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
  // Return value           : list of packets to be retransmitted.
  //
  std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const;

  //
  // Get statistics of calls to GetAudio().
  void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;

 private:
  const Decoder* RtpHeaderToDecoder(const RTPHeader& rtp_header,
                                    uint8_t payload_type) const
      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);

  uint32_t NowInTimestamp(int decoder_sampling_rate) const;

  rtc::scoped_ptr<CriticalSectionWrapper> crit_sect_;
  int id_;  // TODO(henrik.lundin) Make const.
  const Decoder* last_audio_decoder_ GUARDED_BY(crit_sect_);
  AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
  ACMResampler resampler_ GUARDED_BY(crit_sect_);
  // Used in GetAudio, declared as member to avoid allocating every 10ms.
  // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
  rtc::scoped_ptr<int16_t[]> audio_buffer_ GUARDED_BY(crit_sect_);
  rtc::scoped_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
  CallStatistics call_stats_ GUARDED_BY(crit_sect_);
  NetEq* neteq_;
  // Decoders map is keyed by payload type
  std::map<uint8_t, Decoder> decoders_ GUARDED_BY(crit_sect_);
  bool vad_enabled_;
  Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
  bool resampled_last_output_frame_ GUARDED_BY(crit_sect_);
  rtc::Optional<int> last_packet_sample_rate_hz_ GUARDED_BY(crit_sect_);
};

}  // namespace acm2

}  // namespace webrtc

#endif  // WEBRTC_MODULES_AUDIO_CODING_ACM2_ACM_RECEIVER_H_