voice_engine/include/voe_audio_processing.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

// This sub-API supports the following functionalities:
//
//  - Noise Suppression (NS).
//  - Automatic Gain Control (AGC).
//  - Echo Control (EC).
//  - Receiving side VAD, NS and AGC.
//  - Measurements of instantaneous speech, noise and echo levels.
//  - Generation of AP debug recordings.
//  - Detection of keyboard typing which can disrupt a voice conversation.
//
// Usage example, omitting error checking:
//
//  using namespace webrtc;
//  VoiceEngine* voe = VoiceEngine::Create();
//  VoEBase* base = VoEBase::GetInterface();
//  VoEAudioProcessing* ap = VoEAudioProcessing::GetInterface(voe);
//  base->Init();
//  ap->SetEcStatus(true, kAgcAdaptiveAnalog);
//  ...
//  base->Terminate();
//  base->Release();
//  ap->Release();
//  VoiceEngine::Delete(voe);
//
#ifndef WEBRTC_VOICE_ENGINE_VOE_AUDIO_PROCESSING_H
#define WEBRTC_VOICE_ENGINE_VOE_AUDIO_PROCESSING_H

#include "webrtc/common_types.h"

namespace webrtc {

class VoiceEngine;

// VoERxVadCallback
class WEBRTC_DLLEXPORT VoERxVadCallback
{
public:
    virtual void OnRxVad(int channel, int vadDecision) = 0;

protected:
    virtual ~VoERxVadCallback() {}
};

// VoEAudioProcessing
class WEBRTC_DLLEXPORT VoEAudioProcessing
{
public:
    // Factory for the VoEAudioProcessing sub-API. Increases an internal
    // reference counter if successful. Returns NULL if the API is not
    // supported or if construction fails.
    static VoEAudioProcessing* GetInterface(VoiceEngine* voiceEngine);

    // Releases the VoEAudioProcessing sub-API and decreases an internal
    // reference counter. Returns the new reference count. This value should
    // be zero for all sub-API:s before the VoiceEngine object can be safely
    // deleted.
    virtual int Release() = 0;

    // Sets Noise Suppression (NS) status and mode.
    // The NS reduces noise in the microphone signal.
    virtual int SetNsStatus(bool enable, NsModes mode = kNsUnchanged) = 0;

    // Gets the NS status and mode.
    virtual int GetNsStatus(bool& enabled, NsModes& mode) = 0;

    // Sets the Automatic Gain Control (AGC) status and mode.
    // The AGC adjusts the microphone signal to an appropriate level.
    virtual int SetAgcStatus(bool enable, AgcModes mode = kAgcUnchanged) = 0;

    // Gets the AGC status and mode.
    virtual int GetAgcStatus(bool& enabled, AgcModes& mode) = 0;

    // Sets the AGC configuration.
    // Should only be used in situations where the working environment
    // is well known.
    virtual int SetAgcConfig(AgcConfig config) = 0;

    // Gets the AGC configuration.
    virtual int GetAgcConfig(AgcConfig& config) = 0;

    // Sets the Echo Control (EC) status and mode.
    // The EC mitigates acoustic echo where a user can hear their own
    // speech repeated back due to an acoustic coupling between the
    // speaker and the microphone at the remote end.
    virtual int SetEcStatus(bool enable, EcModes mode = kEcUnchanged) = 0;

    // Gets the EC status and mode.
    virtual int GetEcStatus(bool& enabled, EcModes& mode) = 0;

    // Enables the compensation of clock drift between the capture and render
    // streams by the echo canceller (i.e. only using EcMode==kEcAec). It will
    // only be enabled if supported on the current platform; otherwise an error
    // will be returned. Check if the platform is supported by calling
    // |DriftCompensationSupported()|.
    virtual int EnableDriftCompensation(bool enable) = 0;
    virtual bool DriftCompensationEnabled() = 0;
    static bool DriftCompensationSupported();

    // Sets a delay |offset| in ms to add to the system delay reported by the
    // OS, which is used by the AEC to synchronize far- and near-end streams.
    // In some cases a system may introduce a delay which goes unreported by the
    // OS, but which is known to the user. This method can be used to compensate
    // for the unreported delay.
    virtual void SetDelayOffsetMs(int offset) = 0;
    virtual int DelayOffsetMs() = 0;

    // Modifies settings for the AEC designed for mobile devices (AECM).
    virtual int SetAecmMode(AecmModes mode = kAecmSpeakerphone,
                            bool enableCNG = true) = 0;

    // Gets settings for the AECM.
    virtual int GetAecmMode(AecmModes& mode, bool& enabledCNG) = 0;

    // Enables a high pass filter on the capture signal. This removes DC bias
    // and low-frequency noise. Recommended to be enabled.
    virtual int EnableHighPassFilter(bool enable) = 0;
    virtual bool IsHighPassFilterEnabled() = 0;

    // Sets status and mode of the receiving-side (Rx) NS.
    // The Rx NS reduces noise in the received signal for the specified
    // |channel|. Intended for advanced usage only.
    virtual int SetRxNsStatus(int channel,
                              bool enable,
                              NsModes mode = kNsUnchanged) = 0;

    // Gets status and mode of the receiving-side NS.
    virtual int GetRxNsStatus(int channel,
                              bool& enabled,
                              NsModes& mode) = 0;

    // Sets status and mode of the receiving-side (Rx) AGC.
    // The Rx AGC adjusts the received signal to an appropriate level
    // for the specified |channel|. Intended for advanced usage only.
    virtual int SetRxAgcStatus(int channel,
                               bool enable,
                               AgcModes mode = kAgcUnchanged) = 0;

    // Gets status and mode of the receiving-side AGC.
    virtual int GetRxAgcStatus(int channel,
                               bool& enabled,
                               AgcModes& mode) = 0;

    // Modifies the AGC configuration on the receiving side for the
    // specified |channel|.
    virtual int SetRxAgcConfig(int channel, AgcConfig config) = 0;

    // Gets the AGC configuration on the receiving side.
    virtual int GetRxAgcConfig(int channel, AgcConfig& config) = 0;

    // Registers a VoERxVadCallback |observer| instance and enables Rx VAD
    // notifications for the specified |channel|.
    virtual int RegisterRxVadObserver(int channel,
                                      VoERxVadCallback &observer) = 0;

    // Deregisters the VoERxVadCallback |observer| and disables Rx VAD
    // notifications for the specified |channel|.
    virtual int DeRegisterRxVadObserver(int channel) = 0;

    // Gets the VAD/DTX activity for the specified |channel|.
    // The returned value is 1 if frames of audio contains speech
    // and 0 if silence. The output is always 1 if VAD is disabled.
    virtual int VoiceActivityIndicator(int channel) = 0;

    // Enables or disables the possibility to retrieve echo metrics and delay
    // logging values during an active call. The metrics are only supported in
    // AEC.
    virtual int SetEcMetricsStatus(bool enable) = 0;

    // Gets the current EC metric status.
    virtual int GetEcMetricsStatus(bool& enabled) = 0;

    // Gets the instantaneous echo level metrics.
    virtual int GetEchoMetrics(int& ERL, int& ERLE, int& RERL, int& A_NLP) = 0;

    // Gets the EC internal |delay_median| and |delay_std| in ms between
    // near-end and far-end. The values are calculated over the time period
    // since the last GetEcDelayMetrics() call.
    virtual int GetEcDelayMetrics(int& delay_median, int& delay_std) = 0;

    // Enables recording of Audio Processing (AP) debugging information.
    // The file can later be used for off-line analysis of the AP performance.
    virtual int StartDebugRecording(const char* fileNameUTF8) = 0;

    // Disables recording of AP debugging information.
    virtual int StopDebugRecording() = 0;

    // Enables or disables detection of disturbing keyboard typing.
    // An error notification will be given as a callback upon detection.
    virtual int SetTypingDetectionStatus(bool enable) = 0;

    // Gets the current typing detection status.
    virtual int GetTypingDetectionStatus(bool& enabled) = 0;

    // Reports the lower of:
    // * Time in seconds since the last typing event.
    // * Time in seconds since the typing detection was enabled.
    // Returns error if typing detection is disabled.
    virtual int TimeSinceLastTyping(int &seconds) = 0;

    // Optional setting of typing detection parameters
    // Parameter with value == 0 will be ignored
    // and left with default config.
    // TODO(niklase) Remove default argument as soon as libJingle is updated!
    virtual int SetTypingDetectionParameters(int timeWindow,
                                             int costPerTyping,
                                             int reportingThreshold,
                                             int penaltyDecay,
                                             int typeEventDelay = 0) = 0;

    // Swaps the capture-side left and right audio channels when enabled. It
    // only has an effect when using a stereo send codec. The setting is
    // persistent; it will be applied whenever a stereo send codec is enabled.
    //
    // The swap is applied only to the captured audio, and not mixed files. The
    // swap will appear in file recordings and when accessing audio through the
    // external media interface.
    virtual void EnableStereoChannelSwapping(bool enable) = 0;
    virtual bool IsStereoChannelSwappingEnabled() = 0;

protected:
    VoEAudioProcessing() {}
    virtual ~VoEAudioProcessing() {}
};

}  // namespace webrtc

#endif  // WEBRTC_VOICE_ENGINE_VOE_AUDIO_PROCESSING_H