diff options
Diffstat (limited to 'src/modules/audio_processing/main/test/process_test/process_test.cc')
-rw-r--r-- | src/modules/audio_processing/main/test/process_test/process_test.cc | 628 |
1 files changed, 628 insertions, 0 deletions
diff --git a/src/modules/audio_processing/main/test/process_test/process_test.cc b/src/modules/audio_processing/main/test/process_test/process_test.cc new file mode 100644 index 0000000000..c62345fcf0 --- /dev/null +++ b/src/modules/audio_processing/main/test/process_test/process_test.cc @@ -0,0 +1,628 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> +#include <string.h> +#ifdef WEBRTC_ANDROID +#include <sys/stat.h> +#endif + +#include "tick_util.h" +#include "gtest/gtest.h" +#include "module_common_types.h" + +#include "audio_processing.h" + +#include "cpu_features_wrapper.h" + +using webrtc::AudioFrame; +using webrtc::TickInterval; +using webrtc::TickTime; + +using webrtc::AudioProcessing; +using webrtc::GainControl; +using webrtc::NoiseSuppression; + +void usage() { + printf( + "Usage: process_test [options] [-ir REVERSE_FILE] [-i PRIMARY_FILE]\n"); + printf( + " [-o OUT_FILE]\n"); + printf( + "process_test is a test application for AudioProcessing.\n\n" + "When -ir or -i is specified the files will be processed directly in a\n" + "simulation mode. Otherwise the full set of test files is expected to be\n" + "present in the working directory.\n"); + printf("\n"); + printf("Options\n"); + printf("General configuration:\n"); + printf(" -fs SAMPLE_RATE_HZ\n"); + printf(" -ch CHANNELS_IN CHANNELS_OUT\n"); + printf(" -rch REVERSE_CHANNELS\n"); + printf("\n"); + printf("Component configuration:\n"); + printf( + "All components are disabled by default. Each block below begins with a\n" + "flag to enable the component with default settings. The subsequent flags\n" + "in the block are used to provide configuration settings.\n"); + printf("\n -aec Echo cancellation\n"); + printf(" --drift_compensation\n"); + printf(" --no_drift_compensation\n"); + printf("\n -aecm Echo control mobile\n"); + printf("\n -agc Gain control\n"); + printf(" --analog\n"); + printf(" --adaptive_digital\n"); + printf(" --fixed_digital\n"); + printf(" --target_level LEVEL\n"); + printf(" --compression_gain GAIN\n"); + printf(" --limiter\n"); + printf(" --no_limiter\n"); + printf("\n -hpf High pass filter\n"); + printf("\n -ns Noise suppression\n"); + printf(" --ns_low\n"); + printf(" --ns_moderate\n"); + printf(" --ns_high\n"); + printf(" --ns_very_high\n"); + printf("\n -vad Voice activity detection\n"); + printf(" --vad_out_file FILE"); + printf("\n"); + printf("Modifiers:\n"); + printf(" --perf Measure performance.\n"); + printf(" --quiet Suppress text output.\n"); + printf(" --no_progress Suppress progress.\n"); + printf(" --version Print version information and exit.\n"); +} + +// void function for gtest. +void void_main(int argc, char* argv[]) { + if (argc > 1 && strcmp(argv[1], "--help") == 0) { + usage(); + return; + } + + if (argc < 2) { + printf("Did you mean to run without arguments?\n"); + printf("Try `process_test --help' for more information.\n\n"); + } + + AudioProcessing* apm = AudioProcessing::Create(0); + ASSERT_TRUE(apm != NULL); + + WebRtc_Word8 version[1024]; + WebRtc_UWord32 version_bytes_remaining = sizeof(version); + WebRtc_UWord32 version_position = 0; + + const char* far_filename = NULL; + const char* near_filename = NULL; + const char* out_filename = NULL; + const char* vad_out_filename = NULL; + + int32_t sample_rate_hz = 16000; + int32_t device_sample_rate_hz = 16000; + + int num_capture_input_channels = 1; + int num_capture_output_channels = 1; + int num_render_channels = 1; + + int samples_per_channel = sample_rate_hz / 100; + + bool simulating = false; + bool perf_testing = false; + bool verbose = true; + bool progress = true; + //bool interleaved = true; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-ir") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after -ir"; + far_filename = argv[i]; + simulating = true; + + } else if (strcmp(argv[i], "-i") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after -i"; + near_filename = argv[i]; + simulating = true; + + } else if (strcmp(argv[i], "-o") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after -o"; + out_filename = argv[i]; + + } else if (strcmp(argv[i], "-fs") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify sample rate after -fs"; + ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz)); + samples_per_channel = sample_rate_hz / 100; + + ASSERT_EQ(apm->kNoError, + apm->set_sample_rate_hz(sample_rate_hz)); + + } else if (strcmp(argv[i], "-ch") == 0) { + i++; + ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch"; + ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_input_channels)); + i++; + ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_output_channels)); + + ASSERT_EQ(apm->kNoError, + apm->set_num_channels(num_capture_input_channels, + num_capture_output_channels)); + + } else if (strcmp(argv[i], "-rch") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify number of channels after -rch"; + ASSERT_EQ(1, sscanf(argv[i], "%d", &num_render_channels)); + + ASSERT_EQ(apm->kNoError, + apm->set_num_reverse_channels(num_render_channels)); + + } else if (strcmp(argv[i], "-aec") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + + } else if (strcmp(argv[i], "-noasm") == 0) { + WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM; + + } else if (strcmp(argv[i], "--drift_compensation") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + // TODO(ajm): this is enabled in the VQE test app by default. Investigate + // why it can give better performance despite passing zeros. + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_drift_compensation(true)); + } else if (strcmp(argv[i], "--no_drift_compensation") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_drift_compensation(false)); + + } else if (strcmp(argv[i], "-aecm") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); + + } else if (strcmp(argv[i], "-agc") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + + } else if (strcmp(argv[i], "--analog") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); + + } else if (strcmp(argv[i], "--adaptive_digital") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + + } else if (strcmp(argv[i], "--fixed_digital") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kFixedDigital)); + + } else if (strcmp(argv[i], "--target_level") == 0) { + i++; + int level; + ASSERT_EQ(1, sscanf(argv[i], "%d", &level)); + + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_target_level_dbfs(level)); + + } else if (strcmp(argv[i], "--compression_gain") == 0) { + i++; + int gain; + ASSERT_EQ(1, sscanf(argv[i], "%d", &gain)); + + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_compression_gain_db(gain)); + + } else if (strcmp(argv[i], "--limiter") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->enable_limiter(true)); + + } else if (strcmp(argv[i], "--no_limiter") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->enable_limiter(false)); + + } else if (strcmp(argv[i], "-hpf") == 0) { + ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true)); + + } else if (strcmp(argv[i], "-ns") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + + } else if (strcmp(argv[i], "--ns_low") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kLow)); + + } else if (strcmp(argv[i], "--ns_moderate") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kModerate)); + + } else if (strcmp(argv[i], "--ns_high") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kHigh)); + + } else if (strcmp(argv[i], "--ns_very_high") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh)); + + } else if (strcmp(argv[i], "-vad") == 0) { + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); + + } else if (strcmp(argv[i], "--vad_out_file") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after --vad_out_file"; + vad_out_filename = argv[i]; + + } else if (strcmp(argv[i], "--perf") == 0) { + perf_testing = true; + + } else if (strcmp(argv[i], "--quiet") == 0) { + verbose = false; + progress = false; + + } else if (strcmp(argv[i], "--no_progress") == 0) { + progress = false; + + } else if (strcmp(argv[i], "--version") == 0) { + ASSERT_EQ(apm->kNoError, apm->Version(version, + version_bytes_remaining, + version_position)); + printf("%s\n", version); + return; + + } else { + FAIL() << "Unrecognized argument " << argv[i]; + } + } + + if (verbose) { + printf("Sample rate: %d Hz\n", sample_rate_hz); + printf("Primary channels: %d (in), %d (out)\n", + num_capture_input_channels, + num_capture_output_channels); + printf("Reverse channels: %d \n", num_render_channels); + } + + const char far_file_default[] = "apm_far.pcm"; + const char near_file_default[] = "apm_near.pcm"; + const char out_file_default[] = "out.pcm"; + const char event_filename[] = "apm_event.dat"; + const char delay_filename[] = "apm_delay.dat"; + const char drift_filename[] = "apm_drift.dat"; + const char vad_file_default[] = "vad_out.dat"; + + if (!simulating) { + far_filename = far_file_default; + near_filename = near_file_default; + } + + if (out_filename == NULL) { + out_filename = out_file_default; + } + + if (vad_out_filename == NULL) { + vad_out_filename = vad_file_default; + } + + FILE* far_file = NULL; + FILE* near_file = NULL; + FILE* out_file = NULL; + FILE* event_file = NULL; + FILE* delay_file = NULL; + FILE* drift_file = NULL; + FILE* vad_out_file = NULL; + + if (far_filename != NULL) { + far_file = fopen(far_filename, "rb"); + ASSERT_TRUE(NULL != far_file) << "Unable to open far-end audio file " + << far_filename; + } + + near_file = fopen(near_filename, "rb"); + ASSERT_TRUE(NULL != near_file) << "Unable to open near-end audio file " + << near_filename; + struct stat st; + stat(near_filename, &st); + int near_size_samples = st.st_size / sizeof(int16_t); + + out_file = fopen(out_filename, "wb"); + ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file " + << out_filename; + + if (!simulating) { + event_file = fopen(event_filename, "rb"); + ASSERT_TRUE(NULL != event_file) << "Unable to open event file " + << event_filename; + + delay_file = fopen(delay_filename, "rb"); + ASSERT_TRUE(NULL != delay_file) << "Unable to open buffer file " + << delay_filename; + + drift_file = fopen(drift_filename, "rb"); + ASSERT_TRUE(NULL != drift_file) << "Unable to open drift file " + << drift_filename; + } + + if (apm->voice_detection()->is_enabled()) { + vad_out_file = fopen(vad_out_filename, "wb"); + ASSERT_TRUE(NULL != vad_out_file) << "Unable to open VAD output file " + << vad_out_file; + } + + enum Events { + kInitializeEvent, + kRenderEvent, + kCaptureEvent, + kResetEventDeprecated + }; + int16_t event = 0; + size_t read_count = 0; + int reverse_count = 0; + int primary_count = 0; + int near_read_samples = 0; + TickInterval acc_ticks; + + AudioFrame far_frame; + far_frame._frequencyInHz = sample_rate_hz; + + AudioFrame near_frame; + near_frame._frequencyInHz = sample_rate_hz; + + int delay_ms = 0; + int drift_samples = 0; + int capture_level = 127; + int8_t stream_has_voice = 0; + + TickTime t0 = TickTime::Now(); + TickTime t1 = t0; + WebRtc_Word64 max_time_us = 0; + WebRtc_Word64 max_time_reverse_us = 0; + WebRtc_Word64 min_time_us = 1e6; + WebRtc_Word64 min_time_reverse_us = 1e6; + + while (simulating || feof(event_file) == 0) { + std::ostringstream trace_stream; + trace_stream << "Processed frames: " << reverse_count << " (reverse), " + << primary_count << " (primary)"; + SCOPED_TRACE(trace_stream.str()); + + + if (simulating) { + if (far_file == NULL) { + event = kCaptureEvent; + } else { + if (event == kRenderEvent) { + event = kCaptureEvent; + } else { + event = kRenderEvent; + } + } + } else { + read_count = fread(&event, sizeof(event), 1, event_file); + if (read_count != 1) { + break; + } + //if (fread(&event, sizeof(event), 1, event_file) != 1) { + // break; // This is expected. + //} + } + + if (event == kInitializeEvent || event == kResetEventDeprecated) { + ASSERT_EQ(1u, + fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); + samples_per_channel = sample_rate_hz / 100; + + ASSERT_EQ(1u, + fread(&device_sample_rate_hz, + sizeof(device_sample_rate_hz), + 1, + event_file)); + + ASSERT_EQ(apm->kNoError, + apm->set_sample_rate_hz(sample_rate_hz)); + + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->set_device_sample_rate_hz( + device_sample_rate_hz)); + + far_frame._frequencyInHz = sample_rate_hz; + near_frame._frequencyInHz = sample_rate_hz; + + if (verbose) { + printf("Init at frame: %d (primary), %d (reverse)\n", + primary_count, reverse_count); + printf(" Sample rate: %d Hz\n", sample_rate_hz); + } + + } else if (event == kRenderEvent) { + reverse_count++; + far_frame._audioChannel = num_render_channels; + far_frame._payloadDataLengthInSamples = + num_render_channels * samples_per_channel; + + read_count = fread(far_frame._payloadData, + sizeof(WebRtc_Word16), + far_frame._payloadDataLengthInSamples, + far_file); + + if (simulating) { + if (read_count != far_frame._payloadDataLengthInSamples) { + break; // This is expected. + } + } else { + ASSERT_EQ(read_count, + far_frame._payloadDataLengthInSamples); + } + + if (perf_testing) { + t0 = TickTime::Now(); + } + + ASSERT_EQ(apm->kNoError, + apm->AnalyzeReverseStream(&far_frame)); + + if (perf_testing) { + t1 = TickTime::Now(); + TickInterval tick_diff = t1 - t0; + acc_ticks += tick_diff; + if (tick_diff.Microseconds() > max_time_reverse_us) { + max_time_reverse_us = tick_diff.Microseconds(); + } + if (tick_diff.Microseconds() < min_time_reverse_us) { + min_time_reverse_us = tick_diff.Microseconds(); + } + } + + } else if (event == kCaptureEvent) { + primary_count++; + near_frame._audioChannel = num_capture_input_channels; + near_frame._payloadDataLengthInSamples = + num_capture_input_channels * samples_per_channel; + + read_count = fread(near_frame._payloadData, + sizeof(WebRtc_Word16), + near_frame._payloadDataLengthInSamples, + near_file); + + near_read_samples += read_count; + if (progress && primary_count % 100 == 0) { + printf("%.0f%% complete\r", + (near_read_samples * 100.0) / near_size_samples); + fflush(stdout); + } + if (simulating) { + if (read_count != near_frame._payloadDataLengthInSamples) { + break; // This is expected. + } + + delay_ms = 0; + drift_samples = 0; + } else { + ASSERT_EQ(read_count, + near_frame._payloadDataLengthInSamples); + + // TODO(ajm): sizeof(delay_ms) for current files? + ASSERT_EQ(1u, + fread(&delay_ms, 2, 1, delay_file)); + ASSERT_EQ(1u, + fread(&drift_samples, sizeof(drift_samples), 1, drift_file)); + } + + if (perf_testing) { + t0 = TickTime::Now(); + } + + // TODO(ajm): fake an analog gain while simulating. + + int capture_level_in = capture_level; + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_stream_analog_level(capture_level)); + ASSERT_EQ(apm->kNoError, + apm->set_stream_delay_ms(delay_ms)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->set_stream_drift_samples(drift_samples)); + + int err = apm->ProcessStream(&near_frame); + if (err == apm->kBadStreamParameterWarning) { + printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); + } + ASSERT_TRUE(err == apm->kNoError || + err == apm->kBadStreamParameterWarning); + + capture_level = apm->gain_control()->stream_analog_level(); + + stream_has_voice = + static_cast<int8_t>(apm->voice_detection()->stream_has_voice()); + if (vad_out_file != NULL) { + ASSERT_EQ(1u, fwrite(&stream_has_voice, + sizeof(stream_has_voice), + 1, + vad_out_file)); + } + + if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { + ASSERT_EQ(capture_level_in, capture_level); + } + + if (perf_testing) { + t1 = TickTime::Now(); + TickInterval tick_diff = t1 - t0; + acc_ticks += tick_diff; + if (tick_diff.Microseconds() > max_time_us) { + max_time_us = tick_diff.Microseconds(); + } + if (tick_diff.Microseconds() < min_time_us) { + min_time_us = tick_diff.Microseconds(); + } + } + + ASSERT_EQ(near_frame._payloadDataLengthInSamples, + fwrite(near_frame._payloadData, + sizeof(WebRtc_Word16), + near_frame._payloadDataLengthInSamples, + out_file)); + } + else { + FAIL() << "Event " << event << " is unrecognized"; + } + } + + if (verbose) { + printf("\nProcessed frames: %d (primary), %d (reverse)\n", + primary_count, reverse_count); + } + + int8_t temp_int8; + if (far_file != NULL) { + read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file); + EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed"; + } + read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file); + EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed"; + + if (!simulating) { + read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file); + EXPECT_NE(0, feof(event_file)) << "Event file not fully processed"; + read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file); + EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed"; + read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file); + EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed"; + } + + if (perf_testing) { + if (primary_count > 0) { + WebRtc_Word64 exec_time = acc_ticks.Milliseconds(); + printf("\nTotal time: %.3f s, file time: %.2f s\n", + exec_time * 0.001, primary_count * 0.01); + printf("Time per frame: %.3f ms (average), %.3f ms (max)," + " %.3f ms (min)\n", + (exec_time * 1.0) / primary_count, + (max_time_us + max_time_reverse_us) / 1000.0, + (min_time_us + min_time_reverse_us) / 1000.0); + } else { + printf("Warning: no capture frames\n"); + } + } + + AudioProcessing::Destroy(apm); + apm = NULL; +} + +int main(int argc, char* argv[]) +{ + void_main(argc, argv); + + return 0; +} |