diff options
author | andrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> | 2013-02-21 20:12:21 +0000 |
---|---|---|
committer | andrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> | 2013-02-21 20:12:21 +0000 |
commit | 89bd726f4fd6e8fe612b29e5727a0ba0092e63a6 (patch) | |
tree | c1df598f111247e63e3110ffd6f35a931d909347 /common_audio | |
parent | d5b822759f779a39e0c75d01a9dcc0094d26e861 (diff) | |
download | webrtc-89bd726f4fd6e8fe612b29e5727a0ba0092e63a6.tar.gz |
MIPS optimizations for Signal Processing Library patch01
Review URL: https://webrtc-codereview.appspot.com/1028004
Patch from Ljubomir Papuga <lpapuga@mips.com>.
git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@3557 4adac7df-926f-26a2-2b94-8c16560cd09d
Diffstat (limited to 'common_audio')
6 files changed, 730 insertions, 3 deletions
diff --git a/common_audio/signal_processing/include/signal_processing_library.h b/common_audio/signal_processing/include/signal_processing_library.h index 0fe97c27..b8f88c05 100644 --- a/common_audio/signal_processing/include/signal_processing_library.h +++ b/common_audio/signal_processing/include/signal_processing_library.h @@ -147,8 +147,7 @@ ((WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT((a), 18816, 7) & 0x00007fff)) #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif #define WEBRTC_SPL_MEMCPY_W8(v1, v2, length) \ @@ -223,6 +222,9 @@ int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length); #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length); #endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, int length); +#endif // Returns the largest absolute value in a signed 32-bit vector. // @@ -238,6 +240,9 @@ int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length); #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length); #endif +#if defined(MIPS_DSP_R1_LE) +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, int length); +#endif // Returns the maximum value of a 16-bit vector. // @@ -255,6 +260,9 @@ int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length); #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length); #endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, int length); +#endif // Returns the maximum value of a 32-bit vector. // @@ -272,6 +280,9 @@ int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length); #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length); #endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, int length); +#endif // Returns the minimum value of a 16-bit vector. // @@ -289,6 +300,9 @@ int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length); #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length); #endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, int length); +#endif // Returns the minimum value of a 32-bit vector. // @@ -306,6 +320,9 @@ int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length); #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length); #endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, int length); +#endif // Returns the vector index to the largest absolute value of a 16-bit vector. // diff --git a/common_audio/signal_processing/min_max_operations_mips.c b/common_audio/signal_processing/min_max_operations_mips.c new file mode 100644 index 00000000..da3615a5 --- /dev/null +++ b/common_audio/signal_processing/min_max_operations_mips.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the implementation of function + * WebRtcSpl_MaxAbsValueW16() + * + * The description header can be found in signal_processing_library.h. + * + */ + +#include "signal_processing_library.h" + +// Maximum absolute value of word16 vector. +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, int length) { + WebRtc_Word32 totMax = 0; + WebRtc_Word32 tmp32_0, tmp32_1, tmp32_2, tmp32_3; + int i, loop_size; + + if (vector == NULL || length <= 0) { + return -1; + } +#if defined(MIPS_DSP_R1) + const WebRtc_Word32* tmpvec32 = (WebRtc_Word32*)vector; + loop_size = length >> 4; + + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lw %[tmp32_0], 0(%[tmpvec32]) \n\t" + "lw %[tmp32_1], 4(%[tmpvec32]) \n\t" + "lw %[tmp32_2], 8(%[tmpvec32]) \n\t" + "lw %[tmp32_3], 12(%[tmpvec32]) \n\t" + + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + + "lw %[tmp32_0], 16(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + + "lw %[tmp32_1], 20(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + + "lw %[tmp32_2], 24(%[tmpvec32]) \n\t" + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + + "lw %[tmp32_3], 28(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + + "addiu %[tmpvec32], %[tmpvec32], 32 \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), + [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32) + : + : "memory" + ); + } + __asm__ volatile ( + "rotr %[tmp32_0], %[totMax], 16 \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + "packrl.ph %[totMax], $0, %[totMax] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax) + : + ); + loop_size = length & 0xf; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvec32]) \n\t" + "addiu %[tmpvec32], %[tmpvec32], 2 \n\t" + "absq_s.w %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax) + : + : "memory" + ); + } +#else // #if defined(MIPS_DSP_R1) + WebRtc_Word32 v16MaxMax = WEBRTC_SPL_WORD16_MAX; + WebRtc_Word32 r, r1, r2, r3; + const WebRtc_Word16* tmpvector = vector; + loop_size = length >> 4; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "lh %[tmp32_1], 2(%[tmpvector]) \n\t" + "lh %[tmp32_2], 4(%[tmpvector]) \n\t" + "lh %[tmp32_3], 6(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 8(%[tmpvector]) \n\t" + "lh %[tmp32_1], 10(%[tmpvector]) \n\t" + "lh %[tmp32_2], 12(%[tmpvector]) \n\t" + "lh %[tmp32_3], 14(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 16(%[tmpvector]) \n\t" + "lh %[tmp32_1], 18(%[tmpvector]) \n\t" + "lh %[tmp32_2], 20(%[tmpvector]) \n\t" + "lh %[tmp32_3], 22(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 24(%[tmpvector]) \n\t" + "lh %[tmp32_1], 26(%[tmpvector]) \n\t" + "lh %[tmp32_2], 28(%[tmpvector]) \n\t" + "lh %[tmp32_3], 30(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "addiu %[tmpvector], %[tmpvector], 32 \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), + [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector), + [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3) + : + : "memory" + ); + } + loop_size = length & 0xf; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "addiu %[tmpvector], %[tmpvector], 2 \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax) + : + : "memory" + ); + } + + __asm__ volatile ( + "slt %[r], %[v16MaxMax], %[totMax] \n\t" + "movn %[totMax], %[v16MaxMax], %[r] \n\t" + : [totMax] "+r" (totMax), [r] "=&r" (r) + : [v16MaxMax] "r" (v16MaxMax) + ); +#endif // #if defined(MIPS_DSP_R1) + return (int16_t)totMax; +} + +#if defined(MIPS_DSP_R1_LE) +// Maximum absolute value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, int length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + int tmp1 = 0, max_value = 0x7fffffff; + + if (vector == NULL || length <= 0) { + return -1; + } + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[absolute], 0(%[vector]) \n\t" + "absq_s.w %[absolute], %[absolute] \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[absolute] \n\t" + "movn %[maximum], %[absolute], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + "slt %[tmp1], %[max_value], %[maximum] \n\t" + "movn %[maximum], %[max_value], %[tmp1] \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute) + : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value) + : "memory" + ); + + return (int32_t)maximum; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Maximum value of word16 vector. Version for MIPS platform. +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, int length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + int tmp1; + int16_t value; + + if (vector == NULL || length <= 0) { + return maximum; + } + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return maximum; +} + +// Maximum value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, int length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + int tmp1, value; + + if (vector == NULL || length <= 0) { + return maximum; + } + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return maximum; +} + +// Minimum value of word16 vector. Version for MIPS platform. +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, int length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + int tmp1; + int16_t value; + + if (vector == NULL || length <= 0) { + return minimum; + } + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return minimum; +} + +// Minimum value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, int length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + int tmp1, value; + + if (vector == NULL || length <= 0) { + return minimum; + } + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return minimum; +} + diff --git a/common_audio/signal_processing/resample_by_2.c b/common_audio/signal_processing/resample_by_2.c index e6692e8a..2e33cafa 100644 --- a/common_audio/signal_processing/resample_by_2.c +++ b/common_audio/signal_processing/resample_by_2.c @@ -66,6 +66,7 @@ static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255}; // decimator +#if !defined(MIPS32_LE) void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len, WebRtc_Word16* out, WebRtc_Word32* filtState) { WebRtc_Word32 tmp1, tmp2, diff, in32, out32; @@ -121,6 +122,7 @@ void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len, filtState[6] = state6; filtState[7] = state7; } +#endif // #if defined(MIPS32_LE) void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, diff --git a/common_audio/signal_processing/resample_by_2_mips.c b/common_audio/signal_processing/resample_by_2_mips.c new file mode 100644 index 00000000..24d99606 --- /dev/null +++ b/common_audio/signal_processing/resample_by_2_mips.c @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling by two functions. + * The description header can be found in signal_processing_library.h + * + */ + +#if defined(MIPS32_LE) + +#include "signal_processing_library.h" + +// allpass filter coefficients. +static const WebRtc_UWord16 kResampleAllpass1[3] = {3284, 24441, 49528}; +static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255}; + +// Multiply a 32-bit value with a 16-bit value and accumulate to another input: +#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) +#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) + +// decimator +void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, + const WebRtc_Word16 len, + WebRtc_Word16* out, + WebRtc_Word32* filtState) { + WebRtc_Word32 out32; + WebRtc_Word16 i, len1; + + register WebRtc_Word32 state0 = filtState[0]; + register WebRtc_Word32 state1 = filtState[1]; + register WebRtc_Word32 state2 = filtState[2]; + register WebRtc_Word32 state3 = filtState[3]; + register WebRtc_Word32 state4 = filtState[4]; + register WebRtc_Word32 state5 = filtState[5]; + register WebRtc_Word32 state6 = filtState[6]; + register WebRtc_Word32 state7 = filtState[7]; + +#if defined(MIPS_DSP_R2_LE) + WebRtc_Word32 k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2; + + k1Res0= 3284; + k1Res1= 24441; + k1Res2= 49528; + k2Res0= 12199; + k2Res1= 37471; + k2Res2= 60255; + len1 = (len >> 1); + + const WebRtc_Word32* inw = (WebRtc_Word32*)in; + WebRtc_Word32 tmp11, tmp12, tmp21, tmp22; + WebRtc_Word32 in322, in321; + WebRtc_Word32 diff1, diff2; + for (i = len1; i > 0; i--) { + __asm__ volatile ( + "lh %[in321], 0(%[inw]) \n\t" + "lh %[in322], 2(%[inw]) \n\t" + + "sll %[in321], %[in321], 10 \n\t" + "sll %[in322], %[in322], 10 \n\t" + + "addiu %[inw], %[inw], 4 \n\t" + + "subu %[diff1], %[in321], %[state1] \n\t" + "subu %[diff2], %[in322], %[state5] \n\t" + + : [in322] "=&r" (in322), [in321] "=&r" (in321), + [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw) + : [state1] "r" (state1), [state5] "r" (state5) + : "memory" + ); + + __asm__ volatile ( + "mult $ac0, %[diff1], %[k2Res0] \n\t" + "mult $ac1, %[diff2], %[k1Res0] \n\t" + + "extr.w %[tmp11], $ac0, 16 \n\t" + "extr.w %[tmp12], $ac1, 16 \n\t" + + "addu %[tmp11], %[state0], %[tmp11] \n\t" + "addu %[tmp12], %[state4], %[tmp12] \n\t" + + "addiu %[state0], %[in321], 0 \n\t" + "addiu %[state4], %[in322], 0 \n\t" + + "subu %[diff1], %[tmp11], %[state2] \n\t" + "subu %[diff2], %[tmp12], %[state6] \n\t" + + "mult $ac0, %[diff1], %[k2Res1] \n\t" + "mult $ac1, %[diff2], %[k1Res1] \n\t" + + "extr.w %[tmp21], $ac0, 16 \n\t" + "extr.w %[tmp22], $ac1, 16 \n\t" + + "addu %[tmp21], %[state1], %[tmp21] \n\t" + "addu %[tmp22], %[state5], %[tmp22] \n\t" + + "addiu %[state1], %[tmp11], 0 \n\t" + "addiu %[state5], %[tmp12], 0 \n\t" + : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21), + [tmp11] "=&r" (tmp11), [state0] "+r" (state0), + [state1] "+r" (state1), + [state2] "+r" (state2), + [state4] "+r" (state4), [tmp12] "=&r" (tmp12), + [state6] "+r" (state6), [state5] "+r" (state5) + : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0), + [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322), + [in321] "r" (in321), [k1Res0] "r" (k1Res0) + : "hi", "lo", "$ac1hi", "$ac1lo" + ); + + // upper allpass filter + __asm__ volatile ( + "subu %[diff1], %[tmp21], %[state3] \n\t" + "subu %[diff2], %[tmp22], %[state7] \n\t" + + "mult $ac0, %[diff1], %[k2Res2] \n\t" + "mult $ac1, %[diff2], %[k1Res2] \n\t" + "extr.w %[state3], $ac0, 16 \n\t" + "extr.w %[state7], $ac1, 16 \n\t" + "addu %[state3], %[state2], %[state3] \n\t" + "addu %[state7], %[state6], %[state7] \n\t" + + "addiu %[state2], %[tmp21], 0 \n\t" + "addiu %[state6], %[tmp22], 0 \n\t" + + // add two allpass outputs, divide by two and round + "addu %[out32], %[state3], %[state7] \n\t" + "addiu %[out32], %[out32], 1024 \n\t" + "sra %[out32], %[out32], 11 \n\t" + : [state3] "+r" (state3), [state6] "+r" (state6), + [state2] "+r" (state2), [diff2] "=&r" (diff2), + [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7) + : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21), + [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2) + : "hi", "lo", "$ac1hi", "$ac1lo" + ); + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } +#else // #if defined(MIPS_DSP_R2_LE) + WebRtc_Word32 tmp1, tmp2, diff; + WebRtc_Word32 in32; + len1 = (len >> 1)/4; + for (i = len1; i > 0; i--) { + // lower allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (WebRtc_Word32)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } +#endif // #if defined(MIPS_DSP_R2_LE) + __asm__ volatile ( + "sw %[state0], 0(%[filtState]) \n\t" + "sw %[state1], 4(%[filtState]) \n\t" + "sw %[state2], 8(%[filtState]) \n\t" + "sw %[state3], 12(%[filtState]) \n\t" + "sw %[state4], 16(%[filtState]) \n\t" + "sw %[state5], 20(%[filtState]) \n\t" + "sw %[state6], 24(%[filtState]) \n\t" + "sw %[state7], 28(%[filtState]) \n\t" + : + : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2), + [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5), + [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState) + : "memory" + ); +} + +#endif // #if defined(MIPS32_LE) + diff --git a/common_audio/signal_processing/signal_processing.gypi b/common_audio/signal_processing/signal_processing.gypi index f6c6e735..b3483542 100644 --- a/common_audio/signal_processing/signal_processing.gypi +++ b/common_audio/signal_processing/signal_processing.gypi @@ -84,6 +84,12 @@ }], ], }], + ['target_arch=="mipsel"', { + 'sources': [ + 'min_max_operations_mips.c', + 'resample_by_2_mips.c', + ], + }], ], # Ignore warning on shift operator promotion. 'msvs_disabled_warnings': [ 4334, ], diff --git a/common_audio/signal_processing/spl_init.c b/common_audio/signal_processing/spl_init.c index 5d243bb8..f5279259 100644 --- a/common_audio/signal_processing/spl_init.c +++ b/common_audio/signal_processing/spl_init.c @@ -31,7 +31,8 @@ ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; RealForwardFFT WebRtcSpl_RealForwardFFT; RealInverseFFT WebRtcSpl_RealInverseFFT; -#if defined(WEBRTC_DETECT_ARM_NEON) || !defined(WEBRTC_ARCH_ARM_NEON) +#if (defined(WEBRTC_DETECT_ARM_NEON) || !defined(WEBRTC_ARCH_ARM_NEON)) && \ + !defined(MIPS32_LE) /* Initialize function pointers to the generic C version. */ static void InitPointersToC() { WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; @@ -67,6 +68,28 @@ static void InitPointersToNeon() { } #endif +#if defined(MIPS32_LE) +/* Initialize function pointers to the MIPS version. */ +static void InitPointersToMIPS() { + WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; + WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; + WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; + WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; + WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; + WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; + WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC; + WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC; +#if defined(MIPS_DSP_R1_LE) + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; +#else + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; +#endif +} +#endif + static void InitFunctionPointers(void) { #if defined(WEBRTC_DETECT_ARM_NEON) if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { @@ -76,6 +99,8 @@ static void InitFunctionPointers(void) { } #elif defined(WEBRTC_ARCH_ARM_NEON) InitPointersToNeon(); +#elif defined(MIPS32_LE) + InitPointersToMIPS(); #else InitPointersToC(); #endif /* WEBRTC_DETECT_ARM_NEON */ |