aboutsummaryrefslogtreecommitdiff
path: root/webrtc/common_audio
diff options
context:
space:
mode:
authorkma@webrtc.org <kma@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>2012-11-17 00:22:46 +0000
committerkma@webrtc.org <kma@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>2012-11-17 00:22:46 +0000
commit55cd78cfc25f135149b780dcf527d147d5621ba2 (patch)
tree7ec0407789d6e6dc54b45a8bb71a31144afcb2f1 /webrtc/common_audio
parent2ec58dc4d16c982a7b14da37510d0e4ab78e10e3 (diff)
downloadwebrtc-55cd78cfc25f135149b780dcf527d147d5621ba2.tar.gz
Porting ARM optimization from Android to ios.
Tested APM and iSAC in Android. Bit-exact with original versions. Changes include removing or changing some GCC derivatives (e.g. .fnstart, .hword), instruction syntax, etc. Review URL: https://webrtc-codereview.appspot.com/934009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3124 4adac7df-926f-26a2-2b94-8c16560cd09d
Diffstat (limited to 'webrtc/common_audio')
-rw-r--r--webrtc/common_audio/signal_processing/Android.mk14
-rw-r--r--webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S (renamed from webrtc/common_audio/signal_processing/complex_bit_reverse_arm.s)63
-rw-r--r--webrtc/common_audio/signal_processing/cross_correlation_neon.S (renamed from webrtc/common_audio/signal_processing/cross_correlation_neon.s)29
-rw-r--r--webrtc/common_audio/signal_processing/downsample_fast_neon.S (renamed from webrtc/common_audio/signal_processing/downsample_fast_neon.s)21
-rw-r--r--webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S (renamed from webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.s)14
-rw-r--r--webrtc/common_audio/signal_processing/include/spl_inl_armv7.h2
-rw-r--r--webrtc/common_audio/signal_processing/min_max_operations_neon.S (renamed from webrtc/common_audio/signal_processing/min_max_operations_neon.s)76
-rw-r--r--webrtc/common_audio/signal_processing/resample_by_2.c8
-rw-r--r--webrtc/common_audio/signal_processing/signal_processing.gypi14
-rw-r--r--webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S (renamed from webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.s)5
-rw-r--r--webrtc/common_audio/signal_processing/vector_scaling_operations_neon.S (renamed from webrtc/common_audio/signal_processing/vector_scaling_operations_neon.s)12
11 files changed, 100 insertions, 158 deletions
diff --git a/webrtc/common_audio/signal_processing/Android.mk b/webrtc/common_audio/signal_processing/Android.mk
index ecbc5ddf4e..aed7e73b5d 100644
--- a/webrtc/common_audio/signal_processing/Android.mk
+++ b/webrtc/common_audio/signal_processing/Android.mk
@@ -60,7 +60,7 @@ LOCAL_C_INCLUDES := \
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
LOCAL_SRC_FILES += \
- filter_ar_fast_q12_armv7.s
+ filter_ar_fast_q12_armv7.S
else
LOCAL_SRC_FILES += \
filter_ar_fast_q12.c
@@ -68,8 +68,8 @@ endif
ifeq ($(TARGET_ARCH),arm)
LOCAL_SRC_FILES += \
- complex_bit_reverse_arm.s \
- spl_sqrt_floor_arm.s
+ complex_bit_reverse_arm.S \
+ spl_sqrt_floor_arm.S
else
LOCAL_SRC_FILES += \
complex_bit_reverse.c \
@@ -102,10 +102,10 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_MODULE := libwebrtc_spl_neon
LOCAL_MODULE_TAGS := optional
LOCAL_SRC_FILES := \
- cross_correlation_neon.s \
- downsample_fast_neon.s \
- min_max_operations_neon.s \
- vector_scaling_operations_neon.s
+ cross_correlation_neon.S \
+ downsample_fast_neon.S \
+ min_max_operations_neon.S \
+ vector_scaling_operations_neon.S
# Flags passed to both C and C++ files.
LOCAL_CFLAGS := \
diff --git a/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.s b/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
index 4828077800..e7f8a819bd 100644
--- a/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.s
+++ b/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
@@ -12,15 +12,11 @@
@ for ARMv5 platforms.
@ Reference C code is in file complex_bit_reverse.c. Bit-exact.
-.arch armv5
-
-.global WebRtcSpl_ComplexBitReverse
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse
.align 2
-
-WebRtcSpl_ComplexBitReverse:
-.fnstart
-
+DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse
push {r4-r7}
cmp r1, #7
@@ -88,39 +84,36 @@ END:
pop {r4-r7}
bx lr
-.fnend
-
-
@ The index tables. Note the values are doubles of the actual indexes for 16-bit
@ elements, different from the generic C code. It actually provides byte offsets
@ for the indexes.
.align 2
index_7: @ Indexes for stages == 7.
- .hword 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
- .hword 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
- .hword 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
- .hword 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
- .hword 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
- .hword 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
- .hword 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
- .hword 468, 364, 436, 380, 500, 412, 460, 444, 492
+ .short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
+ .short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
+ .short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
+ .short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
+ .short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
+ .short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
+ .short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
+ .short 468, 364, 436, 380, 500, 412, 460, 444, 492
index_8: @ Indexes for stages == 8.
- .hword 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
- .hword 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
- .hword 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
- .hword 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
- .hword 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
- .hword 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
- .hword 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
- .hword 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
- .hword 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
- .hword 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
- .hword 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
- .hword 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
- .hword 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
- .hword 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
- .hword 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
- .hword 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
- .hword 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988
+ .short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
+ .short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
+ .short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
+ .short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
+ .short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
+ .short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
+ .short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
+ .short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
+ .short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
+ .short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
+ .short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
+ .short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
+ .short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
+ .short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
+ .short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
+ .short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
+ .short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988
diff --git a/webrtc/common_audio/signal_processing/cross_correlation_neon.s b/webrtc/common_audio/signal_processing/cross_correlation_neon.S
index a18f67223f..580440c81f 100644
--- a/webrtc/common_audio/signal_processing/cross_correlation_neon.s
+++ b/webrtc/common_audio/signal_processing/cross_correlation_neon.S
@@ -29,24 +29,18 @@
@ r7: Total iteration of LOOP_DIM_SEQ_RESIDUAL
@ r8, r9, r10, r11, r12: scratch
-.arch armv7-a
-.fpu neon
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+GLOBAL_FUNCTION WebRtcSpl_CrossCorrelationNeon
.align 2
-.global WebRtcSpl_CrossCorrelationNeon
-
-WebRtcSpl_CrossCorrelationNeon:
-
-.fnstart
-
-.save {r4-r11}
+DEFINE_FUNCTION WebRtcSpl_CrossCorrelationNeon
push {r4-r11}
@ Put the shift value (-right_shifts) into a Neon register.
ldrsh r10, [sp, #36]
rsb r10, r10, #0
mov r8, r10, asr #31
- vmov.32 d16, r10, r8
+ vmov d16, r10, r8
@ Initialize loop counters.
and r7, r3, #7 @ inner_loop_len2 = dim_seq % 8;
@@ -63,7 +57,7 @@ LOOP_DIM_CROSS_CORRELATION:
LOOP_DIM_SEQ:
vld1.16 {d20, d21}, [r6]! @ seq1_ptr
- vld1.16 {d22, d23}, [r5]! @ seq2_ptr
+ vld1.16 {d22, d23}, [r5]! @ seq2_ptr
subs r8, r8, #1
vmull.s16 q12, d20, d22
vmull.s16 q13, d21, d23
@@ -105,9 +99,6 @@ POST_LOOP_DIM_SEQ_RESIDUAL: @ Sum the results up and do the shift.
pop {r4-r11}
bx lr
-.fnend
-
-
@ TODO(kma): Place this piece of reference code into a C code file.
@ void WebRtcSpl_CrossCorrelationNeon(WebRtc_Word32* cross_correlation,
@ WebRtc_Word16* seq1,
@@ -120,15 +111,15 @@ POST_LOOP_DIM_SEQ_RESIDUAL: @ Sum the results up and do the shift.
@ int j = 0;
@ int inner_loop_len1 = dim_seq >> 3;
@ int inner_loop_len2 = dim_seq - (inner_loop_len1 << 3);
-@
+@
@ assert(dim_cross_correlation > 0);
@ assert(dim_seq > 0);
-@
+@
@ for (i = 0; i < dim_cross_correlation; i++) {
@ int16_t *seq1_ptr = seq1;
@ int16_t *seq2_ptr = seq2 + (step_seq2 * i);
@ int64_t sum = 0;
-@
+@
@ for (j = inner_loop_len1; j > 0; j -= 1) {
@ sum += WEBRTC_SPL_MUL_16_16(*seq1_ptr, *seq2_ptr);
@ seq1_ptr++;
@@ -155,14 +146,14 @@ POST_LOOP_DIM_SEQ_RESIDUAL: @ Sum the results up and do the shift.
@ seq1_ptr++;
@ seq2_ptr++;
@ }
-@
+@
@ // Calculate the rest of the samples.
@ for (j = inner_loop_len2; j > 0; j -= 1) {
@ sum += WEBRTC_SPL_MUL_16_16(*seq1_ptr, *seq2_ptr);
@ seq1_ptr++;
@ seq2_ptr++;
@ }
-@
+@
@ *cross_correlation++ = (int32_t)(sum >> right_shifts);
@ }
@ }
diff --git a/webrtc/common_audio/signal_processing/downsample_fast_neon.s b/webrtc/common_audio/signal_processing/downsample_fast_neon.S
index 13a825d797..4e348ec646 100644
--- a/webrtc/common_audio/signal_processing/downsample_fast_neon.s
+++ b/webrtc/common_audio/signal_processing/downsample_fast_neon.S
@@ -14,17 +14,11 @@
@
@ The reference C code is in file downsample_fast.c. Bit-exact.
-.arch armv7-a
-.fpu neon
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+GLOBAL_FUNCTION WebRtcSpl_DownsampleFastNeon
.align 2
-.global WebRtcSpl_DownsampleFastNeon
-
-WebRtcSpl_DownsampleFastNeon:
-
-.fnstart
-
-.save {r4-r11}
+DEFINE_FUNCTION WebRtcSpl_DownsampleFastNeon
push {r4-r11}
cmp r3, #0 @ data_out_length <= 0?
@@ -168,14 +162,15 @@ LOOP_COEFF_LENGTH_FACTOR4:
vmlal.s16 q3, d18, d17
bge LOOP_COEFF_LENGTH_FACTOR4
+ add r11, r5, asl #4 @ r11 -> &data_in[i + factor * 8]
+ add r9, r5, asl #3 @ Counter i = delay + factor * 8.
+
@ Shift, saturate, and store the result.
vqshrn.s32 d0, q2, #12
vqshrn.s32 d1, q3, #12
+ cmp r9, r3 @ i < endpos - factor * 7 ?
vst1.16 {d0, d1}, [r2]!
- add r11, r5, asl #4 @ r11 -> &data_in[i + factor * 8]
- add r9, r5, asl #3 @ Counter i = delay + factor * 8.
- cmp r9, r3 @ i < endpos - factor * 7 ?
blt LOOP_ENDPOS_FACTOR4
@
@@ -218,5 +213,3 @@ LOOP2_COEFF_LENGTH:
END:
pop {r4-r11}
bx lr
-
-.fnend
diff --git a/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.s b/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
index 5591bb83cd..ff60cc6198 100644
--- a/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.s
+++ b/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
@@ -35,16 +35,11 @@
@ r11: Scratch
@ r12: &coefficients[j]
-.arch armv7-a
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12
.align 2
-.global WebRtcSpl_FilterARFastQ12
-
-WebRtcSpl_FilterARFastQ12:
-
-.fnstart
-
-.save {r4-r11}
+DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12
push {r4-r11}
ldrsh r12, [sp, #32] @ data_length
@@ -155,9 +150,6 @@ END:
pop {r4-r11}
bx lr
-.fnend
-
-
@Reference C code:
@
@void WebRtcSpl_FilterARFastQ12(int16_t* data_in,
diff --git a/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h b/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
index 8461474fb3..c9bcc1c23f 100644
--- a/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
+++ b/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
@@ -166,7 +166,7 @@ static __inline int WebRtcSpl_NormW16(WebRtc_Word16 a) {
static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) {
WebRtc_Word16 out16 = 0;
- __asm __volatile ("ssat %r0, #16, %r1" : "=r"(out16) : "r"(value32));
+ __asm __volatile ("ssat %0, #16, %1" : "=r"(out16) : "r"(value32));
return out16;
}
diff --git a/webrtc/common_audio/signal_processing/min_max_operations_neon.s b/webrtc/common_audio/signal_processing/min_max_operations_neon.S
index 85dd2fb9df..c84307f5e4 100644
--- a/webrtc/common_audio/signal_processing/min_max_operations_neon.s
+++ b/webrtc/common_audio/signal_processing/min_max_operations_neon.S
@@ -15,20 +15,18 @@
@ The reference C code is in file min_max_operations.c. Code here is basically
@ a loop unrolling by 8 with Neon instructions. Bit-exact.
-.arch armv7-a
-.fpu neon
-.global WebRtcSpl_MaxAbsValueW16Neon
-.global WebRtcSpl_MaxAbsValueW32Neon
-.global WebRtcSpl_MaxValueW16Neon
-.global WebRtcSpl_MaxValueW32Neon
-.global WebRtcSpl_MinValueW16Neon
-.global WebRtcSpl_MinValueW32Neon
-.align 2
+#include "webrtc/system_wrappers/interface/asm_defines.h"
-@ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
-WebRtcSpl_MaxAbsValueW16Neon:
-.fnstart
+GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
+GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
+GLOBAL_FUNCTION WebRtcSpl_MaxValueW16Neon
+GLOBAL_FUNCTION WebRtcSpl_MaxValueW32Neon
+GLOBAL_FUNCTION WebRtcSpl_MinValueW16Neon
+GLOBAL_FUNCTION WebRtcSpl_MinValueW32Neon
+.align 2
+@ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
+DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
mov r2, #-1 @ Initialize the return value.
cmp r0, #0
beq END_MAX_ABS_VALUE_W16
@@ -50,8 +48,8 @@ LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
@ Find the maximum value in the Neon registers and move it to r2.
vmax.u16 d24, d25
- vpmax.u16 d24, d24
- vpmax.u16 d24, d24
+ vpmax.u16 d24, d24, d24
+ vpmax.u16 d24, d24, d24
adds r1, #8
vmov.u16 r2, d24[0]
beq END_MAX_ABS_VALUE_W16
@@ -71,12 +69,10 @@ END_MAX_ABS_VALUE_W16:
mov r0, r2
bx lr
-.fnend
-@ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
-WebRtcSpl_MaxAbsValueW32Neon:
-.fnstart
+@ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
+DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
cmp r0, #0
moveq r0, #-1
beq EXIT @ Return -1 for a NULL pointer.
@@ -103,7 +99,7 @@ LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
@ Find the maximum value in the Neon registers and move it to r2.
vmax.u32 q12, q11
vmax.u32 d24, d25
- vpmax.u32 d24, d24
+ vpmax.u32 d24, d24, d24
adds r1, #8
vmov.u32 r2, d24[0]
beq END_MAX_ABS_VALUE_W32
@@ -125,12 +121,8 @@ END_MAX_ABS_VALUE_W32:
EXIT:
bx lr
-.fnend
-
@ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
-WebRtcSpl_MaxValueW16Neon:
-.fnstart
-
+DEFINE_FUNCTION WebRtcSpl_MaxValueW16Neon
mov r2, #0x8000 @ Initialize the return value.
cmp r0, #0
beq END_MAX_VALUE_W16
@@ -151,8 +143,8 @@ LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
@ Find the maximum value in the Neon registers and move it to r2.
vmax.s16 d24, d25
- vpmax.s16 d24, d24
- vpmax.s16 d24, d24
+ vpmax.s16 d24, d24, d24
+ vpmax.s16 d24, d24, d24
adds r1, #8
vmov.u16 r2, d24[0]
beq END_MAX_VALUE_W16
@@ -168,12 +160,8 @@ END_MAX_VALUE_W16:
mov r0, r2
bx lr
-.fnend
-
@ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
-WebRtcSpl_MaxValueW32Neon:
-.fnstart
-
+DEFINE_FUNCTION WebRtcSpl_MaxValueW32Neon
mov r2, #0x80000000 @ Initialize the return value.
cmp r0, #0
beq END_MAX_VALUE_W32
@@ -196,8 +184,8 @@ LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
@ Find the maximum value in the Neon registers and move it to r2.
vmax.s32 q12, q11
- vpmax.s32 d24, d25
- vpmax.s32 d24, d24
+ vpmax.s32 d24, d24, d25
+ vpmax.s32 d24, d24, d24
adds r1, #8
vmov.s32 r2, d24[0]
beq END_MAX_VALUE_W32
@@ -213,12 +201,8 @@ END_MAX_VALUE_W32:
mov r0, r2
bx lr
-.fnend
-
@ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
-WebRtcSpl_MinValueW16Neon:
-.fnstart
-
+DEFINE_FUNCTION WebRtcSpl_MinValueW16Neon
movw r2, #0x7FFF @ Initialize the return value.
cmp r0, #0
beq END_MIN_VALUE_W16
@@ -239,8 +223,8 @@ LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
@ Find the maximum value in the Neon registers and move it to r2.
vmin.s16 d24, d25
- vpmin.s16 d24, d24
- vpmin.s16 d24, d24
+ vpmin.s16 d24, d24, d24
+ vpmin.s16 d24, d24, d24
adds r1, #8
vmov.s16 r2, d24[0]
sxth r2, r2
@@ -257,12 +241,8 @@ END_MIN_VALUE_W16:
mov r0, r2
bx lr
-.fnend
-
@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
-WebRtcSpl_MinValueW32Neon:
-.fnstart
-
+DEFINE_FUNCTION WebRtcSpl_MinValueW32Neon
mov r2, #0x7FFFFFFF @ Initialize the return value.
cmp r0, #0
beq END_MIN_VALUE_W32
@@ -285,8 +265,8 @@ LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
@ Find the maximum value in the Neon registers and move it to r2.
vmin.s32 q12, q11
- vpmin.s32 d24, d25
- vpmin.s32 d24, d24
+ vpmin.s32 d24, d24, d25
+ vpmin.s32 d24, d24, d24
adds r1, #8
vmov.s32 r2, d24[0]
beq END_MIN_VALUE_W32
@@ -301,5 +281,3 @@ LOOP_MIN_VALUE_W32:
END_MIN_VALUE_W32:
mov r0, r2
bx lr
-
-.fnend
diff --git a/webrtc/common_audio/signal_processing/resample_by_2.c b/webrtc/common_audio/signal_processing/resample_by_2.c
index c1d8b37844..e6692e8a79 100644
--- a/webrtc/common_audio/signal_processing/resample_by_2.c
+++ b/webrtc/common_audio/signal_processing/resample_by_2.c
@@ -31,8 +31,8 @@ static __inline WebRtc_Word32 MUL_ACCUM_1(WebRtc_Word32 tbl_value,
WebRtc_Word32 diff,
WebRtc_Word32 state) {
WebRtc_Word32 result;
- __asm__("smlawb %r0, %r1, %r2, %r3": "=r"(result): "r"(diff),
- "r"(tbl_value), "r"(state));
+ __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff),
+ "r"(tbl_value), "r"(state));
return result;
}
@@ -47,8 +47,8 @@ static __inline WebRtc_Word32 MUL_ACCUM_2(WebRtc_Word32 tbl_value,
WebRtc_Word32 diff,
WebRtc_Word32 state) {
WebRtc_Word32 result;
- __asm__("smmla %r0, %r1, %r2, %r3": "=r"(result): "r"(diff << 1),
- "r"(tbl_value), "r"(state));
+ __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1),
+ "r"(tbl_value), "r"(state));
return result;
}
diff --git a/webrtc/common_audio/signal_processing/signal_processing.gypi b/webrtc/common_audio/signal_processing/signal_processing.gypi
index b09c767bed..91592ea7c4 100644
--- a/webrtc/common_audio/signal_processing/signal_processing.gypi
+++ b/webrtc/common_audio/signal_processing/signal_processing.gypi
@@ -65,8 +65,8 @@
'conditions': [
['target_arch=="arm"', {
'sources': [
- 'complex_bit_reverse_arm.s',
- 'spl_sqrt_floor_arm.s',
+ 'complex_bit_reverse_arm.S',
+ 'spl_sqrt_floor_arm.S',
],
'sources!': [
'complex_bit_reverse.c',
@@ -76,7 +76,7 @@
['armv7==1', {
'dependencies': ['signal_processing_neon',],
'sources': [
- 'filter_ar_fast_q12_armv7.s',
+ 'filter_ar_fast_q12_armv7.S',
],
'sources!': [
'filter_ar_fast_q12.c',
@@ -112,10 +112,10 @@
'type': '<(library)',
'includes': ['../../build/arm_neon.gypi',],
'sources': [
- 'cross_correlation_neon.s',
- 'downsample_fast_neon.s',
- 'min_max_operations_neon.s',
- 'vector_scaling_operations_neon.s',
+ 'cross_correlation_neon.S',
+ 'downsample_fast_neon.S',
+ 'min_max_operations_neon.S',
+ 'vector_scaling_operations_neon.S',
],
},
],
diff --git a/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.s b/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
index a2c5b7d0d2..c49ef1f3bc 100644
--- a/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.s
+++ b/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
@@ -8,10 +8,11 @@
@ Output: r0 = INT (SQRT (r0)), precision is 16 bits
@ Registers touched: r1, r2
-.global WebRtcSpl_SqrtFloor
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
.align 2
-WebRtcSpl_SqrtFloor:
+DEFINE_FUNCTION WebRtcSpl_SqrtFloor
mov r1, #3 << 30
mov r2, #1 << 30
diff --git a/webrtc/common_audio/signal_processing/vector_scaling_operations_neon.s b/webrtc/common_audio/signal_processing/vector_scaling_operations_neon.S
index 562425bf05..07db741b00 100644
--- a/webrtc/common_audio/signal_processing/vector_scaling_operations_neon.s
+++ b/webrtc/common_audio/signal_processing/vector_scaling_operations_neon.S
@@ -13,15 +13,11 @@
@ optimized for ARM Neon platform. Output is bit-exact with the reference
@ C code in vector_scaling_operations.c.
-.arch armv7-a
-.fpu neon
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+GLOBAL_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
.align 2
-.global WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
-
-WebRtcSpl_ScaleAndAddVectorsWithRoundNeon:
-.fnstart
-
+DEFINE_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
push {r4-r9}
ldr r4, [sp, #32] @ length
@@ -84,5 +80,3 @@ LOOP_NO_UNROLLING:
END:
pop {r4-r9}
bx lr
-
-.fnend