From 5870e071aabfbde0d570142acf440ab4dca97e04 Mon Sep 17 00:00:00 2001 From: Eric Laurent Date: Fri, 17 Feb 2012 10:26:11 -0800 Subject: Optimized sqrt() for ARM. Merged optimized sqrt() function from webRTC svn rev r1627 to improve performance ARM. Change-Id: Ie9c57e2a3b8a0786c0169028c0940184b7c9db5e --- NOTICE | 36 +++++++++ src/common_audio/signal_processing/Android.mk | 9 ++- .../signal_processing/spl_sqrt_floor.c | 67 ++++++++-------- .../signal_processing/spl_sqrt_floor.s | 88 ++++++++++++++++++++++ 4 files changed, 166 insertions(+), 34 deletions(-) create mode 100644 src/common_audio/signal_processing/spl_sqrt_floor.s diff --git a/NOTICE b/NOTICE index f96164a1f7..9176bbed03 100644 --- a/NOTICE +++ b/NOTICE @@ -164,3 +164,39 @@ Scott McMurray // release() added in by Google. Use this to conditionally // transfer ownership of a heap-allocated object to the caller, usually on // method success. + + +=============================================================================== +/* + * Written by Wilco Dijkstra, 1996. + * Refer to NOTICE file at the root of git project. + * + * Minor modifications in code style for WebRTC, 2012. + */ + +// The following email record is related to source files spl_sqrt_floor.c +// and spl_sqrt_floor.s in trunk/src/common_audio/signal_processing/. +// +// +// From: Wilco Dijkstra +// Date: Fri, Jun 24, 2011 at 3:20 AM +// Subject: Re: sqrt routine +// To: Kevin Ma +// Hi Kevin, +// Thanks for asking. Those routines are public domain (originally posted to +// comp.sys.arm a long time ago), so you can use them freely for any purpose. +// Cheers, +// Wilco +// +// ----- Original Message ----- +// From: "Kevin Ma" +// To: +// Sent: Thursday, June 23, 2011 11:44 PM +// Subject: Fwd: sqrt routine +// Hi Wilco, +// I saw your sqrt routine from several web sites, including +// http://www.finesse.demon.co.uk/steven/sqrt.html. +// Just wonder if there's any copyright information with your Successive +// approximation routines, or if I can freely use it for any purpose. +// Thanks. +// Kevin diff --git a/src/common_audio/signal_processing/Android.mk b/src/common_audio/signal_processing/Android.mk index 787e5c1400..a056e7e489 100644 --- a/src/common_audio/signal_processing/Android.mk +++ b/src/common_audio/signal_processing/Android.mk @@ -44,7 +44,6 @@ LOCAL_SRC_FILES := \ resample_by_2_internal.c \ resample_fractional.c \ spl_sqrt.c \ - spl_sqrt_floor.c \ spl_version.c \ splitting_filter.c \ sqrt_of_one_minus_x_squared.c \ @@ -65,6 +64,14 @@ LOCAL_CFLAGS += \ $(MY_ARM_CFLAGS_NEON) endif +ifeq ($(TARGET_ARCH),arm) +LOCAL_SRC_FILES += \ + spl_sqrt_floor.s +else +LOCAL_SRC_FILES += \ + spl_sqrt_floor.c +endif + LOCAL_SHARED_LIBRARIES := libstlport ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true) diff --git a/src/common_audio/signal_processing/spl_sqrt_floor.c b/src/common_audio/signal_processing/spl_sqrt_floor.c index aa36459ec4..62041b3dc8 100644 --- a/src/common_audio/signal_processing/spl_sqrt_floor.c +++ b/src/common_audio/signal_processing/spl_sqrt_floor.c @@ -1,21 +1,26 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Written by Wilco Dijkstra, 1996. + * Refer to NOTICE file at the root of git project. * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. + * Minor modifications in code style for WebRTC, 2012. */ +#include "signal_processing_library.h" + /* - * This file contains the function WebRtcSpl_SqrtFloor(). - * The description header can be found in signal_processing_library.h + * Algorithm: + * Successive approximation of the equation (root + delta) ^ 2 = N + * until delta < 1. If delta < 1 we have the integer part of SQRT (N). + * Use delta = 2^i for i = 15 .. 0. + * + * Output precision is 16 bits. Note for large input values (close to + * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) + * contains the MSB information (a non-sign value). Do with caution + * if you need to cast the output to int16_t type. * + * If the input value is negative, it returns 0. */ -#include "signal_processing_library.h" - #define WEBRTC_SPL_SQRT_ITER(N) \ try1 = root + (1 << (N)); \ if (value >= try1 << (N)) \ @@ -24,30 +29,26 @@ root |= 2 << (N); \ } -// (out) Square root of input parameter -WebRtc_Word32 WebRtcSpl_SqrtFloor(WebRtc_Word32 value) +int32_t WebRtcSpl_SqrtFloor(int32_t value) { - // new routine for performance, 4 cycles/bit in ARM - // output precision is 16 bits - - WebRtc_Word32 root = 0, try1; + int32_t root = 0, try1; - WEBRTC_SPL_SQRT_ITER (15); - WEBRTC_SPL_SQRT_ITER (14); - WEBRTC_SPL_SQRT_ITER (13); - WEBRTC_SPL_SQRT_ITER (12); - WEBRTC_SPL_SQRT_ITER (11); - WEBRTC_SPL_SQRT_ITER (10); - WEBRTC_SPL_SQRT_ITER ( 9); - WEBRTC_SPL_SQRT_ITER ( 8); - WEBRTC_SPL_SQRT_ITER ( 7); - WEBRTC_SPL_SQRT_ITER ( 6); - WEBRTC_SPL_SQRT_ITER ( 5); - WEBRTC_SPL_SQRT_ITER ( 4); - WEBRTC_SPL_SQRT_ITER ( 3); - WEBRTC_SPL_SQRT_ITER ( 2); - WEBRTC_SPL_SQRT_ITER ( 1); - WEBRTC_SPL_SQRT_ITER ( 0); + WEBRTC_SPL_SQRT_ITER (15); + WEBRTC_SPL_SQRT_ITER (14); + WEBRTC_SPL_SQRT_ITER (13); + WEBRTC_SPL_SQRT_ITER (12); + WEBRTC_SPL_SQRT_ITER (11); + WEBRTC_SPL_SQRT_ITER (10); + WEBRTC_SPL_SQRT_ITER ( 9); + WEBRTC_SPL_SQRT_ITER ( 8); + WEBRTC_SPL_SQRT_ITER ( 7); + WEBRTC_SPL_SQRT_ITER ( 6); + WEBRTC_SPL_SQRT_ITER ( 5); + WEBRTC_SPL_SQRT_ITER ( 4); + WEBRTC_SPL_SQRT_ITER ( 3); + WEBRTC_SPL_SQRT_ITER ( 2); + WEBRTC_SPL_SQRT_ITER ( 1); + WEBRTC_SPL_SQRT_ITER ( 0); - return root >> 1; + return root >> 1; } diff --git a/src/common_audio/signal_processing/spl_sqrt_floor.s b/src/common_audio/signal_processing/spl_sqrt_floor.s new file mode 100644 index 0000000000..425993dfa6 --- /dev/null +++ b/src/common_audio/signal_processing/spl_sqrt_floor.s @@ -0,0 +1,88 @@ +@ Written by Wilco Dijkstra, 1996. +@ Refer to NOTICE file at the root of git project. +@ +@ Minor modifications in code style for WebRTC, 2012. +@ Output is bit-exact with the reference C code in spl_sqrt_floor.c. + +@ Input : r0 32 bit unsigned integer +@ Output: r0 = INT (SQRT (r0)), precision is 16 bits +@ Registers touched: r1, r2 + +.global WebRtcSpl_SqrtFloor + +.align 2 +WebRtcSpl_SqrtFloor: +.fnstart + mov r1, #3 << 30 + mov r2, #1 << 30 + + @ unroll for i = 0 .. 15 + + cmp r0, r2, ror #2 * 0 + subhs r0, r0, r2, ror #2 * 0 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 1 + subhs r0, r0, r2, ror #2 * 1 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 2 + subhs r0, r0, r2, ror #2 * 2 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 3 + subhs r0, r0, r2, ror #2 * 3 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 4 + subhs r0, r0, r2, ror #2 * 4 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 5 + subhs r0, r0, r2, ror #2 * 5 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 6 + subhs r0, r0, r2, ror #2 * 6 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 7 + subhs r0, r0, r2, ror #2 * 7 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 8 + subhs r0, r0, r2, ror #2 * 8 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 9 + subhs r0, r0, r2, ror #2 * 9 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 10 + subhs r0, r0, r2, ror #2 * 10 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 11 + subhs r0, r0, r2, ror #2 * 11 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 12 + subhs r0, r0, r2, ror #2 * 12 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 13 + subhs r0, r0, r2, ror #2 * 13 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 14 + subhs r0, r0, r2, ror #2 * 14 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 15 + subhs r0, r0, r2, ror #2 * 15 + adc r2, r1, r2, lsl #1 + + bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1 + bx lr + +.fnend -- cgit v1.2.3