3 files changed, 130 insertions, 34 deletions
diff --git a/src/common_audio/signal_processing/Android.mk b/src/common_audio/signal_processing/Android.mk
index 787e5c1400..a056e7e489 100644
--- a/src/common_audio/signal_processing/Android.mk
+++ b/src/common_audio/signal_processing/Android.mk
@@ -44,7 +44,6 @@ LOCAL_SRC_FILES := \
     resample_by_2_internal.c \
     resample_fractional.c \
     spl_sqrt.c \
-    spl_sqrt_floor.c \
     spl_version.c \
     splitting_filter.c \
     sqrt_of_one_minus_x_squared.c \
@@ -65,6 +64,14 @@ LOCAL_CFLAGS += \
     $(MY_ARM_CFLAGS_NEON)
 endif
 
+ifeq ($(TARGET_ARCH),arm)
+LOCAL_SRC_FILES += \
+    spl_sqrt_floor.s
+else
+LOCAL_SRC_FILES += \
+    spl_sqrt_floor.c
+endif
+
 LOCAL_SHARED_LIBRARIES := libstlport
 
 ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true)
diff --git a/src/common_audio/signal_processing/spl_sqrt_floor.c b/src/common_audio/signal_processing/spl_sqrt_floor.c
index aa36459ec4..62041b3dc8 100644
--- a/src/common_audio/signal_processing/spl_sqrt_floor.c
+++ b/src/common_audio/signal_processing/spl_sqrt_floor.c
@@ -1,21 +1,26 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ * Written by Wilco Dijkstra, 1996.
+ * Refer to NOTICE file at the root of git project.
  *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
+ * Minor modifications in code style for WebRTC, 2012.
  */
 
+#include "signal_processing_library.h"
+
 /*
- * This file contains the function WebRtcSpl_SqrtFloor().
- * The description header can be found in signal_processing_library.h
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
  *
+ * If the input value is negative, it returns 0.
  */
 
-#include "signal_processing_library.h"
-
 #define WEBRTC_SPL_SQRT_ITER(N)                 \
   try1 = root + (1 << (N));                     \
   if (value >= try1 << (N))                     \
@@ -24,30 +29,26 @@
     root |= 2 << (N);                           \
   }
 
-// (out) Square root of input parameter
-WebRtc_Word32 WebRtcSpl_SqrtFloor(WebRtc_Word32 value)
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
 {
-    // new routine for performance, 4 cycles/bit in ARM
-    // output precision is 16 bits
-
-    WebRtc_Word32 root = 0, try1;
+  int32_t root = 0, try1;
 
-    WEBRTC_SPL_SQRT_ITER (15);
-    WEBRTC_SPL_SQRT_ITER (14);
-    WEBRTC_SPL_SQRT_ITER (13);
-    WEBRTC_SPL_SQRT_ITER (12);
-    WEBRTC_SPL_SQRT_ITER (11);
-    WEBRTC_SPL_SQRT_ITER (10);
-    WEBRTC_SPL_SQRT_ITER ( 9);
-    WEBRTC_SPL_SQRT_ITER ( 8);
-    WEBRTC_SPL_SQRT_ITER ( 7);
-    WEBRTC_SPL_SQRT_ITER ( 6);
-    WEBRTC_SPL_SQRT_ITER ( 5);
-    WEBRTC_SPL_SQRT_ITER ( 4);
-    WEBRTC_SPL_SQRT_ITER ( 3);
-    WEBRTC_SPL_SQRT_ITER ( 2);
-    WEBRTC_SPL_SQRT_ITER ( 1);
-    WEBRTC_SPL_SQRT_ITER ( 0);
+  WEBRTC_SPL_SQRT_ITER (15);
+  WEBRTC_SPL_SQRT_ITER (14);
+  WEBRTC_SPL_SQRT_ITER (13);
+  WEBRTC_SPL_SQRT_ITER (12);
+  WEBRTC_SPL_SQRT_ITER (11);
+  WEBRTC_SPL_SQRT_ITER (10);
+  WEBRTC_SPL_SQRT_ITER ( 9);
+  WEBRTC_SPL_SQRT_ITER ( 8);
+  WEBRTC_SPL_SQRT_ITER ( 7);
+  WEBRTC_SPL_SQRT_ITER ( 6);
+  WEBRTC_SPL_SQRT_ITER ( 5);
+  WEBRTC_SPL_SQRT_ITER ( 4);
+  WEBRTC_SPL_SQRT_ITER ( 3);
+  WEBRTC_SPL_SQRT_ITER ( 2);
+  WEBRTC_SPL_SQRT_ITER ( 1);
+  WEBRTC_SPL_SQRT_ITER ( 0);
 
-    return root >> 1;
+  return root >> 1;
 }
diff --git a/src/common_audio/signal_processing/spl_sqrt_floor.s b/src/common_audio/signal_processing/spl_sqrt_floor.s
new file mode 100644
index 0000000000..425993dfa6
--- /dev/null
+++ b/src/common_audio/signal_processing/spl_sqrt_floor.s
@@ -0,0 +1,88 @@
+@ Written by Wilco Dijkstra, 1996.
+@ Refer to NOTICE file at the root of git project.
+@
+@ Minor modifications in code style for WebRTC, 2012.
+@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
+
+@ Input :             r0 32 bit unsigned integer
+@ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
+@ Registers touched:  r1, r2
+
+.global WebRtcSpl_SqrtFloor
+
+.align  2
+WebRtcSpl_SqrtFloor:
+.fnstart
+  mov    r1, #3 << 30
+  mov    r2, #1 << 30
+
+  @ unroll for i = 0 .. 15
+
+  cmp    r0, r2, ror #2 * 0
+  subhs  r0, r0, r2, ror #2 * 0
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 1
+  subhs  r0, r0, r2, ror #2 * 1
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 2
+  subhs  r0, r0, r2, ror #2 * 2
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 3
+  subhs  r0, r0, r2, ror #2 * 3
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 4
+  subhs  r0, r0, r2, ror #2 * 4
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 5
+  subhs  r0, r0, r2, ror #2 * 5
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 6
+  subhs  r0, r0, r2, ror #2 * 6
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 7
+  subhs  r0, r0, r2, ror #2 * 7
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 8
+  subhs  r0, r0, r2, ror #2 * 8
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 9
+  subhs  r0, r0, r2, ror #2 * 9
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 10
+  subhs  r0, r0, r2, ror #2 * 10
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 11
+  subhs  r0, r0, r2, ror #2 * 11
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 12
+  subhs  r0, r0, r2, ror #2 * 12
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 13
+  subhs  r0, r0, r2, ror #2 * 13
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 14
+  subhs  r0, r0, r2, ror #2 * 14
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 15
+  subhs  r0, r0, r2, ror #2 * 15
+  adc    r2, r1, r2, lsl #1
+
+  bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
+  bx lr
+
+.fnend